1 /*
2  * kmp_atomic.cpp -- ATOMIC implementation routines
3  */
4 
5 //===----------------------------------------------------------------------===//
6 //
7 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
8 // See https://llvm.org/LICENSE.txt for license information.
9 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "kmp_atomic.h"
14 #include "kmp.h" // TRUE, asm routines prototypes
15 
16 typedef unsigned char uchar;
17 typedef unsigned short ushort;
18 
19 /*!
20 @defgroup ATOMIC_OPS Atomic Operations
21 These functions are used for implementing the many different varieties of atomic
22 operations.
23 
24 The compiler is at liberty to inline atomic operations that are naturally
25 supported by the target architecture. For instance on IA-32 architecture an
26 atomic like this can be inlined
27 @code
28 static int s = 0;
29 #pragma omp atomic
30     s++;
31 @endcode
32 using the single instruction: `lock; incl s`
33 
34 However the runtime does provide entrypoints for these operations to support
35 compilers that choose not to inline them. (For instance,
36 `__kmpc_atomic_fixed4_add` could be used to perform the increment above.)
37 
38 The names of the functions are encoded by using the data type name and the
39 operation name, as in these tables.
40 
41 Data Type  | Data type encoding
42 -----------|---------------
43 int8_t     | `fixed1`
44 uint8_t    | `fixed1u`
45 int16_t    | `fixed2`
46 uint16_t   | `fixed2u`
47 int32_t    | `fixed4`
48 uint32_t   | `fixed4u`
49 int32_t    | `fixed8`
50 uint32_t   | `fixed8u`
51 float      | `float4`
52 double     | `float8`
53 float 10 (8087 eighty bit float)  | `float10`
54 complex<float>   |  `cmplx4`
55 complex<double>  | `cmplx8`
56 complex<float10> | `cmplx10`
57 <br>
58 
59 Operation | Operation encoding
60 ----------|-------------------
61 + | add
62 - | sub
63 \* | mul
64 / | div
65 & | andb
66 << | shl
67 \>\> | shr
68 \| | orb
69 ^  | xor
70 && | andl
71 \|\| | orl
72 maximum | max
73 minimum | min
74 .eqv.   | eqv
75 .neqv.  | neqv
76 
77 <br>
78 For non-commutative operations, `_rev` can also be added for the reversed
79 operation. For the functions that capture the result, the suffix `_cpt` is
80 added.
81 
82 Update Functions
83 ================
84 The general form of an atomic function that just performs an update (without a
85 `capture`)
86 @code
87 void __kmpc_atomic_<datatype>_<operation>( ident_t *id_ref, int gtid, TYPE *
88 lhs, TYPE rhs );
89 @endcode
90 @param ident_t  a pointer to source location
91 @param gtid  the global thread id
92 @param lhs   a pointer to the left operand
93 @param rhs   the right operand
94 
95 `capture` functions
96 ===================
97 The capture functions perform an atomic update and return a result, which is
98 either the value before the capture, or that after. They take an additional
99 argument to determine which result is returned.
100 Their general form is therefore
101 @code
102 TYPE __kmpc_atomic_<datatype>_<operation>_cpt( ident_t *id_ref, int gtid, TYPE *
103 lhs, TYPE rhs, int flag );
104 @endcode
105 @param ident_t  a pointer to source location
106 @param gtid  the global thread id
107 @param lhs   a pointer to the left operand
108 @param rhs   the right operand
109 @param flag  one if the result is to be captured *after* the operation, zero if
110 captured *before*.
111 
112 The one set of exceptions to this is the `complex<float>` type where the value
113 is not returned, rather an extra argument pointer is passed.
114 
115 They look like
116 @code
117 void __kmpc_atomic_cmplx4_<op>_cpt(  ident_t *id_ref, int gtid, kmp_cmplx32 *
118 lhs, kmp_cmplx32 rhs, kmp_cmplx32 * out, int flag );
119 @endcode
120 
121 Read and Write Operations
122 =========================
123 The OpenMP<sup>*</sup> standard now supports atomic operations that simply
124 ensure that the value is read or written atomically, with no modification
125 performed. In many cases on IA-32 architecture these operations can be inlined
126 since the architecture guarantees that no tearing occurs on aligned objects
127 accessed with a single memory operation of up to 64 bits in size.
128 
129 The general form of the read operations is
130 @code
131 TYPE __kmpc_atomic_<type>_rd ( ident_t *id_ref, int gtid, TYPE * loc );
132 @endcode
133 
134 For the write operations the form is
135 @code
136 void __kmpc_atomic_<type>_wr ( ident_t *id_ref, int gtid, TYPE * lhs, TYPE rhs
137 );
138 @endcode
139 
140 Full list of functions
141 ======================
142 This leads to the generation of 376 atomic functions, as follows.
143 
144 Functions for integers
145 ---------------------
146 There are versions here for integers of size 1,2,4 and 8 bytes both signed and
147 unsigned (where that matters).
148 @code
149     __kmpc_atomic_fixed1_add
150     __kmpc_atomic_fixed1_add_cpt
151     __kmpc_atomic_fixed1_add_fp
152     __kmpc_atomic_fixed1_andb
153     __kmpc_atomic_fixed1_andb_cpt
154     __kmpc_atomic_fixed1_andl
155     __kmpc_atomic_fixed1_andl_cpt
156     __kmpc_atomic_fixed1_div
157     __kmpc_atomic_fixed1_div_cpt
158     __kmpc_atomic_fixed1_div_cpt_rev
159     __kmpc_atomic_fixed1_div_float8
160     __kmpc_atomic_fixed1_div_fp
161     __kmpc_atomic_fixed1_div_rev
162     __kmpc_atomic_fixed1_eqv
163     __kmpc_atomic_fixed1_eqv_cpt
164     __kmpc_atomic_fixed1_max
165     __kmpc_atomic_fixed1_max_cpt
166     __kmpc_atomic_fixed1_min
167     __kmpc_atomic_fixed1_min_cpt
168     __kmpc_atomic_fixed1_mul
169     __kmpc_atomic_fixed1_mul_cpt
170     __kmpc_atomic_fixed1_mul_float8
171     __kmpc_atomic_fixed1_mul_fp
172     __kmpc_atomic_fixed1_neqv
173     __kmpc_atomic_fixed1_neqv_cpt
174     __kmpc_atomic_fixed1_orb
175     __kmpc_atomic_fixed1_orb_cpt
176     __kmpc_atomic_fixed1_orl
177     __kmpc_atomic_fixed1_orl_cpt
178     __kmpc_atomic_fixed1_rd
179     __kmpc_atomic_fixed1_shl
180     __kmpc_atomic_fixed1_shl_cpt
181     __kmpc_atomic_fixed1_shl_cpt_rev
182     __kmpc_atomic_fixed1_shl_rev
183     __kmpc_atomic_fixed1_shr
184     __kmpc_atomic_fixed1_shr_cpt
185     __kmpc_atomic_fixed1_shr_cpt_rev
186     __kmpc_atomic_fixed1_shr_rev
187     __kmpc_atomic_fixed1_sub
188     __kmpc_atomic_fixed1_sub_cpt
189     __kmpc_atomic_fixed1_sub_cpt_rev
190     __kmpc_atomic_fixed1_sub_fp
191     __kmpc_atomic_fixed1_sub_rev
192     __kmpc_atomic_fixed1_swp
193     __kmpc_atomic_fixed1_wr
194     __kmpc_atomic_fixed1_xor
195     __kmpc_atomic_fixed1_xor_cpt
196     __kmpc_atomic_fixed1u_add_fp
197     __kmpc_atomic_fixed1u_sub_fp
198     __kmpc_atomic_fixed1u_mul_fp
199     __kmpc_atomic_fixed1u_div
200     __kmpc_atomic_fixed1u_div_cpt
201     __kmpc_atomic_fixed1u_div_cpt_rev
202     __kmpc_atomic_fixed1u_div_fp
203     __kmpc_atomic_fixed1u_div_rev
204     __kmpc_atomic_fixed1u_shr
205     __kmpc_atomic_fixed1u_shr_cpt
206     __kmpc_atomic_fixed1u_shr_cpt_rev
207     __kmpc_atomic_fixed1u_shr_rev
208     __kmpc_atomic_fixed2_add
209     __kmpc_atomic_fixed2_add_cpt
210     __kmpc_atomic_fixed2_add_fp
211     __kmpc_atomic_fixed2_andb
212     __kmpc_atomic_fixed2_andb_cpt
213     __kmpc_atomic_fixed2_andl
214     __kmpc_atomic_fixed2_andl_cpt
215     __kmpc_atomic_fixed2_div
216     __kmpc_atomic_fixed2_div_cpt
217     __kmpc_atomic_fixed2_div_cpt_rev
218     __kmpc_atomic_fixed2_div_float8
219     __kmpc_atomic_fixed2_div_fp
220     __kmpc_atomic_fixed2_div_rev
221     __kmpc_atomic_fixed2_eqv
222     __kmpc_atomic_fixed2_eqv_cpt
223     __kmpc_atomic_fixed2_max
224     __kmpc_atomic_fixed2_max_cpt
225     __kmpc_atomic_fixed2_min
226     __kmpc_atomic_fixed2_min_cpt
227     __kmpc_atomic_fixed2_mul
228     __kmpc_atomic_fixed2_mul_cpt
229     __kmpc_atomic_fixed2_mul_float8
230     __kmpc_atomic_fixed2_mul_fp
231     __kmpc_atomic_fixed2_neqv
232     __kmpc_atomic_fixed2_neqv_cpt
233     __kmpc_atomic_fixed2_orb
234     __kmpc_atomic_fixed2_orb_cpt
235     __kmpc_atomic_fixed2_orl
236     __kmpc_atomic_fixed2_orl_cpt
237     __kmpc_atomic_fixed2_rd
238     __kmpc_atomic_fixed2_shl
239     __kmpc_atomic_fixed2_shl_cpt
240     __kmpc_atomic_fixed2_shl_cpt_rev
241     __kmpc_atomic_fixed2_shl_rev
242     __kmpc_atomic_fixed2_shr
243     __kmpc_atomic_fixed2_shr_cpt
244     __kmpc_atomic_fixed2_shr_cpt_rev
245     __kmpc_atomic_fixed2_shr_rev
246     __kmpc_atomic_fixed2_sub
247     __kmpc_atomic_fixed2_sub_cpt
248     __kmpc_atomic_fixed2_sub_cpt_rev
249     __kmpc_atomic_fixed2_sub_fp
250     __kmpc_atomic_fixed2_sub_rev
251     __kmpc_atomic_fixed2_swp
252     __kmpc_atomic_fixed2_wr
253     __kmpc_atomic_fixed2_xor
254     __kmpc_atomic_fixed2_xor_cpt
255     __kmpc_atomic_fixed2u_add_fp
256     __kmpc_atomic_fixed2u_sub_fp
257     __kmpc_atomic_fixed2u_mul_fp
258     __kmpc_atomic_fixed2u_div
259     __kmpc_atomic_fixed2u_div_cpt
260     __kmpc_atomic_fixed2u_div_cpt_rev
261     __kmpc_atomic_fixed2u_div_fp
262     __kmpc_atomic_fixed2u_div_rev
263     __kmpc_atomic_fixed2u_shr
264     __kmpc_atomic_fixed2u_shr_cpt
265     __kmpc_atomic_fixed2u_shr_cpt_rev
266     __kmpc_atomic_fixed2u_shr_rev
267     __kmpc_atomic_fixed4_add
268     __kmpc_atomic_fixed4_add_cpt
269     __kmpc_atomic_fixed4_add_fp
270     __kmpc_atomic_fixed4_andb
271     __kmpc_atomic_fixed4_andb_cpt
272     __kmpc_atomic_fixed4_andl
273     __kmpc_atomic_fixed4_andl_cpt
274     __kmpc_atomic_fixed4_div
275     __kmpc_atomic_fixed4_div_cpt
276     __kmpc_atomic_fixed4_div_cpt_rev
277     __kmpc_atomic_fixed4_div_float8
278     __kmpc_atomic_fixed4_div_fp
279     __kmpc_atomic_fixed4_div_rev
280     __kmpc_atomic_fixed4_eqv
281     __kmpc_atomic_fixed4_eqv_cpt
282     __kmpc_atomic_fixed4_max
283     __kmpc_atomic_fixed4_max_cpt
284     __kmpc_atomic_fixed4_min
285     __kmpc_atomic_fixed4_min_cpt
286     __kmpc_atomic_fixed4_mul
287     __kmpc_atomic_fixed4_mul_cpt
288     __kmpc_atomic_fixed4_mul_float8
289     __kmpc_atomic_fixed4_mul_fp
290     __kmpc_atomic_fixed4_neqv
291     __kmpc_atomic_fixed4_neqv_cpt
292     __kmpc_atomic_fixed4_orb
293     __kmpc_atomic_fixed4_orb_cpt
294     __kmpc_atomic_fixed4_orl
295     __kmpc_atomic_fixed4_orl_cpt
296     __kmpc_atomic_fixed4_rd
297     __kmpc_atomic_fixed4_shl
298     __kmpc_atomic_fixed4_shl_cpt
299     __kmpc_atomic_fixed4_shl_cpt_rev
300     __kmpc_atomic_fixed4_shl_rev
301     __kmpc_atomic_fixed4_shr
302     __kmpc_atomic_fixed4_shr_cpt
303     __kmpc_atomic_fixed4_shr_cpt_rev
304     __kmpc_atomic_fixed4_shr_rev
305     __kmpc_atomic_fixed4_sub
306     __kmpc_atomic_fixed4_sub_cpt
307     __kmpc_atomic_fixed4_sub_cpt_rev
308     __kmpc_atomic_fixed4_sub_fp
309     __kmpc_atomic_fixed4_sub_rev
310     __kmpc_atomic_fixed4_swp
311     __kmpc_atomic_fixed4_wr
312     __kmpc_atomic_fixed4_xor
313     __kmpc_atomic_fixed4_xor_cpt
314     __kmpc_atomic_fixed4u_add_fp
315     __kmpc_atomic_fixed4u_sub_fp
316     __kmpc_atomic_fixed4u_mul_fp
317     __kmpc_atomic_fixed4u_div
318     __kmpc_atomic_fixed4u_div_cpt
319     __kmpc_atomic_fixed4u_div_cpt_rev
320     __kmpc_atomic_fixed4u_div_fp
321     __kmpc_atomic_fixed4u_div_rev
322     __kmpc_atomic_fixed4u_shr
323     __kmpc_atomic_fixed4u_shr_cpt
324     __kmpc_atomic_fixed4u_shr_cpt_rev
325     __kmpc_atomic_fixed4u_shr_rev
326     __kmpc_atomic_fixed8_add
327     __kmpc_atomic_fixed8_add_cpt
328     __kmpc_atomic_fixed8_add_fp
329     __kmpc_atomic_fixed8_andb
330     __kmpc_atomic_fixed8_andb_cpt
331     __kmpc_atomic_fixed8_andl
332     __kmpc_atomic_fixed8_andl_cpt
333     __kmpc_atomic_fixed8_div
334     __kmpc_atomic_fixed8_div_cpt
335     __kmpc_atomic_fixed8_div_cpt_rev
336     __kmpc_atomic_fixed8_div_float8
337     __kmpc_atomic_fixed8_div_fp
338     __kmpc_atomic_fixed8_div_rev
339     __kmpc_atomic_fixed8_eqv
340     __kmpc_atomic_fixed8_eqv_cpt
341     __kmpc_atomic_fixed8_max
342     __kmpc_atomic_fixed8_max_cpt
343     __kmpc_atomic_fixed8_min
344     __kmpc_atomic_fixed8_min_cpt
345     __kmpc_atomic_fixed8_mul
346     __kmpc_atomic_fixed8_mul_cpt
347     __kmpc_atomic_fixed8_mul_float8
348     __kmpc_atomic_fixed8_mul_fp
349     __kmpc_atomic_fixed8_neqv
350     __kmpc_atomic_fixed8_neqv_cpt
351     __kmpc_atomic_fixed8_orb
352     __kmpc_atomic_fixed8_orb_cpt
353     __kmpc_atomic_fixed8_orl
354     __kmpc_atomic_fixed8_orl_cpt
355     __kmpc_atomic_fixed8_rd
356     __kmpc_atomic_fixed8_shl
357     __kmpc_atomic_fixed8_shl_cpt
358     __kmpc_atomic_fixed8_shl_cpt_rev
359     __kmpc_atomic_fixed8_shl_rev
360     __kmpc_atomic_fixed8_shr
361     __kmpc_atomic_fixed8_shr_cpt
362     __kmpc_atomic_fixed8_shr_cpt_rev
363     __kmpc_atomic_fixed8_shr_rev
364     __kmpc_atomic_fixed8_sub
365     __kmpc_atomic_fixed8_sub_cpt
366     __kmpc_atomic_fixed8_sub_cpt_rev
367     __kmpc_atomic_fixed8_sub_fp
368     __kmpc_atomic_fixed8_sub_rev
369     __kmpc_atomic_fixed8_swp
370     __kmpc_atomic_fixed8_wr
371     __kmpc_atomic_fixed8_xor
372     __kmpc_atomic_fixed8_xor_cpt
373     __kmpc_atomic_fixed8u_add_fp
374     __kmpc_atomic_fixed8u_sub_fp
375     __kmpc_atomic_fixed8u_mul_fp
376     __kmpc_atomic_fixed8u_div
377     __kmpc_atomic_fixed8u_div_cpt
378     __kmpc_atomic_fixed8u_div_cpt_rev
379     __kmpc_atomic_fixed8u_div_fp
380     __kmpc_atomic_fixed8u_div_rev
381     __kmpc_atomic_fixed8u_shr
382     __kmpc_atomic_fixed8u_shr_cpt
383     __kmpc_atomic_fixed8u_shr_cpt_rev
384     __kmpc_atomic_fixed8u_shr_rev
385 @endcode
386 
387 Functions for floating point
388 ----------------------------
389 There are versions here for floating point numbers of size 4, 8, 10 and 16
390 bytes. (Ten byte floats are used by X87, but are now rare).
391 @code
392     __kmpc_atomic_float4_add
393     __kmpc_atomic_float4_add_cpt
394     __kmpc_atomic_float4_add_float8
395     __kmpc_atomic_float4_add_fp
396     __kmpc_atomic_float4_div
397     __kmpc_atomic_float4_div_cpt
398     __kmpc_atomic_float4_div_cpt_rev
399     __kmpc_atomic_float4_div_float8
400     __kmpc_atomic_float4_div_fp
401     __kmpc_atomic_float4_div_rev
402     __kmpc_atomic_float4_max
403     __kmpc_atomic_float4_max_cpt
404     __kmpc_atomic_float4_min
405     __kmpc_atomic_float4_min_cpt
406     __kmpc_atomic_float4_mul
407     __kmpc_atomic_float4_mul_cpt
408     __kmpc_atomic_float4_mul_float8
409     __kmpc_atomic_float4_mul_fp
410     __kmpc_atomic_float4_rd
411     __kmpc_atomic_float4_sub
412     __kmpc_atomic_float4_sub_cpt
413     __kmpc_atomic_float4_sub_cpt_rev
414     __kmpc_atomic_float4_sub_float8
415     __kmpc_atomic_float4_sub_fp
416     __kmpc_atomic_float4_sub_rev
417     __kmpc_atomic_float4_swp
418     __kmpc_atomic_float4_wr
419     __kmpc_atomic_float8_add
420     __kmpc_atomic_float8_add_cpt
421     __kmpc_atomic_float8_add_fp
422     __kmpc_atomic_float8_div
423     __kmpc_atomic_float8_div_cpt
424     __kmpc_atomic_float8_div_cpt_rev
425     __kmpc_atomic_float8_div_fp
426     __kmpc_atomic_float8_div_rev
427     __kmpc_atomic_float8_max
428     __kmpc_atomic_float8_max_cpt
429     __kmpc_atomic_float8_min
430     __kmpc_atomic_float8_min_cpt
431     __kmpc_atomic_float8_mul
432     __kmpc_atomic_float8_mul_cpt
433     __kmpc_atomic_float8_mul_fp
434     __kmpc_atomic_float8_rd
435     __kmpc_atomic_float8_sub
436     __kmpc_atomic_float8_sub_cpt
437     __kmpc_atomic_float8_sub_cpt_rev
438     __kmpc_atomic_float8_sub_fp
439     __kmpc_atomic_float8_sub_rev
440     __kmpc_atomic_float8_swp
441     __kmpc_atomic_float8_wr
442     __kmpc_atomic_float10_add
443     __kmpc_atomic_float10_add_cpt
444     __kmpc_atomic_float10_add_fp
445     __kmpc_atomic_float10_div
446     __kmpc_atomic_float10_div_cpt
447     __kmpc_atomic_float10_div_cpt_rev
448     __kmpc_atomic_float10_div_fp
449     __kmpc_atomic_float10_div_rev
450     __kmpc_atomic_float10_mul
451     __kmpc_atomic_float10_mul_cpt
452     __kmpc_atomic_float10_mul_fp
453     __kmpc_atomic_float10_rd
454     __kmpc_atomic_float10_sub
455     __kmpc_atomic_float10_sub_cpt
456     __kmpc_atomic_float10_sub_cpt_rev
457     __kmpc_atomic_float10_sub_fp
458     __kmpc_atomic_float10_sub_rev
459     __kmpc_atomic_float10_swp
460     __kmpc_atomic_float10_wr
461     __kmpc_atomic_float16_add
462     __kmpc_atomic_float16_add_cpt
463     __kmpc_atomic_float16_div
464     __kmpc_atomic_float16_div_cpt
465     __kmpc_atomic_float16_div_cpt_rev
466     __kmpc_atomic_float16_div_rev
467     __kmpc_atomic_float16_max
468     __kmpc_atomic_float16_max_cpt
469     __kmpc_atomic_float16_min
470     __kmpc_atomic_float16_min_cpt
471     __kmpc_atomic_float16_mul
472     __kmpc_atomic_float16_mul_cpt
473     __kmpc_atomic_float16_rd
474     __kmpc_atomic_float16_sub
475     __kmpc_atomic_float16_sub_cpt
476     __kmpc_atomic_float16_sub_cpt_rev
477     __kmpc_atomic_float16_sub_rev
478     __kmpc_atomic_float16_swp
479     __kmpc_atomic_float16_wr
480 @endcode
481 
482 Functions for Complex types
483 ---------------------------
484 Functions for complex types whose component floating point variables are of size
485 4,8,10 or 16 bytes. The names here are based on the size of the component float,
486 *not* the size of the complex type. So `__kmpc_atomic_cmplx8_add` is an
487 operation on a `complex<double>` or `complex(kind=8)`, *not* `complex<float>`.
488 
489 @code
490     __kmpc_atomic_cmplx4_add
491     __kmpc_atomic_cmplx4_add_cmplx8
492     __kmpc_atomic_cmplx4_add_cpt
493     __kmpc_atomic_cmplx4_div
494     __kmpc_atomic_cmplx4_div_cmplx8
495     __kmpc_atomic_cmplx4_div_cpt
496     __kmpc_atomic_cmplx4_div_cpt_rev
497     __kmpc_atomic_cmplx4_div_rev
498     __kmpc_atomic_cmplx4_mul
499     __kmpc_atomic_cmplx4_mul_cmplx8
500     __kmpc_atomic_cmplx4_mul_cpt
501     __kmpc_atomic_cmplx4_rd
502     __kmpc_atomic_cmplx4_sub
503     __kmpc_atomic_cmplx4_sub_cmplx8
504     __kmpc_atomic_cmplx4_sub_cpt
505     __kmpc_atomic_cmplx4_sub_cpt_rev
506     __kmpc_atomic_cmplx4_sub_rev
507     __kmpc_atomic_cmplx4_swp
508     __kmpc_atomic_cmplx4_wr
509     __kmpc_atomic_cmplx8_add
510     __kmpc_atomic_cmplx8_add_cpt
511     __kmpc_atomic_cmplx8_div
512     __kmpc_atomic_cmplx8_div_cpt
513     __kmpc_atomic_cmplx8_div_cpt_rev
514     __kmpc_atomic_cmplx8_div_rev
515     __kmpc_atomic_cmplx8_mul
516     __kmpc_atomic_cmplx8_mul_cpt
517     __kmpc_atomic_cmplx8_rd
518     __kmpc_atomic_cmplx8_sub
519     __kmpc_atomic_cmplx8_sub_cpt
520     __kmpc_atomic_cmplx8_sub_cpt_rev
521     __kmpc_atomic_cmplx8_sub_rev
522     __kmpc_atomic_cmplx8_swp
523     __kmpc_atomic_cmplx8_wr
524     __kmpc_atomic_cmplx10_add
525     __kmpc_atomic_cmplx10_add_cpt
526     __kmpc_atomic_cmplx10_div
527     __kmpc_atomic_cmplx10_div_cpt
528     __kmpc_atomic_cmplx10_div_cpt_rev
529     __kmpc_atomic_cmplx10_div_rev
530     __kmpc_atomic_cmplx10_mul
531     __kmpc_atomic_cmplx10_mul_cpt
532     __kmpc_atomic_cmplx10_rd
533     __kmpc_atomic_cmplx10_sub
534     __kmpc_atomic_cmplx10_sub_cpt
535     __kmpc_atomic_cmplx10_sub_cpt_rev
536     __kmpc_atomic_cmplx10_sub_rev
537     __kmpc_atomic_cmplx10_swp
538     __kmpc_atomic_cmplx10_wr
539     __kmpc_atomic_cmplx16_add
540     __kmpc_atomic_cmplx16_add_cpt
541     __kmpc_atomic_cmplx16_div
542     __kmpc_atomic_cmplx16_div_cpt
543     __kmpc_atomic_cmplx16_div_cpt_rev
544     __kmpc_atomic_cmplx16_div_rev
545     __kmpc_atomic_cmplx16_mul
546     __kmpc_atomic_cmplx16_mul_cpt
547     __kmpc_atomic_cmplx16_rd
548     __kmpc_atomic_cmplx16_sub
549     __kmpc_atomic_cmplx16_sub_cpt
550     __kmpc_atomic_cmplx16_sub_cpt_rev
551     __kmpc_atomic_cmplx16_swp
552     __kmpc_atomic_cmplx16_wr
553 @endcode
554 */
555 
556 /*!
557 @ingroup ATOMIC_OPS
558 @{
559 */
560 
561 /*
562  * Global vars
563  */
564 
565 #ifndef KMP_GOMP_COMPAT
566 int __kmp_atomic_mode = 1; // Intel perf
567 #else
568 int __kmp_atomic_mode = 2; // GOMP compatibility
569 #endif /* KMP_GOMP_COMPAT */
570 
571 KMP_ALIGN(128)
572 
573 // Control access to all user coded atomics in Gnu compat mode
574 kmp_atomic_lock_t __kmp_atomic_lock;
575 // Control access to all user coded atomics for 1-byte fixed data types
576 kmp_atomic_lock_t __kmp_atomic_lock_1i;
577 // Control access to all user coded atomics for 2-byte fixed data types
578 kmp_atomic_lock_t __kmp_atomic_lock_2i;
579 // Control access to all user coded atomics for 4-byte fixed data types
580 kmp_atomic_lock_t __kmp_atomic_lock_4i;
581 // Control access to all user coded atomics for kmp_real32 data type
582 kmp_atomic_lock_t __kmp_atomic_lock_4r;
583 // Control access to all user coded atomics for 8-byte fixed data types
584 kmp_atomic_lock_t __kmp_atomic_lock_8i;
585 // Control access to all user coded atomics for kmp_real64 data type
586 kmp_atomic_lock_t __kmp_atomic_lock_8r;
587 // Control access to all user coded atomics for complex byte data type
588 kmp_atomic_lock_t __kmp_atomic_lock_8c;
589 // Control access to all user coded atomics for long double data type
590 kmp_atomic_lock_t __kmp_atomic_lock_10r;
591 // Control access to all user coded atomics for _Quad data type
592 kmp_atomic_lock_t __kmp_atomic_lock_16r;
593 // Control access to all user coded atomics for double complex data type
594 kmp_atomic_lock_t __kmp_atomic_lock_16c;
595 // Control access to all user coded atomics for long double complex type
596 kmp_atomic_lock_t __kmp_atomic_lock_20c;
597 // Control access to all user coded atomics for _Quad complex data type
598 kmp_atomic_lock_t __kmp_atomic_lock_32c;
599 
600 /* 2007-03-02:
601    Without "volatile" specifier in OP_CMPXCHG and MIN_MAX_CMPXCHG we have a bug
602    on *_32 and *_32e. This is just a temporary workaround for the problem. It
603    seems the right solution is writing OP_CMPXCHG and MIN_MAX_CMPXCHG routines
604    in assembler language. */
605 #define KMP_ATOMIC_VOLATILE volatile
606 
607 #if (KMP_ARCH_X86) && KMP_HAVE_QUAD
608 
operator +(Quad_a4_t & lhs,Quad_a4_t & rhs)609 static inline Quad_a4_t operator+(Quad_a4_t &lhs, Quad_a4_t &rhs) {
610   return lhs.q + rhs.q;
611 }
operator -(Quad_a4_t & lhs,Quad_a4_t & rhs)612 static inline Quad_a4_t operator-(Quad_a4_t &lhs, Quad_a4_t &rhs) {
613   return lhs.q - rhs.q;
614 }
operator *(Quad_a4_t & lhs,Quad_a4_t & rhs)615 static inline Quad_a4_t operator*(Quad_a4_t &lhs, Quad_a4_t &rhs) {
616   return lhs.q * rhs.q;
617 }
operator /(Quad_a4_t & lhs,Quad_a4_t & rhs)618 static inline Quad_a4_t operator/(Quad_a4_t &lhs, Quad_a4_t &rhs) {
619   return lhs.q / rhs.q;
620 }
operator <(Quad_a4_t & lhs,Quad_a4_t & rhs)621 static inline bool operator<(Quad_a4_t &lhs, Quad_a4_t &rhs) {
622   return lhs.q < rhs.q;
623 }
operator >(Quad_a4_t & lhs,Quad_a4_t & rhs)624 static inline bool operator>(Quad_a4_t &lhs, Quad_a4_t &rhs) {
625   return lhs.q > rhs.q;
626 }
627 
operator +(Quad_a16_t & lhs,Quad_a16_t & rhs)628 static inline Quad_a16_t operator+(Quad_a16_t &lhs, Quad_a16_t &rhs) {
629   return lhs.q + rhs.q;
630 }
operator -(Quad_a16_t & lhs,Quad_a16_t & rhs)631 static inline Quad_a16_t operator-(Quad_a16_t &lhs, Quad_a16_t &rhs) {
632   return lhs.q - rhs.q;
633 }
operator *(Quad_a16_t & lhs,Quad_a16_t & rhs)634 static inline Quad_a16_t operator*(Quad_a16_t &lhs, Quad_a16_t &rhs) {
635   return lhs.q * rhs.q;
636 }
operator /(Quad_a16_t & lhs,Quad_a16_t & rhs)637 static inline Quad_a16_t operator/(Quad_a16_t &lhs, Quad_a16_t &rhs) {
638   return lhs.q / rhs.q;
639 }
operator <(Quad_a16_t & lhs,Quad_a16_t & rhs)640 static inline bool operator<(Quad_a16_t &lhs, Quad_a16_t &rhs) {
641   return lhs.q < rhs.q;
642 }
operator >(Quad_a16_t & lhs,Quad_a16_t & rhs)643 static inline bool operator>(Quad_a16_t &lhs, Quad_a16_t &rhs) {
644   return lhs.q > rhs.q;
645 }
646 
operator +(kmp_cmplx128_a4_t & lhs,kmp_cmplx128_a4_t & rhs)647 static inline kmp_cmplx128_a4_t operator+(kmp_cmplx128_a4_t &lhs,
648                                           kmp_cmplx128_a4_t &rhs) {
649   return lhs.q + rhs.q;
650 }
operator -(kmp_cmplx128_a4_t & lhs,kmp_cmplx128_a4_t & rhs)651 static inline kmp_cmplx128_a4_t operator-(kmp_cmplx128_a4_t &lhs,
652                                           kmp_cmplx128_a4_t &rhs) {
653   return lhs.q - rhs.q;
654 }
operator *(kmp_cmplx128_a4_t & lhs,kmp_cmplx128_a4_t & rhs)655 static inline kmp_cmplx128_a4_t operator*(kmp_cmplx128_a4_t &lhs,
656                                           kmp_cmplx128_a4_t &rhs) {
657   return lhs.q * rhs.q;
658 }
operator /(kmp_cmplx128_a4_t & lhs,kmp_cmplx128_a4_t & rhs)659 static inline kmp_cmplx128_a4_t operator/(kmp_cmplx128_a4_t &lhs,
660                                           kmp_cmplx128_a4_t &rhs) {
661   return lhs.q / rhs.q;
662 }
663 
operator +(kmp_cmplx128_a16_t & lhs,kmp_cmplx128_a16_t & rhs)664 static inline kmp_cmplx128_a16_t operator+(kmp_cmplx128_a16_t &lhs,
665                                            kmp_cmplx128_a16_t &rhs) {
666   return lhs.q + rhs.q;
667 }
operator -(kmp_cmplx128_a16_t & lhs,kmp_cmplx128_a16_t & rhs)668 static inline kmp_cmplx128_a16_t operator-(kmp_cmplx128_a16_t &lhs,
669                                            kmp_cmplx128_a16_t &rhs) {
670   return lhs.q - rhs.q;
671 }
operator *(kmp_cmplx128_a16_t & lhs,kmp_cmplx128_a16_t & rhs)672 static inline kmp_cmplx128_a16_t operator*(kmp_cmplx128_a16_t &lhs,
673                                            kmp_cmplx128_a16_t &rhs) {
674   return lhs.q * rhs.q;
675 }
operator /(kmp_cmplx128_a16_t & lhs,kmp_cmplx128_a16_t & rhs)676 static inline kmp_cmplx128_a16_t operator/(kmp_cmplx128_a16_t &lhs,
677                                            kmp_cmplx128_a16_t &rhs) {
678   return lhs.q / rhs.q;
679 }
680 
681 #endif // (KMP_ARCH_X86) && KMP_HAVE_QUAD
682 
683 // ATOMIC implementation routines -----------------------------------------
684 // One routine for each operation and operand type.
685 // All routines declarations looks like
686 // void __kmpc_atomic_RTYPE_OP( ident_t*, int, TYPE *lhs, TYPE rhs );
687 
688 #define KMP_CHECK_GTID                                                         \
689   if (gtid == KMP_GTID_UNKNOWN) {                                              \
690     gtid = __kmp_entry_gtid();                                                 \
691   } // check and get gtid when needed
692 
693 // Beginning of a definition (provides name, parameters, gebug trace)
694 //     TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned
695 //     fixed)
696 //     OP_ID   - operation identifier (add, sub, mul, ...)
697 //     TYPE    - operands' type
698 #define ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, RET_TYPE)                           \
699   RET_TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID(ident_t *id_ref, int gtid,        \
700                                              TYPE *lhs, TYPE rhs) {            \
701     KMP_DEBUG_ASSERT(__kmp_init_serial);                                       \
702     KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid));
703 
704 // ------------------------------------------------------------------------
705 // Lock variables used for critical sections for various size operands
706 #define ATOMIC_LOCK0 __kmp_atomic_lock // all types, for Gnu compat
707 #define ATOMIC_LOCK1i __kmp_atomic_lock_1i // char
708 #define ATOMIC_LOCK2i __kmp_atomic_lock_2i // short
709 #define ATOMIC_LOCK4i __kmp_atomic_lock_4i // long int
710 #define ATOMIC_LOCK4r __kmp_atomic_lock_4r // float
711 #define ATOMIC_LOCK8i __kmp_atomic_lock_8i // long long int
712 #define ATOMIC_LOCK8r __kmp_atomic_lock_8r // double
713 #define ATOMIC_LOCK8c __kmp_atomic_lock_8c // float complex
714 #define ATOMIC_LOCK10r __kmp_atomic_lock_10r // long double
715 #define ATOMIC_LOCK16r __kmp_atomic_lock_16r // _Quad
716 #define ATOMIC_LOCK16c __kmp_atomic_lock_16c // double complex
717 #define ATOMIC_LOCK20c __kmp_atomic_lock_20c // long double complex
718 #define ATOMIC_LOCK32c __kmp_atomic_lock_32c // _Quad complex
719 
720 // ------------------------------------------------------------------------
721 // Operation on *lhs, rhs bound by critical section
722 //     OP     - operator (it's supposed to contain an assignment)
723 //     LCK_ID - lock identifier
724 // Note: don't check gtid as it should always be valid
725 // 1, 2-byte - expect valid parameter, other - check before this macro
726 #define OP_CRITICAL(OP, LCK_ID)                                                \
727   __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);                       \
728                                                                                \
729   (*lhs) OP(rhs);                                                              \
730                                                                                \
731   __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);
732 
733 #define OP_UPDATE_CRITICAL(TYPE, OP, LCK_ID)                                   \
734   __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);                       \
735   (*lhs) = (TYPE)((*lhs)OP rhs);                                               \
736   __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);
737 
738 // ------------------------------------------------------------------------
739 // For GNU compatibility, we may need to use a critical section,
740 // even though it is not required by the ISA.
741 //
742 // On IA-32 architecture, all atomic operations except for fixed 4 byte add,
743 // sub, and bitwise logical ops, and 1 & 2 byte logical ops use a common
744 // critical section.  On Intel(R) 64, all atomic operations are done with fetch
745 // and add or compare and exchange.  Therefore, the FLAG parameter to this
746 // macro is either KMP_ARCH_X86 or 0 (or 1, for Intel-specific extension which
747 // require a critical section, where we predict that they will be implemented
748 // in the Gnu codegen by calling GOMP_atomic_start() / GOMP_atomic_end()).
749 //
750 // When the OP_GOMP_CRITICAL macro is used in a *CRITICAL* macro construct,
751 // the FLAG parameter should always be 1.  If we know that we will be using
752 // a critical section, then we want to make certain that we use the generic
753 // lock __kmp_atomic_lock to protect the atomic update, and not of of the
754 // locks that are specialized based upon the size or type of the data.
755 //
756 // If FLAG is 0, then we are relying on dead code elimination by the build
757 // compiler to get rid of the useless block of code, and save a needless
758 // branch at runtime.
759 
760 #ifdef KMP_GOMP_COMPAT
761 #define OP_GOMP_CRITICAL(OP, FLAG)                                             \
762   if ((FLAG) && (__kmp_atomic_mode == 2)) {                                    \
763     KMP_CHECK_GTID;                                                            \
764     OP_CRITICAL(OP, 0);                                                        \
765     return;                                                                    \
766   }
767 
768 #define OP_UPDATE_GOMP_CRITICAL(TYPE, OP, FLAG)                                \
769   if ((FLAG) && (__kmp_atomic_mode == 2)) {                                    \
770     KMP_CHECK_GTID;                                                            \
771     OP_UPDATE_CRITICAL(TYPE, OP, 0);                                           \
772     return;                                                                    \
773   }
774 #else
775 #define OP_GOMP_CRITICAL(OP, FLAG)
776 #define OP_UPDATE_GOMP_CRITICAL(TYPE, OP, FLAG)
777 #endif /* KMP_GOMP_COMPAT */
778 
779 #if KMP_MIC
780 #define KMP_DO_PAUSE _mm_delay_32(1)
781 #else
782 #define KMP_DO_PAUSE
783 #endif /* KMP_MIC */
784 
785 // ------------------------------------------------------------------------
786 // Operation on *lhs, rhs using "compare_and_store" routine
787 //     TYPE    - operands' type
788 //     BITS    - size in bits, used to distinguish low level calls
789 //     OP      - operator
790 #define OP_CMPXCHG(TYPE, BITS, OP)                                             \
791   {                                                                            \
792     TYPE old_value, new_value;                                                 \
793     old_value = *(TYPE volatile *)lhs;                                         \
794     new_value = (TYPE)(old_value OP rhs);                                      \
795     while (!KMP_COMPARE_AND_STORE_ACQ##BITS(                                   \
796         (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) & old_value,     \
797         *VOLATILE_CAST(kmp_int##BITS *) & new_value)) {                        \
798       KMP_DO_PAUSE;                                                            \
799                                                                                \
800       old_value = *(TYPE volatile *)lhs;                                       \
801       new_value = (TYPE)(old_value OP rhs);                                    \
802     }                                                                          \
803   }
804 
805 #if USE_CMPXCHG_FIX
806 // 2007-06-25:
807 // workaround for C78287 (complex(kind=4) data type). lin_32, lin_32e, win_32
808 // and win_32e are affected (I verified the asm). Compiler ignores the volatile
809 // qualifier of the temp_val in the OP_CMPXCHG macro. This is a problem of the
810 // compiler. Related tracker is C76005, targeted to 11.0. I verified the asm of
811 // the workaround.
812 #define OP_CMPXCHG_WORKAROUND(TYPE, BITS, OP)                                  \
813   {                                                                            \
814     struct _sss {                                                              \
815       TYPE cmp;                                                                \
816       kmp_int##BITS *vvv;                                                      \
817     };                                                                         \
818     struct _sss old_value, new_value;                                          \
819     old_value.vvv = (kmp_int##BITS *)&old_value.cmp;                           \
820     new_value.vvv = (kmp_int##BITS *)&new_value.cmp;                           \
821     *old_value.vvv = *(volatile kmp_int##BITS *)lhs;                           \
822     new_value.cmp = (TYPE)(old_value.cmp OP rhs);                              \
823     while (!KMP_COMPARE_AND_STORE_ACQ##BITS(                                   \
824         (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) old_value.vvv,   \
825         *VOLATILE_CAST(kmp_int##BITS *) new_value.vvv)) {                      \
826       KMP_DO_PAUSE;                                                            \
827                                                                                \
828       *old_value.vvv = *(volatile kmp_int##BITS *)lhs;                         \
829       new_value.cmp = (TYPE)(old_value.cmp OP rhs);                            \
830     }                                                                          \
831   }
832 // end of the first part of the workaround for C78287
833 #endif // USE_CMPXCHG_FIX
834 
835 #if KMP_OS_WINDOWS && (KMP_ARCH_AARCH64 || KMP_ARCH_ARM)
836 // Undo explicit type casts to get MSVC ARM64 to build. Uses
837 // OP_CMPXCHG_WORKAROUND definition for OP_CMPXCHG
838 #undef OP_CMPXCHG
839 #define OP_CMPXCHG(TYPE, BITS, OP)                                             \
840   {                                                                            \
841     struct _sss {                                                              \
842       TYPE cmp;                                                                \
843       kmp_int##BITS *vvv;                                                      \
844     };                                                                         \
845     struct _sss old_value, new_value;                                          \
846     old_value.vvv = (kmp_int##BITS *)&old_value.cmp;                           \
847     new_value.vvv = (kmp_int##BITS *)&new_value.cmp;                           \
848     *old_value.vvv = *(volatile kmp_int##BITS *)lhs;                           \
849     new_value.cmp = old_value.cmp OP rhs;                                      \
850     while (!KMP_COMPARE_AND_STORE_ACQ##BITS(                                   \
851         (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) old_value.vvv,   \
852         *VOLATILE_CAST(kmp_int##BITS *) new_value.vvv)) {                      \
853       KMP_DO_PAUSE;                                                            \
854                                                                                \
855       *old_value.vvv = *(volatile kmp_int##BITS *)lhs;                         \
856       new_value.cmp = old_value.cmp OP rhs;                                    \
857     }                                                                          \
858   }
859 
860 #undef OP_UPDATE_CRITICAL
861 #define OP_UPDATE_CRITICAL(TYPE, OP, LCK_ID)                                   \
862   __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);                       \
863   (*lhs) = (*lhs)OP rhs;                                                       \
864   __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);
865 
866 #endif // KMP_OS_WINDOWS && (KMP_ARCH_AARCH64 || KMP_ARCH_ARM)
867 
868 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
869 
870 // ------------------------------------------------------------------------
871 // X86 or X86_64: no alignment problems ====================================
872 #define ATOMIC_FIXED_ADD(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK,         \
873                          GOMP_FLAG)                                            \
874   ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void)                                     \
875   OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG)                                 \
876   /* OP used as a sign for subtraction: (lhs-rhs) --> (lhs+-rhs) */            \
877   KMP_TEST_THEN_ADD##BITS(lhs, OP rhs);                                        \
878   }
879 // -------------------------------------------------------------------------
880 #define ATOMIC_CMPXCHG(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK,           \
881                        GOMP_FLAG)                                              \
882   ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void)                                     \
883   OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG)                                 \
884   OP_CMPXCHG(TYPE, BITS, OP)                                                   \
885   }
886 #if USE_CMPXCHG_FIX
887 // -------------------------------------------------------------------------
888 // workaround for C78287 (complex(kind=4) data type)
889 #define ATOMIC_CMPXCHG_WORKAROUND(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID,      \
890                                   MASK, GOMP_FLAG)                             \
891   ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void)                                     \
892   OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG)                                 \
893   OP_CMPXCHG_WORKAROUND(TYPE, BITS, OP)                                        \
894   }
895 // end of the second part of the workaround for C78287
896 #endif // USE_CMPXCHG_FIX
897 
898 #else
899 // -------------------------------------------------------------------------
900 // Code for other architectures that don't handle unaligned accesses.
901 #define ATOMIC_FIXED_ADD(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK,         \
902                          GOMP_FLAG)                                            \
903   ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void)                                     \
904   OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG)                                 \
905   if (!((kmp_uintptr_t)lhs & 0x##MASK)) {                                      \
906     /* OP used as a sign for subtraction: (lhs-rhs) --> (lhs+-rhs) */          \
907     KMP_TEST_THEN_ADD##BITS(lhs, OP rhs);                                      \
908   } else {                                                                     \
909     KMP_CHECK_GTID;                                                            \
910     OP_UPDATE_CRITICAL(TYPE, OP,                                               \
911                        LCK_ID) /* unaligned address - use critical */          \
912   }                                                                            \
913   }
914 // -------------------------------------------------------------------------
915 #define ATOMIC_CMPXCHG(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK,           \
916                        GOMP_FLAG)                                              \
917   ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void)                                     \
918   OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG)                                 \
919   if (!((kmp_uintptr_t)lhs & 0x##MASK)) {                                      \
920     OP_CMPXCHG(TYPE, BITS, OP) /* aligned address */                           \
921   } else {                                                                     \
922     KMP_CHECK_GTID;                                                            \
923     OP_UPDATE_CRITICAL(TYPE, OP,                                               \
924                        LCK_ID) /* unaligned address - use critical */          \
925   }                                                                            \
926   }
927 #if USE_CMPXCHG_FIX
928 // -------------------------------------------------------------------------
929 // workaround for C78287 (complex(kind=4) data type)
930 #define ATOMIC_CMPXCHG_WORKAROUND(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID,      \
931                                   MASK, GOMP_FLAG)                             \
932   ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void)                                     \
933   OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG)                                 \
934   if (!((kmp_uintptr_t)lhs & 0x##MASK)) {                                      \
935     OP_CMPXCHG(TYPE, BITS, OP) /* aligned address */                           \
936   } else {                                                                     \
937     KMP_CHECK_GTID;                                                            \
938     OP_UPDATE_CRITICAL(TYPE, OP,                                               \
939                        LCK_ID) /* unaligned address - use critical */          \
940   }                                                                            \
941   }
942 // end of the second part of the workaround for C78287
943 #endif // USE_CMPXCHG_FIX
944 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
945 
946 // Routines for ATOMIC 4-byte operands addition and subtraction
947 ATOMIC_FIXED_ADD(fixed4, add, kmp_int32, 32, +, 4i, 3,
948                  0) // __kmpc_atomic_fixed4_add
949 ATOMIC_FIXED_ADD(fixed4, sub, kmp_int32, 32, -, 4i, 3,
950                  0) // __kmpc_atomic_fixed4_sub
951 
952 ATOMIC_CMPXCHG(float4, add, kmp_real32, 32, +, 4r, 3,
953                KMP_ARCH_X86) // __kmpc_atomic_float4_add
954 ATOMIC_CMPXCHG(float4, sub, kmp_real32, 32, -, 4r, 3,
955                KMP_ARCH_X86) // __kmpc_atomic_float4_sub
956 
957 // Routines for ATOMIC 8-byte operands addition and subtraction
958 ATOMIC_FIXED_ADD(fixed8, add, kmp_int64, 64, +, 8i, 7,
959                  KMP_ARCH_X86) // __kmpc_atomic_fixed8_add
960 ATOMIC_FIXED_ADD(fixed8, sub, kmp_int64, 64, -, 8i, 7,
961                  KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub
962 
963 ATOMIC_CMPXCHG(float8, add, kmp_real64, 64, +, 8r, 7,
964                KMP_ARCH_X86) // __kmpc_atomic_float8_add
965 ATOMIC_CMPXCHG(float8, sub, kmp_real64, 64, -, 8r, 7,
966                KMP_ARCH_X86) // __kmpc_atomic_float8_sub
967 
968 // ------------------------------------------------------------------------
969 // Entries definition for integer operands
970 //     TYPE_ID - operands type and size (fixed4, float4)
971 //     OP_ID   - operation identifier (add, sub, mul, ...)
972 //     TYPE    - operand type
973 //     BITS    - size in bits, used to distinguish low level calls
974 //     OP      - operator (used in critical section)
975 //     LCK_ID  - lock identifier, used to possibly distinguish lock variable
976 //     MASK    - used for alignment check
977 
978 //               TYPE_ID,OP_ID,  TYPE,   BITS,OP,LCK_ID,MASK,GOMP_FLAG
979 // ------------------------------------------------------------------------
980 // Routines for ATOMIC integer operands, other operators
981 // ------------------------------------------------------------------------
982 //              TYPE_ID,OP_ID, TYPE,          OP, LCK_ID, GOMP_FLAG
983 ATOMIC_CMPXCHG(fixed1, add, kmp_int8, 8, +, 1i, 0,
984                KMP_ARCH_X86) // __kmpc_atomic_fixed1_add
985 ATOMIC_CMPXCHG(fixed1, andb, kmp_int8, 8, &, 1i, 0,
986                0) // __kmpc_atomic_fixed1_andb
987 ATOMIC_CMPXCHG(fixed1, div, kmp_int8, 8, /, 1i, 0,
988                KMP_ARCH_X86) // __kmpc_atomic_fixed1_div
989 ATOMIC_CMPXCHG(fixed1u, div, kmp_uint8, 8, /, 1i, 0,
990                KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div
991 ATOMIC_CMPXCHG(fixed1, mul, kmp_int8, 8, *, 1i, 0,
992                KMP_ARCH_X86) // __kmpc_atomic_fixed1_mul
993 ATOMIC_CMPXCHG(fixed1, orb, kmp_int8, 8, |, 1i, 0,
994                0) // __kmpc_atomic_fixed1_orb
995 ATOMIC_CMPXCHG(fixed1, shl, kmp_int8, 8, <<, 1i, 0,
996                KMP_ARCH_X86) // __kmpc_atomic_fixed1_shl
997 ATOMIC_CMPXCHG(fixed1, shr, kmp_int8, 8, >>, 1i, 0,
998                KMP_ARCH_X86) // __kmpc_atomic_fixed1_shr
999 ATOMIC_CMPXCHG(fixed1u, shr, kmp_uint8, 8, >>, 1i, 0,
1000                KMP_ARCH_X86) // __kmpc_atomic_fixed1u_shr
1001 ATOMIC_CMPXCHG(fixed1, sub, kmp_int8, 8, -, 1i, 0,
1002                KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub
1003 ATOMIC_CMPXCHG(fixed1, xor, kmp_int8, 8, ^, 1i, 0,
1004                0) // __kmpc_atomic_fixed1_xor
1005 ATOMIC_CMPXCHG(fixed2, add, kmp_int16, 16, +, 2i, 1,
1006                KMP_ARCH_X86) // __kmpc_atomic_fixed2_add
1007 ATOMIC_CMPXCHG(fixed2, andb, kmp_int16, 16, &, 2i, 1,
1008                0) // __kmpc_atomic_fixed2_andb
1009 ATOMIC_CMPXCHG(fixed2, div, kmp_int16, 16, /, 2i, 1,
1010                KMP_ARCH_X86) // __kmpc_atomic_fixed2_div
1011 ATOMIC_CMPXCHG(fixed2u, div, kmp_uint16, 16, /, 2i, 1,
1012                KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div
1013 ATOMIC_CMPXCHG(fixed2, mul, kmp_int16, 16, *, 2i, 1,
1014                KMP_ARCH_X86) // __kmpc_atomic_fixed2_mul
1015 ATOMIC_CMPXCHG(fixed2, orb, kmp_int16, 16, |, 2i, 1,
1016                0) // __kmpc_atomic_fixed2_orb
1017 ATOMIC_CMPXCHG(fixed2, shl, kmp_int16, 16, <<, 2i, 1,
1018                KMP_ARCH_X86) // __kmpc_atomic_fixed2_shl
1019 ATOMIC_CMPXCHG(fixed2, shr, kmp_int16, 16, >>, 2i, 1,
1020                KMP_ARCH_X86) // __kmpc_atomic_fixed2_shr
1021 ATOMIC_CMPXCHG(fixed2u, shr, kmp_uint16, 16, >>, 2i, 1,
1022                KMP_ARCH_X86) // __kmpc_atomic_fixed2u_shr
1023 ATOMIC_CMPXCHG(fixed2, sub, kmp_int16, 16, -, 2i, 1,
1024                KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub
1025 ATOMIC_CMPXCHG(fixed2, xor, kmp_int16, 16, ^, 2i, 1,
1026                0) // __kmpc_atomic_fixed2_xor
1027 ATOMIC_CMPXCHG(fixed4, andb, kmp_int32, 32, &, 4i, 3,
1028                0) // __kmpc_atomic_fixed4_andb
1029 ATOMIC_CMPXCHG(fixed4, div, kmp_int32, 32, /, 4i, 3,
1030                KMP_ARCH_X86) // __kmpc_atomic_fixed4_div
1031 ATOMIC_CMPXCHG(fixed4u, div, kmp_uint32, 32, /, 4i, 3,
1032                KMP_ARCH_X86) // __kmpc_atomic_fixed4u_div
1033 ATOMIC_CMPXCHG(fixed4, mul, kmp_int32, 32, *, 4i, 3,
1034                KMP_ARCH_X86) // __kmpc_atomic_fixed4_mul
1035 ATOMIC_CMPXCHG(fixed4, orb, kmp_int32, 32, |, 4i, 3,
1036                0) // __kmpc_atomic_fixed4_orb
1037 ATOMIC_CMPXCHG(fixed4, shl, kmp_int32, 32, <<, 4i, 3,
1038                KMP_ARCH_X86) // __kmpc_atomic_fixed4_shl
1039 ATOMIC_CMPXCHG(fixed4, shr, kmp_int32, 32, >>, 4i, 3,
1040                KMP_ARCH_X86) // __kmpc_atomic_fixed4_shr
1041 ATOMIC_CMPXCHG(fixed4u, shr, kmp_uint32, 32, >>, 4i, 3,
1042                KMP_ARCH_X86) // __kmpc_atomic_fixed4u_shr
1043 ATOMIC_CMPXCHG(fixed4, xor, kmp_int32, 32, ^, 4i, 3,
1044                0) // __kmpc_atomic_fixed4_xor
1045 ATOMIC_CMPXCHG(fixed8, andb, kmp_int64, 64, &, 8i, 7,
1046                KMP_ARCH_X86) // __kmpc_atomic_fixed8_andb
1047 ATOMIC_CMPXCHG(fixed8, div, kmp_int64, 64, /, 8i, 7,
1048                KMP_ARCH_X86) // __kmpc_atomic_fixed8_div
1049 ATOMIC_CMPXCHG(fixed8u, div, kmp_uint64, 64, /, 8i, 7,
1050                KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div
1051 ATOMIC_CMPXCHG(fixed8, mul, kmp_int64, 64, *, 8i, 7,
1052                KMP_ARCH_X86) // __kmpc_atomic_fixed8_mul
1053 ATOMIC_CMPXCHG(fixed8, orb, kmp_int64, 64, |, 8i, 7,
1054                KMP_ARCH_X86) // __kmpc_atomic_fixed8_orb
1055 ATOMIC_CMPXCHG(fixed8, shl, kmp_int64, 64, <<, 8i, 7,
1056                KMP_ARCH_X86) // __kmpc_atomic_fixed8_shl
1057 ATOMIC_CMPXCHG(fixed8, shr, kmp_int64, 64, >>, 8i, 7,
1058                KMP_ARCH_X86) // __kmpc_atomic_fixed8_shr
1059 ATOMIC_CMPXCHG(fixed8u, shr, kmp_uint64, 64, >>, 8i, 7,
1060                KMP_ARCH_X86) // __kmpc_atomic_fixed8u_shr
1061 ATOMIC_CMPXCHG(fixed8, xor, kmp_int64, 64, ^, 8i, 7,
1062                KMP_ARCH_X86) // __kmpc_atomic_fixed8_xor
1063 ATOMIC_CMPXCHG(float4, div, kmp_real32, 32, /, 4r, 3,
1064                KMP_ARCH_X86) // __kmpc_atomic_float4_div
1065 ATOMIC_CMPXCHG(float4, mul, kmp_real32, 32, *, 4r, 3,
1066                KMP_ARCH_X86) // __kmpc_atomic_float4_mul
1067 ATOMIC_CMPXCHG(float8, div, kmp_real64, 64, /, 8r, 7,
1068                KMP_ARCH_X86) // __kmpc_atomic_float8_div
1069 ATOMIC_CMPXCHG(float8, mul, kmp_real64, 64, *, 8r, 7,
1070                KMP_ARCH_X86) // __kmpc_atomic_float8_mul
1071 //              TYPE_ID,OP_ID, TYPE,          OP, LCK_ID, GOMP_FLAG
1072 
1073 /* ------------------------------------------------------------------------ */
1074 /* Routines for C/C++ Reduction operators && and ||                         */
1075 
1076 // ------------------------------------------------------------------------
1077 // Need separate macros for &&, || because there is no combined assignment
1078 //   TODO: eliminate ATOMIC_CRIT_{L,EQV} macros as not used
1079 #define ATOMIC_CRIT_L(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG)             \
1080   ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void)                                     \
1081   OP_GOMP_CRITICAL(= *lhs OP, GOMP_FLAG)                                       \
1082   OP_CRITICAL(= *lhs OP, LCK_ID)                                               \
1083   }
1084 
1085 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1086 
1087 // ------------------------------------------------------------------------
1088 // X86 or X86_64: no alignment problems ===================================
1089 #define ATOMIC_CMPX_L(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, GOMP_FLAG) \
1090   ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void)                                     \
1091   OP_GOMP_CRITICAL(= *lhs OP, GOMP_FLAG)                                       \
1092   OP_CMPXCHG(TYPE, BITS, OP)                                                   \
1093   }
1094 
1095 #else
1096 // ------------------------------------------------------------------------
1097 // Code for other architectures that don't handle unaligned accesses.
1098 #define ATOMIC_CMPX_L(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, GOMP_FLAG) \
1099   ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void)                                     \
1100   OP_GOMP_CRITICAL(= *lhs OP, GOMP_FLAG)                                       \
1101   if (!((kmp_uintptr_t)lhs & 0x##MASK)) {                                      \
1102     OP_CMPXCHG(TYPE, BITS, OP) /* aligned address */                           \
1103   } else {                                                                     \
1104     KMP_CHECK_GTID;                                                            \
1105     OP_CRITICAL(= *lhs OP, LCK_ID) /* unaligned - use critical */              \
1106   }                                                                            \
1107   }
1108 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
1109 
1110 ATOMIC_CMPX_L(fixed1, andl, char, 8, &&, 1i, 0,
1111               KMP_ARCH_X86) // __kmpc_atomic_fixed1_andl
1112 ATOMIC_CMPX_L(fixed1, orl, char, 8, ||, 1i, 0,
1113               KMP_ARCH_X86) // __kmpc_atomic_fixed1_orl
1114 ATOMIC_CMPX_L(fixed2, andl, short, 16, &&, 2i, 1,
1115               KMP_ARCH_X86) // __kmpc_atomic_fixed2_andl
1116 ATOMIC_CMPX_L(fixed2, orl, short, 16, ||, 2i, 1,
1117               KMP_ARCH_X86) // __kmpc_atomic_fixed2_orl
1118 ATOMIC_CMPX_L(fixed4, andl, kmp_int32, 32, &&, 4i, 3,
1119               0) // __kmpc_atomic_fixed4_andl
1120 ATOMIC_CMPX_L(fixed4, orl, kmp_int32, 32, ||, 4i, 3,
1121               0) // __kmpc_atomic_fixed4_orl
1122 ATOMIC_CMPX_L(fixed8, andl, kmp_int64, 64, &&, 8i, 7,
1123               KMP_ARCH_X86) // __kmpc_atomic_fixed8_andl
1124 ATOMIC_CMPX_L(fixed8, orl, kmp_int64, 64, ||, 8i, 7,
1125               KMP_ARCH_X86) // __kmpc_atomic_fixed8_orl
1126 
1127 /* ------------------------------------------------------------------------- */
1128 /* Routines for Fortran operators that matched no one in C:                  */
1129 /* MAX, MIN, .EQV., .NEQV.                                                   */
1130 /* Operators .AND., .OR. are covered by __kmpc_atomic_*_{andl,orl}           */
1131 /* Intrinsics IAND, IOR, IEOR are covered by __kmpc_atomic_*_{andb,orb,xor}  */
1132 
1133 // -------------------------------------------------------------------------
1134 // MIN and MAX need separate macros
1135 // OP - operator to check if we need any actions?
1136 #define MIN_MAX_CRITSECT(OP, LCK_ID)                                           \
1137   __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);                       \
1138                                                                                \
1139   if (*lhs OP rhs) { /* still need actions? */                                 \
1140     *lhs = rhs;                                                                \
1141   }                                                                            \
1142   __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);
1143 
1144 // -------------------------------------------------------------------------
1145 #ifdef KMP_GOMP_COMPAT
1146 #define GOMP_MIN_MAX_CRITSECT(OP, FLAG)                                        \
1147   if ((FLAG) && (__kmp_atomic_mode == 2)) {                                    \
1148     KMP_CHECK_GTID;                                                            \
1149     MIN_MAX_CRITSECT(OP, 0);                                                   \
1150     return;                                                                    \
1151   }
1152 #else
1153 #define GOMP_MIN_MAX_CRITSECT(OP, FLAG)
1154 #endif /* KMP_GOMP_COMPAT */
1155 
1156 // -------------------------------------------------------------------------
1157 #define MIN_MAX_CMPXCHG(TYPE, BITS, OP)                                        \
1158   {                                                                            \
1159     TYPE KMP_ATOMIC_VOLATILE temp_val;                                         \
1160     TYPE old_value;                                                            \
1161     temp_val = *lhs;                                                           \
1162     old_value = temp_val;                                                      \
1163     while (old_value OP rhs && /* still need actions? */                       \
1164            !KMP_COMPARE_AND_STORE_ACQ##BITS(                                   \
1165                (kmp_int##BITS *)lhs,                                           \
1166                *VOLATILE_CAST(kmp_int##BITS *) & old_value,                    \
1167                *VOLATILE_CAST(kmp_int##BITS *) & rhs)) {                       \
1168       temp_val = *lhs;                                                         \
1169       old_value = temp_val;                                                    \
1170     }                                                                          \
1171   }
1172 
1173 // -------------------------------------------------------------------------
1174 // 1-byte, 2-byte operands - use critical section
1175 #define MIN_MAX_CRITICAL(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG)          \
1176   ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void)                                     \
1177   if (*lhs OP rhs) { /* need actions? */                                       \
1178     GOMP_MIN_MAX_CRITSECT(OP, GOMP_FLAG)                                       \
1179     MIN_MAX_CRITSECT(OP, LCK_ID)                                               \
1180   }                                                                            \
1181   }
1182 
1183 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1184 
1185 // -------------------------------------------------------------------------
1186 // X86 or X86_64: no alignment problems ====================================
1187 #define MIN_MAX_COMPXCHG(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK,         \
1188                          GOMP_FLAG)                                            \
1189   ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void)                                     \
1190   if (*lhs OP rhs) {                                                           \
1191     GOMP_MIN_MAX_CRITSECT(OP, GOMP_FLAG)                                       \
1192     MIN_MAX_CMPXCHG(TYPE, BITS, OP)                                            \
1193   }                                                                            \
1194   }
1195 
1196 #else
1197 // -------------------------------------------------------------------------
1198 // Code for other architectures that don't handle unaligned accesses.
1199 #define MIN_MAX_COMPXCHG(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK,         \
1200                          GOMP_FLAG)                                            \
1201   ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void)                                     \
1202   if (*lhs OP rhs) {                                                           \
1203     GOMP_MIN_MAX_CRITSECT(OP, GOMP_FLAG)                                       \
1204     if (!((kmp_uintptr_t)lhs & 0x##MASK)) {                                    \
1205       MIN_MAX_CMPXCHG(TYPE, BITS, OP) /* aligned address */                    \
1206     } else {                                                                   \
1207       KMP_CHECK_GTID;                                                          \
1208       MIN_MAX_CRITSECT(OP, LCK_ID) /* unaligned address */                     \
1209     }                                                                          \
1210   }                                                                            \
1211   }
1212 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
1213 
1214 MIN_MAX_COMPXCHG(fixed1, max, char, 8, <, 1i, 0,
1215                  KMP_ARCH_X86) // __kmpc_atomic_fixed1_max
1216 MIN_MAX_COMPXCHG(fixed1, min, char, 8, >, 1i, 0,
1217                  KMP_ARCH_X86) // __kmpc_atomic_fixed1_min
1218 MIN_MAX_COMPXCHG(fixed2, max, short, 16, <, 2i, 1,
1219                  KMP_ARCH_X86) // __kmpc_atomic_fixed2_max
1220 MIN_MAX_COMPXCHG(fixed2, min, short, 16, >, 2i, 1,
1221                  KMP_ARCH_X86) // __kmpc_atomic_fixed2_min
1222 MIN_MAX_COMPXCHG(fixed4, max, kmp_int32, 32, <, 4i, 3,
1223                  0) // __kmpc_atomic_fixed4_max
1224 MIN_MAX_COMPXCHG(fixed4, min, kmp_int32, 32, >, 4i, 3,
1225                  0) // __kmpc_atomic_fixed4_min
1226 MIN_MAX_COMPXCHG(fixed8, max, kmp_int64, 64, <, 8i, 7,
1227                  KMP_ARCH_X86) // __kmpc_atomic_fixed8_max
1228 MIN_MAX_COMPXCHG(fixed8, min, kmp_int64, 64, >, 8i, 7,
1229                  KMP_ARCH_X86) // __kmpc_atomic_fixed8_min
1230 MIN_MAX_COMPXCHG(float4, max, kmp_real32, 32, <, 4r, 3,
1231                  KMP_ARCH_X86) // __kmpc_atomic_float4_max
1232 MIN_MAX_COMPXCHG(float4, min, kmp_real32, 32, >, 4r, 3,
1233                  KMP_ARCH_X86) // __kmpc_atomic_float4_min
1234 MIN_MAX_COMPXCHG(float8, max, kmp_real64, 64, <, 8r, 7,
1235                  KMP_ARCH_X86) // __kmpc_atomic_float8_max
1236 MIN_MAX_COMPXCHG(float8, min, kmp_real64, 64, >, 8r, 7,
1237                  KMP_ARCH_X86) // __kmpc_atomic_float8_min
1238 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1239 MIN_MAX_CRITICAL(float10, max, long double, <, 10r,
1240                  1) // __kmpc_atomic_float10_max
1241 MIN_MAX_CRITICAL(float10, min, long double, >, 10r,
1242                  1) // __kmpc_atomic_float10_min
1243 #endif // KMP_ARCH_X86 || KMP_ARCH_X86_64
1244 #if KMP_HAVE_QUAD
1245 MIN_MAX_CRITICAL(float16, max, QUAD_LEGACY, <, 16r,
1246                  1) // __kmpc_atomic_float16_max
1247 MIN_MAX_CRITICAL(float16, min, QUAD_LEGACY, >, 16r,
1248                  1) // __kmpc_atomic_float16_min
1249 #if (KMP_ARCH_X86)
1250 MIN_MAX_CRITICAL(float16, max_a16, Quad_a16_t, <, 16r,
1251                  1) // __kmpc_atomic_float16_max_a16
1252 MIN_MAX_CRITICAL(float16, min_a16, Quad_a16_t, >, 16r,
1253                  1) // __kmpc_atomic_float16_min_a16
1254 #endif // (KMP_ARCH_X86)
1255 #endif // KMP_HAVE_QUAD
1256 // ------------------------------------------------------------------------
1257 // Need separate macros for .EQV. because of the need of complement (~)
1258 // OP ignored for critical sections, ^=~ used instead
1259 #define ATOMIC_CRIT_EQV(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG)           \
1260   ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void)                                     \
1261   OP_GOMP_CRITICAL(^= (TYPE) ~, GOMP_FLAG) /* send assignment */               \
1262   OP_CRITICAL(^= (TYPE) ~, LCK_ID) /* send assignment and complement */        \
1263   }
1264 
1265 // ------------------------------------------------------------------------
1266 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1267 // ------------------------------------------------------------------------
1268 // X86 or X86_64: no alignment problems ===================================
1269 #define ATOMIC_CMPX_EQV(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK,          \
1270                         GOMP_FLAG)                                             \
1271   ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void)                                     \
1272   OP_GOMP_CRITICAL(^= (TYPE) ~, GOMP_FLAG) /* send assignment */               \
1273   OP_CMPXCHG(TYPE, BITS, OP)                                                   \
1274   }
1275 // ------------------------------------------------------------------------
1276 #else
1277 // ------------------------------------------------------------------------
1278 // Code for other architectures that don't handle unaligned accesses.
1279 #define ATOMIC_CMPX_EQV(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK,          \
1280                         GOMP_FLAG)                                             \
1281   ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void)                                     \
1282   OP_GOMP_CRITICAL(^= (TYPE) ~, GOMP_FLAG)                                     \
1283   if (!((kmp_uintptr_t)lhs & 0x##MASK)) {                                      \
1284     OP_CMPXCHG(TYPE, BITS, OP) /* aligned address */                           \
1285   } else {                                                                     \
1286     KMP_CHECK_GTID;                                                            \
1287     OP_CRITICAL(^= (TYPE) ~, LCK_ID) /* unaligned address - use critical */    \
1288   }                                                                            \
1289   }
1290 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
1291 
1292 ATOMIC_CMPXCHG(fixed1, neqv, kmp_int8, 8, ^, 1i, 0,
1293                KMP_ARCH_X86) // __kmpc_atomic_fixed1_neqv
1294 ATOMIC_CMPXCHG(fixed2, neqv, kmp_int16, 16, ^, 2i, 1,
1295                KMP_ARCH_X86) // __kmpc_atomic_fixed2_neqv
1296 ATOMIC_CMPXCHG(fixed4, neqv, kmp_int32, 32, ^, 4i, 3,
1297                KMP_ARCH_X86) // __kmpc_atomic_fixed4_neqv
1298 ATOMIC_CMPXCHG(fixed8, neqv, kmp_int64, 64, ^, 8i, 7,
1299                KMP_ARCH_X86) // __kmpc_atomic_fixed8_neqv
1300 ATOMIC_CMPX_EQV(fixed1, eqv, kmp_int8, 8, ^~, 1i, 0,
1301                 KMP_ARCH_X86) // __kmpc_atomic_fixed1_eqv
1302 ATOMIC_CMPX_EQV(fixed2, eqv, kmp_int16, 16, ^~, 2i, 1,
1303                 KMP_ARCH_X86) // __kmpc_atomic_fixed2_eqv
1304 ATOMIC_CMPX_EQV(fixed4, eqv, kmp_int32, 32, ^~, 4i, 3,
1305                 KMP_ARCH_X86) // __kmpc_atomic_fixed4_eqv
1306 ATOMIC_CMPX_EQV(fixed8, eqv, kmp_int64, 64, ^~, 8i, 7,
1307                 KMP_ARCH_X86) // __kmpc_atomic_fixed8_eqv
1308 
1309 // ------------------------------------------------------------------------
1310 // Routines for Extended types: long double, _Quad, complex flavours (use
1311 // critical section)
1312 //     TYPE_ID, OP_ID, TYPE - detailed above
1313 //     OP      - operator
1314 //     LCK_ID  - lock identifier, used to possibly distinguish lock variable
1315 #define ATOMIC_CRITICAL(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG)           \
1316   ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void)                                     \
1317   OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG) /* send assignment */           \
1318   OP_UPDATE_CRITICAL(TYPE, OP, LCK_ID) /* send assignment */                   \
1319   }
1320 
1321 /* ------------------------------------------------------------------------- */
1322 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1323 // routines for long double type
1324 ATOMIC_CRITICAL(float10, add, long double, +, 10r,
1325                 1) // __kmpc_atomic_float10_add
1326 ATOMIC_CRITICAL(float10, sub, long double, -, 10r,
1327                 1) // __kmpc_atomic_float10_sub
1328 ATOMIC_CRITICAL(float10, mul, long double, *, 10r,
1329                 1) // __kmpc_atomic_float10_mul
1330 ATOMIC_CRITICAL(float10, div, long double, /, 10r,
1331                 1) // __kmpc_atomic_float10_div
1332 #endif // KMP_ARCH_X86 || KMP_ARCH_X86_64
1333 #if KMP_HAVE_QUAD
1334 // routines for _Quad type
1335 ATOMIC_CRITICAL(float16, add, QUAD_LEGACY, +, 16r,
1336                 1) // __kmpc_atomic_float16_add
1337 ATOMIC_CRITICAL(float16, sub, QUAD_LEGACY, -, 16r,
1338                 1) // __kmpc_atomic_float16_sub
1339 ATOMIC_CRITICAL(float16, mul, QUAD_LEGACY, *, 16r,
1340                 1) // __kmpc_atomic_float16_mul
1341 ATOMIC_CRITICAL(float16, div, QUAD_LEGACY, /, 16r,
1342                 1) // __kmpc_atomic_float16_div
1343 #if (KMP_ARCH_X86)
1344 ATOMIC_CRITICAL(float16, add_a16, Quad_a16_t, +, 16r,
1345                 1) // __kmpc_atomic_float16_add_a16
1346 ATOMIC_CRITICAL(float16, sub_a16, Quad_a16_t, -, 16r,
1347                 1) // __kmpc_atomic_float16_sub_a16
1348 ATOMIC_CRITICAL(float16, mul_a16, Quad_a16_t, *, 16r,
1349                 1) // __kmpc_atomic_float16_mul_a16
1350 ATOMIC_CRITICAL(float16, div_a16, Quad_a16_t, /, 16r,
1351                 1) // __kmpc_atomic_float16_div_a16
1352 #endif // (KMP_ARCH_X86)
1353 #endif // KMP_HAVE_QUAD
1354 // routines for complex types
1355 
1356 #if USE_CMPXCHG_FIX
1357 // workaround for C78287 (complex(kind=4) data type)
1358 ATOMIC_CMPXCHG_WORKAROUND(cmplx4, add, kmp_cmplx32, 64, +, 8c, 7,
1359                           1) // __kmpc_atomic_cmplx4_add
1360 ATOMIC_CMPXCHG_WORKAROUND(cmplx4, sub, kmp_cmplx32, 64, -, 8c, 7,
1361                           1) // __kmpc_atomic_cmplx4_sub
1362 ATOMIC_CMPXCHG_WORKAROUND(cmplx4, mul, kmp_cmplx32, 64, *, 8c, 7,
1363                           1) // __kmpc_atomic_cmplx4_mul
1364 ATOMIC_CMPXCHG_WORKAROUND(cmplx4, div, kmp_cmplx32, 64, /, 8c, 7,
1365                           1) // __kmpc_atomic_cmplx4_div
1366 // end of the workaround for C78287
1367 #else
1368 ATOMIC_CRITICAL(cmplx4, add, kmp_cmplx32, +, 8c, 1) // __kmpc_atomic_cmplx4_add
1369 ATOMIC_CRITICAL(cmplx4, sub, kmp_cmplx32, -, 8c, 1) // __kmpc_atomic_cmplx4_sub
1370 ATOMIC_CRITICAL(cmplx4, mul, kmp_cmplx32, *, 8c, 1) // __kmpc_atomic_cmplx4_mul
1371 ATOMIC_CRITICAL(cmplx4, div, kmp_cmplx32, /, 8c, 1) // __kmpc_atomic_cmplx4_div
1372 #endif // USE_CMPXCHG_FIX
1373 
1374 ATOMIC_CRITICAL(cmplx8, add, kmp_cmplx64, +, 16c, 1) // __kmpc_atomic_cmplx8_add
1375 ATOMIC_CRITICAL(cmplx8, sub, kmp_cmplx64, -, 16c, 1) // __kmpc_atomic_cmplx8_sub
1376 ATOMIC_CRITICAL(cmplx8, mul, kmp_cmplx64, *, 16c, 1) // __kmpc_atomic_cmplx8_mul
1377 ATOMIC_CRITICAL(cmplx8, div, kmp_cmplx64, /, 16c, 1) // __kmpc_atomic_cmplx8_div
1378 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1379 ATOMIC_CRITICAL(cmplx10, add, kmp_cmplx80, +, 20c,
1380                 1) // __kmpc_atomic_cmplx10_add
1381 ATOMIC_CRITICAL(cmplx10, sub, kmp_cmplx80, -, 20c,
1382                 1) // __kmpc_atomic_cmplx10_sub
1383 ATOMIC_CRITICAL(cmplx10, mul, kmp_cmplx80, *, 20c,
1384                 1) // __kmpc_atomic_cmplx10_mul
1385 ATOMIC_CRITICAL(cmplx10, div, kmp_cmplx80, /, 20c,
1386                 1) // __kmpc_atomic_cmplx10_div
1387 #endif // KMP_ARCH_X86 || KMP_ARCH_X86_64
1388 #if KMP_HAVE_QUAD
1389 ATOMIC_CRITICAL(cmplx16, add, CPLX128_LEG, +, 32c,
1390                 1) // __kmpc_atomic_cmplx16_add
1391 ATOMIC_CRITICAL(cmplx16, sub, CPLX128_LEG, -, 32c,
1392                 1) // __kmpc_atomic_cmplx16_sub
1393 ATOMIC_CRITICAL(cmplx16, mul, CPLX128_LEG, *, 32c,
1394                 1) // __kmpc_atomic_cmplx16_mul
1395 ATOMIC_CRITICAL(cmplx16, div, CPLX128_LEG, /, 32c,
1396                 1) // __kmpc_atomic_cmplx16_div
1397 #if (KMP_ARCH_X86)
1398 ATOMIC_CRITICAL(cmplx16, add_a16, kmp_cmplx128_a16_t, +, 32c,
1399                 1) // __kmpc_atomic_cmplx16_add_a16
1400 ATOMIC_CRITICAL(cmplx16, sub_a16, kmp_cmplx128_a16_t, -, 32c,
1401                 1) // __kmpc_atomic_cmplx16_sub_a16
1402 ATOMIC_CRITICAL(cmplx16, mul_a16, kmp_cmplx128_a16_t, *, 32c,
1403                 1) // __kmpc_atomic_cmplx16_mul_a16
1404 ATOMIC_CRITICAL(cmplx16, div_a16, kmp_cmplx128_a16_t, /, 32c,
1405                 1) // __kmpc_atomic_cmplx16_div_a16
1406 #endif // (KMP_ARCH_X86)
1407 #endif // KMP_HAVE_QUAD
1408 
1409 // OpenMP 4.0: x = expr binop x for non-commutative operations.
1410 // Supported only on IA-32 architecture and Intel(R) 64
1411 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1412 
1413 // ------------------------------------------------------------------------
1414 // Operation on *lhs, rhs bound by critical section
1415 //     OP     - operator (it's supposed to contain an assignment)
1416 //     LCK_ID - lock identifier
1417 // Note: don't check gtid as it should always be valid
1418 // 1, 2-byte - expect valid parameter, other - check before this macro
1419 #define OP_CRITICAL_REV(TYPE, OP, LCK_ID)                                      \
1420   __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);                       \
1421                                                                                \
1422   (*lhs) = (TYPE)((rhs)OP(*lhs));                                              \
1423                                                                                \
1424   __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);
1425 
1426 #ifdef KMP_GOMP_COMPAT
1427 #define OP_GOMP_CRITICAL_REV(TYPE, OP, FLAG)                                   \
1428   if ((FLAG) && (__kmp_atomic_mode == 2)) {                                    \
1429     KMP_CHECK_GTID;                                                            \
1430     OP_CRITICAL_REV(TYPE, OP, 0);                                              \
1431     return;                                                                    \
1432   }
1433 
1434 #else
1435 #define OP_GOMP_CRITICAL_REV(TYPE, OP, FLAG)
1436 #endif /* KMP_GOMP_COMPAT */
1437 
1438 // Beginning of a definition (provides name, parameters, gebug trace)
1439 //     TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned
1440 //     fixed)
1441 //     OP_ID   - operation identifier (add, sub, mul, ...)
1442 //     TYPE    - operands' type
1443 #define ATOMIC_BEGIN_REV(TYPE_ID, OP_ID, TYPE, RET_TYPE)                       \
1444   RET_TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID##_rev(ident_t *id_ref, int gtid,  \
1445                                                    TYPE *lhs, TYPE rhs) {      \
1446     KMP_DEBUG_ASSERT(__kmp_init_serial);                                       \
1447     KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID "_rev: T#%d\n", gtid));
1448 
1449 // ------------------------------------------------------------------------
1450 // Operation on *lhs, rhs using "compare_and_store" routine
1451 //     TYPE    - operands' type
1452 //     BITS    - size in bits, used to distinguish low level calls
1453 //     OP      - operator
1454 // Note: temp_val introduced in order to force the compiler to read
1455 //       *lhs only once (w/o it the compiler reads *lhs twice)
1456 #define OP_CMPXCHG_REV(TYPE, BITS, OP)                                         \
1457   {                                                                            \
1458     TYPE KMP_ATOMIC_VOLATILE temp_val;                                         \
1459     TYPE old_value, new_value;                                                 \
1460     temp_val = *lhs;                                                           \
1461     old_value = temp_val;                                                      \
1462     new_value = (TYPE)(rhs OP old_value);                                      \
1463     while (!KMP_COMPARE_AND_STORE_ACQ##BITS(                                   \
1464         (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) & old_value,     \
1465         *VOLATILE_CAST(kmp_int##BITS *) & new_value)) {                        \
1466       KMP_DO_PAUSE;                                                            \
1467                                                                                \
1468       temp_val = *lhs;                                                         \
1469       old_value = temp_val;                                                    \
1470       new_value = (TYPE)(rhs OP old_value);                                    \
1471     }                                                                          \
1472   }
1473 
1474 // -------------------------------------------------------------------------
1475 #define ATOMIC_CMPXCHG_REV(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, GOMP_FLAG)  \
1476   ATOMIC_BEGIN_REV(TYPE_ID, OP_ID, TYPE, void)                                 \
1477   OP_GOMP_CRITICAL_REV(TYPE, OP, GOMP_FLAG)                                    \
1478   OP_CMPXCHG_REV(TYPE, BITS, OP)                                               \
1479   }
1480 
1481 // ------------------------------------------------------------------------
1482 // Entries definition for integer operands
1483 //     TYPE_ID - operands type and size (fixed4, float4)
1484 //     OP_ID   - operation identifier (add, sub, mul, ...)
1485 //     TYPE    - operand type
1486 //     BITS    - size in bits, used to distinguish low level calls
1487 //     OP      - operator (used in critical section)
1488 //     LCK_ID  - lock identifier, used to possibly distinguish lock variable
1489 
1490 //               TYPE_ID,OP_ID,  TYPE,   BITS,OP,LCK_ID,GOMP_FLAG
1491 // ------------------------------------------------------------------------
1492 // Routines for ATOMIC integer operands, other operators
1493 // ------------------------------------------------------------------------
1494 //                  TYPE_ID,OP_ID, TYPE,    BITS, OP, LCK_ID, GOMP_FLAG
1495 ATOMIC_CMPXCHG_REV(fixed1, div, kmp_int8, 8, /, 1i,
1496                    KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_rev
1497 ATOMIC_CMPXCHG_REV(fixed1u, div, kmp_uint8, 8, /, 1i,
1498                    KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_rev
1499 ATOMIC_CMPXCHG_REV(fixed1, shl, kmp_int8, 8, <<, 1i,
1500                    KMP_ARCH_X86) // __kmpc_atomic_fixed1_shl_rev
1501 ATOMIC_CMPXCHG_REV(fixed1, shr, kmp_int8, 8, >>, 1i,
1502                    KMP_ARCH_X86) // __kmpc_atomic_fixed1_shr_rev
1503 ATOMIC_CMPXCHG_REV(fixed1u, shr, kmp_uint8, 8, >>, 1i,
1504                    KMP_ARCH_X86) // __kmpc_atomic_fixed1u_shr_rev
1505 ATOMIC_CMPXCHG_REV(fixed1, sub, kmp_int8, 8, -, 1i,
1506                    KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_rev
1507 
1508 ATOMIC_CMPXCHG_REV(fixed2, div, kmp_int16, 16, /, 2i,
1509                    KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_rev
1510 ATOMIC_CMPXCHG_REV(fixed2u, div, kmp_uint16, 16, /, 2i,
1511                    KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_rev
1512 ATOMIC_CMPXCHG_REV(fixed2, shl, kmp_int16, 16, <<, 2i,
1513                    KMP_ARCH_X86) // __kmpc_atomic_fixed2_shl_rev
1514 ATOMIC_CMPXCHG_REV(fixed2, shr, kmp_int16, 16, >>, 2i,
1515                    KMP_ARCH_X86) // __kmpc_atomic_fixed2_shr_rev
1516 ATOMIC_CMPXCHG_REV(fixed2u, shr, kmp_uint16, 16, >>, 2i,
1517                    KMP_ARCH_X86) // __kmpc_atomic_fixed2u_shr_rev
1518 ATOMIC_CMPXCHG_REV(fixed2, sub, kmp_int16, 16, -, 2i,
1519                    KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_rev
1520 
1521 ATOMIC_CMPXCHG_REV(fixed4, div, kmp_int32, 32, /, 4i,
1522                    KMP_ARCH_X86) // __kmpc_atomic_fixed4_div_rev
1523 ATOMIC_CMPXCHG_REV(fixed4u, div, kmp_uint32, 32, /, 4i,
1524                    KMP_ARCH_X86) // __kmpc_atomic_fixed4u_div_rev
1525 ATOMIC_CMPXCHG_REV(fixed4, shl, kmp_int32, 32, <<, 4i,
1526                    KMP_ARCH_X86) // __kmpc_atomic_fixed4_shl_rev
1527 ATOMIC_CMPXCHG_REV(fixed4, shr, kmp_int32, 32, >>, 4i,
1528                    KMP_ARCH_X86) // __kmpc_atomic_fixed4_shr_rev
1529 ATOMIC_CMPXCHG_REV(fixed4u, shr, kmp_uint32, 32, >>, 4i,
1530                    KMP_ARCH_X86) // __kmpc_atomic_fixed4u_shr_rev
1531 ATOMIC_CMPXCHG_REV(fixed4, sub, kmp_int32, 32, -, 4i,
1532                    KMP_ARCH_X86) // __kmpc_atomic_fixed4_sub_rev
1533 
1534 ATOMIC_CMPXCHG_REV(fixed8, div, kmp_int64, 64, /, 8i,
1535                    KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_rev
1536 ATOMIC_CMPXCHG_REV(fixed8u, div, kmp_uint64, 64, /, 8i,
1537                    KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_rev
1538 ATOMIC_CMPXCHG_REV(fixed8, shl, kmp_int64, 64, <<, 8i,
1539                    KMP_ARCH_X86) // __kmpc_atomic_fixed8_shl_rev
1540 ATOMIC_CMPXCHG_REV(fixed8, shr, kmp_int64, 64, >>, 8i,
1541                    KMP_ARCH_X86) // __kmpc_atomic_fixed8_shr_rev
1542 ATOMIC_CMPXCHG_REV(fixed8u, shr, kmp_uint64, 64, >>, 8i,
1543                    KMP_ARCH_X86) // __kmpc_atomic_fixed8u_shr_rev
1544 ATOMIC_CMPXCHG_REV(fixed8, sub, kmp_int64, 64, -, 8i,
1545                    KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_rev
1546 
1547 ATOMIC_CMPXCHG_REV(float4, div, kmp_real32, 32, /, 4r,
1548                    KMP_ARCH_X86) // __kmpc_atomic_float4_div_rev
1549 ATOMIC_CMPXCHG_REV(float4, sub, kmp_real32, 32, -, 4r,
1550                    KMP_ARCH_X86) // __kmpc_atomic_float4_sub_rev
1551 
1552 ATOMIC_CMPXCHG_REV(float8, div, kmp_real64, 64, /, 8r,
1553                    KMP_ARCH_X86) // __kmpc_atomic_float8_div_rev
1554 ATOMIC_CMPXCHG_REV(float8, sub, kmp_real64, 64, -, 8r,
1555                    KMP_ARCH_X86) // __kmpc_atomic_float8_sub_rev
1556 //                  TYPE_ID,OP_ID, TYPE,     BITS,OP,LCK_ID, GOMP_FLAG
1557 
1558 // ------------------------------------------------------------------------
1559 // Routines for Extended types: long double, _Quad, complex flavours (use
1560 // critical section)
1561 //     TYPE_ID, OP_ID, TYPE - detailed above
1562 //     OP      - operator
1563 //     LCK_ID  - lock identifier, used to possibly distinguish lock variable
1564 #define ATOMIC_CRITICAL_REV(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG)       \
1565   ATOMIC_BEGIN_REV(TYPE_ID, OP_ID, TYPE, void)                                 \
1566   OP_GOMP_CRITICAL_REV(TYPE, OP, GOMP_FLAG)                                    \
1567   OP_CRITICAL_REV(TYPE, OP, LCK_ID)                                            \
1568   }
1569 
1570 /* ------------------------------------------------------------------------- */
1571 // routines for long double type
1572 ATOMIC_CRITICAL_REV(float10, sub, long double, -, 10r,
1573                     1) // __kmpc_atomic_float10_sub_rev
1574 ATOMIC_CRITICAL_REV(float10, div, long double, /, 10r,
1575                     1) // __kmpc_atomic_float10_div_rev
1576 #if KMP_HAVE_QUAD
1577 // routines for _Quad type
1578 ATOMIC_CRITICAL_REV(float16, sub, QUAD_LEGACY, -, 16r,
1579                     1) // __kmpc_atomic_float16_sub_rev
1580 ATOMIC_CRITICAL_REV(float16, div, QUAD_LEGACY, /, 16r,
1581                     1) // __kmpc_atomic_float16_div_rev
1582 #if (KMP_ARCH_X86)
1583 ATOMIC_CRITICAL_REV(float16, sub_a16, Quad_a16_t, -, 16r,
1584                     1) // __kmpc_atomic_float16_sub_a16_rev
1585 ATOMIC_CRITICAL_REV(float16, div_a16, Quad_a16_t, /, 16r,
1586                     1) // __kmpc_atomic_float16_div_a16_rev
1587 #endif // KMP_ARCH_X86
1588 #endif // KMP_HAVE_QUAD
1589 
1590 // routines for complex types
1591 ATOMIC_CRITICAL_REV(cmplx4, sub, kmp_cmplx32, -, 8c,
1592                     1) // __kmpc_atomic_cmplx4_sub_rev
1593 ATOMIC_CRITICAL_REV(cmplx4, div, kmp_cmplx32, /, 8c,
1594                     1) // __kmpc_atomic_cmplx4_div_rev
1595 ATOMIC_CRITICAL_REV(cmplx8, sub, kmp_cmplx64, -, 16c,
1596                     1) // __kmpc_atomic_cmplx8_sub_rev
1597 ATOMIC_CRITICAL_REV(cmplx8, div, kmp_cmplx64, /, 16c,
1598                     1) // __kmpc_atomic_cmplx8_div_rev
1599 ATOMIC_CRITICAL_REV(cmplx10, sub, kmp_cmplx80, -, 20c,
1600                     1) // __kmpc_atomic_cmplx10_sub_rev
1601 ATOMIC_CRITICAL_REV(cmplx10, div, kmp_cmplx80, /, 20c,
1602                     1) // __kmpc_atomic_cmplx10_div_rev
1603 #if KMP_HAVE_QUAD
1604 ATOMIC_CRITICAL_REV(cmplx16, sub, CPLX128_LEG, -, 32c,
1605                     1) // __kmpc_atomic_cmplx16_sub_rev
1606 ATOMIC_CRITICAL_REV(cmplx16, div, CPLX128_LEG, /, 32c,
1607                     1) // __kmpc_atomic_cmplx16_div_rev
1608 #if (KMP_ARCH_X86)
1609 ATOMIC_CRITICAL_REV(cmplx16, sub_a16, kmp_cmplx128_a16_t, -, 32c,
1610                     1) // __kmpc_atomic_cmplx16_sub_a16_rev
1611 ATOMIC_CRITICAL_REV(cmplx16, div_a16, kmp_cmplx128_a16_t, /, 32c,
1612                     1) // __kmpc_atomic_cmplx16_div_a16_rev
1613 #endif // KMP_ARCH_X86
1614 #endif // KMP_HAVE_QUAD
1615 
1616 #endif // KMP_ARCH_X86 || KMP_ARCH_X86_64
1617 // End of OpenMP 4.0: x = expr binop x for non-commutative operations.
1618 
1619 /* ------------------------------------------------------------------------ */
1620 /* Routines for mixed types of LHS and RHS, when RHS is "larger"            */
1621 /* Note: in order to reduce the total number of types combinations          */
1622 /*       it is supposed that compiler converts RHS to longest floating type,*/
1623 /*       that is _Quad, before call to any of these routines                */
1624 /* Conversion to _Quad will be done by the compiler during calculation,     */
1625 /*    conversion back to TYPE - before the assignment, like:                */
1626 /*    *lhs = (TYPE)( (_Quad)(*lhs) OP rhs )                                 */
1627 /* Performance penalty expected because of SW emulation use                 */
1628 /* ------------------------------------------------------------------------ */
1629 
1630 #define ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE)                \
1631   void __kmpc_atomic_##TYPE_ID##_##OP_ID##_##RTYPE_ID(                         \
1632       ident_t *id_ref, int gtid, TYPE *lhs, RTYPE rhs) {                       \
1633     KMP_DEBUG_ASSERT(__kmp_init_serial);                                       \
1634     KA_TRACE(100,                                                              \
1635              ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID "_" #RTYPE_ID ": T#%d\n",   \
1636               gtid));
1637 
1638 // -------------------------------------------------------------------------
1639 #define ATOMIC_CRITICAL_FP(TYPE_ID, TYPE, OP_ID, OP, RTYPE_ID, RTYPE, LCK_ID,  \
1640                            GOMP_FLAG)                                          \
1641   ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE)                      \
1642   OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG) /* send assignment */           \
1643   OP_UPDATE_CRITICAL(TYPE, OP, LCK_ID) /* send assignment */                   \
1644   }
1645 
1646 // -------------------------------------------------------------------------
1647 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1648 // -------------------------------------------------------------------------
1649 // X86 or X86_64: no alignment problems ====================================
1650 #define ATOMIC_CMPXCHG_MIX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, RTYPE,    \
1651                            LCK_ID, MASK, GOMP_FLAG)                            \
1652   ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE)                      \
1653   OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG)                                 \
1654   OP_CMPXCHG(TYPE, BITS, OP)                                                   \
1655   }
1656 // -------------------------------------------------------------------------
1657 #else
1658 // ------------------------------------------------------------------------
1659 // Code for other architectures that don't handle unaligned accesses.
1660 #define ATOMIC_CMPXCHG_MIX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, RTYPE,    \
1661                            LCK_ID, MASK, GOMP_FLAG)                            \
1662   ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE)                      \
1663   OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG)                                 \
1664   if (!((kmp_uintptr_t)lhs & 0x##MASK)) {                                      \
1665     OP_CMPXCHG(TYPE, BITS, OP) /* aligned address */                           \
1666   } else {                                                                     \
1667     KMP_CHECK_GTID;                                                            \
1668     OP_UPDATE_CRITICAL(TYPE, OP,                                               \
1669                        LCK_ID) /* unaligned address - use critical */          \
1670   }                                                                            \
1671   }
1672 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
1673 
1674 // -------------------------------------------------------------------------
1675 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1676 // -------------------------------------------------------------------------
1677 #define ATOMIC_CMPXCHG_REV_MIX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID,       \
1678                                RTYPE, LCK_ID, MASK, GOMP_FLAG)                 \
1679   ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE)                      \
1680   OP_GOMP_CRITICAL_REV(TYPE, OP, GOMP_FLAG)                                    \
1681   OP_CMPXCHG_REV(TYPE, BITS, OP)                                               \
1682   }
1683 #define ATOMIC_CRITICAL_REV_FP(TYPE_ID, TYPE, OP_ID, OP, RTYPE_ID, RTYPE,      \
1684                                LCK_ID, GOMP_FLAG)                              \
1685   ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE)                      \
1686   OP_GOMP_CRITICAL_REV(TYPE, OP, GOMP_FLAG)                                    \
1687   OP_CRITICAL_REV(TYPE, OP, LCK_ID)                                            \
1688   }
1689 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
1690 
1691 // RHS=float8
1692 ATOMIC_CMPXCHG_MIX(fixed1, char, mul, 8, *, float8, kmp_real64, 1i, 0,
1693                    KMP_ARCH_X86) // __kmpc_atomic_fixed1_mul_float8
1694 ATOMIC_CMPXCHG_MIX(fixed1, char, div, 8, /, float8, kmp_real64, 1i, 0,
1695                    KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_float8
1696 ATOMIC_CMPXCHG_MIX(fixed2, short, mul, 16, *, float8, kmp_real64, 2i, 1,
1697                    KMP_ARCH_X86) // __kmpc_atomic_fixed2_mul_float8
1698 ATOMIC_CMPXCHG_MIX(fixed2, short, div, 16, /, float8, kmp_real64, 2i, 1,
1699                    KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_float8
1700 ATOMIC_CMPXCHG_MIX(fixed4, kmp_int32, mul, 32, *, float8, kmp_real64, 4i, 3,
1701                    0) // __kmpc_atomic_fixed4_mul_float8
1702 ATOMIC_CMPXCHG_MIX(fixed4, kmp_int32, div, 32, /, float8, kmp_real64, 4i, 3,
1703                    0) // __kmpc_atomic_fixed4_div_float8
1704 ATOMIC_CMPXCHG_MIX(fixed8, kmp_int64, mul, 64, *, float8, kmp_real64, 8i, 7,
1705                    KMP_ARCH_X86) // __kmpc_atomic_fixed8_mul_float8
1706 ATOMIC_CMPXCHG_MIX(fixed8, kmp_int64, div, 64, /, float8, kmp_real64, 8i, 7,
1707                    KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_float8
1708 ATOMIC_CMPXCHG_MIX(float4, kmp_real32, add, 32, +, float8, kmp_real64, 4r, 3,
1709                    KMP_ARCH_X86) // __kmpc_atomic_float4_add_float8
1710 ATOMIC_CMPXCHG_MIX(float4, kmp_real32, sub, 32, -, float8, kmp_real64, 4r, 3,
1711                    KMP_ARCH_X86) // __kmpc_atomic_float4_sub_float8
1712 ATOMIC_CMPXCHG_MIX(float4, kmp_real32, mul, 32, *, float8, kmp_real64, 4r, 3,
1713                    KMP_ARCH_X86) // __kmpc_atomic_float4_mul_float8
1714 ATOMIC_CMPXCHG_MIX(float4, kmp_real32, div, 32, /, float8, kmp_real64, 4r, 3,
1715                    KMP_ARCH_X86) // __kmpc_atomic_float4_div_float8
1716 
1717 // RHS=float16 (deprecated, to be removed when we are sure the compiler does not
1718 // use them)
1719 #if KMP_HAVE_QUAD
1720 ATOMIC_CMPXCHG_MIX(fixed1, char, add, 8, +, fp, _Quad, 1i, 0,
1721                    KMP_ARCH_X86) // __kmpc_atomic_fixed1_add_fp
1722 ATOMIC_CMPXCHG_MIX(fixed1u, uchar, add, 8, +, fp, _Quad, 1i, 0,
1723                    KMP_ARCH_X86) // __kmpc_atomic_fixed1u_add_fp
1724 ATOMIC_CMPXCHG_MIX(fixed1, char, sub, 8, -, fp, _Quad, 1i, 0,
1725                    KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_fp
1726 ATOMIC_CMPXCHG_MIX(fixed1u, uchar, sub, 8, -, fp, _Quad, 1i, 0,
1727                    KMP_ARCH_X86) // __kmpc_atomic_fixed1u_sub_fp
1728 ATOMIC_CMPXCHG_MIX(fixed1, char, mul, 8, *, fp, _Quad, 1i, 0,
1729                    KMP_ARCH_X86) // __kmpc_atomic_fixed1_mul_fp
1730 ATOMIC_CMPXCHG_MIX(fixed1u, uchar, mul, 8, *, fp, _Quad, 1i, 0,
1731                    KMP_ARCH_X86) // __kmpc_atomic_fixed1u_mul_fp
1732 ATOMIC_CMPXCHG_MIX(fixed1, char, div, 8, /, fp, _Quad, 1i, 0,
1733                    KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_fp
1734 ATOMIC_CMPXCHG_MIX(fixed1u, uchar, div, 8, /, fp, _Quad, 1i, 0,
1735                    KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_fp
1736 
1737 ATOMIC_CMPXCHG_MIX(fixed2, short, add, 16, +, fp, _Quad, 2i, 1,
1738                    KMP_ARCH_X86) // __kmpc_atomic_fixed2_add_fp
1739 ATOMIC_CMPXCHG_MIX(fixed2u, ushort, add, 16, +, fp, _Quad, 2i, 1,
1740                    KMP_ARCH_X86) // __kmpc_atomic_fixed2u_add_fp
1741 ATOMIC_CMPXCHG_MIX(fixed2, short, sub, 16, -, fp, _Quad, 2i, 1,
1742                    KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_fp
1743 ATOMIC_CMPXCHG_MIX(fixed2u, ushort, sub, 16, -, fp, _Quad, 2i, 1,
1744                    KMP_ARCH_X86) // __kmpc_atomic_fixed2u_sub_fp
1745 ATOMIC_CMPXCHG_MIX(fixed2, short, mul, 16, *, fp, _Quad, 2i, 1,
1746                    KMP_ARCH_X86) // __kmpc_atomic_fixed2_mul_fp
1747 ATOMIC_CMPXCHG_MIX(fixed2u, ushort, mul, 16, *, fp, _Quad, 2i, 1,
1748                    KMP_ARCH_X86) // __kmpc_atomic_fixed2u_mul_fp
1749 ATOMIC_CMPXCHG_MIX(fixed2, short, div, 16, /, fp, _Quad, 2i, 1,
1750                    KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_fp
1751 ATOMIC_CMPXCHG_MIX(fixed2u, ushort, div, 16, /, fp, _Quad, 2i, 1,
1752                    KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_fp
1753 
1754 ATOMIC_CMPXCHG_MIX(fixed4, kmp_int32, add, 32, +, fp, _Quad, 4i, 3,
1755                    0) // __kmpc_atomic_fixed4_add_fp
1756 ATOMIC_CMPXCHG_MIX(fixed4u, kmp_uint32, add, 32, +, fp, _Quad, 4i, 3,
1757                    0) // __kmpc_atomic_fixed4u_add_fp
1758 ATOMIC_CMPXCHG_MIX(fixed4, kmp_int32, sub, 32, -, fp, _Quad, 4i, 3,
1759                    0) // __kmpc_atomic_fixed4_sub_fp
1760 ATOMIC_CMPXCHG_MIX(fixed4u, kmp_uint32, sub, 32, -, fp, _Quad, 4i, 3,
1761                    0) // __kmpc_atomic_fixed4u_sub_fp
1762 ATOMIC_CMPXCHG_MIX(fixed4, kmp_int32, mul, 32, *, fp, _Quad, 4i, 3,
1763                    0) // __kmpc_atomic_fixed4_mul_fp
1764 ATOMIC_CMPXCHG_MIX(fixed4u, kmp_uint32, mul, 32, *, fp, _Quad, 4i, 3,
1765                    0) // __kmpc_atomic_fixed4u_mul_fp
1766 ATOMIC_CMPXCHG_MIX(fixed4, kmp_int32, div, 32, /, fp, _Quad, 4i, 3,
1767                    0) // __kmpc_atomic_fixed4_div_fp
1768 ATOMIC_CMPXCHG_MIX(fixed4u, kmp_uint32, div, 32, /, fp, _Quad, 4i, 3,
1769                    0) // __kmpc_atomic_fixed4u_div_fp
1770 
1771 ATOMIC_CMPXCHG_MIX(fixed8, kmp_int64, add, 64, +, fp, _Quad, 8i, 7,
1772                    KMP_ARCH_X86) // __kmpc_atomic_fixed8_add_fp
1773 ATOMIC_CMPXCHG_MIX(fixed8u, kmp_uint64, add, 64, +, fp, _Quad, 8i, 7,
1774                    KMP_ARCH_X86) // __kmpc_atomic_fixed8u_add_fp
1775 ATOMIC_CMPXCHG_MIX(fixed8, kmp_int64, sub, 64, -, fp, _Quad, 8i, 7,
1776                    KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_fp
1777 ATOMIC_CMPXCHG_MIX(fixed8u, kmp_uint64, sub, 64, -, fp, _Quad, 8i, 7,
1778                    KMP_ARCH_X86) // __kmpc_atomic_fixed8u_sub_fp
1779 ATOMIC_CMPXCHG_MIX(fixed8, kmp_int64, mul, 64, *, fp, _Quad, 8i, 7,
1780                    KMP_ARCH_X86) // __kmpc_atomic_fixed8_mul_fp
1781 ATOMIC_CMPXCHG_MIX(fixed8u, kmp_uint64, mul, 64, *, fp, _Quad, 8i, 7,
1782                    KMP_ARCH_X86) // __kmpc_atomic_fixed8u_mul_fp
1783 ATOMIC_CMPXCHG_MIX(fixed8, kmp_int64, div, 64, /, fp, _Quad, 8i, 7,
1784                    KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_fp
1785 ATOMIC_CMPXCHG_MIX(fixed8u, kmp_uint64, div, 64, /, fp, _Quad, 8i, 7,
1786                    KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_fp
1787 
1788 ATOMIC_CMPXCHG_MIX(float4, kmp_real32, add, 32, +, fp, _Quad, 4r, 3,
1789                    KMP_ARCH_X86) // __kmpc_atomic_float4_add_fp
1790 ATOMIC_CMPXCHG_MIX(float4, kmp_real32, sub, 32, -, fp, _Quad, 4r, 3,
1791                    KMP_ARCH_X86) // __kmpc_atomic_float4_sub_fp
1792 ATOMIC_CMPXCHG_MIX(float4, kmp_real32, mul, 32, *, fp, _Quad, 4r, 3,
1793                    KMP_ARCH_X86) // __kmpc_atomic_float4_mul_fp
1794 ATOMIC_CMPXCHG_MIX(float4, kmp_real32, div, 32, /, fp, _Quad, 4r, 3,
1795                    KMP_ARCH_X86) // __kmpc_atomic_float4_div_fp
1796 
1797 ATOMIC_CMPXCHG_MIX(float8, kmp_real64, add, 64, +, fp, _Quad, 8r, 7,
1798                    KMP_ARCH_X86) // __kmpc_atomic_float8_add_fp
1799 ATOMIC_CMPXCHG_MIX(float8, kmp_real64, sub, 64, -, fp, _Quad, 8r, 7,
1800                    KMP_ARCH_X86) // __kmpc_atomic_float8_sub_fp
1801 ATOMIC_CMPXCHG_MIX(float8, kmp_real64, mul, 64, *, fp, _Quad, 8r, 7,
1802                    KMP_ARCH_X86) // __kmpc_atomic_float8_mul_fp
1803 ATOMIC_CMPXCHG_MIX(float8, kmp_real64, div, 64, /, fp, _Quad, 8r, 7,
1804                    KMP_ARCH_X86) // __kmpc_atomic_float8_div_fp
1805 
1806 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1807 ATOMIC_CRITICAL_FP(float10, long double, add, +, fp, _Quad, 10r,
1808                    1) // __kmpc_atomic_float10_add_fp
1809 ATOMIC_CRITICAL_FP(float10, long double, sub, -, fp, _Quad, 10r,
1810                    1) // __kmpc_atomic_float10_sub_fp
1811 ATOMIC_CRITICAL_FP(float10, long double, mul, *, fp, _Quad, 10r,
1812                    1) // __kmpc_atomic_float10_mul_fp
1813 ATOMIC_CRITICAL_FP(float10, long double, div, /, fp, _Quad, 10r,
1814                    1) // __kmpc_atomic_float10_div_fp
1815 
1816 // Reverse operations
1817 ATOMIC_CMPXCHG_REV_MIX(fixed1, char, sub_rev, 8, -, fp, _Quad, 1i, 0,
1818                        KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_rev_fp
1819 ATOMIC_CMPXCHG_REV_MIX(fixed1u, uchar, sub_rev, 8, -, fp, _Quad, 1i, 0,
1820                        KMP_ARCH_X86) // __kmpc_atomic_fixed1u_sub_rev_fp
1821 ATOMIC_CMPXCHG_REV_MIX(fixed1, char, div_rev, 8, /, fp, _Quad, 1i, 0,
1822                        KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_rev_fp
1823 ATOMIC_CMPXCHG_REV_MIX(fixed1u, uchar, div_rev, 8, /, fp, _Quad, 1i, 0,
1824                        KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_rev_fp
1825 
1826 ATOMIC_CMPXCHG_REV_MIX(fixed2, short, sub_rev, 16, -, fp, _Quad, 2i, 1,
1827                        KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_rev_fp
1828 ATOMIC_CMPXCHG_REV_MIX(fixed2u, ushort, sub_rev, 16, -, fp, _Quad, 2i, 1,
1829                        KMP_ARCH_X86) // __kmpc_atomic_fixed2u_sub_rev_fp
1830 ATOMIC_CMPXCHG_REV_MIX(fixed2, short, div_rev, 16, /, fp, _Quad, 2i, 1,
1831                        KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_rev_fp
1832 ATOMIC_CMPXCHG_REV_MIX(fixed2u, ushort, div_rev, 16, /, fp, _Quad, 2i, 1,
1833                        KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_rev_fp
1834 
1835 ATOMIC_CMPXCHG_REV_MIX(fixed4, kmp_int32, sub_rev, 32, -, fp, _Quad, 4i, 3,
1836                        0) // __kmpc_atomic_fixed4_sub_rev_fp
1837 ATOMIC_CMPXCHG_REV_MIX(fixed4u, kmp_uint32, sub_rev, 32, -, fp, _Quad, 4i, 3,
1838                        0) // __kmpc_atomic_fixed4u_sub_rev_fp
1839 ATOMIC_CMPXCHG_REV_MIX(fixed4, kmp_int32, div_rev, 32, /, fp, _Quad, 4i, 3,
1840                        0) // __kmpc_atomic_fixed4_div_rev_fp
1841 ATOMIC_CMPXCHG_REV_MIX(fixed4u, kmp_uint32, div_rev, 32, /, fp, _Quad, 4i, 3,
1842                        0) // __kmpc_atomic_fixed4u_div_rev_fp
1843 
1844 ATOMIC_CMPXCHG_REV_MIX(fixed8, kmp_int64, sub_rev, 64, -, fp, _Quad, 8i, 7,
1845                        KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_rev_fp
1846 ATOMIC_CMPXCHG_REV_MIX(fixed8u, kmp_uint64, sub_rev, 64, -, fp, _Quad, 8i, 7,
1847                        KMP_ARCH_X86) // __kmpc_atomic_fixed8u_sub_rev_fp
1848 ATOMIC_CMPXCHG_REV_MIX(fixed8, kmp_int64, div_rev, 64, /, fp, _Quad, 8i, 7,
1849                        KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_rev_fp
1850 ATOMIC_CMPXCHG_REV_MIX(fixed8u, kmp_uint64, div_rev, 64, /, fp, _Quad, 8i, 7,
1851                        KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_rev_fp
1852 
1853 ATOMIC_CMPXCHG_REV_MIX(float4, kmp_real32, sub_rev, 32, -, fp, _Quad, 4r, 3,
1854                        KMP_ARCH_X86) // __kmpc_atomic_float4_sub_rev_fp
1855 ATOMIC_CMPXCHG_REV_MIX(float4, kmp_real32, div_rev, 32, /, fp, _Quad, 4r, 3,
1856                        KMP_ARCH_X86) // __kmpc_atomic_float4_div_rev_fp
1857 
1858 ATOMIC_CMPXCHG_REV_MIX(float8, kmp_real64, sub_rev, 64, -, fp, _Quad, 8r, 7,
1859                        KMP_ARCH_X86) // __kmpc_atomic_float8_sub_rev_fp
1860 ATOMIC_CMPXCHG_REV_MIX(float8, kmp_real64, div_rev, 64, /, fp, _Quad, 8r, 7,
1861                        KMP_ARCH_X86) // __kmpc_atomic_float8_div_rev_fp
1862 
1863 ATOMIC_CRITICAL_REV_FP(float10, long double, sub_rev, -, fp, _Quad, 10r,
1864                        1) // __kmpc_atomic_float10_sub_rev_fp
1865 ATOMIC_CRITICAL_REV_FP(float10, long double, div_rev, /, fp, _Quad, 10r,
1866                        1) // __kmpc_atomic_float10_div_rev_fp
1867 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
1868 
1869 #endif // KMP_HAVE_QUAD
1870 
1871 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1872 // ------------------------------------------------------------------------
1873 // X86 or X86_64: no alignment problems ====================================
1874 #if USE_CMPXCHG_FIX
1875 // workaround for C78287 (complex(kind=4) data type)
1876 #define ATOMIC_CMPXCHG_CMPLX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, RTYPE,  \
1877                              LCK_ID, MASK, GOMP_FLAG)                          \
1878   ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE)                      \
1879   OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG)                                 \
1880   OP_CMPXCHG_WORKAROUND(TYPE, BITS, OP)                                        \
1881   }
1882 // end of the second part of the workaround for C78287
1883 #else
1884 #define ATOMIC_CMPXCHG_CMPLX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, RTYPE,  \
1885                              LCK_ID, MASK, GOMP_FLAG)                          \
1886   ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE)                      \
1887   OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG)                                 \
1888   OP_CMPXCHG(TYPE, BITS, OP)                                                   \
1889   }
1890 #endif // USE_CMPXCHG_FIX
1891 #else
1892 // ------------------------------------------------------------------------
1893 // Code for other architectures that don't handle unaligned accesses.
1894 #define ATOMIC_CMPXCHG_CMPLX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, RTYPE,  \
1895                              LCK_ID, MASK, GOMP_FLAG)                          \
1896   ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE)                      \
1897   OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG)                                 \
1898   if (!((kmp_uintptr_t)lhs & 0x##MASK)) {                                      \
1899     OP_CMPXCHG(TYPE, BITS, OP) /* aligned address */                           \
1900   } else {                                                                     \
1901     KMP_CHECK_GTID;                                                            \
1902     OP_UPDATE_CRITICAL(TYPE, OP,                                               \
1903                        LCK_ID) /* unaligned address - use critical */          \
1904   }                                                                            \
1905   }
1906 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
1907 
1908 ATOMIC_CMPXCHG_CMPLX(cmplx4, kmp_cmplx32, add, 64, +, cmplx8, kmp_cmplx64, 8c,
1909                      7, KMP_ARCH_X86) // __kmpc_atomic_cmplx4_add_cmplx8
1910 ATOMIC_CMPXCHG_CMPLX(cmplx4, kmp_cmplx32, sub, 64, -, cmplx8, kmp_cmplx64, 8c,
1911                      7, KMP_ARCH_X86) // __kmpc_atomic_cmplx4_sub_cmplx8
1912 ATOMIC_CMPXCHG_CMPLX(cmplx4, kmp_cmplx32, mul, 64, *, cmplx8, kmp_cmplx64, 8c,
1913                      7, KMP_ARCH_X86) // __kmpc_atomic_cmplx4_mul_cmplx8
1914 ATOMIC_CMPXCHG_CMPLX(cmplx4, kmp_cmplx32, div, 64, /, cmplx8, kmp_cmplx64, 8c,
1915                      7, KMP_ARCH_X86) // __kmpc_atomic_cmplx4_div_cmplx8
1916 
1917 // READ, WRITE, CAPTURE
1918 
1919 // ------------------------------------------------------------------------
1920 // Atomic READ routines
1921 
1922 // ------------------------------------------------------------------------
1923 // Beginning of a definition (provides name, parameters, gebug trace)
1924 //     TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned
1925 //     fixed)
1926 //     OP_ID   - operation identifier (add, sub, mul, ...)
1927 //     TYPE    - operands' type
1928 #define ATOMIC_BEGIN_READ(TYPE_ID, OP_ID, TYPE, RET_TYPE)                      \
1929   RET_TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID(ident_t *id_ref, int gtid,        \
1930                                              TYPE *loc) {                      \
1931     KMP_DEBUG_ASSERT(__kmp_init_serial);                                       \
1932     KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid));
1933 
1934 // ------------------------------------------------------------------------
1935 // Operation on *lhs, rhs using "compare_and_store_ret" routine
1936 //     TYPE    - operands' type
1937 //     BITS    - size in bits, used to distinguish low level calls
1938 //     OP      - operator
1939 // Note: temp_val introduced in order to force the compiler to read
1940 //       *lhs only once (w/o it the compiler reads *lhs twice)
1941 // TODO: check if it is still necessary
1942 // Return old value regardless of the result of "compare & swap# operation
1943 #define OP_CMPXCHG_READ(TYPE, BITS, OP)                                        \
1944   {                                                                            \
1945     TYPE KMP_ATOMIC_VOLATILE temp_val;                                         \
1946     union f_i_union {                                                          \
1947       TYPE f_val;                                                              \
1948       kmp_int##BITS i_val;                                                     \
1949     };                                                                         \
1950     union f_i_union old_value;                                                 \
1951     temp_val = *loc;                                                           \
1952     old_value.f_val = temp_val;                                                \
1953     old_value.i_val = KMP_COMPARE_AND_STORE_RET##BITS(                         \
1954         (kmp_int##BITS *)loc,                                                  \
1955         *VOLATILE_CAST(kmp_int##BITS *) & old_value.i_val,                     \
1956         *VOLATILE_CAST(kmp_int##BITS *) & old_value.i_val);                    \
1957     new_value = old_value.f_val;                                               \
1958     return new_value;                                                          \
1959   }
1960 
1961 // -------------------------------------------------------------------------
1962 // Operation on *lhs, rhs bound by critical section
1963 //     OP     - operator (it's supposed to contain an assignment)
1964 //     LCK_ID - lock identifier
1965 // Note: don't check gtid as it should always be valid
1966 // 1, 2-byte - expect valid parameter, other - check before this macro
1967 #define OP_CRITICAL_READ(OP, LCK_ID)                                           \
1968   __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);                       \
1969                                                                                \
1970   new_value = (*loc);                                                          \
1971                                                                                \
1972   __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);
1973 
1974 // -------------------------------------------------------------------------
1975 #ifdef KMP_GOMP_COMPAT
1976 #define OP_GOMP_CRITICAL_READ(OP, FLAG)                                        \
1977   if ((FLAG) && (__kmp_atomic_mode == 2)) {                                    \
1978     KMP_CHECK_GTID;                                                            \
1979     OP_CRITICAL_READ(OP, 0);                                                   \
1980     return new_value;                                                          \
1981   }
1982 #else
1983 #define OP_GOMP_CRITICAL_READ(OP, FLAG)
1984 #endif /* KMP_GOMP_COMPAT */
1985 
1986 // -------------------------------------------------------------------------
1987 #define ATOMIC_FIXED_READ(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG)           \
1988   ATOMIC_BEGIN_READ(TYPE_ID, OP_ID, TYPE, TYPE)                                \
1989   TYPE new_value;                                                              \
1990   OP_GOMP_CRITICAL_READ(OP## =, GOMP_FLAG)                                     \
1991   new_value = KMP_TEST_THEN_ADD##BITS(loc, OP 0);                              \
1992   return new_value;                                                            \
1993   }
1994 // -------------------------------------------------------------------------
1995 #define ATOMIC_CMPXCHG_READ(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG)         \
1996   ATOMIC_BEGIN_READ(TYPE_ID, OP_ID, TYPE, TYPE)                                \
1997   TYPE new_value;                                                              \
1998   OP_GOMP_CRITICAL_READ(OP## =, GOMP_FLAG)                                     \
1999   OP_CMPXCHG_READ(TYPE, BITS, OP)                                              \
2000   }
2001 // ------------------------------------------------------------------------
2002 // Routines for Extended types: long double, _Quad, complex flavours (use
2003 // critical section)
2004 //     TYPE_ID, OP_ID, TYPE - detailed above
2005 //     OP      - operator
2006 //     LCK_ID  - lock identifier, used to possibly distinguish lock variable
2007 #define ATOMIC_CRITICAL_READ(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG)      \
2008   ATOMIC_BEGIN_READ(TYPE_ID, OP_ID, TYPE, TYPE)                                \
2009   TYPE new_value;                                                              \
2010   OP_GOMP_CRITICAL_READ(OP## =, GOMP_FLAG) /* send assignment */               \
2011   OP_CRITICAL_READ(OP, LCK_ID) /* send assignment */                           \
2012   return new_value;                                                            \
2013   }
2014 
2015 // ------------------------------------------------------------------------
2016 // Fix for cmplx4 read (CQ220361) on Windows* OS. Regular routine with return
2017 // value doesn't work.
2018 // Let's return the read value through the additional parameter.
2019 #if (KMP_OS_WINDOWS)
2020 
2021 #define OP_CRITICAL_READ_WRK(OP, LCK_ID)                                       \
2022   __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);                       \
2023                                                                                \
2024   (*out) = (*loc);                                                             \
2025                                                                                \
2026   __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);
2027 // ------------------------------------------------------------------------
2028 #ifdef KMP_GOMP_COMPAT
2029 #define OP_GOMP_CRITICAL_READ_WRK(OP, FLAG)                                    \
2030   if ((FLAG) && (__kmp_atomic_mode == 2)) {                                    \
2031     KMP_CHECK_GTID;                                                            \
2032     OP_CRITICAL_READ_WRK(OP, 0);                                               \
2033   }
2034 #else
2035 #define OP_GOMP_CRITICAL_READ_WRK(OP, FLAG)
2036 #endif /* KMP_GOMP_COMPAT */
2037 // ------------------------------------------------------------------------
2038 #define ATOMIC_BEGIN_READ_WRK(TYPE_ID, OP_ID, TYPE)                            \
2039   void __kmpc_atomic_##TYPE_ID##_##OP_ID(TYPE *out, ident_t *id_ref, int gtid, \
2040                                          TYPE *loc) {                          \
2041     KMP_DEBUG_ASSERT(__kmp_init_serial);                                       \
2042     KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid));
2043 
2044 // ------------------------------------------------------------------------
2045 #define ATOMIC_CRITICAL_READ_WRK(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG)  \
2046   ATOMIC_BEGIN_READ_WRK(TYPE_ID, OP_ID, TYPE)                                  \
2047   OP_GOMP_CRITICAL_READ_WRK(OP## =, GOMP_FLAG) /* send assignment */           \
2048   OP_CRITICAL_READ_WRK(OP, LCK_ID) /* send assignment */                       \
2049   }
2050 
2051 #endif // KMP_OS_WINDOWS
2052 
2053 // ------------------------------------------------------------------------
2054 //                  TYPE_ID,OP_ID, TYPE,      OP, GOMP_FLAG
2055 ATOMIC_FIXED_READ(fixed4, rd, kmp_int32, 32, +, 0) // __kmpc_atomic_fixed4_rd
2056 ATOMIC_FIXED_READ(fixed8, rd, kmp_int64, 64, +,
2057                   KMP_ARCH_X86) // __kmpc_atomic_fixed8_rd
2058 ATOMIC_CMPXCHG_READ(float4, rd, kmp_real32, 32, +,
2059                     KMP_ARCH_X86) // __kmpc_atomic_float4_rd
2060 ATOMIC_CMPXCHG_READ(float8, rd, kmp_real64, 64, +,
2061                     KMP_ARCH_X86) // __kmpc_atomic_float8_rd
2062 
2063 // !!! TODO: Remove lock operations for "char" since it can't be non-atomic
2064 ATOMIC_CMPXCHG_READ(fixed1, rd, kmp_int8, 8, +,
2065                     KMP_ARCH_X86) // __kmpc_atomic_fixed1_rd
2066 ATOMIC_CMPXCHG_READ(fixed2, rd, kmp_int16, 16, +,
2067                     KMP_ARCH_X86) // __kmpc_atomic_fixed2_rd
2068 
2069 ATOMIC_CRITICAL_READ(float10, rd, long double, +, 10r,
2070                      1) // __kmpc_atomic_float10_rd
2071 #if KMP_HAVE_QUAD
2072 ATOMIC_CRITICAL_READ(float16, rd, QUAD_LEGACY, +, 16r,
2073                      1) // __kmpc_atomic_float16_rd
2074 #endif // KMP_HAVE_QUAD
2075 
2076 // Fix for CQ220361 on Windows* OS
2077 #if (KMP_OS_WINDOWS)
2078 ATOMIC_CRITICAL_READ_WRK(cmplx4, rd, kmp_cmplx32, +, 8c,
2079                          1) // __kmpc_atomic_cmplx4_rd
2080 #else
2081 ATOMIC_CRITICAL_READ(cmplx4, rd, kmp_cmplx32, +, 8c,
2082                      1) // __kmpc_atomic_cmplx4_rd
2083 #endif // (KMP_OS_WINDOWS)
2084 ATOMIC_CRITICAL_READ(cmplx8, rd, kmp_cmplx64, +, 16c,
2085                      1) // __kmpc_atomic_cmplx8_rd
2086 ATOMIC_CRITICAL_READ(cmplx10, rd, kmp_cmplx80, +, 20c,
2087                      1) // __kmpc_atomic_cmplx10_rd
2088 #if KMP_HAVE_QUAD
2089 ATOMIC_CRITICAL_READ(cmplx16, rd, CPLX128_LEG, +, 32c,
2090                      1) // __kmpc_atomic_cmplx16_rd
2091 #if (KMP_ARCH_X86)
2092 ATOMIC_CRITICAL_READ(float16, a16_rd, Quad_a16_t, +, 16r,
2093                      1) // __kmpc_atomic_float16_a16_rd
2094 ATOMIC_CRITICAL_READ(cmplx16, a16_rd, kmp_cmplx128_a16_t, +, 32c,
2095                      1) // __kmpc_atomic_cmplx16_a16_rd
2096 #endif // (KMP_ARCH_X86)
2097 #endif // KMP_HAVE_QUAD
2098 
2099 // ------------------------------------------------------------------------
2100 // Atomic WRITE routines
2101 
2102 #define ATOMIC_XCHG_WR(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG)              \
2103   ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void)                                     \
2104   OP_GOMP_CRITICAL(OP, GOMP_FLAG)                                              \
2105   KMP_XCHG_FIXED##BITS(lhs, rhs);                                              \
2106   }
2107 // ------------------------------------------------------------------------
2108 #define ATOMIC_XCHG_FLOAT_WR(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG)        \
2109   ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void)                                     \
2110   OP_GOMP_CRITICAL(OP, GOMP_FLAG)                                              \
2111   KMP_XCHG_REAL##BITS(lhs, rhs);                                               \
2112   }
2113 
2114 // ------------------------------------------------------------------------
2115 // Operation on *lhs, rhs using "compare_and_store" routine
2116 //     TYPE    - operands' type
2117 //     BITS    - size in bits, used to distinguish low level calls
2118 //     OP      - operator
2119 // Note: temp_val introduced in order to force the compiler to read
2120 //       *lhs only once (w/o it the compiler reads *lhs twice)
2121 #define OP_CMPXCHG_WR(TYPE, BITS, OP)                                          \
2122   {                                                                            \
2123     TYPE KMP_ATOMIC_VOLATILE temp_val;                                         \
2124     TYPE old_value, new_value;                                                 \
2125     temp_val = *lhs;                                                           \
2126     old_value = temp_val;                                                      \
2127     new_value = rhs;                                                           \
2128     while (!KMP_COMPARE_AND_STORE_ACQ##BITS(                                   \
2129         (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) & old_value,     \
2130         *VOLATILE_CAST(kmp_int##BITS *) & new_value)) {                        \
2131       temp_val = *lhs;                                                         \
2132       old_value = temp_val;                                                    \
2133       new_value = rhs;                                                         \
2134     }                                                                          \
2135   }
2136 
2137 // -------------------------------------------------------------------------
2138 #define ATOMIC_CMPXCHG_WR(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG)           \
2139   ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void)                                     \
2140   OP_GOMP_CRITICAL(OP, GOMP_FLAG)                                              \
2141   OP_CMPXCHG_WR(TYPE, BITS, OP)                                                \
2142   }
2143 
2144 // ------------------------------------------------------------------------
2145 // Routines for Extended types: long double, _Quad, complex flavours (use
2146 // critical section)
2147 //     TYPE_ID, OP_ID, TYPE - detailed above
2148 //     OP      - operator
2149 //     LCK_ID  - lock identifier, used to possibly distinguish lock variable
2150 #define ATOMIC_CRITICAL_WR(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG)        \
2151   ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void)                                     \
2152   OP_GOMP_CRITICAL(OP, GOMP_FLAG) /* send assignment */                        \
2153   OP_CRITICAL(OP, LCK_ID) /* send assignment */                                \
2154   }
2155 // -------------------------------------------------------------------------
2156 
2157 ATOMIC_XCHG_WR(fixed1, wr, kmp_int8, 8, =,
2158                KMP_ARCH_X86) // __kmpc_atomic_fixed1_wr
2159 ATOMIC_XCHG_WR(fixed2, wr, kmp_int16, 16, =,
2160                KMP_ARCH_X86) // __kmpc_atomic_fixed2_wr
2161 ATOMIC_XCHG_WR(fixed4, wr, kmp_int32, 32, =,
2162                KMP_ARCH_X86) // __kmpc_atomic_fixed4_wr
2163 #if (KMP_ARCH_X86)
2164 ATOMIC_CMPXCHG_WR(fixed8, wr, kmp_int64, 64, =,
2165                   KMP_ARCH_X86) // __kmpc_atomic_fixed8_wr
2166 #else
2167 ATOMIC_XCHG_WR(fixed8, wr, kmp_int64, 64, =,
2168                KMP_ARCH_X86) // __kmpc_atomic_fixed8_wr
2169 #endif // (KMP_ARCH_X86)
2170 
2171 ATOMIC_XCHG_FLOAT_WR(float4, wr, kmp_real32, 32, =,
2172                      KMP_ARCH_X86) // __kmpc_atomic_float4_wr
2173 #if (KMP_ARCH_X86)
2174 ATOMIC_CMPXCHG_WR(float8, wr, kmp_real64, 64, =,
2175                   KMP_ARCH_X86) // __kmpc_atomic_float8_wr
2176 #else
2177 ATOMIC_XCHG_FLOAT_WR(float8, wr, kmp_real64, 64, =,
2178                      KMP_ARCH_X86) // __kmpc_atomic_float8_wr
2179 #endif // (KMP_ARCH_X86)
2180 
2181 ATOMIC_CRITICAL_WR(float10, wr, long double, =, 10r,
2182                    1) // __kmpc_atomic_float10_wr
2183 #if KMP_HAVE_QUAD
2184 ATOMIC_CRITICAL_WR(float16, wr, QUAD_LEGACY, =, 16r,
2185                    1) // __kmpc_atomic_float16_wr
2186 #endif // KMP_HAVE_QUAD
2187 ATOMIC_CRITICAL_WR(cmplx4, wr, kmp_cmplx32, =, 8c, 1) // __kmpc_atomic_cmplx4_wr
2188 ATOMIC_CRITICAL_WR(cmplx8, wr, kmp_cmplx64, =, 16c,
2189                    1) // __kmpc_atomic_cmplx8_wr
2190 ATOMIC_CRITICAL_WR(cmplx10, wr, kmp_cmplx80, =, 20c,
2191                    1) // __kmpc_atomic_cmplx10_wr
2192 #if KMP_HAVE_QUAD
2193 ATOMIC_CRITICAL_WR(cmplx16, wr, CPLX128_LEG, =, 32c,
2194                    1) // __kmpc_atomic_cmplx16_wr
2195 #if (KMP_ARCH_X86)
2196 ATOMIC_CRITICAL_WR(float16, a16_wr, Quad_a16_t, =, 16r,
2197                    1) // __kmpc_atomic_float16_a16_wr
2198 ATOMIC_CRITICAL_WR(cmplx16, a16_wr, kmp_cmplx128_a16_t, =, 32c,
2199                    1) // __kmpc_atomic_cmplx16_a16_wr
2200 #endif // (KMP_ARCH_X86)
2201 #endif // KMP_HAVE_QUAD
2202 
2203 // ------------------------------------------------------------------------
2204 // Atomic CAPTURE routines
2205 
2206 // Beginning of a definition (provides name, parameters, gebug trace)
2207 //     TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned
2208 //     fixed)
2209 //     OP_ID   - operation identifier (add, sub, mul, ...)
2210 //     TYPE    - operands' type
2211 #define ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, RET_TYPE)                       \
2212   RET_TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID(ident_t *id_ref, int gtid,        \
2213                                              TYPE *lhs, TYPE rhs, int flag) {  \
2214     KMP_DEBUG_ASSERT(__kmp_init_serial);                                       \
2215     KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid));
2216 
2217 // -------------------------------------------------------------------------
2218 // Operation on *lhs, rhs bound by critical section
2219 //     OP     - operator (it's supposed to contain an assignment)
2220 //     LCK_ID - lock identifier
2221 // Note: don't check gtid as it should always be valid
2222 // 1, 2-byte - expect valid parameter, other - check before this macro
2223 #define OP_CRITICAL_CPT(OP, LCK_ID)                                            \
2224   __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);                       \
2225                                                                                \
2226   if (flag) {                                                                  \
2227     (*lhs) OP rhs;                                                             \
2228     new_value = (*lhs);                                                        \
2229   } else {                                                                     \
2230     new_value = (*lhs);                                                        \
2231     (*lhs) OP rhs;                                                             \
2232   }                                                                            \
2233                                                                                \
2234   __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);                       \
2235   return new_value;
2236 
2237 #define OP_UPDATE_CRITICAL_CPT(TYPE, OP, LCK_ID)                               \
2238   __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);                       \
2239                                                                                \
2240   if (flag) {                                                                  \
2241     (*lhs) = (TYPE)((*lhs)OP rhs);                                             \
2242     new_value = (*lhs);                                                        \
2243   } else {                                                                     \
2244     new_value = (*lhs);                                                        \
2245     (*lhs) = (TYPE)((*lhs)OP rhs);                                             \
2246   }                                                                            \
2247                                                                                \
2248   __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);                       \
2249   return new_value;
2250 
2251 // ------------------------------------------------------------------------
2252 #ifdef KMP_GOMP_COMPAT
2253 #define OP_GOMP_CRITICAL_CPT(TYPE, OP, FLAG)                                   \
2254   if ((FLAG) && (__kmp_atomic_mode == 2)) {                                    \
2255     KMP_CHECK_GTID;                                                            \
2256     OP_UPDATE_CRITICAL_CPT(TYPE, OP, 0);                                       \
2257   }
2258 #else
2259 #define OP_GOMP_CRITICAL_CPT(TYPE, OP, FLAG)
2260 #endif /* KMP_GOMP_COMPAT */
2261 
2262 // ------------------------------------------------------------------------
2263 // Operation on *lhs, rhs using "compare_and_store" routine
2264 //     TYPE    - operands' type
2265 //     BITS    - size in bits, used to distinguish low level calls
2266 //     OP      - operator
2267 // Note: temp_val introduced in order to force the compiler to read
2268 //       *lhs only once (w/o it the compiler reads *lhs twice)
2269 #define OP_CMPXCHG_CPT(TYPE, BITS, OP)                                         \
2270   {                                                                            \
2271     TYPE KMP_ATOMIC_VOLATILE temp_val;                                         \
2272     TYPE old_value, new_value;                                                 \
2273     temp_val = *lhs;                                                           \
2274     old_value = temp_val;                                                      \
2275     new_value = (TYPE)(old_value OP rhs);                                      \
2276     while (!KMP_COMPARE_AND_STORE_ACQ##BITS(                                   \
2277         (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) & old_value,     \
2278         *VOLATILE_CAST(kmp_int##BITS *) & new_value)) {                        \
2279       temp_val = *lhs;                                                         \
2280       old_value = temp_val;                                                    \
2281       new_value = (TYPE)(old_value OP rhs);                                    \
2282     }                                                                          \
2283     if (flag) {                                                                \
2284       return new_value;                                                        \
2285     } else                                                                     \
2286       return old_value;                                                        \
2287   }
2288 
2289 // -------------------------------------------------------------------------
2290 #define ATOMIC_CMPXCHG_CPT(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG)          \
2291   ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE)                                 \
2292   TYPE new_value;                                                              \
2293   (void)new_value;                                                             \
2294   OP_GOMP_CRITICAL_CPT(TYPE, OP, GOMP_FLAG)                                    \
2295   OP_CMPXCHG_CPT(TYPE, BITS, OP)                                               \
2296   }
2297 
2298 // -------------------------------------------------------------------------
2299 #define ATOMIC_FIXED_ADD_CPT(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG)        \
2300   ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE)                                 \
2301   TYPE old_value, new_value;                                                   \
2302   (void)new_value;                                                             \
2303   OP_GOMP_CRITICAL_CPT(TYPE, OP, GOMP_FLAG)                                    \
2304   /* OP used as a sign for subtraction: (lhs-rhs) --> (lhs+-rhs) */            \
2305   old_value = KMP_TEST_THEN_ADD##BITS(lhs, OP rhs);                            \
2306   if (flag) {                                                                  \
2307     return old_value OP rhs;                                                   \
2308   } else                                                                       \
2309     return old_value;                                                          \
2310   }
2311 // -------------------------------------------------------------------------
2312 
2313 ATOMIC_FIXED_ADD_CPT(fixed4, add_cpt, kmp_int32, 32, +,
2314                      0) // __kmpc_atomic_fixed4_add_cpt
2315 ATOMIC_FIXED_ADD_CPT(fixed4, sub_cpt, kmp_int32, 32, -,
2316                      0) // __kmpc_atomic_fixed4_sub_cpt
2317 ATOMIC_FIXED_ADD_CPT(fixed8, add_cpt, kmp_int64, 64, +,
2318                      KMP_ARCH_X86) // __kmpc_atomic_fixed8_add_cpt
2319 ATOMIC_FIXED_ADD_CPT(fixed8, sub_cpt, kmp_int64, 64, -,
2320                      KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_cpt
2321 
2322 ATOMIC_CMPXCHG_CPT(float4, add_cpt, kmp_real32, 32, +,
2323                    KMP_ARCH_X86) // __kmpc_atomic_float4_add_cpt
2324 ATOMIC_CMPXCHG_CPT(float4, sub_cpt, kmp_real32, 32, -,
2325                    KMP_ARCH_X86) // __kmpc_atomic_float4_sub_cpt
2326 ATOMIC_CMPXCHG_CPT(float8, add_cpt, kmp_real64, 64, +,
2327                    KMP_ARCH_X86) // __kmpc_atomic_float8_add_cpt
2328 ATOMIC_CMPXCHG_CPT(float8, sub_cpt, kmp_real64, 64, -,
2329                    KMP_ARCH_X86) // __kmpc_atomic_float8_sub_cpt
2330 
2331 // ------------------------------------------------------------------------
2332 // Entries definition for integer operands
2333 //     TYPE_ID - operands type and size (fixed4, float4)
2334 //     OP_ID   - operation identifier (add, sub, mul, ...)
2335 //     TYPE    - operand type
2336 //     BITS    - size in bits, used to distinguish low level calls
2337 //     OP      - operator (used in critical section)
2338 //               TYPE_ID,OP_ID,  TYPE,   BITS,OP,GOMP_FLAG
2339 // ------------------------------------------------------------------------
2340 // Routines for ATOMIC integer operands, other operators
2341 // ------------------------------------------------------------------------
2342 //              TYPE_ID,OP_ID, TYPE,          OP,  GOMP_FLAG
2343 ATOMIC_CMPXCHG_CPT(fixed1, add_cpt, kmp_int8, 8, +,
2344                    KMP_ARCH_X86) // __kmpc_atomic_fixed1_add_cpt
2345 ATOMIC_CMPXCHG_CPT(fixed1, andb_cpt, kmp_int8, 8, &,
2346                    0) // __kmpc_atomic_fixed1_andb_cpt
2347 ATOMIC_CMPXCHG_CPT(fixed1, div_cpt, kmp_int8, 8, /,
2348                    KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_cpt
2349 ATOMIC_CMPXCHG_CPT(fixed1u, div_cpt, kmp_uint8, 8, /,
2350                    KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_cpt
2351 ATOMIC_CMPXCHG_CPT(fixed1, mul_cpt, kmp_int8, 8, *,
2352                    KMP_ARCH_X86) // __kmpc_atomic_fixed1_mul_cpt
2353 ATOMIC_CMPXCHG_CPT(fixed1, orb_cpt, kmp_int8, 8, |,
2354                    0) // __kmpc_atomic_fixed1_orb_cpt
2355 ATOMIC_CMPXCHG_CPT(fixed1, shl_cpt, kmp_int8, 8, <<,
2356                    KMP_ARCH_X86) // __kmpc_atomic_fixed1_shl_cpt
2357 ATOMIC_CMPXCHG_CPT(fixed1, shr_cpt, kmp_int8, 8, >>,
2358                    KMP_ARCH_X86) // __kmpc_atomic_fixed1_shr_cpt
2359 ATOMIC_CMPXCHG_CPT(fixed1u, shr_cpt, kmp_uint8, 8, >>,
2360                    KMP_ARCH_X86) // __kmpc_atomic_fixed1u_shr_cpt
2361 ATOMIC_CMPXCHG_CPT(fixed1, sub_cpt, kmp_int8, 8, -,
2362                    KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_cpt
2363 ATOMIC_CMPXCHG_CPT(fixed1, xor_cpt, kmp_int8, 8, ^,
2364                    0) // __kmpc_atomic_fixed1_xor_cpt
2365 ATOMIC_CMPXCHG_CPT(fixed2, add_cpt, kmp_int16, 16, +,
2366                    KMP_ARCH_X86) // __kmpc_atomic_fixed2_add_cpt
2367 ATOMIC_CMPXCHG_CPT(fixed2, andb_cpt, kmp_int16, 16, &,
2368                    0) // __kmpc_atomic_fixed2_andb_cpt
2369 ATOMIC_CMPXCHG_CPT(fixed2, div_cpt, kmp_int16, 16, /,
2370                    KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_cpt
2371 ATOMIC_CMPXCHG_CPT(fixed2u, div_cpt, kmp_uint16, 16, /,
2372                    KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_cpt
2373 ATOMIC_CMPXCHG_CPT(fixed2, mul_cpt, kmp_int16, 16, *,
2374                    KMP_ARCH_X86) // __kmpc_atomic_fixed2_mul_cpt
2375 ATOMIC_CMPXCHG_CPT(fixed2, orb_cpt, kmp_int16, 16, |,
2376                    0) // __kmpc_atomic_fixed2_orb_cpt
2377 ATOMIC_CMPXCHG_CPT(fixed2, shl_cpt, kmp_int16, 16, <<,
2378                    KMP_ARCH_X86) // __kmpc_atomic_fixed2_shl_cpt
2379 ATOMIC_CMPXCHG_CPT(fixed2, shr_cpt, kmp_int16, 16, >>,
2380                    KMP_ARCH_X86) // __kmpc_atomic_fixed2_shr_cpt
2381 ATOMIC_CMPXCHG_CPT(fixed2u, shr_cpt, kmp_uint16, 16, >>,
2382                    KMP_ARCH_X86) // __kmpc_atomic_fixed2u_shr_cpt
2383 ATOMIC_CMPXCHG_CPT(fixed2, sub_cpt, kmp_int16, 16, -,
2384                    KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_cpt
2385 ATOMIC_CMPXCHG_CPT(fixed2, xor_cpt, kmp_int16, 16, ^,
2386                    0) // __kmpc_atomic_fixed2_xor_cpt
2387 ATOMIC_CMPXCHG_CPT(fixed4, andb_cpt, kmp_int32, 32, &,
2388                    0) // __kmpc_atomic_fixed4_andb_cpt
2389 ATOMIC_CMPXCHG_CPT(fixed4, div_cpt, kmp_int32, 32, /,
2390                    KMP_ARCH_X86) // __kmpc_atomic_fixed4_div_cpt
2391 ATOMIC_CMPXCHG_CPT(fixed4u, div_cpt, kmp_uint32, 32, /,
2392                    KMP_ARCH_X86) // __kmpc_atomic_fixed4u_div_cpt
2393 ATOMIC_CMPXCHG_CPT(fixed4, mul_cpt, kmp_int32, 32, *,
2394                    KMP_ARCH_X86) // __kmpc_atomic_fixed4_mul_cpt
2395 ATOMIC_CMPXCHG_CPT(fixed4, orb_cpt, kmp_int32, 32, |,
2396                    0) // __kmpc_atomic_fixed4_orb_cpt
2397 ATOMIC_CMPXCHG_CPT(fixed4, shl_cpt, kmp_int32, 32, <<,
2398                    KMP_ARCH_X86) // __kmpc_atomic_fixed4_shl_cpt
2399 ATOMIC_CMPXCHG_CPT(fixed4, shr_cpt, kmp_int32, 32, >>,
2400                    KMP_ARCH_X86) // __kmpc_atomic_fixed4_shr_cpt
2401 ATOMIC_CMPXCHG_CPT(fixed4u, shr_cpt, kmp_uint32, 32, >>,
2402                    KMP_ARCH_X86) // __kmpc_atomic_fixed4u_shr_cpt
2403 ATOMIC_CMPXCHG_CPT(fixed4, xor_cpt, kmp_int32, 32, ^,
2404                    0) // __kmpc_atomic_fixed4_xor_cpt
2405 ATOMIC_CMPXCHG_CPT(fixed8, andb_cpt, kmp_int64, 64, &,
2406                    KMP_ARCH_X86) // __kmpc_atomic_fixed8_andb_cpt
2407 ATOMIC_CMPXCHG_CPT(fixed8, div_cpt, kmp_int64, 64, /,
2408                    KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_cpt
2409 ATOMIC_CMPXCHG_CPT(fixed8u, div_cpt, kmp_uint64, 64, /,
2410                    KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_cpt
2411 ATOMIC_CMPXCHG_CPT(fixed8, mul_cpt, kmp_int64, 64, *,
2412                    KMP_ARCH_X86) // __kmpc_atomic_fixed8_mul_cpt
2413 ATOMIC_CMPXCHG_CPT(fixed8, orb_cpt, kmp_int64, 64, |,
2414                    KMP_ARCH_X86) // __kmpc_atomic_fixed8_orb_cpt
2415 ATOMIC_CMPXCHG_CPT(fixed8, shl_cpt, kmp_int64, 64, <<,
2416                    KMP_ARCH_X86) // __kmpc_atomic_fixed8_shl_cpt
2417 ATOMIC_CMPXCHG_CPT(fixed8, shr_cpt, kmp_int64, 64, >>,
2418                    KMP_ARCH_X86) // __kmpc_atomic_fixed8_shr_cpt
2419 ATOMIC_CMPXCHG_CPT(fixed8u, shr_cpt, kmp_uint64, 64, >>,
2420                    KMP_ARCH_X86) // __kmpc_atomic_fixed8u_shr_cpt
2421 ATOMIC_CMPXCHG_CPT(fixed8, xor_cpt, kmp_int64, 64, ^,
2422                    KMP_ARCH_X86) // __kmpc_atomic_fixed8_xor_cpt
2423 ATOMIC_CMPXCHG_CPT(float4, div_cpt, kmp_real32, 32, /,
2424                    KMP_ARCH_X86) // __kmpc_atomic_float4_div_cpt
2425 ATOMIC_CMPXCHG_CPT(float4, mul_cpt, kmp_real32, 32, *,
2426                    KMP_ARCH_X86) // __kmpc_atomic_float4_mul_cpt
2427 ATOMIC_CMPXCHG_CPT(float8, div_cpt, kmp_real64, 64, /,
2428                    KMP_ARCH_X86) // __kmpc_atomic_float8_div_cpt
2429 ATOMIC_CMPXCHG_CPT(float8, mul_cpt, kmp_real64, 64, *,
2430                    KMP_ARCH_X86) // __kmpc_atomic_float8_mul_cpt
2431 //              TYPE_ID,OP_ID, TYPE,          OP,  GOMP_FLAG
2432 
2433 // CAPTURE routines for mixed types RHS=float16
2434 #if KMP_HAVE_QUAD
2435 
2436 // Beginning of a definition (provides name, parameters, gebug trace)
2437 //     TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned
2438 //     fixed)
2439 //     OP_ID   - operation identifier (add, sub, mul, ...)
2440 //     TYPE    - operands' type
2441 #define ATOMIC_BEGIN_CPT_MIX(TYPE_ID, OP_ID, TYPE, RTYPE_ID, RTYPE)            \
2442   TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID##_##RTYPE_ID(                         \
2443       ident_t *id_ref, int gtid, TYPE *lhs, RTYPE rhs, int flag) {             \
2444     KMP_DEBUG_ASSERT(__kmp_init_serial);                                       \
2445     KA_TRACE(100,                                                              \
2446              ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID "_" #RTYPE_ID ": T#%d\n",   \
2447               gtid));
2448 
2449 // -------------------------------------------------------------------------
2450 #define ATOMIC_CMPXCHG_CPT_MIX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID,       \
2451                                RTYPE, LCK_ID, MASK, GOMP_FLAG)                 \
2452   ATOMIC_BEGIN_CPT_MIX(TYPE_ID, OP_ID, TYPE, RTYPE_ID, RTYPE)                  \
2453   TYPE new_value;                                                              \
2454   (void)new_value;                                                             \
2455   OP_GOMP_CRITICAL_CPT(TYPE, OP, GOMP_FLAG)                                    \
2456   OP_CMPXCHG_CPT(TYPE, BITS, OP)                                               \
2457   }
2458 
2459 // -------------------------------------------------------------------------
2460 #define ATOMIC_CRITICAL_CPT_MIX(TYPE_ID, TYPE, OP_ID, OP, RTYPE_ID, RTYPE,     \
2461                                 LCK_ID, GOMP_FLAG)                             \
2462   ATOMIC_BEGIN_CPT_MIX(TYPE_ID, OP_ID, TYPE, RTYPE_ID, RTYPE)                  \
2463   TYPE new_value;                                                              \
2464   (void)new_value;                                                             \
2465   OP_GOMP_CRITICAL_CPT(TYPE, OP, GOMP_FLAG) /* send assignment */              \
2466   OP_UPDATE_CRITICAL_CPT(TYPE, OP, LCK_ID) /* send assignment */               \
2467   }
2468 
2469 ATOMIC_CMPXCHG_CPT_MIX(fixed1, char, add_cpt, 8, +, fp, _Quad, 1i, 0,
2470                        KMP_ARCH_X86) // __kmpc_atomic_fixed1_add_cpt_fp
2471 ATOMIC_CMPXCHG_CPT_MIX(fixed1u, uchar, add_cpt, 8, +, fp, _Quad, 1i, 0,
2472                        KMP_ARCH_X86) // __kmpc_atomic_fixed1u_add_cpt_fp
2473 ATOMIC_CMPXCHG_CPT_MIX(fixed1, char, sub_cpt, 8, -, fp, _Quad, 1i, 0,
2474                        KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_cpt_fp
2475 ATOMIC_CMPXCHG_CPT_MIX(fixed1u, uchar, sub_cpt, 8, -, fp, _Quad, 1i, 0,
2476                        KMP_ARCH_X86) // __kmpc_atomic_fixed1u_sub_cpt_fp
2477 ATOMIC_CMPXCHG_CPT_MIX(fixed1, char, mul_cpt, 8, *, fp, _Quad, 1i, 0,
2478                        KMP_ARCH_X86) // __kmpc_atomic_fixed1_mul_cpt_fp
2479 ATOMIC_CMPXCHG_CPT_MIX(fixed1u, uchar, mul_cpt, 8, *, fp, _Quad, 1i, 0,
2480                        KMP_ARCH_X86) // __kmpc_atomic_fixed1u_mul_cpt_fp
2481 ATOMIC_CMPXCHG_CPT_MIX(fixed1, char, div_cpt, 8, /, fp, _Quad, 1i, 0,
2482                        KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_cpt_fp
2483 ATOMIC_CMPXCHG_CPT_MIX(fixed1u, uchar, div_cpt, 8, /, fp, _Quad, 1i, 0,
2484                        KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_cpt_fp
2485 
2486 ATOMIC_CMPXCHG_CPT_MIX(fixed2, short, add_cpt, 16, +, fp, _Quad, 2i, 1,
2487                        KMP_ARCH_X86) // __kmpc_atomic_fixed2_add_cpt_fp
2488 ATOMIC_CMPXCHG_CPT_MIX(fixed2u, ushort, add_cpt, 16, +, fp, _Quad, 2i, 1,
2489                        KMP_ARCH_X86) // __kmpc_atomic_fixed2u_add_cpt_fp
2490 ATOMIC_CMPXCHG_CPT_MIX(fixed2, short, sub_cpt, 16, -, fp, _Quad, 2i, 1,
2491                        KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_cpt_fp
2492 ATOMIC_CMPXCHG_CPT_MIX(fixed2u, ushort, sub_cpt, 16, -, fp, _Quad, 2i, 1,
2493                        KMP_ARCH_X86) // __kmpc_atomic_fixed2u_sub_cpt_fp
2494 ATOMIC_CMPXCHG_CPT_MIX(fixed2, short, mul_cpt, 16, *, fp, _Quad, 2i, 1,
2495                        KMP_ARCH_X86) // __kmpc_atomic_fixed2_mul_cpt_fp
2496 ATOMIC_CMPXCHG_CPT_MIX(fixed2u, ushort, mul_cpt, 16, *, fp, _Quad, 2i, 1,
2497                        KMP_ARCH_X86) // __kmpc_atomic_fixed2u_mul_cpt_fp
2498 ATOMIC_CMPXCHG_CPT_MIX(fixed2, short, div_cpt, 16, /, fp, _Quad, 2i, 1,
2499                        KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_cpt_fp
2500 ATOMIC_CMPXCHG_CPT_MIX(fixed2u, ushort, div_cpt, 16, /, fp, _Quad, 2i, 1,
2501                        KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_cpt_fp
2502 
2503 ATOMIC_CMPXCHG_CPT_MIX(fixed4, kmp_int32, add_cpt, 32, +, fp, _Quad, 4i, 3,
2504                        0) // __kmpc_atomic_fixed4_add_cpt_fp
2505 ATOMIC_CMPXCHG_CPT_MIX(fixed4u, kmp_uint32, add_cpt, 32, +, fp, _Quad, 4i, 3,
2506                        0) // __kmpc_atomic_fixed4u_add_cpt_fp
2507 ATOMIC_CMPXCHG_CPT_MIX(fixed4, kmp_int32, sub_cpt, 32, -, fp, _Quad, 4i, 3,
2508                        0) // __kmpc_atomic_fixed4_sub_cpt_fp
2509 ATOMIC_CMPXCHG_CPT_MIX(fixed4u, kmp_uint32, sub_cpt, 32, -, fp, _Quad, 4i, 3,
2510                        0) // __kmpc_atomic_fixed4u_sub_cpt_fp
2511 ATOMIC_CMPXCHG_CPT_MIX(fixed4, kmp_int32, mul_cpt, 32, *, fp, _Quad, 4i, 3,
2512                        0) // __kmpc_atomic_fixed4_mul_cpt_fp
2513 ATOMIC_CMPXCHG_CPT_MIX(fixed4u, kmp_uint32, mul_cpt, 32, *, fp, _Quad, 4i, 3,
2514                        0) // __kmpc_atomic_fixed4u_mul_cpt_fp
2515 ATOMIC_CMPXCHG_CPT_MIX(fixed4, kmp_int32, div_cpt, 32, /, fp, _Quad, 4i, 3,
2516                        0) // __kmpc_atomic_fixed4_div_cpt_fp
2517 ATOMIC_CMPXCHG_CPT_MIX(fixed4u, kmp_uint32, div_cpt, 32, /, fp, _Quad, 4i, 3,
2518                        0) // __kmpc_atomic_fixed4u_div_cpt_fp
2519 
2520 ATOMIC_CMPXCHG_CPT_MIX(fixed8, kmp_int64, add_cpt, 64, +, fp, _Quad, 8i, 7,
2521                        KMP_ARCH_X86) // __kmpc_atomic_fixed8_add_cpt_fp
2522 ATOMIC_CMPXCHG_CPT_MIX(fixed8u, kmp_uint64, add_cpt, 64, +, fp, _Quad, 8i, 7,
2523                        KMP_ARCH_X86) // __kmpc_atomic_fixed8u_add_cpt_fp
2524 ATOMIC_CMPXCHG_CPT_MIX(fixed8, kmp_int64, sub_cpt, 64, -, fp, _Quad, 8i, 7,
2525                        KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_cpt_fp
2526 ATOMIC_CMPXCHG_CPT_MIX(fixed8u, kmp_uint64, sub_cpt, 64, -, fp, _Quad, 8i, 7,
2527                        KMP_ARCH_X86) // __kmpc_atomic_fixed8u_sub_cpt_fp
2528 ATOMIC_CMPXCHG_CPT_MIX(fixed8, kmp_int64, mul_cpt, 64, *, fp, _Quad, 8i, 7,
2529                        KMP_ARCH_X86) // __kmpc_atomic_fixed8_mul_cpt_fp
2530 ATOMIC_CMPXCHG_CPT_MIX(fixed8u, kmp_uint64, mul_cpt, 64, *, fp, _Quad, 8i, 7,
2531                        KMP_ARCH_X86) // __kmpc_atomic_fixed8u_mul_cpt_fp
2532 ATOMIC_CMPXCHG_CPT_MIX(fixed8, kmp_int64, div_cpt, 64, /, fp, _Quad, 8i, 7,
2533                        KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_cpt_fp
2534 ATOMIC_CMPXCHG_CPT_MIX(fixed8u, kmp_uint64, div_cpt, 64, /, fp, _Quad, 8i, 7,
2535                        KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_cpt_fp
2536 
2537 ATOMIC_CMPXCHG_CPT_MIX(float4, kmp_real32, add_cpt, 32, +, fp, _Quad, 4r, 3,
2538                        KMP_ARCH_X86) // __kmpc_atomic_float4_add_cpt_fp
2539 ATOMIC_CMPXCHG_CPT_MIX(float4, kmp_real32, sub_cpt, 32, -, fp, _Quad, 4r, 3,
2540                        KMP_ARCH_X86) // __kmpc_atomic_float4_sub_cpt_fp
2541 ATOMIC_CMPXCHG_CPT_MIX(float4, kmp_real32, mul_cpt, 32, *, fp, _Quad, 4r, 3,
2542                        KMP_ARCH_X86) // __kmpc_atomic_float4_mul_cpt_fp
2543 ATOMIC_CMPXCHG_CPT_MIX(float4, kmp_real32, div_cpt, 32, /, fp, _Quad, 4r, 3,
2544                        KMP_ARCH_X86) // __kmpc_atomic_float4_div_cpt_fp
2545 
2546 ATOMIC_CMPXCHG_CPT_MIX(float8, kmp_real64, add_cpt, 64, +, fp, _Quad, 8r, 7,
2547                        KMP_ARCH_X86) // __kmpc_atomic_float8_add_cpt_fp
2548 ATOMIC_CMPXCHG_CPT_MIX(float8, kmp_real64, sub_cpt, 64, -, fp, _Quad, 8r, 7,
2549                        KMP_ARCH_X86) // __kmpc_atomic_float8_sub_cpt_fp
2550 ATOMIC_CMPXCHG_CPT_MIX(float8, kmp_real64, mul_cpt, 64, *, fp, _Quad, 8r, 7,
2551                        KMP_ARCH_X86) // __kmpc_atomic_float8_mul_cpt_fp
2552 ATOMIC_CMPXCHG_CPT_MIX(float8, kmp_real64, div_cpt, 64, /, fp, _Quad, 8r, 7,
2553                        KMP_ARCH_X86) // __kmpc_atomic_float8_div_cpt_fp
2554 
2555 ATOMIC_CRITICAL_CPT_MIX(float10, long double, add_cpt, +, fp, _Quad, 10r,
2556                         1) // __kmpc_atomic_float10_add_cpt_fp
2557 ATOMIC_CRITICAL_CPT_MIX(float10, long double, sub_cpt, -, fp, _Quad, 10r,
2558                         1) // __kmpc_atomic_float10_sub_cpt_fp
2559 ATOMIC_CRITICAL_CPT_MIX(float10, long double, mul_cpt, *, fp, _Quad, 10r,
2560                         1) // __kmpc_atomic_float10_mul_cpt_fp
2561 ATOMIC_CRITICAL_CPT_MIX(float10, long double, div_cpt, /, fp, _Quad, 10r,
2562                         1) // __kmpc_atomic_float10_div_cpt_fp
2563 
2564 #endif // KMP_HAVE_QUAD
2565 
2566 // ------------------------------------------------------------------------
2567 // Routines for C/C++ Reduction operators && and ||
2568 
2569 // -------------------------------------------------------------------------
2570 // Operation on *lhs, rhs bound by critical section
2571 //     OP     - operator (it's supposed to contain an assignment)
2572 //     LCK_ID - lock identifier
2573 // Note: don't check gtid as it should always be valid
2574 // 1, 2-byte - expect valid parameter, other - check before this macro
2575 #define OP_CRITICAL_L_CPT(OP, LCK_ID)                                          \
2576   __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);                       \
2577                                                                                \
2578   if (flag) {                                                                  \
2579     new_value OP rhs;                                                          \
2580     (*lhs) = new_value;                                                        \
2581   } else {                                                                     \
2582     new_value = (*lhs);                                                        \
2583     (*lhs) OP rhs;                                                             \
2584   }                                                                            \
2585                                                                                \
2586   __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);
2587 
2588 // ------------------------------------------------------------------------
2589 #ifdef KMP_GOMP_COMPAT
2590 #define OP_GOMP_CRITICAL_L_CPT(OP, FLAG)                                       \
2591   if ((FLAG) && (__kmp_atomic_mode == 2)) {                                    \
2592     KMP_CHECK_GTID;                                                            \
2593     OP_CRITICAL_L_CPT(OP, 0);                                                  \
2594     return new_value;                                                          \
2595   }
2596 #else
2597 #define OP_GOMP_CRITICAL_L_CPT(OP, FLAG)
2598 #endif /* KMP_GOMP_COMPAT */
2599 
2600 // ------------------------------------------------------------------------
2601 // Need separate macros for &&, || because there is no combined assignment
2602 #define ATOMIC_CMPX_L_CPT(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG)           \
2603   ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE)                                 \
2604   TYPE new_value;                                                              \
2605   (void)new_value;                                                             \
2606   OP_GOMP_CRITICAL_L_CPT(= *lhs OP, GOMP_FLAG)                                 \
2607   OP_CMPXCHG_CPT(TYPE, BITS, OP)                                               \
2608   }
2609 
2610 ATOMIC_CMPX_L_CPT(fixed1, andl_cpt, char, 8, &&,
2611                   KMP_ARCH_X86) // __kmpc_atomic_fixed1_andl_cpt
2612 ATOMIC_CMPX_L_CPT(fixed1, orl_cpt, char, 8, ||,
2613                   KMP_ARCH_X86) // __kmpc_atomic_fixed1_orl_cpt
2614 ATOMIC_CMPX_L_CPT(fixed2, andl_cpt, short, 16, &&,
2615                   KMP_ARCH_X86) // __kmpc_atomic_fixed2_andl_cpt
2616 ATOMIC_CMPX_L_CPT(fixed2, orl_cpt, short, 16, ||,
2617                   KMP_ARCH_X86) // __kmpc_atomic_fixed2_orl_cpt
2618 ATOMIC_CMPX_L_CPT(fixed4, andl_cpt, kmp_int32, 32, &&,
2619                   0) // __kmpc_atomic_fixed4_andl_cpt
2620 ATOMIC_CMPX_L_CPT(fixed4, orl_cpt, kmp_int32, 32, ||,
2621                   0) // __kmpc_atomic_fixed4_orl_cpt
2622 ATOMIC_CMPX_L_CPT(fixed8, andl_cpt, kmp_int64, 64, &&,
2623                   KMP_ARCH_X86) // __kmpc_atomic_fixed8_andl_cpt
2624 ATOMIC_CMPX_L_CPT(fixed8, orl_cpt, kmp_int64, 64, ||,
2625                   KMP_ARCH_X86) // __kmpc_atomic_fixed8_orl_cpt
2626 
2627 // -------------------------------------------------------------------------
2628 // Routines for Fortran operators that matched no one in C:
2629 // MAX, MIN, .EQV., .NEQV.
2630 // Operators .AND., .OR. are covered by __kmpc_atomic_*_{andl,orl}_cpt
2631 // Intrinsics IAND, IOR, IEOR are covered by __kmpc_atomic_*_{andb,orb,xor}_cpt
2632 
2633 // -------------------------------------------------------------------------
2634 // MIN and MAX need separate macros
2635 // OP - operator to check if we need any actions?
2636 #define MIN_MAX_CRITSECT_CPT(OP, LCK_ID)                                       \
2637   __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);                       \
2638                                                                                \
2639   if (*lhs OP rhs) { /* still need actions? */                                 \
2640     old_value = *lhs;                                                          \
2641     *lhs = rhs;                                                                \
2642     if (flag)                                                                  \
2643       new_value = rhs;                                                         \
2644     else                                                                       \
2645       new_value = old_value;                                                   \
2646   } else {                                                                     \
2647     new_value = *lhs;                                                          \
2648   }                                                                            \
2649   __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);                       \
2650   return new_value;
2651 
2652 // -------------------------------------------------------------------------
2653 #ifdef KMP_GOMP_COMPAT
2654 #define GOMP_MIN_MAX_CRITSECT_CPT(OP, FLAG)                                    \
2655   if ((FLAG) && (__kmp_atomic_mode == 2)) {                                    \
2656     KMP_CHECK_GTID;                                                            \
2657     MIN_MAX_CRITSECT_CPT(OP, 0);                                               \
2658   }
2659 #else
2660 #define GOMP_MIN_MAX_CRITSECT_CPT(OP, FLAG)
2661 #endif /* KMP_GOMP_COMPAT */
2662 
2663 // -------------------------------------------------------------------------
2664 #define MIN_MAX_CMPXCHG_CPT(TYPE, BITS, OP)                                    \
2665   {                                                                            \
2666     TYPE KMP_ATOMIC_VOLATILE temp_val;                                         \
2667     /*TYPE old_value; */                                                       \
2668     temp_val = *lhs;                                                           \
2669     old_value = temp_val;                                                      \
2670     while (old_value OP rhs && /* still need actions? */                       \
2671            !KMP_COMPARE_AND_STORE_ACQ##BITS(                                   \
2672                (kmp_int##BITS *)lhs,                                           \
2673                *VOLATILE_CAST(kmp_int##BITS *) & old_value,                    \
2674                *VOLATILE_CAST(kmp_int##BITS *) & rhs)) {                       \
2675       temp_val = *lhs;                                                         \
2676       old_value = temp_val;                                                    \
2677     }                                                                          \
2678     if (flag)                                                                  \
2679       return rhs;                                                              \
2680     else                                                                       \
2681       return old_value;                                                        \
2682   }
2683 
2684 // -------------------------------------------------------------------------
2685 // 1-byte, 2-byte operands - use critical section
2686 #define MIN_MAX_CRITICAL_CPT(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG)      \
2687   ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE)                                 \
2688   TYPE new_value, old_value;                                                   \
2689   if (*lhs OP rhs) { /* need actions? */                                       \
2690     GOMP_MIN_MAX_CRITSECT_CPT(OP, GOMP_FLAG)                                   \
2691     MIN_MAX_CRITSECT_CPT(OP, LCK_ID)                                           \
2692   }                                                                            \
2693   return *lhs;                                                                 \
2694   }
2695 
2696 #define MIN_MAX_COMPXCHG_CPT(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG)        \
2697   ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE)                                 \
2698   TYPE new_value, old_value;                                                   \
2699   (void)new_value;                                                             \
2700   if (*lhs OP rhs) {                                                           \
2701     GOMP_MIN_MAX_CRITSECT_CPT(OP, GOMP_FLAG)                                   \
2702     MIN_MAX_CMPXCHG_CPT(TYPE, BITS, OP)                                        \
2703   }                                                                            \
2704   return *lhs;                                                                 \
2705   }
2706 
2707 MIN_MAX_COMPXCHG_CPT(fixed1, max_cpt, char, 8, <,
2708                      KMP_ARCH_X86) // __kmpc_atomic_fixed1_max_cpt
2709 MIN_MAX_COMPXCHG_CPT(fixed1, min_cpt, char, 8, >,
2710                      KMP_ARCH_X86) // __kmpc_atomic_fixed1_min_cpt
2711 MIN_MAX_COMPXCHG_CPT(fixed2, max_cpt, short, 16, <,
2712                      KMP_ARCH_X86) // __kmpc_atomic_fixed2_max_cpt
2713 MIN_MAX_COMPXCHG_CPT(fixed2, min_cpt, short, 16, >,
2714                      KMP_ARCH_X86) // __kmpc_atomic_fixed2_min_cpt
2715 MIN_MAX_COMPXCHG_CPT(fixed4, max_cpt, kmp_int32, 32, <,
2716                      0) // __kmpc_atomic_fixed4_max_cpt
2717 MIN_MAX_COMPXCHG_CPT(fixed4, min_cpt, kmp_int32, 32, >,
2718                      0) // __kmpc_atomic_fixed4_min_cpt
2719 MIN_MAX_COMPXCHG_CPT(fixed8, max_cpt, kmp_int64, 64, <,
2720                      KMP_ARCH_X86) // __kmpc_atomic_fixed8_max_cpt
2721 MIN_MAX_COMPXCHG_CPT(fixed8, min_cpt, kmp_int64, 64, >,
2722                      KMP_ARCH_X86) // __kmpc_atomic_fixed8_min_cpt
2723 MIN_MAX_COMPXCHG_CPT(float4, max_cpt, kmp_real32, 32, <,
2724                      KMP_ARCH_X86) // __kmpc_atomic_float4_max_cpt
2725 MIN_MAX_COMPXCHG_CPT(float4, min_cpt, kmp_real32, 32, >,
2726                      KMP_ARCH_X86) // __kmpc_atomic_float4_min_cpt
2727 MIN_MAX_COMPXCHG_CPT(float8, max_cpt, kmp_real64, 64, <,
2728                      KMP_ARCH_X86) // __kmpc_atomic_float8_max_cpt
2729 MIN_MAX_COMPXCHG_CPT(float8, min_cpt, kmp_real64, 64, >,
2730                      KMP_ARCH_X86) // __kmpc_atomic_float8_min_cpt
2731 MIN_MAX_CRITICAL_CPT(float10, max_cpt, long double, <, 10r,
2732                      1) // __kmpc_atomic_float10_max_cpt
2733 MIN_MAX_CRITICAL_CPT(float10, min_cpt, long double, >, 10r,
2734                      1) // __kmpc_atomic_float10_min_cpt
2735 #if KMP_HAVE_QUAD
2736 MIN_MAX_CRITICAL_CPT(float16, max_cpt, QUAD_LEGACY, <, 16r,
2737                      1) // __kmpc_atomic_float16_max_cpt
2738 MIN_MAX_CRITICAL_CPT(float16, min_cpt, QUAD_LEGACY, >, 16r,
2739                      1) // __kmpc_atomic_float16_min_cpt
2740 #if (KMP_ARCH_X86)
2741 MIN_MAX_CRITICAL_CPT(float16, max_a16_cpt, Quad_a16_t, <, 16r,
2742                      1) // __kmpc_atomic_float16_max_a16_cpt
2743 MIN_MAX_CRITICAL_CPT(float16, min_a16_cpt, Quad_a16_t, >, 16r,
2744                      1) // __kmpc_atomic_float16_mix_a16_cpt
2745 #endif // (KMP_ARCH_X86)
2746 #endif // KMP_HAVE_QUAD
2747 
2748 // ------------------------------------------------------------------------
2749 #ifdef KMP_GOMP_COMPAT
2750 #define OP_GOMP_CRITICAL_EQV_CPT(OP, FLAG)                                     \
2751   if ((FLAG) && (__kmp_atomic_mode == 2)) {                                    \
2752     KMP_CHECK_GTID;                                                            \
2753     OP_CRITICAL_CPT(OP, 0);                                                    \
2754   }
2755 #else
2756 #define OP_GOMP_CRITICAL_EQV_CPT(OP, FLAG)
2757 #endif /* KMP_GOMP_COMPAT */
2758 // ------------------------------------------------------------------------
2759 #define ATOMIC_CMPX_EQV_CPT(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG)         \
2760   ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE)                                 \
2761   TYPE new_value;                                                              \
2762   (void)new_value;                                                             \
2763   OP_GOMP_CRITICAL_EQV_CPT(^= (TYPE) ~, GOMP_FLAG) /* send assignment */       \
2764   OP_CMPXCHG_CPT(TYPE, BITS, OP)                                               \
2765   }
2766 
2767 // ------------------------------------------------------------------------
2768 
2769 ATOMIC_CMPXCHG_CPT(fixed1, neqv_cpt, kmp_int8, 8, ^,
2770                    KMP_ARCH_X86) // __kmpc_atomic_fixed1_neqv_cpt
2771 ATOMIC_CMPXCHG_CPT(fixed2, neqv_cpt, kmp_int16, 16, ^,
2772                    KMP_ARCH_X86) // __kmpc_atomic_fixed2_neqv_cpt
2773 ATOMIC_CMPXCHG_CPT(fixed4, neqv_cpt, kmp_int32, 32, ^,
2774                    KMP_ARCH_X86) // __kmpc_atomic_fixed4_neqv_cpt
2775 ATOMIC_CMPXCHG_CPT(fixed8, neqv_cpt, kmp_int64, 64, ^,
2776                    KMP_ARCH_X86) // __kmpc_atomic_fixed8_neqv_cpt
2777 ATOMIC_CMPX_EQV_CPT(fixed1, eqv_cpt, kmp_int8, 8, ^~,
2778                     KMP_ARCH_X86) // __kmpc_atomic_fixed1_eqv_cpt
2779 ATOMIC_CMPX_EQV_CPT(fixed2, eqv_cpt, kmp_int16, 16, ^~,
2780                     KMP_ARCH_X86) // __kmpc_atomic_fixed2_eqv_cpt
2781 ATOMIC_CMPX_EQV_CPT(fixed4, eqv_cpt, kmp_int32, 32, ^~,
2782                     KMP_ARCH_X86) // __kmpc_atomic_fixed4_eqv_cpt
2783 ATOMIC_CMPX_EQV_CPT(fixed8, eqv_cpt, kmp_int64, 64, ^~,
2784                     KMP_ARCH_X86) // __kmpc_atomic_fixed8_eqv_cpt
2785 
2786 // ------------------------------------------------------------------------
2787 // Routines for Extended types: long double, _Quad, complex flavours (use
2788 // critical section)
2789 //     TYPE_ID, OP_ID, TYPE - detailed above
2790 //     OP      - operator
2791 //     LCK_ID  - lock identifier, used to possibly distinguish lock variable
2792 #define ATOMIC_CRITICAL_CPT(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG)       \
2793   ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE)                                 \
2794   TYPE new_value;                                                              \
2795   OP_GOMP_CRITICAL_CPT(TYPE, OP, GOMP_FLAG) /* send assignment */              \
2796   OP_UPDATE_CRITICAL_CPT(TYPE, OP, LCK_ID) /* send assignment */               \
2797   }
2798 
2799 // ------------------------------------------------------------------------
2800 // Workaround for cmplx4. Regular routines with return value don't work
2801 // on Win_32e. Let's return captured values through the additional parameter.
2802 #define OP_CRITICAL_CPT_WRK(OP, LCK_ID)                                        \
2803   __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);                       \
2804                                                                                \
2805   if (flag) {                                                                  \
2806     (*lhs) OP rhs;                                                             \
2807     (*out) = (*lhs);                                                           \
2808   } else {                                                                     \
2809     (*out) = (*lhs);                                                           \
2810     (*lhs) OP rhs;                                                             \
2811   }                                                                            \
2812                                                                                \
2813   __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);                       \
2814   return;
2815 // ------------------------------------------------------------------------
2816 
2817 #ifdef KMP_GOMP_COMPAT
2818 #define OP_GOMP_CRITICAL_CPT_WRK(OP, FLAG)                                     \
2819   if ((FLAG) && (__kmp_atomic_mode == 2)) {                                    \
2820     KMP_CHECK_GTID;                                                            \
2821     OP_CRITICAL_CPT_WRK(OP## =, 0);                                            \
2822   }
2823 #else
2824 #define OP_GOMP_CRITICAL_CPT_WRK(OP, FLAG)
2825 #endif /* KMP_GOMP_COMPAT */
2826 // ------------------------------------------------------------------------
2827 
2828 #define ATOMIC_BEGIN_WRK(TYPE_ID, OP_ID, TYPE)                                 \
2829   void __kmpc_atomic_##TYPE_ID##_##OP_ID(ident_t *id_ref, int gtid, TYPE *lhs, \
2830                                          TYPE rhs, TYPE *out, int flag) {      \
2831     KMP_DEBUG_ASSERT(__kmp_init_serial);                                       \
2832     KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid));
2833 // ------------------------------------------------------------------------
2834 
2835 #define ATOMIC_CRITICAL_CPT_WRK(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG)   \
2836   ATOMIC_BEGIN_WRK(TYPE_ID, OP_ID, TYPE)                                       \
2837   OP_GOMP_CRITICAL_CPT_WRK(OP, GOMP_FLAG)                                      \
2838   OP_CRITICAL_CPT_WRK(OP## =, LCK_ID)                                          \
2839   }
2840 // The end of workaround for cmplx4
2841 
2842 /* ------------------------------------------------------------------------- */
2843 // routines for long double type
2844 ATOMIC_CRITICAL_CPT(float10, add_cpt, long double, +, 10r,
2845                     1) // __kmpc_atomic_float10_add_cpt
2846 ATOMIC_CRITICAL_CPT(float10, sub_cpt, long double, -, 10r,
2847                     1) // __kmpc_atomic_float10_sub_cpt
2848 ATOMIC_CRITICAL_CPT(float10, mul_cpt, long double, *, 10r,
2849                     1) // __kmpc_atomic_float10_mul_cpt
2850 ATOMIC_CRITICAL_CPT(float10, div_cpt, long double, /, 10r,
2851                     1) // __kmpc_atomic_float10_div_cpt
2852 #if KMP_HAVE_QUAD
2853 // routines for _Quad type
2854 ATOMIC_CRITICAL_CPT(float16, add_cpt, QUAD_LEGACY, +, 16r,
2855                     1) // __kmpc_atomic_float16_add_cpt
2856 ATOMIC_CRITICAL_CPT(float16, sub_cpt, QUAD_LEGACY, -, 16r,
2857                     1) // __kmpc_atomic_float16_sub_cpt
2858 ATOMIC_CRITICAL_CPT(float16, mul_cpt, QUAD_LEGACY, *, 16r,
2859                     1) // __kmpc_atomic_float16_mul_cpt
2860 ATOMIC_CRITICAL_CPT(float16, div_cpt, QUAD_LEGACY, /, 16r,
2861                     1) // __kmpc_atomic_float16_div_cpt
2862 #if (KMP_ARCH_X86)
2863 ATOMIC_CRITICAL_CPT(float16, add_a16_cpt, Quad_a16_t, +, 16r,
2864                     1) // __kmpc_atomic_float16_add_a16_cpt
2865 ATOMIC_CRITICAL_CPT(float16, sub_a16_cpt, Quad_a16_t, -, 16r,
2866                     1) // __kmpc_atomic_float16_sub_a16_cpt
2867 ATOMIC_CRITICAL_CPT(float16, mul_a16_cpt, Quad_a16_t, *, 16r,
2868                     1) // __kmpc_atomic_float16_mul_a16_cpt
2869 ATOMIC_CRITICAL_CPT(float16, div_a16_cpt, Quad_a16_t, /, 16r,
2870                     1) // __kmpc_atomic_float16_div_a16_cpt
2871 #endif // (KMP_ARCH_X86)
2872 #endif // KMP_HAVE_QUAD
2873 
2874 // routines for complex types
2875 
2876 // cmplx4 routines to return void
2877 ATOMIC_CRITICAL_CPT_WRK(cmplx4, add_cpt, kmp_cmplx32, +, 8c,
2878                         1) // __kmpc_atomic_cmplx4_add_cpt
2879 ATOMIC_CRITICAL_CPT_WRK(cmplx4, sub_cpt, kmp_cmplx32, -, 8c,
2880                         1) // __kmpc_atomic_cmplx4_sub_cpt
2881 ATOMIC_CRITICAL_CPT_WRK(cmplx4, mul_cpt, kmp_cmplx32, *, 8c,
2882                         1) // __kmpc_atomic_cmplx4_mul_cpt
2883 ATOMIC_CRITICAL_CPT_WRK(cmplx4, div_cpt, kmp_cmplx32, /, 8c,
2884                         1) // __kmpc_atomic_cmplx4_div_cpt
2885 
2886 ATOMIC_CRITICAL_CPT(cmplx8, add_cpt, kmp_cmplx64, +, 16c,
2887                     1) // __kmpc_atomic_cmplx8_add_cpt
2888 ATOMIC_CRITICAL_CPT(cmplx8, sub_cpt, kmp_cmplx64, -, 16c,
2889                     1) // __kmpc_atomic_cmplx8_sub_cpt
2890 ATOMIC_CRITICAL_CPT(cmplx8, mul_cpt, kmp_cmplx64, *, 16c,
2891                     1) // __kmpc_atomic_cmplx8_mul_cpt
2892 ATOMIC_CRITICAL_CPT(cmplx8, div_cpt, kmp_cmplx64, /, 16c,
2893                     1) // __kmpc_atomic_cmplx8_div_cpt
2894 ATOMIC_CRITICAL_CPT(cmplx10, add_cpt, kmp_cmplx80, +, 20c,
2895                     1) // __kmpc_atomic_cmplx10_add_cpt
2896 ATOMIC_CRITICAL_CPT(cmplx10, sub_cpt, kmp_cmplx80, -, 20c,
2897                     1) // __kmpc_atomic_cmplx10_sub_cpt
2898 ATOMIC_CRITICAL_CPT(cmplx10, mul_cpt, kmp_cmplx80, *, 20c,
2899                     1) // __kmpc_atomic_cmplx10_mul_cpt
2900 ATOMIC_CRITICAL_CPT(cmplx10, div_cpt, kmp_cmplx80, /, 20c,
2901                     1) // __kmpc_atomic_cmplx10_div_cpt
2902 #if KMP_HAVE_QUAD
2903 ATOMIC_CRITICAL_CPT(cmplx16, add_cpt, CPLX128_LEG, +, 32c,
2904                     1) // __kmpc_atomic_cmplx16_add_cpt
2905 ATOMIC_CRITICAL_CPT(cmplx16, sub_cpt, CPLX128_LEG, -, 32c,
2906                     1) // __kmpc_atomic_cmplx16_sub_cpt
2907 ATOMIC_CRITICAL_CPT(cmplx16, mul_cpt, CPLX128_LEG, *, 32c,
2908                     1) // __kmpc_atomic_cmplx16_mul_cpt
2909 ATOMIC_CRITICAL_CPT(cmplx16, div_cpt, CPLX128_LEG, /, 32c,
2910                     1) // __kmpc_atomic_cmplx16_div_cpt
2911 #if (KMP_ARCH_X86)
2912 ATOMIC_CRITICAL_CPT(cmplx16, add_a16_cpt, kmp_cmplx128_a16_t, +, 32c,
2913                     1) // __kmpc_atomic_cmplx16_add_a16_cpt
2914 ATOMIC_CRITICAL_CPT(cmplx16, sub_a16_cpt, kmp_cmplx128_a16_t, -, 32c,
2915                     1) // __kmpc_atomic_cmplx16_sub_a16_cpt
2916 ATOMIC_CRITICAL_CPT(cmplx16, mul_a16_cpt, kmp_cmplx128_a16_t, *, 32c,
2917                     1) // __kmpc_atomic_cmplx16_mul_a16_cpt
2918 ATOMIC_CRITICAL_CPT(cmplx16, div_a16_cpt, kmp_cmplx128_a16_t, /, 32c,
2919                     1) // __kmpc_atomic_cmplx16_div_a16_cpt
2920 #endif // (KMP_ARCH_X86)
2921 #endif // KMP_HAVE_QUAD
2922 
2923 // OpenMP 4.0: v = x = expr binop x; { v = x; x = expr binop x; } { x = expr
2924 // binop x; v = x; }  for non-commutative operations.
2925 // Supported only on IA-32 architecture and Intel(R) 64
2926 
2927 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
2928 // -------------------------------------------------------------------------
2929 // Operation on *lhs, rhs bound by critical section
2930 //     OP     - operator (it's supposed to contain an assignment)
2931 //     LCK_ID - lock identifier
2932 // Note: don't check gtid as it should always be valid
2933 // 1, 2-byte - expect valid parameter, other - check before this macro
2934 #define OP_CRITICAL_CPT_REV(TYPE, OP, LCK_ID)                                  \
2935   __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);                       \
2936                                                                                \
2937   if (flag) {                                                                  \
2938     /*temp_val = (*lhs);*/                                                     \
2939     (*lhs) = (TYPE)((rhs)OP(*lhs));                                            \
2940     new_value = (*lhs);                                                        \
2941   } else {                                                                     \
2942     new_value = (*lhs);                                                        \
2943     (*lhs) = (TYPE)((rhs)OP(*lhs));                                            \
2944   }                                                                            \
2945   __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);                       \
2946   return new_value;
2947 
2948 // ------------------------------------------------------------------------
2949 #ifdef KMP_GOMP_COMPAT
2950 #define OP_GOMP_CRITICAL_CPT_REV(TYPE, OP, FLAG)                               \
2951   if ((FLAG) && (__kmp_atomic_mode == 2)) {                                    \
2952     KMP_CHECK_GTID;                                                            \
2953     OP_CRITICAL_CPT_REV(TYPE, OP, 0);                                          \
2954   }
2955 #else
2956 #define OP_GOMP_CRITICAL_CPT_REV(TYPE, OP, FLAG)
2957 #endif /* KMP_GOMP_COMPAT */
2958 
2959 // ------------------------------------------------------------------------
2960 // Operation on *lhs, rhs using "compare_and_store" routine
2961 //     TYPE    - operands' type
2962 //     BITS    - size in bits, used to distinguish low level calls
2963 //     OP      - operator
2964 // Note: temp_val introduced in order to force the compiler to read
2965 //       *lhs only once (w/o it the compiler reads *lhs twice)
2966 #define OP_CMPXCHG_CPT_REV(TYPE, BITS, OP)                                     \
2967   {                                                                            \
2968     TYPE KMP_ATOMIC_VOLATILE temp_val;                                         \
2969     TYPE old_value, new_value;                                                 \
2970     temp_val = *lhs;                                                           \
2971     old_value = temp_val;                                                      \
2972     new_value = (TYPE)(rhs OP old_value);                                      \
2973     while (!KMP_COMPARE_AND_STORE_ACQ##BITS(                                   \
2974         (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) & old_value,     \
2975         *VOLATILE_CAST(kmp_int##BITS *) & new_value)) {                        \
2976       temp_val = *lhs;                                                         \
2977       old_value = temp_val;                                                    \
2978       new_value = (TYPE)(rhs OP old_value);                                    \
2979     }                                                                          \
2980     if (flag) {                                                                \
2981       return new_value;                                                        \
2982     } else                                                                     \
2983       return old_value;                                                        \
2984   }
2985 
2986 // -------------------------------------------------------------------------
2987 #define ATOMIC_CMPXCHG_CPT_REV(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG)      \
2988   ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE)                                 \
2989   TYPE new_value;                                                              \
2990   (void)new_value;                                                             \
2991   OP_GOMP_CRITICAL_CPT_REV(TYPE, OP, GOMP_FLAG)                                \
2992   OP_CMPXCHG_CPT_REV(TYPE, BITS, OP)                                           \
2993   }
2994 
2995 ATOMIC_CMPXCHG_CPT_REV(fixed1, div_cpt_rev, kmp_int8, 8, /,
2996                        KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_cpt_rev
2997 ATOMIC_CMPXCHG_CPT_REV(fixed1u, div_cpt_rev, kmp_uint8, 8, /,
2998                        KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_cpt_rev
2999 ATOMIC_CMPXCHG_CPT_REV(fixed1, shl_cpt_rev, kmp_int8, 8, <<,
3000                        KMP_ARCH_X86) // __kmpc_atomic_fixed1_shl_cpt_rev
3001 ATOMIC_CMPXCHG_CPT_REV(fixed1, shr_cpt_rev, kmp_int8, 8, >>,
3002                        KMP_ARCH_X86) // __kmpc_atomic_fixed1_shr_cpt_rev
3003 ATOMIC_CMPXCHG_CPT_REV(fixed1u, shr_cpt_rev, kmp_uint8, 8, >>,
3004                        KMP_ARCH_X86) // __kmpc_atomic_fixed1u_shr_cpt_rev
3005 ATOMIC_CMPXCHG_CPT_REV(fixed1, sub_cpt_rev, kmp_int8, 8, -,
3006                        KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_cpt_rev
3007 ATOMIC_CMPXCHG_CPT_REV(fixed2, div_cpt_rev, kmp_int16, 16, /,
3008                        KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_cpt_rev
3009 ATOMIC_CMPXCHG_CPT_REV(fixed2u, div_cpt_rev, kmp_uint16, 16, /,
3010                        KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_cpt_rev
3011 ATOMIC_CMPXCHG_CPT_REV(fixed2, shl_cpt_rev, kmp_int16, 16, <<,
3012                        KMP_ARCH_X86) // __kmpc_atomic_fixed2_shl_cpt_rev
3013 ATOMIC_CMPXCHG_CPT_REV(fixed2, shr_cpt_rev, kmp_int16, 16, >>,
3014                        KMP_ARCH_X86) // __kmpc_atomic_fixed2_shr_cpt_rev
3015 ATOMIC_CMPXCHG_CPT_REV(fixed2u, shr_cpt_rev, kmp_uint16, 16, >>,
3016                        KMP_ARCH_X86) // __kmpc_atomic_fixed2u_shr_cpt_rev
3017 ATOMIC_CMPXCHG_CPT_REV(fixed2, sub_cpt_rev, kmp_int16, 16, -,
3018                        KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_cpt_rev
3019 ATOMIC_CMPXCHG_CPT_REV(fixed4, div_cpt_rev, kmp_int32, 32, /,
3020                        KMP_ARCH_X86) // __kmpc_atomic_fixed4_div_cpt_rev
3021 ATOMIC_CMPXCHG_CPT_REV(fixed4u, div_cpt_rev, kmp_uint32, 32, /,
3022                        KMP_ARCH_X86) // __kmpc_atomic_fixed4u_div_cpt_rev
3023 ATOMIC_CMPXCHG_CPT_REV(fixed4, shl_cpt_rev, kmp_int32, 32, <<,
3024                        KMP_ARCH_X86) // __kmpc_atomic_fixed4_shl_cpt_rev
3025 ATOMIC_CMPXCHG_CPT_REV(fixed4, shr_cpt_rev, kmp_int32, 32, >>,
3026                        KMP_ARCH_X86) // __kmpc_atomic_fixed4_shr_cpt_rev
3027 ATOMIC_CMPXCHG_CPT_REV(fixed4u, shr_cpt_rev, kmp_uint32, 32, >>,
3028                        KMP_ARCH_X86) // __kmpc_atomic_fixed4u_shr_cpt_rev
3029 ATOMIC_CMPXCHG_CPT_REV(fixed4, sub_cpt_rev, kmp_int32, 32, -,
3030                        KMP_ARCH_X86) // __kmpc_atomic_fixed4_sub_cpt_rev
3031 ATOMIC_CMPXCHG_CPT_REV(fixed8, div_cpt_rev, kmp_int64, 64, /,
3032                        KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_cpt_rev
3033 ATOMIC_CMPXCHG_CPT_REV(fixed8u, div_cpt_rev, kmp_uint64, 64, /,
3034                        KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_cpt_rev
3035 ATOMIC_CMPXCHG_CPT_REV(fixed8, shl_cpt_rev, kmp_int64, 64, <<,
3036                        KMP_ARCH_X86) // __kmpc_atomic_fixed8_shl_cpt_rev
3037 ATOMIC_CMPXCHG_CPT_REV(fixed8, shr_cpt_rev, kmp_int64, 64, >>,
3038                        KMP_ARCH_X86) // __kmpc_atomic_fixed8_shr_cpt_rev
3039 ATOMIC_CMPXCHG_CPT_REV(fixed8u, shr_cpt_rev, kmp_uint64, 64, >>,
3040                        KMP_ARCH_X86) // __kmpc_atomic_fixed8u_shr_cpt_rev
3041 ATOMIC_CMPXCHG_CPT_REV(fixed8, sub_cpt_rev, kmp_int64, 64, -,
3042                        KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_cpt_rev
3043 ATOMIC_CMPXCHG_CPT_REV(float4, div_cpt_rev, kmp_real32, 32, /,
3044                        KMP_ARCH_X86) // __kmpc_atomic_float4_div_cpt_rev
3045 ATOMIC_CMPXCHG_CPT_REV(float4, sub_cpt_rev, kmp_real32, 32, -,
3046                        KMP_ARCH_X86) // __kmpc_atomic_float4_sub_cpt_rev
3047 ATOMIC_CMPXCHG_CPT_REV(float8, div_cpt_rev, kmp_real64, 64, /,
3048                        KMP_ARCH_X86) // __kmpc_atomic_float8_div_cpt_rev
3049 ATOMIC_CMPXCHG_CPT_REV(float8, sub_cpt_rev, kmp_real64, 64, -,
3050                        KMP_ARCH_X86) // __kmpc_atomic_float8_sub_cpt_rev
3051 //              TYPE_ID,OP_ID, TYPE,          OP,  GOMP_FLAG
3052 
3053 // ------------------------------------------------------------------------
3054 // Routines for Extended types: long double, _Quad, complex flavours (use
3055 // critical section)
3056 //     TYPE_ID, OP_ID, TYPE - detailed above
3057 //     OP      - operator
3058 //     LCK_ID  - lock identifier, used to possibly distinguish lock variable
3059 #define ATOMIC_CRITICAL_CPT_REV(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG)   \
3060   ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE)                                 \
3061   TYPE new_value;                                                              \
3062   /*printf("__kmp_atomic_mode = %d\n", __kmp_atomic_mode);*/                   \
3063   OP_GOMP_CRITICAL_CPT_REV(TYPE, OP, GOMP_FLAG)                                \
3064   OP_CRITICAL_CPT_REV(TYPE, OP, LCK_ID)                                        \
3065   }
3066 
3067 /* ------------------------------------------------------------------------- */
3068 // routines for long double type
3069 ATOMIC_CRITICAL_CPT_REV(float10, sub_cpt_rev, long double, -, 10r,
3070                         1) // __kmpc_atomic_float10_sub_cpt_rev
3071 ATOMIC_CRITICAL_CPT_REV(float10, div_cpt_rev, long double, /, 10r,
3072                         1) // __kmpc_atomic_float10_div_cpt_rev
3073 #if KMP_HAVE_QUAD
3074 // routines for _Quad type
3075 ATOMIC_CRITICAL_CPT_REV(float16, sub_cpt_rev, QUAD_LEGACY, -, 16r,
3076                         1) // __kmpc_atomic_float16_sub_cpt_rev
3077 ATOMIC_CRITICAL_CPT_REV(float16, div_cpt_rev, QUAD_LEGACY, /, 16r,
3078                         1) // __kmpc_atomic_float16_div_cpt_rev
3079 #if (KMP_ARCH_X86)
3080 ATOMIC_CRITICAL_CPT_REV(float16, sub_a16_cpt_rev, Quad_a16_t, -, 16r,
3081                         1) // __kmpc_atomic_float16_sub_a16_cpt_rev
3082 ATOMIC_CRITICAL_CPT_REV(float16, div_a16_cpt_rev, Quad_a16_t, /, 16r,
3083                         1) // __kmpc_atomic_float16_div_a16_cpt_rev
3084 #endif // (KMP_ARCH_X86)
3085 #endif // KMP_HAVE_QUAD
3086 
3087 // routines for complex types
3088 
3089 // ------------------------------------------------------------------------
3090 // Workaround for cmplx4. Regular routines with return value don't work
3091 // on Win_32e. Let's return captured values through the additional parameter.
3092 #define OP_CRITICAL_CPT_REV_WRK(OP, LCK_ID)                                    \
3093   __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);                       \
3094                                                                                \
3095   if (flag) {                                                                  \
3096     (*lhs) = (rhs)OP(*lhs);                                                    \
3097     (*out) = (*lhs);                                                           \
3098   } else {                                                                     \
3099     (*out) = (*lhs);                                                           \
3100     (*lhs) = (rhs)OP(*lhs);                                                    \
3101   }                                                                            \
3102                                                                                \
3103   __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);                       \
3104   return;
3105 // ------------------------------------------------------------------------
3106 
3107 #ifdef KMP_GOMP_COMPAT
3108 #define OP_GOMP_CRITICAL_CPT_REV_WRK(OP, FLAG)                                 \
3109   if ((FLAG) && (__kmp_atomic_mode == 2)) {                                    \
3110     KMP_CHECK_GTID;                                                            \
3111     OP_CRITICAL_CPT_REV_WRK(OP, 0);                                            \
3112   }
3113 #else
3114 #define OP_GOMP_CRITICAL_CPT_REV_WRK(OP, FLAG)
3115 #endif /* KMP_GOMP_COMPAT */
3116 // ------------------------------------------------------------------------
3117 
3118 #define ATOMIC_CRITICAL_CPT_REV_WRK(TYPE_ID, OP_ID, TYPE, OP, LCK_ID,          \
3119                                     GOMP_FLAG)                                 \
3120   ATOMIC_BEGIN_WRK(TYPE_ID, OP_ID, TYPE)                                       \
3121   OP_GOMP_CRITICAL_CPT_REV_WRK(OP, GOMP_FLAG)                                  \
3122   OP_CRITICAL_CPT_REV_WRK(OP, LCK_ID)                                          \
3123   }
3124 // The end of workaround for cmplx4
3125 
3126 // !!! TODO: check if we need to return void for cmplx4 routines
3127 // cmplx4 routines to return void
3128 ATOMIC_CRITICAL_CPT_REV_WRK(cmplx4, sub_cpt_rev, kmp_cmplx32, -, 8c,
3129                             1) // __kmpc_atomic_cmplx4_sub_cpt_rev
3130 ATOMIC_CRITICAL_CPT_REV_WRK(cmplx4, div_cpt_rev, kmp_cmplx32, /, 8c,
3131                             1) // __kmpc_atomic_cmplx4_div_cpt_rev
3132 
3133 ATOMIC_CRITICAL_CPT_REV(cmplx8, sub_cpt_rev, kmp_cmplx64, -, 16c,
3134                         1) // __kmpc_atomic_cmplx8_sub_cpt_rev
3135 ATOMIC_CRITICAL_CPT_REV(cmplx8, div_cpt_rev, kmp_cmplx64, /, 16c,
3136                         1) // __kmpc_atomic_cmplx8_div_cpt_rev
3137 ATOMIC_CRITICAL_CPT_REV(cmplx10, sub_cpt_rev, kmp_cmplx80, -, 20c,
3138                         1) // __kmpc_atomic_cmplx10_sub_cpt_rev
3139 ATOMIC_CRITICAL_CPT_REV(cmplx10, div_cpt_rev, kmp_cmplx80, /, 20c,
3140                         1) // __kmpc_atomic_cmplx10_div_cpt_rev
3141 #if KMP_HAVE_QUAD
3142 ATOMIC_CRITICAL_CPT_REV(cmplx16, sub_cpt_rev, CPLX128_LEG, -, 32c,
3143                         1) // __kmpc_atomic_cmplx16_sub_cpt_rev
3144 ATOMIC_CRITICAL_CPT_REV(cmplx16, div_cpt_rev, CPLX128_LEG, /, 32c,
3145                         1) // __kmpc_atomic_cmplx16_div_cpt_rev
3146 #if (KMP_ARCH_X86)
3147 ATOMIC_CRITICAL_CPT_REV(cmplx16, sub_a16_cpt_rev, kmp_cmplx128_a16_t, -, 32c,
3148                         1) // __kmpc_atomic_cmplx16_sub_a16_cpt_rev
3149 ATOMIC_CRITICAL_CPT_REV(cmplx16, div_a16_cpt_rev, kmp_cmplx128_a16_t, /, 32c,
3150                         1) // __kmpc_atomic_cmplx16_div_a16_cpt_rev
3151 #endif // (KMP_ARCH_X86)
3152 #endif // KMP_HAVE_QUAD
3153 
3154 // Capture reverse for mixed type: RHS=float16
3155 #if KMP_HAVE_QUAD
3156 
3157 // Beginning of a definition (provides name, parameters, gebug trace)
3158 //     TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned
3159 //     fixed)
3160 //     OP_ID   - operation identifier (add, sub, mul, ...)
3161 //     TYPE    - operands' type
3162 // -------------------------------------------------------------------------
3163 #define ATOMIC_CMPXCHG_CPT_REV_MIX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID,   \
3164                                    RTYPE, LCK_ID, MASK, GOMP_FLAG)             \
3165   ATOMIC_BEGIN_CPT_MIX(TYPE_ID, OP_ID, TYPE, RTYPE_ID, RTYPE)                  \
3166   TYPE new_value;                                                              \
3167   (void)new_value;                                                             \
3168   OP_GOMP_CRITICAL_CPT_REV(TYPE, OP, GOMP_FLAG)                                \
3169   OP_CMPXCHG_CPT_REV(TYPE, BITS, OP)                                           \
3170   }
3171 
3172 // -------------------------------------------------------------------------
3173 #define ATOMIC_CRITICAL_CPT_REV_MIX(TYPE_ID, TYPE, OP_ID, OP, RTYPE_ID, RTYPE, \
3174                                     LCK_ID, GOMP_FLAG)                         \
3175   ATOMIC_BEGIN_CPT_MIX(TYPE_ID, OP_ID, TYPE, RTYPE_ID, RTYPE)                  \
3176   TYPE new_value;                                                              \
3177   (void)new_value;                                                             \
3178   OP_GOMP_CRITICAL_CPT_REV(TYPE, OP, GOMP_FLAG) /* send assignment */          \
3179   OP_CRITICAL_CPT_REV(TYPE, OP, LCK_ID) /* send assignment */                  \
3180   }
3181 
3182 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed1, char, sub_cpt_rev, 8, -, fp, _Quad, 1i, 0,
3183                            KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_cpt_rev_fp
3184 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed1u, uchar, sub_cpt_rev, 8, -, fp, _Quad, 1i, 0,
3185                            KMP_ARCH_X86) // __kmpc_atomic_fixed1u_sub_cpt_rev_fp
3186 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed1, char, div_cpt_rev, 8, /, fp, _Quad, 1i, 0,
3187                            KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_cpt_rev_fp
3188 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed1u, uchar, div_cpt_rev, 8, /, fp, _Quad, 1i, 0,
3189                            KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_cpt_rev_fp
3190 
3191 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed2, short, sub_cpt_rev, 16, -, fp, _Quad, 2i, 1,
3192                            KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_cpt_rev_fp
3193 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed2u, ushort, sub_cpt_rev, 16, -, fp, _Quad, 2i,
3194                            1,
3195                            KMP_ARCH_X86) // __kmpc_atomic_fixed2u_sub_cpt_rev_fp
3196 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed2, short, div_cpt_rev, 16, /, fp, _Quad, 2i, 1,
3197                            KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_cpt_rev_fp
3198 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed2u, ushort, div_cpt_rev, 16, /, fp, _Quad, 2i,
3199                            1,
3200                            KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_cpt_rev_fp
3201 
3202 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed4, kmp_int32, sub_cpt_rev, 32, -, fp, _Quad, 4i,
3203                            3, 0) // __kmpc_atomic_fixed4_sub_cpt_rev_fp
3204 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed4u, kmp_uint32, sub_cpt_rev, 32, -, fp, _Quad,
3205                            4i, 3, 0) // __kmpc_atomic_fixed4u_sub_cpt_rev_fp
3206 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed4, kmp_int32, div_cpt_rev, 32, /, fp, _Quad, 4i,
3207                            3, 0) // __kmpc_atomic_fixed4_div_cpt_rev_fp
3208 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed4u, kmp_uint32, div_cpt_rev, 32, /, fp, _Quad,
3209                            4i, 3, 0) // __kmpc_atomic_fixed4u_div_cpt_rev_fp
3210 
3211 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed8, kmp_int64, sub_cpt_rev, 64, -, fp, _Quad, 8i,
3212                            7,
3213                            KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_cpt_rev_fp
3214 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed8u, kmp_uint64, sub_cpt_rev, 64, -, fp, _Quad,
3215                            8i, 7,
3216                            KMP_ARCH_X86) // __kmpc_atomic_fixed8u_sub_cpt_rev_fp
3217 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed8, kmp_int64, div_cpt_rev, 64, /, fp, _Quad, 8i,
3218                            7,
3219                            KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_cpt_rev_fp
3220 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed8u, kmp_uint64, div_cpt_rev, 64, /, fp, _Quad,
3221                            8i, 7,
3222                            KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_cpt_rev_fp
3223 
3224 ATOMIC_CMPXCHG_CPT_REV_MIX(float4, kmp_real32, sub_cpt_rev, 32, -, fp, _Quad,
3225                            4r, 3,
3226                            KMP_ARCH_X86) // __kmpc_atomic_float4_sub_cpt_rev_fp
3227 ATOMIC_CMPXCHG_CPT_REV_MIX(float4, kmp_real32, div_cpt_rev, 32, /, fp, _Quad,
3228                            4r, 3,
3229                            KMP_ARCH_X86) // __kmpc_atomic_float4_div_cpt_rev_fp
3230 
3231 ATOMIC_CMPXCHG_CPT_REV_MIX(float8, kmp_real64, sub_cpt_rev, 64, -, fp, _Quad,
3232                            8r, 7,
3233                            KMP_ARCH_X86) // __kmpc_atomic_float8_sub_cpt_rev_fp
3234 ATOMIC_CMPXCHG_CPT_REV_MIX(float8, kmp_real64, div_cpt_rev, 64, /, fp, _Quad,
3235                            8r, 7,
3236                            KMP_ARCH_X86) // __kmpc_atomic_float8_div_cpt_rev_fp
3237 
3238 ATOMIC_CRITICAL_CPT_REV_MIX(float10, long double, sub_cpt_rev, -, fp, _Quad,
3239                             10r, 1) // __kmpc_atomic_float10_sub_cpt_rev_fp
3240 ATOMIC_CRITICAL_CPT_REV_MIX(float10, long double, div_cpt_rev, /, fp, _Quad,
3241                             10r, 1) // __kmpc_atomic_float10_div_cpt_rev_fp
3242 
3243 #endif // KMP_HAVE_QUAD
3244 
3245 //   OpenMP 4.0 Capture-write (swap): {v = x; x = expr;}
3246 
3247 #define ATOMIC_BEGIN_SWP(TYPE_ID, TYPE)                                        \
3248   TYPE __kmpc_atomic_##TYPE_ID##_swp(ident_t *id_ref, int gtid, TYPE *lhs,     \
3249                                      TYPE rhs) {                               \
3250     KMP_DEBUG_ASSERT(__kmp_init_serial);                                       \
3251     KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_swp: T#%d\n", gtid));
3252 
3253 #define CRITICAL_SWP(LCK_ID)                                                   \
3254   __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);                       \
3255                                                                                \
3256   old_value = (*lhs);                                                          \
3257   (*lhs) = rhs;                                                                \
3258                                                                                \
3259   __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);                       \
3260   return old_value;
3261 
3262 // ------------------------------------------------------------------------
3263 #ifdef KMP_GOMP_COMPAT
3264 #define GOMP_CRITICAL_SWP(FLAG)                                                \
3265   if ((FLAG) && (__kmp_atomic_mode == 2)) {                                    \
3266     KMP_CHECK_GTID;                                                            \
3267     CRITICAL_SWP(0);                                                           \
3268   }
3269 #else
3270 #define GOMP_CRITICAL_SWP(FLAG)
3271 #endif /* KMP_GOMP_COMPAT */
3272 
3273 #define ATOMIC_XCHG_SWP(TYPE_ID, TYPE, BITS, GOMP_FLAG)                        \
3274   ATOMIC_BEGIN_SWP(TYPE_ID, TYPE)                                              \
3275   TYPE old_value;                                                              \
3276   GOMP_CRITICAL_SWP(GOMP_FLAG)                                                 \
3277   old_value = KMP_XCHG_FIXED##BITS(lhs, rhs);                                  \
3278   return old_value;                                                            \
3279   }
3280 // ------------------------------------------------------------------------
3281 #define ATOMIC_XCHG_FLOAT_SWP(TYPE_ID, TYPE, BITS, GOMP_FLAG)                  \
3282   ATOMIC_BEGIN_SWP(TYPE_ID, TYPE)                                              \
3283   TYPE old_value;                                                              \
3284   GOMP_CRITICAL_SWP(GOMP_FLAG)                                                 \
3285   old_value = KMP_XCHG_REAL##BITS(lhs, rhs);                                   \
3286   return old_value;                                                            \
3287   }
3288 
3289 // ------------------------------------------------------------------------
3290 #define CMPXCHG_SWP(TYPE, BITS)                                                \
3291   {                                                                            \
3292     TYPE KMP_ATOMIC_VOLATILE temp_val;                                         \
3293     TYPE old_value, new_value;                                                 \
3294     temp_val = *lhs;                                                           \
3295     old_value = temp_val;                                                      \
3296     new_value = rhs;                                                           \
3297     while (!KMP_COMPARE_AND_STORE_ACQ##BITS(                                   \
3298         (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) & old_value,     \
3299         *VOLATILE_CAST(kmp_int##BITS *) & new_value)) {                        \
3300       temp_val = *lhs;                                                         \
3301       old_value = temp_val;                                                    \
3302       new_value = rhs;                                                         \
3303     }                                                                          \
3304     return old_value;                                                          \
3305   }
3306 
3307 // -------------------------------------------------------------------------
3308 #define ATOMIC_CMPXCHG_SWP(TYPE_ID, TYPE, BITS, GOMP_FLAG)                     \
3309   ATOMIC_BEGIN_SWP(TYPE_ID, TYPE)                                              \
3310   TYPE old_value;                                                              \
3311   (void)old_value;                                                             \
3312   GOMP_CRITICAL_SWP(GOMP_FLAG)                                                 \
3313   CMPXCHG_SWP(TYPE, BITS)                                                      \
3314   }
3315 
3316 ATOMIC_XCHG_SWP(fixed1, kmp_int8, 8, KMP_ARCH_X86) // __kmpc_atomic_fixed1_swp
3317 ATOMIC_XCHG_SWP(fixed2, kmp_int16, 16, KMP_ARCH_X86) // __kmpc_atomic_fixed2_swp
3318 ATOMIC_XCHG_SWP(fixed4, kmp_int32, 32, KMP_ARCH_X86) // __kmpc_atomic_fixed4_swp
3319 
3320 ATOMIC_XCHG_FLOAT_SWP(float4, kmp_real32, 32,
3321                       KMP_ARCH_X86) // __kmpc_atomic_float4_swp
3322 
3323 #if (KMP_ARCH_X86)
3324 ATOMIC_CMPXCHG_SWP(fixed8, kmp_int64, 64,
3325                    KMP_ARCH_X86) // __kmpc_atomic_fixed8_swp
3326 ATOMIC_CMPXCHG_SWP(float8, kmp_real64, 64,
3327                    KMP_ARCH_X86) // __kmpc_atomic_float8_swp
3328 #else
3329 ATOMIC_XCHG_SWP(fixed8, kmp_int64, 64, KMP_ARCH_X86) // __kmpc_atomic_fixed8_swp
3330 ATOMIC_XCHG_FLOAT_SWP(float8, kmp_real64, 64,
3331                       KMP_ARCH_X86) // __kmpc_atomic_float8_swp
3332 #endif // (KMP_ARCH_X86)
3333 
3334 // ------------------------------------------------------------------------
3335 // Routines for Extended types: long double, _Quad, complex flavours (use
3336 // critical section)
3337 #define ATOMIC_CRITICAL_SWP(TYPE_ID, TYPE, LCK_ID, GOMP_FLAG)                  \
3338   ATOMIC_BEGIN_SWP(TYPE_ID, TYPE)                                              \
3339   TYPE old_value;                                                              \
3340   GOMP_CRITICAL_SWP(GOMP_FLAG)                                                 \
3341   CRITICAL_SWP(LCK_ID)                                                         \
3342   }
3343 
3344 // ------------------------------------------------------------------------
3345 // !!! TODO: check if we need to return void for cmplx4 routines
3346 // Workaround for cmplx4. Regular routines with return value don't work
3347 // on Win_32e. Let's return captured values through the additional parameter.
3348 
3349 #define ATOMIC_BEGIN_SWP_WRK(TYPE_ID, TYPE)                                    \
3350   void __kmpc_atomic_##TYPE_ID##_swp(ident_t *id_ref, int gtid, TYPE *lhs,     \
3351                                      TYPE rhs, TYPE *out) {                    \
3352     KMP_DEBUG_ASSERT(__kmp_init_serial);                                       \
3353     KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_swp: T#%d\n", gtid));
3354 
3355 #define CRITICAL_SWP_WRK(LCK_ID)                                               \
3356   __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);                       \
3357                                                                                \
3358   tmp = (*lhs);                                                                \
3359   (*lhs) = (rhs);                                                              \
3360   (*out) = tmp;                                                                \
3361   __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);                       \
3362   return;
3363 // ------------------------------------------------------------------------
3364 
3365 #ifdef KMP_GOMP_COMPAT
3366 #define GOMP_CRITICAL_SWP_WRK(FLAG)                                            \
3367   if ((FLAG) && (__kmp_atomic_mode == 2)) {                                    \
3368     KMP_CHECK_GTID;                                                            \
3369     CRITICAL_SWP_WRK(0);                                                       \
3370   }
3371 #else
3372 #define GOMP_CRITICAL_SWP_WRK(FLAG)
3373 #endif /* KMP_GOMP_COMPAT */
3374 // ------------------------------------------------------------------------
3375 
3376 #define ATOMIC_CRITICAL_SWP_WRK(TYPE_ID, TYPE, LCK_ID, GOMP_FLAG)              \
3377   ATOMIC_BEGIN_SWP_WRK(TYPE_ID, TYPE)                                          \
3378   TYPE tmp;                                                                    \
3379   GOMP_CRITICAL_SWP_WRK(GOMP_FLAG)                                             \
3380   CRITICAL_SWP_WRK(LCK_ID)                                                     \
3381   }
3382 // The end of workaround for cmplx4
3383 
3384 ATOMIC_CRITICAL_SWP(float10, long double, 10r, 1) // __kmpc_atomic_float10_swp
3385 #if KMP_HAVE_QUAD
3386 ATOMIC_CRITICAL_SWP(float16, QUAD_LEGACY, 16r, 1) // __kmpc_atomic_float16_swp
3387 #endif // KMP_HAVE_QUAD
3388 // cmplx4 routine to return void
3389 ATOMIC_CRITICAL_SWP_WRK(cmplx4, kmp_cmplx32, 8c, 1) // __kmpc_atomic_cmplx4_swp
3390 
3391 // ATOMIC_CRITICAL_SWP( cmplx4, kmp_cmplx32,  8c,   1 )           //
3392 // __kmpc_atomic_cmplx4_swp
3393 
3394 ATOMIC_CRITICAL_SWP(cmplx8, kmp_cmplx64, 16c, 1) // __kmpc_atomic_cmplx8_swp
3395 ATOMIC_CRITICAL_SWP(cmplx10, kmp_cmplx80, 20c, 1) // __kmpc_atomic_cmplx10_swp
3396 #if KMP_HAVE_QUAD
3397 ATOMIC_CRITICAL_SWP(cmplx16, CPLX128_LEG, 32c, 1) // __kmpc_atomic_cmplx16_swp
3398 #if (KMP_ARCH_X86)
3399 ATOMIC_CRITICAL_SWP(float16_a16, Quad_a16_t, 16r,
3400                     1) // __kmpc_atomic_float16_a16_swp
3401 ATOMIC_CRITICAL_SWP(cmplx16_a16, kmp_cmplx128_a16_t, 32c,
3402                     1) // __kmpc_atomic_cmplx16_a16_swp
3403 #endif // (KMP_ARCH_X86)
3404 #endif // KMP_HAVE_QUAD
3405 
3406 // End of OpenMP 4.0 Capture
3407 
3408 #endif // KMP_ARCH_X86 || KMP_ARCH_X86_64
3409 
3410 #undef OP_CRITICAL
3411 
3412 /* ------------------------------------------------------------------------ */
3413 /* Generic atomic routines                                                  */
3414 
__kmpc_atomic_1(ident_t * id_ref,int gtid,void * lhs,void * rhs,void (* f)(void *,void *,void *))3415 void __kmpc_atomic_1(ident_t *id_ref, int gtid, void *lhs, void *rhs,
3416                      void (*f)(void *, void *, void *)) {
3417   KMP_DEBUG_ASSERT(__kmp_init_serial);
3418 
3419   if (
3420 #if KMP_ARCH_X86 && defined(KMP_GOMP_COMPAT)
3421       FALSE /* must use lock */
3422 #else
3423       TRUE
3424 #endif // KMP_ARCH_X86 && defined(KMP_GOMP_COMPAT)
3425   ) {
3426     kmp_int8 old_value, new_value;
3427 
3428     old_value = *(kmp_int8 *)lhs;
3429     (*f)(&new_value, &old_value, rhs);
3430 
3431     /* TODO: Should this be acquire or release? */
3432     while (!KMP_COMPARE_AND_STORE_ACQ8((kmp_int8 *)lhs, *(kmp_int8 *)&old_value,
3433                                        *(kmp_int8 *)&new_value)) {
3434       KMP_CPU_PAUSE();
3435 
3436       old_value = *(kmp_int8 *)lhs;
3437       (*f)(&new_value, &old_value, rhs);
3438     }
3439 
3440     return;
3441   } else {
3442     // All 1-byte data is of integer data type.
3443 
3444 #ifdef KMP_GOMP_COMPAT
3445     if (__kmp_atomic_mode == 2) {
3446       __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid);
3447     } else
3448 #endif /* KMP_GOMP_COMPAT */
3449       __kmp_acquire_atomic_lock(&__kmp_atomic_lock_1i, gtid);
3450 
3451     (*f)(lhs, lhs, rhs);
3452 
3453 #ifdef KMP_GOMP_COMPAT
3454     if (__kmp_atomic_mode == 2) {
3455       __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid);
3456     } else
3457 #endif /* KMP_GOMP_COMPAT */
3458       __kmp_release_atomic_lock(&__kmp_atomic_lock_1i, gtid);
3459   }
3460 }
3461 
__kmpc_atomic_2(ident_t * id_ref,int gtid,void * lhs,void * rhs,void (* f)(void *,void *,void *))3462 void __kmpc_atomic_2(ident_t *id_ref, int gtid, void *lhs, void *rhs,
3463                      void (*f)(void *, void *, void *)) {
3464   if (
3465 #if KMP_ARCH_X86 && defined(KMP_GOMP_COMPAT)
3466       FALSE /* must use lock */
3467 #elif KMP_ARCH_X86 || KMP_ARCH_X86_64
3468       TRUE /* no alignment problems */
3469 #else
3470       !((kmp_uintptr_t)lhs & 0x1) /* make sure address is 2-byte aligned */
3471 #endif // KMP_ARCH_X86 && defined(KMP_GOMP_COMPAT)
3472   ) {
3473     kmp_int16 old_value, new_value;
3474 
3475     old_value = *(kmp_int16 *)lhs;
3476     (*f)(&new_value, &old_value, rhs);
3477 
3478     /* TODO: Should this be acquire or release? */
3479     while (!KMP_COMPARE_AND_STORE_ACQ16(
3480         (kmp_int16 *)lhs, *(kmp_int16 *)&old_value, *(kmp_int16 *)&new_value)) {
3481       KMP_CPU_PAUSE();
3482 
3483       old_value = *(kmp_int16 *)lhs;
3484       (*f)(&new_value, &old_value, rhs);
3485     }
3486 
3487     return;
3488   } else {
3489     // All 2-byte data is of integer data type.
3490 
3491 #ifdef KMP_GOMP_COMPAT
3492     if (__kmp_atomic_mode == 2) {
3493       __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid);
3494     } else
3495 #endif /* KMP_GOMP_COMPAT */
3496       __kmp_acquire_atomic_lock(&__kmp_atomic_lock_2i, gtid);
3497 
3498     (*f)(lhs, lhs, rhs);
3499 
3500 #ifdef KMP_GOMP_COMPAT
3501     if (__kmp_atomic_mode == 2) {
3502       __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid);
3503     } else
3504 #endif /* KMP_GOMP_COMPAT */
3505       __kmp_release_atomic_lock(&__kmp_atomic_lock_2i, gtid);
3506   }
3507 }
3508 
__kmpc_atomic_4(ident_t * id_ref,int gtid,void * lhs,void * rhs,void (* f)(void *,void *,void *))3509 void __kmpc_atomic_4(ident_t *id_ref, int gtid, void *lhs, void *rhs,
3510                      void (*f)(void *, void *, void *)) {
3511   KMP_DEBUG_ASSERT(__kmp_init_serial);
3512 
3513   if (
3514 // FIXME: On IA-32 architecture, gcc uses cmpxchg only for 4-byte ints.
3515 // Gomp compatibility is broken if this routine is called for floats.
3516 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
3517       TRUE /* no alignment problems */
3518 #else
3519       !((kmp_uintptr_t)lhs & 0x3) /* make sure address is 4-byte aligned */
3520 #endif // KMP_ARCH_X86 || KMP_ARCH_X86_64
3521   ) {
3522     kmp_int32 old_value, new_value;
3523 
3524     old_value = *(kmp_int32 *)lhs;
3525     (*f)(&new_value, &old_value, rhs);
3526 
3527     /* TODO: Should this be acquire or release? */
3528     while (!KMP_COMPARE_AND_STORE_ACQ32(
3529         (kmp_int32 *)lhs, *(kmp_int32 *)&old_value, *(kmp_int32 *)&new_value)) {
3530       KMP_CPU_PAUSE();
3531 
3532       old_value = *(kmp_int32 *)lhs;
3533       (*f)(&new_value, &old_value, rhs);
3534     }
3535 
3536     return;
3537   } else {
3538     // Use __kmp_atomic_lock_4i for all 4-byte data,
3539     // even if it isn't of integer data type.
3540 
3541 #ifdef KMP_GOMP_COMPAT
3542     if (__kmp_atomic_mode == 2) {
3543       __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid);
3544     } else
3545 #endif /* KMP_GOMP_COMPAT */
3546       __kmp_acquire_atomic_lock(&__kmp_atomic_lock_4i, gtid);
3547 
3548     (*f)(lhs, lhs, rhs);
3549 
3550 #ifdef KMP_GOMP_COMPAT
3551     if (__kmp_atomic_mode == 2) {
3552       __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid);
3553     } else
3554 #endif /* KMP_GOMP_COMPAT */
3555       __kmp_release_atomic_lock(&__kmp_atomic_lock_4i, gtid);
3556   }
3557 }
3558 
__kmpc_atomic_8(ident_t * id_ref,int gtid,void * lhs,void * rhs,void (* f)(void *,void *,void *))3559 void __kmpc_atomic_8(ident_t *id_ref, int gtid, void *lhs, void *rhs,
3560                      void (*f)(void *, void *, void *)) {
3561   KMP_DEBUG_ASSERT(__kmp_init_serial);
3562   if (
3563 
3564 #if KMP_ARCH_X86 && defined(KMP_GOMP_COMPAT)
3565       FALSE /* must use lock */
3566 #elif KMP_ARCH_X86 || KMP_ARCH_X86_64
3567       TRUE /* no alignment problems */
3568 #else
3569       !((kmp_uintptr_t)lhs & 0x7) /* make sure address is 8-byte aligned */
3570 #endif // KMP_ARCH_X86 && defined(KMP_GOMP_COMPAT)
3571   ) {
3572     kmp_int64 old_value, new_value;
3573 
3574     old_value = *(kmp_int64 *)lhs;
3575     (*f)(&new_value, &old_value, rhs);
3576     /* TODO: Should this be acquire or release? */
3577     while (!KMP_COMPARE_AND_STORE_ACQ64(
3578         (kmp_int64 *)lhs, *(kmp_int64 *)&old_value, *(kmp_int64 *)&new_value)) {
3579       KMP_CPU_PAUSE();
3580 
3581       old_value = *(kmp_int64 *)lhs;
3582       (*f)(&new_value, &old_value, rhs);
3583     }
3584 
3585     return;
3586   } else {
3587     // Use __kmp_atomic_lock_8i for all 8-byte data,
3588     // even if it isn't of integer data type.
3589 
3590 #ifdef KMP_GOMP_COMPAT
3591     if (__kmp_atomic_mode == 2) {
3592       __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid);
3593     } else
3594 #endif /* KMP_GOMP_COMPAT */
3595       __kmp_acquire_atomic_lock(&__kmp_atomic_lock_8i, gtid);
3596 
3597     (*f)(lhs, lhs, rhs);
3598 
3599 #ifdef KMP_GOMP_COMPAT
3600     if (__kmp_atomic_mode == 2) {
3601       __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid);
3602     } else
3603 #endif /* KMP_GOMP_COMPAT */
3604       __kmp_release_atomic_lock(&__kmp_atomic_lock_8i, gtid);
3605   }
3606 }
3607 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
__kmpc_atomic_10(ident_t * id_ref,int gtid,void * lhs,void * rhs,void (* f)(void *,void *,void *))3608 void __kmpc_atomic_10(ident_t *id_ref, int gtid, void *lhs, void *rhs,
3609                       void (*f)(void *, void *, void *)) {
3610   KMP_DEBUG_ASSERT(__kmp_init_serial);
3611 
3612 #ifdef KMP_GOMP_COMPAT
3613   if (__kmp_atomic_mode == 2) {
3614     __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid);
3615   } else
3616 #endif /* KMP_GOMP_COMPAT */
3617     __kmp_acquire_atomic_lock(&__kmp_atomic_lock_10r, gtid);
3618 
3619   (*f)(lhs, lhs, rhs);
3620 
3621 #ifdef KMP_GOMP_COMPAT
3622   if (__kmp_atomic_mode == 2) {
3623     __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid);
3624   } else
3625 #endif /* KMP_GOMP_COMPAT */
3626     __kmp_release_atomic_lock(&__kmp_atomic_lock_10r, gtid);
3627 }
3628 #endif // KMP_ARCH_X86 || KMP_ARCH_X86_64
3629 
__kmpc_atomic_16(ident_t * id_ref,int gtid,void * lhs,void * rhs,void (* f)(void *,void *,void *))3630 void __kmpc_atomic_16(ident_t *id_ref, int gtid, void *lhs, void *rhs,
3631                       void (*f)(void *, void *, void *)) {
3632   KMP_DEBUG_ASSERT(__kmp_init_serial);
3633 
3634 #ifdef KMP_GOMP_COMPAT
3635   if (__kmp_atomic_mode == 2) {
3636     __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid);
3637   } else
3638 #endif /* KMP_GOMP_COMPAT */
3639     __kmp_acquire_atomic_lock(&__kmp_atomic_lock_16c, gtid);
3640 
3641   (*f)(lhs, lhs, rhs);
3642 
3643 #ifdef KMP_GOMP_COMPAT
3644   if (__kmp_atomic_mode == 2) {
3645     __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid);
3646   } else
3647 #endif /* KMP_GOMP_COMPAT */
3648     __kmp_release_atomic_lock(&__kmp_atomic_lock_16c, gtid);
3649 }
3650 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
__kmpc_atomic_20(ident_t * id_ref,int gtid,void * lhs,void * rhs,void (* f)(void *,void *,void *))3651 void __kmpc_atomic_20(ident_t *id_ref, int gtid, void *lhs, void *rhs,
3652                       void (*f)(void *, void *, void *)) {
3653   KMP_DEBUG_ASSERT(__kmp_init_serial);
3654 
3655 #ifdef KMP_GOMP_COMPAT
3656   if (__kmp_atomic_mode == 2) {
3657     __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid);
3658   } else
3659 #endif /* KMP_GOMP_COMPAT */
3660     __kmp_acquire_atomic_lock(&__kmp_atomic_lock_20c, gtid);
3661 
3662   (*f)(lhs, lhs, rhs);
3663 
3664 #ifdef KMP_GOMP_COMPAT
3665   if (__kmp_atomic_mode == 2) {
3666     __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid);
3667   } else
3668 #endif /* KMP_GOMP_COMPAT */
3669     __kmp_release_atomic_lock(&__kmp_atomic_lock_20c, gtid);
3670 }
3671 #endif // KMP_ARCH_X86 || KMP_ARCH_X86_64
__kmpc_atomic_32(ident_t * id_ref,int gtid,void * lhs,void * rhs,void (* f)(void *,void *,void *))3672 void __kmpc_atomic_32(ident_t *id_ref, int gtid, void *lhs, void *rhs,
3673                       void (*f)(void *, void *, void *)) {
3674   KMP_DEBUG_ASSERT(__kmp_init_serial);
3675 
3676 #ifdef KMP_GOMP_COMPAT
3677   if (__kmp_atomic_mode == 2) {
3678     __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid);
3679   } else
3680 #endif /* KMP_GOMP_COMPAT */
3681     __kmp_acquire_atomic_lock(&__kmp_atomic_lock_32c, gtid);
3682 
3683   (*f)(lhs, lhs, rhs);
3684 
3685 #ifdef KMP_GOMP_COMPAT
3686   if (__kmp_atomic_mode == 2) {
3687     __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid);
3688   } else
3689 #endif /* KMP_GOMP_COMPAT */
3690     __kmp_release_atomic_lock(&__kmp_atomic_lock_32c, gtid);
3691 }
3692 
3693 // AC: same two routines as GOMP_atomic_start/end, but will be called by our
3694 // compiler; duplicated in order to not use 3-party names in pure Intel code
3695 // TODO: consider adding GTID parameter after consultation with Ernesto/Xinmin.
__kmpc_atomic_start(void)3696 void __kmpc_atomic_start(void) {
3697   int gtid = __kmp_entry_gtid();
3698   KA_TRACE(20, ("__kmpc_atomic_start: T#%d\n", gtid));
3699   __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid);
3700 }
3701 
__kmpc_atomic_end(void)3702 void __kmpc_atomic_end(void) {
3703   int gtid = __kmp_get_gtid();
3704   KA_TRACE(20, ("__kmpc_atomic_end: T#%d\n", gtid));
3705   __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid);
3706 }
3707 
3708 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
3709 
3710 // OpenMP 5.1 compare and swap
3711 
3712 /*!
3713 @param loc Source code location
3714 @param gtid Global thread id
3715 @param x Memory location to operate on
3716 @param e Expected value
3717 @param d Desired value
3718 @return Result of comparison
3719 
3720 Implements Compare And Swap atomic operation.
3721 
3722 Sample code:
3723 #pragma omp atomic compare update capture
3724   { r = x == e; if(r) { x = d; } }
3725 */
__kmpc_atomic_bool_1_cas(ident_t * loc,int gtid,char * x,char e,char d)3726 bool __kmpc_atomic_bool_1_cas(ident_t *loc, int gtid, char *x, char e, char d) {
3727   return KMP_COMPARE_AND_STORE_ACQ8(x, e, d);
3728 }
__kmpc_atomic_bool_2_cas(ident_t * loc,int gtid,short * x,short e,short d)3729 bool __kmpc_atomic_bool_2_cas(ident_t *loc, int gtid, short *x, short e,
3730                               short d) {
3731   return KMP_COMPARE_AND_STORE_ACQ16(x, e, d);
3732 }
__kmpc_atomic_bool_4_cas(ident_t * loc,int gtid,kmp_int32 * x,kmp_int32 e,kmp_int32 d)3733 bool __kmpc_atomic_bool_4_cas(ident_t *loc, int gtid, kmp_int32 *x, kmp_int32 e,
3734                               kmp_int32 d) {
3735   return KMP_COMPARE_AND_STORE_ACQ32(x, e, d);
3736 }
__kmpc_atomic_bool_8_cas(ident_t * loc,int gtid,kmp_int64 * x,kmp_int64 e,kmp_int64 d)3737 bool __kmpc_atomic_bool_8_cas(ident_t *loc, int gtid, kmp_int64 *x, kmp_int64 e,
3738                               kmp_int64 d) {
3739   return KMP_COMPARE_AND_STORE_ACQ64(x, e, d);
3740 }
3741 
3742 /*!
3743 @param loc Source code location
3744 @param gtid Global thread id
3745 @param x Memory location to operate on
3746 @param e Expected value
3747 @param d Desired value
3748 @return Old value of x
3749 
3750 Implements Compare And Swap atomic operation.
3751 
3752 Sample code:
3753 #pragma omp atomic compare update capture
3754   { v = x; if (x == e) { x = d; } }
3755 */
__kmpc_atomic_val_1_cas(ident_t * loc,int gtid,char * x,char e,char d)3756 char __kmpc_atomic_val_1_cas(ident_t *loc, int gtid, char *x, char e, char d) {
3757   return KMP_COMPARE_AND_STORE_RET8(x, e, d);
3758 }
__kmpc_atomic_val_2_cas(ident_t * loc,int gtid,short * x,short e,short d)3759 short __kmpc_atomic_val_2_cas(ident_t *loc, int gtid, short *x, short e,
3760                               short d) {
3761   return KMP_COMPARE_AND_STORE_RET16(x, e, d);
3762 }
__kmpc_atomic_val_4_cas(ident_t * loc,int gtid,kmp_int32 * x,kmp_int32 e,kmp_int32 d)3763 kmp_int32 __kmpc_atomic_val_4_cas(ident_t *loc, int gtid, kmp_int32 *x,
3764                                   kmp_int32 e, kmp_int32 d) {
3765   return KMP_COMPARE_AND_STORE_RET32(x, e, d);
3766 }
__kmpc_atomic_val_8_cas(ident_t * loc,int gtid,kmp_int64 * x,kmp_int64 e,kmp_int64 d)3767 kmp_int64 __kmpc_atomic_val_8_cas(ident_t *loc, int gtid, kmp_int64 *x,
3768                                   kmp_int64 e, kmp_int64 d) {
3769   return KMP_COMPARE_AND_STORE_RET64(x, e, d);
3770 }
3771 
3772 /*!
3773 @param loc Source code location
3774 @param gtid Global thread id
3775 @param x Memory location to operate on
3776 @param e Expected value
3777 @param d Desired value
3778 @param pv Captured value location
3779 @return Result of comparison
3780 
3781 Implements Compare And Swap + Capture atomic operation.
3782 
3783 v gets old valie of x if comparison failed, untouched otherwise.
3784 Sample code:
3785 #pragma omp atomic compare update capture
3786   { r = x == e; if(r) { x = d; } else { v = x; } }
3787 */
__kmpc_atomic_bool_1_cas_cpt(ident_t * loc,int gtid,char * x,char e,char d,char * pv)3788 bool __kmpc_atomic_bool_1_cas_cpt(ident_t *loc, int gtid, char *x, char e,
3789                                   char d, char *pv) {
3790   char old = KMP_COMPARE_AND_STORE_RET8(x, e, d);
3791   if (old == e)
3792     return true;
3793   KMP_ASSERT(pv != NULL);
3794   *pv = old;
3795   return false;
3796 }
__kmpc_atomic_bool_2_cas_cpt(ident_t * loc,int gtid,short * x,short e,short d,short * pv)3797 bool __kmpc_atomic_bool_2_cas_cpt(ident_t *loc, int gtid, short *x, short e,
3798                                   short d, short *pv) {
3799   short old = KMP_COMPARE_AND_STORE_RET16(x, e, d);
3800   if (old == e)
3801     return true;
3802   KMP_ASSERT(pv != NULL);
3803   *pv = old;
3804   return false;
3805 }
__kmpc_atomic_bool_4_cas_cpt(ident_t * loc,int gtid,kmp_int32 * x,kmp_int32 e,kmp_int32 d,kmp_int32 * pv)3806 bool __kmpc_atomic_bool_4_cas_cpt(ident_t *loc, int gtid, kmp_int32 *x,
3807                                   kmp_int32 e, kmp_int32 d, kmp_int32 *pv) {
3808   kmp_int32 old = KMP_COMPARE_AND_STORE_RET32(x, e, d);
3809   if (old == e)
3810     return true;
3811   KMP_ASSERT(pv != NULL);
3812   *pv = old;
3813   return false;
3814 }
__kmpc_atomic_bool_8_cas_cpt(ident_t * loc,int gtid,kmp_int64 * x,kmp_int64 e,kmp_int64 d,kmp_int64 * pv)3815 bool __kmpc_atomic_bool_8_cas_cpt(ident_t *loc, int gtid, kmp_int64 *x,
3816                                   kmp_int64 e, kmp_int64 d, kmp_int64 *pv) {
3817   kmp_int64 old = KMP_COMPARE_AND_STORE_RET64(x, e, d);
3818   if (old == e)
3819     return true;
3820   KMP_ASSERT(pv != NULL);
3821   *pv = old;
3822   return false;
3823 }
3824 
3825 /*!
3826 @param loc Source code location
3827 @param gtid Global thread id
3828 @param x Memory location to operate on
3829 @param e Expected value
3830 @param d Desired value
3831 @param pv Captured value location
3832 @return Old value of x
3833 
3834 Implements Compare And Swap + Capture atomic operation.
3835 
3836 v gets new valie of x.
3837 Sample code:
3838 #pragma omp atomic compare update capture
3839   { if (x == e) { x = d; }; v = x; }
3840 */
__kmpc_atomic_val_1_cas_cpt(ident_t * loc,int gtid,char * x,char e,char d,char * pv)3841 char __kmpc_atomic_val_1_cas_cpt(ident_t *loc, int gtid, char *x, char e,
3842                                  char d, char *pv) {
3843   char old = KMP_COMPARE_AND_STORE_RET8(x, e, d);
3844   KMP_ASSERT(pv != NULL);
3845   *pv = old == e ? d : old;
3846   return old;
3847 }
__kmpc_atomic_val_2_cas_cpt(ident_t * loc,int gtid,short * x,short e,short d,short * pv)3848 short __kmpc_atomic_val_2_cas_cpt(ident_t *loc, int gtid, short *x, short e,
3849                                   short d, short *pv) {
3850   short old = KMP_COMPARE_AND_STORE_RET16(x, e, d);
3851   KMP_ASSERT(pv != NULL);
3852   *pv = old == e ? d : old;
3853   return old;
3854 }
__kmpc_atomic_val_4_cas_cpt(ident_t * loc,int gtid,kmp_int32 * x,kmp_int32 e,kmp_int32 d,kmp_int32 * pv)3855 kmp_int32 __kmpc_atomic_val_4_cas_cpt(ident_t *loc, int gtid, kmp_int32 *x,
3856                                       kmp_int32 e, kmp_int32 d, kmp_int32 *pv) {
3857   kmp_int32 old = KMP_COMPARE_AND_STORE_RET32(x, e, d);
3858   KMP_ASSERT(pv != NULL);
3859   *pv = old == e ? d : old;
3860   return old;
3861 }
__kmpc_atomic_val_8_cas_cpt(ident_t * loc,int gtid,kmp_int64 * x,kmp_int64 e,kmp_int64 d,kmp_int64 * pv)3862 kmp_int64 __kmpc_atomic_val_8_cas_cpt(ident_t *loc, int gtid, kmp_int64 *x,
3863                                       kmp_int64 e, kmp_int64 d, kmp_int64 *pv) {
3864   kmp_int64 old = KMP_COMPARE_AND_STORE_RET64(x, e, d);
3865   KMP_ASSERT(pv != NULL);
3866   *pv = old == e ? d : old;
3867   return old;
3868 }
3869 
3870 // End OpenMP 5.1 compare + capture
3871 #endif // KMP_ARCH_X86 || KMP_ARCH_X86_64
3872 
3873 /*!
3874 @}
3875 */
3876 
3877 // end of file
3878