1 /*
2  * Copyright (c) 2015-2019, NVIDIA CORPORATION.  All rights reserved.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *     http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  *
16  */
17 
18 /** \file
19  * \brief OpenMP/OpenACC/C++11 atomics expander routines; all targets including
20  * LLVM
21  */
22 
23 #include "expatomics.h"
24 #include "exputil.h"
25 #include "error.h"
26 #include "dtypeutl.h"
27 #include "regutil.h"
28 #include "machreg.h"
29 #include "ilmtp.h"
30 #include "ilm.h"
31 #include "ili.h"
32 #define EXPANDER_DECLARE_INTERNAL
33 #include "expand.h"
34 #include "machar.h"
35 #include "ccffinfo.h"
36 #include "pd.h"
37 #include "symfun.h"
38 
39 static int atomic_capture_created;
40 static int atomic_capture_update_first;
41 static int atomic_store_created;
42 static int is_in_atomic;
43 static int is_in_atomic_read;
44 static int is_in_atomic_write;
45 static int is_in_atomic_capture;
46 
47 static int capture_read_ili;
48 static int capture_update_ili;
49 static int atomic_typecast_operand;
50 // this is for the non-commutable operators.
51 // If it is non-zero, AtomicOp.ili_operand is the
52 // 1st operand of the atomic binary operator.
53 static int is_atomic_operand1 = 0;
54 
55 static int cmplx_atomic_opcodes[] = {IL_SCMPLXADD, IL_SCMPLXSUB};
56 static int num_cmplx_opcodes = sizeof(cmplx_atomic_opcodes) / sizeof(int);
57 
58 static int float_atomic_opcodes[] = {IL_FADD, IL_FSUB, IL_FDIV,
59                                      IL_FMUL, IL_FMAX, IL_FMIN};
60 static int num_float_opcodes = sizeof(float_atomic_opcodes) / sizeof(int);
61 
62 static int double_atomic_opcodes[] = {IL_DADD, IL_DSUB, IL_DDIV,
63                                       IL_DMUL, IL_DMAX, IL_DMIN};
64 static int num_double_opcodes = sizeof(double_atomic_opcodes) / sizeof(int);
65 
66 // FIX: 		integer*4 <-- real*4
67 // UFIX: 	unsigned integer*4 <-- real*4
68 // DFIX: 	integer*4 <-- real*8
69 // DFIXU: 	unsigned integer*4 <-- real*8
70 static int int_atomic_opcodes[] = {
71     IL_IADD,    IL_ISUB,   IL_LEQV,   IL_XOR,   IL_IMUL,  IL_UIMUL,
72     IL_AND,     IL_OR,     IL_UIADD,  IL_UISUB, IL_UIMUL, IL_ULSHIFT,
73     IL_URSHIFT, IL_LSHIFT, IL_RSHIFT, IL_IDIV,  IL_UIDIV, IL_IMAX,
74     IL_IMIN,    IL_NOT,    IL_INEG,   IL_LD};
75 static int num_int_opcodes = sizeof(int_atomic_opcodes) / sizeof(int);
76 
77 // FIXK:		integer*8 <-- real*4
78 // FIXUK:	unsigned integer*8 <-- real*4
79 // DFIXK:	integer*8 <-- real*8
80 // DFIXUK:	unsigned integer*8 <-- real*8
81 static int long_atomic_opcodes[] = {IL_KADD, IL_KSUB, IL_KXOR, IL_KMUL,
82                                     IL_UKMUL, IL_KAND, IL_KOR, IL_UKADD,
83                                     IL_UKSUB, IL_KDIV, IL_UKDIV
84                                     ,
85                                     IL_KMAX, IL_KMIN
86 };
87 
88 typedef enum MP_ATOMIC_IDX {
89   LHS_IDX = 0,
90   RHS_IDX,
91   MO_IDX,
92   AOP_IDX,
93   UNUSED1,
94   TMP_SPTR_IDX
95 } MP_ATOMIC_IDX;
96 
97 static int num_long_opcodes = sizeof(long_atomic_opcodes) / sizeof(int);
98 
99 static struct {
100   int atomic_operand;
101   int ldst_point;
102   int ldst_nme;
103   int ili_operand;
104 } AtomicOp;
105 
106 #ifdef __cplusplus
107 
GetAtomicOp(int * array)108 inline static ATOMIC_RMW_OP GetAtomicOp(int *array) {
109   return static_cast<ATOMIC_RMW_OP>(array[AOP_IDX]);
110 }
111 
GetSPTRVal(int * array)112 inline static SPTR GetSPTRVal(int *array) {
113   return static_cast<SPTR>(array[TMP_SPTR_IDX]);
114 }
115 
116 #else // ! C++
117 
118 #define GetAtomicOp(A)  (A[AOP_IDX])
119 #define GetSPTRVal(A)   (A[TMP_SPTR_IDX])
120 
121 #endif // C++
122 
123 int
get_atomic_function_ex(ILI_OP opcode)124 get_atomic_function_ex(ILI_OP opcode)
125 {
126   // the last two/three(if "r" is the last letter) letters of the function name
127   // i: integer 32bit
128   // f: float, single precision 32bit
129   // d: double,double precision 64bit
130   // u: unsigned integer 32bit
131   // k: integer 64bit
132   // l: unsigned 64bit
133   // r: reverse, non-commutable operator
134   // 32bit integer
135   if (atomic_typecast_operand == IL_FIX) {
136     switch (opcode) {
137     case IL_FADD:
138       return mk_prototype("atomicaddif", "pure", DT_INT, 2, DT_CPTR, DT_FLOAT);
139     case IL_FSUB:
140       if (is_atomic_operand1)
141         return mk_prototype("atomicsubifr", "pure", DT_INT, 2, DT_CPTR, DT_FLOAT);
142       else
143         return mk_prototype("atomicsubif", "pure", DT_INT, 2, DT_CPTR, DT_FLOAT);
144     case IL_FMUL:
145       return mk_prototype("atomicmulif", "pure", DT_INT, 2, DT_CPTR, DT_FLOAT);
146     case IL_FDIV:
147       if (is_atomic_operand1)
148         return mk_prototype("atomicdivifr", "pure", DT_INT, 2, DT_CPTR, DT_FLOAT);
149       else
150         return mk_prototype("atomicdivif", "pure", DT_INT, 2, DT_CPTR, DT_FLOAT);
151     case IL_FMAX:
152       return mk_prototype("atomicmaxif", "pure", DT_INT, 2, DT_CPTR, DT_FLOAT);
153     case IL_FMIN:
154       return mk_prototype("atomicminif", "pure", DT_INT, 2, DT_CPTR, DT_FLOAT);
155     default:;
156     }
157   } else if (atomic_typecast_operand == IL_UFIX) {
158     switch (opcode) {
159     case IL_FADD:
160       return mk_prototype("atomicadduf", "pure", DT_UINT, 2, DT_CPTR, DT_FLOAT);
161     case IL_FSUB:
162       if (is_atomic_operand1)
163         return mk_prototype("atomicsubufr", "pure", DT_UINT, 2, DT_CPTR, DT_FLOAT);
164       else
165         return mk_prototype("atomicsubuf", "pure", DT_UINT, 2, DT_CPTR, DT_FLOAT);
166     case IL_FMUL:
167       return mk_prototype("atomicmuluf", "pure", DT_UINT, 2, DT_CPTR, DT_FLOAT);
168     case IL_FDIV:
169       if (is_atomic_operand1)
170         return mk_prototype("atomicdivufr", "pure", DT_UINT, 2, DT_CPTR, DT_FLOAT);
171       else
172         return mk_prototype("atomicdivuf", "pure", DT_UINT, 2, DT_CPTR, DT_FLOAT);
173     case IL_FMAX:
174       return mk_prototype("atomicmaxuf", "pure", DT_UINT, 2, DT_CPTR, DT_FLOAT);
175     case IL_FMIN:
176       return mk_prototype("atomicminuf", "pure", DT_UINT, 2, DT_CPTR, DT_FLOAT);
177     default:;
178     }
179   } else if (atomic_typecast_operand == IL_DFIX) {
180     switch (opcode) {
181     case IL_DADD:
182       return mk_prototype("atomicaddid", "pure", DT_INT, 2, DT_CPTR, DT_DBLE);
183     case IL_DSUB:
184       if (is_atomic_operand1)
185         return mk_prototype("atomicsubidr", "pure", DT_INT, 2, DT_CPTR, DT_DBLE);
186       else
187         return mk_prototype("atomicsubid", "pure", DT_INT, 2, DT_CPTR, DT_DBLE);
188     case IL_DMUL:
189       return mk_prototype("atomicmulid", "pure", DT_INT, 2, DT_CPTR, DT_DBLE);
190     case IL_DDIV:
191       if (is_atomic_operand1)
192         return mk_prototype("atomicdividr", "pure", DT_INT, 2, DT_CPTR, DT_DBLE);
193       else
194         return mk_prototype("atomicdivid", "pure", DT_INT, 2, DT_CPTR, DT_DBLE);
195     case IL_DMAX:
196       return mk_prototype("atomicmaxid", "pure", DT_INT, 2, DT_CPTR, DT_DBLE);
197     case IL_DMIN:
198       return mk_prototype("atomicminid", "pure", DT_INT, 2, DT_CPTR, DT_DBLE);
199     default:;
200     }
201   } else if (atomic_typecast_operand == IL_DFIXU) {
202     switch (opcode) {
203     case IL_DADD:
204       return mk_prototype("atomicaddud", "pure", DT_UINT, 2, DT_CPTR, DT_DBLE);
205     case IL_DSUB:
206       if (is_atomic_operand1)
207         return mk_prototype("atomicsubudr", "pure", DT_UINT, 2, DT_CPTR, DT_DBLE);
208       else
209         return mk_prototype("atomicsubud", "pure", DT_UINT, 2, DT_CPTR, DT_DBLE);
210     case IL_DMUL:
211       return mk_prototype("atomicmulud", "pure", DT_UINT, 2, DT_CPTR, DT_DBLE);
212     case IL_DDIV:
213       if (is_atomic_operand1)
214         return mk_prototype("atomicdivudr", "pure", DT_UINT, 2, DT_CPTR, DT_DBLE);
215       else
216         return mk_prototype("atomicdivud", "pure", DT_UINT, 2, DT_CPTR, DT_DBLE);
217     case IL_DMAX:
218       return mk_prototype("atomicmaxud", "pure", DT_UINT, 2, DT_CPTR, DT_DBLE);
219     case IL_DMIN:
220       return mk_prototype("atomicminud", "pure", DT_UINT, 2, DT_CPTR, DT_DBLE);
221     default:;
222     }
223   }
224   // 64bits integer
225   else if (atomic_typecast_operand == IL_FIXK) {
226     switch (opcode) {
227     case IL_FADD:
228       return mk_prototype("atomicaddkf", "pure", DT_INT8, 2, DT_CPTR, DT_FLOAT);
229     case IL_FSUB:
230       if (is_atomic_operand1)
231         return mk_prototype("atomicsubkfr", "pure", DT_INT8, 2, DT_CPTR, DT_FLOAT);
232       else
233         return mk_prototype("atomicsubkf", "pure", DT_INT8, 2, DT_CPTR, DT_FLOAT);
234     case IL_FMUL:
235       return mk_prototype("atomicmulkf", "pure", DT_INT8, 2, DT_CPTR, DT_FLOAT);
236     case IL_FDIV:
237       if (is_atomic_operand1)
238         return mk_prototype("atomicdivkfr", "pure", DT_INT8, 2, DT_CPTR, DT_FLOAT);
239       else
240         return mk_prototype("atomicdivkf", "pure", DT_INT8, 2, DT_CPTR, DT_FLOAT);
241     case IL_FMAX:
242       return mk_prototype("atomicmaxkf", "pure", DT_INT8, 2, DT_CPTR, DT_FLOAT);
243     case IL_FMIN:
244       return mk_prototype("atomicminkf", "pure", DT_INT8, 2, DT_CPTR, DT_FLOAT);
245     default:;
246     }
247   } else if (atomic_typecast_operand == IL_FIXUK) {
248     switch (opcode) {
249     case IL_FADD:
250       return mk_prototype("atomicaddlf", "pure", DT_UINT8, 2, DT_CPTR, DT_FLOAT);
251     case IL_FSUB:
252       if (is_atomic_operand1)
253         return mk_prototype("atomicsublfr", "pure", DT_UINT8, 2, DT_CPTR, DT_FLOAT);
254       else
255         return mk_prototype("atomicsublf", "pure", DT_UINT8, 2, DT_CPTR, DT_FLOAT);
256     case IL_FMUL:
257       return mk_prototype("atomicmullf", "pure", DT_UINT8, 2, DT_CPTR, DT_FLOAT);
258     case IL_FDIV:
259       if (is_atomic_operand1)
260         return mk_prototype("atomicdivlfr", "pure", DT_UINT8, 2, DT_CPTR, DT_FLOAT);
261       else
262         return mk_prototype("atomicdivlf", "pure", DT_UINT8, 2, DT_CPTR, DT_FLOAT);
263     case IL_FMAX:
264       return mk_prototype("atomicmaxlf", "pure", DT_UINT8, 2, DT_CPTR, DT_FLOAT);
265     case IL_FMIN:
266       return mk_prototype("atomicminlf", "pure", DT_UINT8, 2, DT_CPTR, DT_FLOAT);
267     default:;
268     }
269   } else if (atomic_typecast_operand == IL_DFIXK) {
270     switch (opcode) {
271     case IL_DADD:
272       return mk_prototype("atomicaddkd", "pure", DT_INT8, 2, DT_CPTR, DT_DBLE);
273     case IL_DSUB:
274       if (is_atomic_operand1)
275         return mk_prototype("atomicsubkdr", "pure", DT_INT8, 2, DT_CPTR, DT_DBLE);
276       else
277         return mk_prototype("atomicsubkd", "pure", DT_INT8, 2, DT_CPTR, DT_DBLE);
278     case IL_DMUL:
279       return mk_prototype("atomicmulkd", "pure", DT_INT8, 2, DT_CPTR, DT_DBLE);
280     case IL_DDIV:
281       if (is_atomic_operand1)
282         return mk_prototype("atomicdivkdr", "pure", DT_INT8, 2, DT_CPTR, DT_DBLE);
283       else
284         return mk_prototype("atomicdivkd", "pure", DT_INT8, 2, DT_CPTR, DT_DBLE);
285     case IL_DMAX:
286       return mk_prototype("atomicmaxkd", "pure", DT_INT8, 2, DT_CPTR, DT_DBLE);
287     case IL_DMIN:
288       return mk_prototype("atomicminkd", "pure", DT_INT8, 2, DT_CPTR, DT_DBLE);
289     default:;
290     }
291   } else if (atomic_typecast_operand == IL_DFIXUK) {
292     switch (opcode) {
293     case IL_DADD:
294       return mk_prototype("atomicaddld", "pure", DT_UINT8, 2, DT_CPTR, DT_DBLE);
295     case IL_DSUB:
296       if (is_atomic_operand1)
297         return mk_prototype("atomicsubldr", "pure", DT_UINT8, 2, DT_CPTR, DT_DBLE);
298       else
299         return mk_prototype("atomicsubld", "pure", DT_UINT8, 2, DT_CPTR, DT_DBLE);
300     case IL_DMUL:
301       return mk_prototype("atomicmulld", "pure", DT_UINT8, 2, DT_CPTR, DT_DBLE);
302     case IL_DDIV:
303       if (is_atomic_operand1)
304         return mk_prototype("atomicdivldr", "pure", DT_UINT8, 2, DT_CPTR, DT_DBLE);
305       else
306         return mk_prototype("atomicdivld", "pure", DT_UINT8, 2, DT_CPTR, DT_DBLE);
307     case IL_DMAX:
308       return mk_prototype("atomicmaxld", "pure", DT_UINT8, 2, DT_CPTR, DT_DBLE);
309     case IL_DMIN:
310       return mk_prototype("atomicminld", "pure", DT_UINT8, 2, DT_CPTR, DT_DBLE);
311     default:;
312     }
313   } else if (atomic_typecast_operand == IL_SNGL) {
314     switch (opcode) {
315     case IL_DADD:
316      return mk_prototype("atomicaddfd", "pure", DT_FLOAT, 2, DT_CPTR, DT_DBLE);
317     case IL_DSUB:
318       if (is_atomic_operand1)
319         return mk_prototype("atomicsubfdr", "pure", DT_FLOAT, 2, DT_CPTR, DT_DBLE);
320       else
321         return mk_prototype("atomicsubfd", "pure", DT_FLOAT, 2, DT_CPTR, DT_DBLE);
322     case IL_DMUL:
323       return mk_prototype("atomicmulfd", "pure", DT_FLOAT, 2, DT_CPTR, DT_DBLE);
324     case IL_DDIV:
325       if (is_atomic_operand1)
326         return mk_prototype("atomicdivfdr", "pure", DT_FLOAT, 2, DT_CPTR, DT_DBLE);
327       else
328         return mk_prototype("atomicdivfd", "pure", DT_FLOAT, 2, DT_CPTR, DT_DBLE);
329     case IL_DMAX:
330       return mk_prototype("atomicmaxfd", "pure", DT_FLOAT, 2, DT_CPTR, DT_DBLE);
331     case IL_DMIN:
332       return mk_prototype("atomicminfd", "pure", DT_FLOAT, 2, DT_CPTR, DT_DBLE);
333     default:;
334     }
335   }
336 
337   error(S_0155_OP1_OP2, ERR_Severe, gbl.lineno, "Invalid atomic operation.", CNULL);
338   return 0;
339 }
340 
341 int
get_atomic_function(ILI_OP opcode)342 get_atomic_function(ILI_OP opcode)
343 {
344   switch (opcode) {
345   /*
346    * Update:
347    */
348   case IL_IMUL:
349     return mk_prototype("atomicmuli", "pure", DT_INT, 2, DT_CPTR, DT_INT);
350   case IL_UIMUL:
351     return mk_prototype("atomicmulu", "pure", DT_UINT, 2, DT_CPTR, DT_UINT);
352   case IL_KMUL:
353     return mk_prototype("atomicmulil", "pure", DT_INT8, 2, DT_CPTR, DT_INT8);
354   case IL_UKMUL:
355     return mk_prototype("atomicmulul", "pure", DT_UINT8, 2, DT_CPTR, DT_UINT8);
356   case IL_FMUL:
357     return mk_prototype("atomicmulf", "pure", DT_FLOAT, 2, DT_CPTR, DT_FLOAT);
358   case IL_DMUL:
359     return mk_prototype("atomicmuld", "pure", DT_DBLE, 2, DT_CPTR, DT_DBLE);
360   case IL_IMAX:
361     return mk_prototype("atomicmaxi", "pure", DT_INT, 2, DT_CPTR, DT_INT);
362   case IL_IMIN:
363     return mk_prototype("atomicmini", "pure", DT_INT, 2, DT_CPTR, DT_INT);
364   case IL_KMAX:
365     return mk_prototype("atomicmaxil", "pure", DT_INT8, 2, DT_CPTR, DT_INT8);
366   case IL_KMIN:
367     return mk_prototype("atomicminil", "pure", DT_INT8, 2, DT_CPTR, DT_INT8);
368   case IL_NOT:
369     return mk_prototype("atomicnoti", "pure", DT_INT, 2, DT_CPTR, DT_INT);
370   case IL_XOR:
371     return mk_prototype("atomicxori", "pure", DT_INT, 2, DT_CPTR, DT_INT);
372   case IL_LEQV:
373     return mk_prototype("atomicleqvi", "pure", DT_INT, 2, DT_CPTR, DT_INT);
374   case IL_KXOR:
375     return mk_prototype("atomicxorll", "pure", DT_INT8, 2, DT_CPTR, DT_INT8);
376   case IL_AND:
377     return mk_prototype("atomicandi", "pure", DT_INT, 2, DT_CPTR, DT_INT);
378   case IL_KAND:
379     return mk_prototype("atomicandll", "pure", DT_INT8, 2, DT_CPTR, DT_INT8);
380   case IL_OR:
381     return mk_prototype("atomicori", "pure", DT_INT, 2, DT_CPTR, DT_INT);
382   case IL_KOR:
383     return mk_prototype("atomicorll", "pure", DT_INT8, 2, DT_CPTR, DT_INT8);
384   case IL_IADD:
385     return mk_prototype("atomicaddi", "pure", DT_INT, 2, DT_CPTR, DT_INT);
386   case IL_KADD:
387     return mk_prototype("atomicaddil", "pure", DT_INT8, 2, DT_CPTR, DT_INT8);
388   case IL_ISUB:
389     return mk_prototype("atomicsubi", "pure", DT_INT, 2, DT_CPTR, DT_INT);
390   case IL_KSUB:
391     return mk_prototype("atomicsubil", "pure", DT_INT8, 2, DT_CPTR, DT_INT8);
392   case IL_UIADD:
393     return mk_prototype("atomicaddu", "pure", DT_UINT, 2, DT_CPTR, DT_UINT);
394   case IL_UKADD:
395     return mk_prototype("atomicaddul", "pure", DT_UINT8, 2, DT_CPTR, DT_UINT8);
396   case IL_UISUB:
397     return mk_prototype("atomicsubu", "pure", DT_UINT, 2, DT_CPTR, DT_UINT);
398   case IL_UKSUB:
399     return mk_prototype("atomicsubull", "pure", DT_UINT8, 2, DT_CPTR, DT_UINT8);
400   case IL_INEG:
401     return mk_prototype("atomicnegi", "pure", DT_INT, 2, DT_CPTR, DT_INT);
402   case IL_ULSHIFT:
403     return mk_prototype("atomiclshiftu", "pure", DT_UINT, 2, DT_CPTR, DT_UINT);
404   case IL_URSHIFT:
405     return mk_prototype("atomicrshiftu", "pure", DT_UINT, 2, DT_CPTR, DT_UINT);
406   case IL_LSHIFT:
407     return mk_prototype("atomiclshifti", "pure", DT_INT, 2, DT_CPTR, DT_INT);
408   case IL_RSHIFT:
409     return mk_prototype("atomicrshifti", "pure", DT_INT, 2, DT_CPTR, DT_INT);
410   case IL_FADD:
411     return mk_prototype("atomicaddf", "pure", DT_FLOAT, 2, DT_CPTR, DT_FLOAT);
412   case IL_FSUB:
413     return mk_prototype("atomicsubf", "pure", DT_FLOAT, 2, DT_CPTR, DT_FLOAT);
414   case IL_FMAX:
415     return mk_prototype("atomicmaxf", "pure", DT_FLOAT, 2, DT_CPTR, DT_FLOAT);
416   case IL_FMIN:
417     return mk_prototype("atomicminf", "pure", DT_FLOAT, 2, DT_CPTR, DT_FLOAT);
418   case IL_DADD:
419     return mk_prototype("atomicaddd", "pure", DT_DBLE, 2, DT_CPTR, DT_DBLE);
420   case IL_DSUB:
421     return mk_prototype("atomicsubd", "pure", DT_DBLE, 2, DT_CPTR, DT_DBLE);
422   case IL_DMAX:
423     return mk_prototype("atomicmaxd", "pure", DT_DBLE, 2, DT_CPTR, DT_DBLE);
424   case IL_DMIN:
425     return mk_prototype("atomicmind", "pure", DT_DBLE, 2, DT_CPTR, DT_DBLE);
426   case IL_IDIV:
427     return mk_prototype("atomicdivi", "pure", DT_INT, 2, DT_CPTR, DT_INT);
428   case IL_KDIV:
429     return mk_prototype("atomicdivil", "pure", DT_INT8, 2, DT_CPTR, DT_INT8);
430   case IL_UIDIV:
431     return mk_prototype("atomicdivu", "pure", DT_UINT, 2, DT_CPTR, DT_UINT);
432   case IL_UKDIV:
433     return mk_prototype("atomicdivul", "pure", DT_UINT8, 2, DT_CPTR, DT_UINT8);
434   case IL_FDIV:
435     return mk_prototype("atomicdivf", "pure", DT_FLOAT, 2, DT_CPTR, DT_FLOAT);
436   case IL_DDIV:
437     return mk_prototype("atomicdivd", "pure", DT_DBLE, 2, DT_CPTR, DT_DBLE);
438   /*
439    * Read:
440    */
441 #define DT_VOID_NONE DT_NONE
442   case IL_LD:
443     return mk_prototype("atomicloadi", "pure", DT_VOID_NONE, 2, DT_CPTR, DT_CPTR);
444   case IL_LDSP:
445     return mk_prototype("atomicloadf", "pure", DT_VOID_NONE, 2, DT_CPTR, DT_CPTR);
446   case IL_LDDP:
447     return mk_prototype("atomicloadd", "pure", DT_VOID_NONE, 2, DT_CPTR, DT_CPTR);
448   case IL_LDKR:
449     return mk_prototype("atomicloadl", "pure", DT_VOID_NONE, 2, DT_CPTR, DT_CPTR);
450   /*
451    * Write:
452    */
453   case IL_ST:
454     return mk_prototype("atomicexchi", "pure", DT_INT, 2, DT_CPTR, DT_INT);
455   case IL_STSP:
456     return mk_prototype("atomicexchf", "pure", DT_FLOAT, 2, DT_CPTR, DT_FLOAT);
457   case IL_STDP:
458     return mk_prototype("atomicexchd", "pure", DT_DBLE, 2, DT_CPTR, DT_DBLE);
459   case IL_STKR:
460     return mk_prototype("atomicexchul", "pure", DT_INT8, 2, DT_CPTR, DT_INT8);
461   case IL_SCMPLXADD:
462     return mk_prototype("atomicaddcmplx", "pure", DT_VOID_NONE, 3, DT_CPTR, DT_FLOAT, DT_FLOAT);
463   case IL_SCMPLXSUB:
464     return mk_prototype("atomicsubcmplx", "pure", DT_VOID_NONE, 3, DT_CPTR, DT_FLOAT, DT_FLOAT);
465   default:
466     interr("Unsupported atomic opcode: ", opcode, ERR_Severe);
467     return 0;
468   }
469 }
470 
471 int
get_capture_read_ili(void)472 get_capture_read_ili(void)
473 {
474   return capture_read_ili;
475 }
476 
477 void
set_capture_read_ili(int x)478 set_capture_read_ili(int x)
479 {
480   capture_read_ili = x;
481 }
482 
483 int
get_capture_update_ili(void)484 get_capture_update_ili(void)
485 {
486   return capture_update_ili;
487 }
488 
489 void
set_capture_update_ili(int x)490 set_capture_update_ili(int x)
491 {
492   capture_update_ili = x;
493 }
494 
495 int
get_is_in_atomic(void)496 get_is_in_atomic(void)
497 {
498   return is_in_atomic;
499 }
500 
501 void
set_is_in_atomic(int x)502 set_is_in_atomic(int x)
503 {
504   is_in_atomic = x;
505 }
506 
507 int
get_is_in_atomic_read(void)508 get_is_in_atomic_read(void)
509 {
510   return is_in_atomic_read;
511 }
512 
513 void
set_is_in_atomic_read(int x)514 set_is_in_atomic_read(int x)
515 {
516   is_in_atomic_read = x;
517 }
518 
519 int
get_is_in_atomic_write(void)520 get_is_in_atomic_write(void)
521 {
522   return is_in_atomic_write;
523 }
524 
525 void
set_is_in_atomic_write(int x)526 set_is_in_atomic_write(int x)
527 {
528   is_in_atomic_write = x;
529 }
530 
531 int
get_is_in_atomic_capture(void)532 get_is_in_atomic_capture(void)
533 {
534   return is_in_atomic_capture;
535 }
536 
537 void
set_is_in_atomic_capture(int x)538 set_is_in_atomic_capture(int x)
539 {
540   is_in_atomic_capture = x;
541 }
542 
543 int
get_atomic_capture_created(void)544 get_atomic_capture_created(void)
545 {
546   return atomic_capture_created;
547 }
548 
549 void
set_atomic_capture_created(int x)550 set_atomic_capture_created(int x)
551 {
552   atomic_capture_created = x;
553 }
554 
555 int
get_atomic_store_created(void)556 get_atomic_store_created(void)
557 {
558   return atomic_store_created;
559 }
560 
561 void
set_atomic_store_created(int x)562 set_atomic_store_created(int x)
563 {
564   atomic_store_created = x;
565 }
566 
567 int
get_atomic_write_opcode(int current_ili)568 get_atomic_write_opcode(int current_ili)
569 {
570   int ili = current_ili;
571   int store_opcode;
572   int store_value;
573   int store_pt, store_nme;
574 
575   store_opcode = ILI_OPC(ili);
576   store_value = ILI_OPND(ili, 1);
577   store_pt = ILI_OPND(ili, 2);
578   store_nme = ILI_OPND(ili, 3);
579 
580   if (store_opcode != IL_ST && store_opcode != IL_STDP &&
581       store_opcode != IL_STSP && store_opcode != IL_STKR) {
582     /* Rely on the caller to issue an error if necessary */
583     return 0;
584   }
585 
586   AtomicOp.atomic_operand = 0;
587   AtomicOp.ldst_point = store_pt;
588   AtomicOp.ldst_nme = store_nme;
589   AtomicOp.ili_operand = store_value;
590 
591   return store_opcode;
592 }
593 
594 /* This function is used to set the address token flag which
595  * will be used in the later accelerator code generation
596  * For example, if the atomic operates on scalar variable
597  * this flag may indicate an optimization to place this variable
598  * in accelerator's shared memory */
set_store_pt_addrtkn_flg(int store_pt)599 static void set_store_pt_addrtkn_flg(int store_pt)
600 {
601   int store_symbol;
602   if (ILI_OPC(store_pt) == IL_ACON) {
603     store_symbol = ILI_OPND(store_pt, 1);
604     store_symbol = CONVAL1G(store_symbol);
605     ADDRTKNP(store_symbol, 1);
606   } else if (ILI_OPC(store_pt) == IL_AADD || ILI_OPC(store_pt) == IL_ASUB) {
607     int acon_ili;
608     acon_ili = ILI_OPND(store_pt, 1);
609     if (ILI_OPC(acon_ili) != IL_ACON) {
610       while (ILI_OPC(acon_ili) == IL_AADD || ILI_OPC(acon_ili) == IL_ASUB) {
611         acon_ili = ILI_OPND(acon_ili, 1);
612       }
613       /* If the base is not a constant (perhaps it is a compiler temp)
614        ** then don't try and mark.
615        **/
616       if (ILI_OPC(acon_ili) == IL_ACON) {
617         store_symbol = ILI_OPND(acon_ili, 1);
618         store_symbol = CONVAL1G(store_symbol);
619         ADDRTKNP(store_symbol, 1);
620       }
621     } else {
622       store_symbol = ILI_OPND(acon_ili, 1);
623       store_symbol = CONVAL1G(store_symbol);
624       ADDRTKNP(store_symbol, 1);
625     }
626   }
627 }
628 
629 int
create_atomic_capture_seq(int update_ili,int read_ili,int capture_first)630 create_atomic_capture_seq(int update_ili, int read_ili, int capture_first)
631 {
632   int function;
633   ILI_OP intarg_opcode, floatarg_opcode, doublearg_opcode, longarg_opcode;
634   int ld_opcode;
635   ILI_OP st_opcode, arg_opcode;
636   int store_pt, store_nme, arg, garg;
637   int store_symbol;
638   int argreg = 0;
639   int update_operand;
640   int load_pt1, load_pt2;
641   int op1, op2, opc;
642   int update_op;
643   ILI_OP return_op;
644   int result;
645   int result_arg;
646   int msize;
647   int allow_capture_last = 1;
648   int arg_dt = 0;
649 
650 #if defined(TARGET_X8664)
651   intarg_opcode = IL_DAIR;
652   floatarg_opcode = IL_DASP;
653   doublearg_opcode = IL_DADP;
654   longarg_opcode = IL_DAKR;
655 #else
656   intarg_opcode = IL_ARGIR;
657   floatarg_opcode = IL_ARGSP;
658   doublearg_opcode = IL_ARGDP;
659   longarg_opcode = IL_ARGKR;
660 #endif
661 
662   st_opcode = ILI_OPC(read_ili);
663   if (st_opcode != ILI_OPC(update_ili)) {
664     /* This is not a legal atomic capture--data type mismatch */
665     interr("Atomic Capture: Mismatched storage operations.", 0, ERR_Severe);
666   }
667 
668   switch (st_opcode) {
669   case IL_ST:
670     arg_opcode = intarg_opcode;
671     ld_opcode = IL_LD;
672     return_op = IL_DFRIR;
673     result_arg = IR_RETVAL;
674     arg_dt = DT_INT;
675 #if defined(TARGET_X8664)
676     argreg = ARG_IR(1);
677 #else
678 #endif
679     break;
680   case IL_STDP:
681     arg_opcode = doublearg_opcode;
682     ld_opcode = IL_LDDP;
683     arg_dt = DT_DBLE;
684 #if defined(TARGET_X8664)
685     argreg = ARG_XR(0);
686     return_op = IL_DFRDP;
687     result_arg = FR_RETVAL;
688 #else
689     return_op = IL_DFRDP;
690     result_arg = FR_RETVAL;
691 #endif
692     break;
693   case IL_STSP:
694     arg_opcode = floatarg_opcode;
695     ld_opcode = IL_LDSP;
696     arg_dt = DT_FLOAT;
697 #if defined(TARGET_X8664)
698     argreg = ARG_XR(0);
699     return_op = IL_DFRSP;
700     result_arg = FR_RETVAL;
701 #else
702     return_op = IL_DFRSP;
703     result_arg = FR_RETVAL;
704 #endif
705     break;
706   case IL_STKR:
707     arg_opcode = longarg_opcode;
708     ld_opcode = IL_LDKR;
709     arg_dt = DT_INT8;
710 #if defined(TARGET_X8664)
711     return_op = IL_DFRKR;
712     result_arg = KR_RETVAL;
713     argreg = ARG_IR(1);
714 #else
715     return_op = IL_DFRKR;
716     result_arg = KR_RETVAL;
717 #endif
718     break;
719   default:
720     interr("Create: Unexpected atomic store opcode", st_opcode, ERR_Severe);
721     break;
722   }
723 
724   op1 = ILI_OPND(update_ili, 1);
725   op2 = ILI_OPND(update_ili, 2);
726   store_nme = ILI_OPND(update_ili, 3);
727 
728   update_op = op1;
729   function = get_atomic_function(ILI_OPC(op1));
730 
731   load_pt1 = load_pt2 = -1;
732 
733   if (ILI_OPC(ILI_OPND(update_op, 1)) == IL_CSEIR) {
734     /* Look through the CSEIR to the "real" load.
735      * If the read_ili is done off of a CSE, make sure
736      * it is the same.
737      */
738     if (ILI_OPC(ILI_OPND(read_ili, 1)) == IL_CSEIR) {
739       if (ILI_OPND(read_ili, 1) != ILI_OPND(update_op, 1)) {
740         interr("Mismatched CSE (1).\n", 0, ERR_unused);
741       } else {
742         allow_capture_last = 0;
743       }
744     }
745     load_pt1 = ILI_OPND(ILI_OPND(update_op, 1), 1);
746   } else if (ILI_OPC(ILI_OPND(update_op, 1)) == ld_opcode) {
747     load_pt1 = ILI_OPND(update_op, 1);
748   }
749 
750   if (ILI_OPC(ILI_OPND(update_op, 2)) == IL_CSEIR) {
751     /* Look through the CSEIR to the "real" load.
752      * If the read_ili is done off of a CSE, make sure
753      * it is the same.
754      */
755     if (ILI_OPC(ILI_OPND(read_ili, 1)) == IL_CSEIR) {
756       if (ILI_OPND(read_ili, 1) != ILI_OPND(update_op, 2)) {
757         interr("Mismatched CSE (2).\n", 0, ERR_unused);
758       } else {
759         allow_capture_last = 0;
760       }
761     }
762     load_pt2 = ILI_OPND(ILI_OPND(update_op, 2), 1);
763   } else if (ILI_OPC(ILI_OPND(update_op, 2)) == ld_opcode) {
764     load_pt2 = ILI_OPND(update_op, 2);
765   }
766 
767   if (load_pt1 == -1 && load_pt2 == -1) {
768     interr("Can't find matching load operation in atomic capture.", 0, ERR_Severe);
769     return 0;
770   }
771 
772   store_pt = op2;
773   if (ILI_OPC(store_pt) == IL_CSEAR) {
774     store_pt = ILI_OPND(store_pt, 1);
775   }
776   set_store_pt_addrtkn_flg(store_pt);
777 
778   /* Determine which operand from update_op comes from the load,
779    * and which operand comes from the "updating" part.
780    */
781   if (ILI_OPND(load_pt1, 1) == store_pt) {
782     /* The first argument for the update_op was the load/store point.
783      * Use the second as the update_operand.
784      */
785     update_operand = ILI_OPND(update_op, 2);
786   } else if (ILI_OPND(load_pt2, 1) == store_pt) {
787     /* The second argument for the update_op was the load/store point.
788      * Use the first as the update_operand.
789      */
790     update_operand = ILI_OPND(update_op, 1);
791   } else {
792     interr("Can't find load operation in atomic capture.", 0, ERR_Severe);
793     return 0;
794   }
795 
796   arg = ad1ili(IL_NULL, 0);
797 #if defined(TARGET_X8664)
798   arg = ad3ili(arg_opcode, update_operand, argreg, arg);
799   arg = ad3ili(IL_DAAR, store_pt, ARG_IR(0), arg);
800 #else
801   arg = ad3ili(arg_opcode, update_operand, arg, arg);
802   arg = ad3ili(IL_ARGAR, store_pt, arg, 0);
803 #endif
804   garg = ad1ili(IL_NULL, 0);
805   garg = ad4ili(IL_GARG, update_operand, garg, arg_dt, 0);
806   garg = ad4ili(IL_GARG, store_pt, garg, DT_CPTR, store_nme);
807   arg = ad2ili(IL_JSR, function, arg);
808   garg = ad3ili(IL_GJSR, function, garg, 0);
809   ILI_ALT(arg) = garg;
810   arg = ad2ili(return_op, arg, result_arg);
811   if (!capture_first && allow_capture_last) {
812     arg = ad2ili(ILI_OPC(op1), arg, update_operand);
813   }
814   /* Replicate the store for the original read_ili, except the
815    * value that is being stored.
816    */
817   result = ad4ili(st_opcode, arg, ILI_OPND(read_ili, 2), ILI_OPND(read_ili, 3),
818                   ILI_OPND(read_ili, 4));
819   return result;
820 }
821 
822 int
create_atomic_write_seq(int store_ili)823 create_atomic_write_seq(int store_ili)
824 {
825   int arg, garg;
826   int function;
827   int store_pt, store_nme;
828   ILI_OP arg_opcode;
829   ILI_OP intarg_opcode, floatarg_opcode, doublearg_opcode, longarg_opcode;
830   int argreg;
831   int arg_dt = 0;
832 
833 #if defined(TARGET_X8664)
834   intarg_opcode = IL_DAIR;
835   floatarg_opcode = IL_DASP;
836   doublearg_opcode = IL_DADP;
837   longarg_opcode = IL_DAKR;
838 #else
839   intarg_opcode = IL_ARGIR;
840   floatarg_opcode = IL_ARGSP;
841   doublearg_opcode = IL_ARGDP;
842   longarg_opcode = IL_ARGKR;
843 #endif
844 
845   switch (ILI_OPC(store_ili)) {
846   case IL_ST:
847     arg_dt = DT_INT;
848     arg_opcode = intarg_opcode;
849 #if defined(TARGET_X8664)
850     argreg = ARG_IR(1);
851 #endif
852     break;
853   case IL_STDP:
854     arg_dt = DT_DBLE;
855     arg_opcode = doublearg_opcode;
856 #if defined(TARGET_X8664)
857     argreg = ARG_XR(0);
858 #endif
859     break;
860   case IL_STSP:
861     arg_dt = DT_FLOAT;
862     arg_opcode = floatarg_opcode;
863 #if defined(TARGET_X8664)
864     argreg = ARG_XR(0);
865 #endif
866     break;
867   case IL_STKR:
868     arg_dt = DT_INT8;
869     arg_opcode = longarg_opcode;
870 #if defined(TARGET_X8664)
871     argreg = ARG_IR(1);
872 #endif
873     break;
874   default:
875     interr("Create: Unexpected atomic store opcode", ILI_OPC(store_ili), ERR_Severe);
876     break;
877   }
878 
879   /* Create a call to:
880    * atomicexch*(store_pt, load_val)
881    * which stores val atomically into store_pt.
882    */
883 
884   store_pt = ILI_OPND(store_ili, 2);
885   store_nme = ILI_OPND(store_ili, 3);
886   set_store_pt_addrtkn_flg(store_pt);
887   function = get_atomic_function(ILI_OPC(store_ili));
888   arg = ad1ili(IL_NULL, 0);
889   garg = ad1ili(IL_NULL, 0);
890 #if defined(TARGET_X8664)
891   arg = ad3ili(arg_opcode, AtomicOp.ili_operand, argreg, arg);
892   arg = ad3ili(IL_DAAR, store_pt, ARG_IR(0), arg);
893 #else
894   arg = ad3ili(arg_opcode, AtomicOp.ili_operand, arg, arg);
895   arg = ad3ili(IL_ARGAR, store_pt, arg, 0);
896 #endif
897   garg = ad4ili(IL_GARG, AtomicOp.ili_operand, garg, arg_dt, 0);
898   garg = ad4ili(IL_GARG, store_pt, garg, DT_CPTR, store_nme);
899   arg = ad2ili(IL_JSR, function, arg);
900   garg = ad3ili(IL_GJSR, function, garg, 0);
901   ILI_ALT(arg) = garg;
902   return arg;
903 }
904 
905 int
get_atomic_read_opcode(int current_ili)906 get_atomic_read_opcode(int current_ili)
907 {
908   int ili = current_ili;
909   int load_opcode, store_opcode;
910   int ld_op;
911   int load_pt, load_nme, store_pt;
912 
913   store_opcode = ILI_OPC(ili);
914   ld_op = ILI_OPND(ili, 1);
915   store_pt = ILI_OPND(ili, 2);
916 
917   load_opcode = ILI_OPC(ld_op);
918 
919   if (load_opcode == IL_CSEIR) {
920     /* Look through the CSEIR opcode */
921     ld_op = ILI_OPND(ILI_OPND(ili, 1), 1);
922     load_opcode = ILI_OPC(ld_op);
923   }
924 
925   if (load_opcode != IL_LD && load_opcode != IL_LDDP &&
926       load_opcode != IL_LDSP && load_opcode != IL_LDKR) {
927     /* Rely on the caller to issue an error if necessary */
928     return 0;
929   }
930 
931   load_pt = ILI_OPND(ld_op, 1);
932   load_nme = ILI_OPND(ld_op, 2);
933   AtomicOp.atomic_operand = ld_op;
934   AtomicOp.ldst_point = load_pt;
935   AtomicOp.ldst_nme = load_nme;
936   AtomicOp.ili_operand = 0;
937 
938   return store_opcode;
939 }
940 
941 int
create_atomic_read_seq(int store_ili)942 create_atomic_read_seq(int store_ili)
943 {
944   int arg, garg;
945   int function;
946   int store_pt, store_nme;
947 
948   /* Create a call to:
949    * atomicload*(store_pt, load_pt)
950    * which loads (atomically from load_pt), and stores (non-atomically)
951    * into store_pt.
952    */
953   store_pt = ILI_OPND(store_ili, 2);
954   store_nme = ILI_OPND(store_ili, 3);
955   set_store_pt_addrtkn_flg(store_pt);
956   function = get_atomic_function(ILI_OPC(AtomicOp.atomic_operand));
957   arg = ad1ili(IL_NULL, 0);
958 #if defined(TARGET_X8664)
959   arg = ad3ili(IL_DAAR, AtomicOp.ldst_point, ARG_IR(1), arg);
960   arg = ad3ili(IL_DAAR, store_pt, ARG_IR(0), arg);
961 #else
962   arg = ad3ili(IL_ARGAR, AtomicOp.ldst_point, arg, 0);
963   arg = ad3ili(IL_ARGAR, store_pt, arg, 0);
964 #endif
965   garg = ad1ili(IL_NULL, 0);
966   garg = ad4ili(IL_GARG, AtomicOp.ldst_point, garg, DT_CPTR, AtomicOp.ldst_nme);
967   garg = ad4ili(IL_GARG, store_pt, garg, DT_CPTR, store_nme);
968   arg = ad2ili(IL_JSR, function, arg);
969   garg = ad3ili(IL_GJSR, function, garg, 0);
970   ILI_ALT(arg) = garg;
971   return arg;
972 }
973 
974 /*setup the atomic operands and opcode
975 when high-precision to low-precision conversion happens
976 */
977 static void
set_atomic_typecast_h2l(int tcast_ili)978 set_atomic_typecast_h2l(int tcast_ili)
979 {
980   atomic_typecast_operand = tcast_ili;
981 }
982 
983 static void
reset_atomic_typecast_h2l()984 reset_atomic_typecast_h2l()
985 {
986   atomic_typecast_operand = 0;
987   is_atomic_operand1 = 0;
988 }
989 
990 static int
is_atomic_typcast_h2l()991 is_atomic_typcast_h2l()
992 {
993   return atomic_typecast_operand != 0;
994 }
995 
996 ILI_OP
get_atomic_update_opcode(int current_ili)997 get_atomic_update_opcode(int current_ili)
998 {
999   int ili = current_ili;
1000   int bin_op, op1, op2, store_pt, store_nme, load_pt1, load_pt2;
1001   int opc;
1002   ILI_OP store_opcode, load_opcode;
1003 
1004   load_pt1 = 0;
1005   load_pt2 = 0;
1006 
1007   store_opcode = ILI_OPC(ili);
1008 
1009   if (store_opcode == IL_FREEIR) {
1010     AtomicOp.atomic_operand = ili;
1011     AtomicOp.ldst_point = 0;
1012     AtomicOp.ldst_nme = 0;
1013     AtomicOp.ili_operand = 0;
1014     return IL_FREEIR;
1015   }
1016 
1017   if (store_opcode != IL_ST && store_opcode != IL_STDP &&
1018       store_opcode != IL_STSP && store_opcode != IL_STKR &&
1019       store_opcode != IL_STSCMPLX) {
1020     if(store_opcode == IL_STDCMPLX)
1021        error(S_0155_OP1_OP2, ERR_Severe, gbl.lineno, "Double precision complex data type are not supported in atomic region within accelerator region.", CNULL);
1022     else
1023        interr("Error: Detected unexpected atomic store opcode.", store_opcode, ERR_Severe);
1024     return IL_NONE;
1025   }
1026 
1027   bin_op = ILI_OPND(ili, 1);
1028   store_pt = ILI_OPND(ili, 2);
1029   store_nme = ILI_OPND(ili, 3);
1030 
1031   if (ILI_OPC(store_pt) == IL_CSEAR) {
1032     store_pt = ILI_OPND(store_pt, 1);
1033   }
1034   // check the high precision to low precision type cast first
1035   // a demo example of double to unsigned long long translation
1036   // 106  LDKR           48^    12~ <array[0]>    i8
1037   // 107  DAKR          106^ kr( 5)     1^
1038   // 108  QJSR          212~<__mth_i_dfloatuk>   107^
1039   // 109  DFRDP         108^ dp( 1)
1040   // 110  DFLOATUK      106^   109^-alt
1041   // 111  DADD          105^   110^
1042   // 112  DADP          111^ dp( 1)     1^
1043   // 113  QJSR          213~<__mth_i_dfixuk>   112^
1044   // 114  DFRKR         113^ kr( 1)
1045   // 115  STKR          114^    48^    12~ <array[0]>    i8
1046   // The following if statement takes care of the ili 115 ~ ili 111
1047   // There is another if statement (in this function) takes care of
1048   // ili 110 ~ ili 106
1049   if (store_opcode == IL_STKR && ILI_OPC(bin_op) == IL_DFRKR &&
1050       ILI_OPC(ILI_OPND(bin_op, 1)) == IL_QJSR &&
1051       (strcmp(SYMNAME(ILI_OPND(ILI_OPND(bin_op, 1), 1)), "__mth_i_fixuk") ==
1052            0 ||
1053        strcmp(SYMNAME(ILI_OPND(ILI_OPND(bin_op, 1), 1)), "__mth_i_fixukx") ==
1054            0)) {
1055     set_atomic_typecast_h2l(IL_FIXUK);
1056     bin_op = ILI_OPND(ILI_OPND(ILI_OPND(bin_op, 1), 2), 1);
1057     store_opcode = IL_STSP;
1058   } else if (store_opcode == IL_STKR && ILI_OPC(bin_op) == IL_DFRKR &&
1059              ILI_OPC(ILI_OPND(bin_op, 1)) == IL_QJSR &&
1060              (strcmp(SYMNAME(ILI_OPND(ILI_OPND(bin_op, 1), 1)),
1061                      "__mth_i_dfixuk") == 0 ||
1062               strcmp(SYMNAME(ILI_OPND(ILI_OPND(bin_op, 1), 1)),
1063                      "__mth_i_dfixukx") == 0)) {
1064     set_atomic_typecast_h2l(IL_DFIXUK);
1065     bin_op = ILI_OPND(ILI_OPND(ILI_OPND(bin_op, 1), 2), 1);
1066     store_opcode = IL_STDP;
1067   } else if (store_opcode == IL_STKR && ILI_OPC(bin_op) == IL_DFRKR &&
1068              ILI_OPC(ILI_OPND(bin_op, 1)) == IL_QJSR &&
1069              strcmp(SYMNAME(ILI_OPND(ILI_OPND(bin_op, 1), 1)),
1070                     "__mth_i_kfixx") == 0) {
1071     set_atomic_typecast_h2l(IL_FIXK);
1072     bin_op = ILI_OPND(ILI_OPND(ILI_OPND(bin_op, 1), 2), 1);
1073     store_opcode = IL_STSP;
1074   } else if (store_opcode == IL_STKR && ILI_OPC(bin_op) == IL_DFRKR &&
1075              ILI_OPC(ILI_OPND(bin_op, 1)) == IL_QJSR &&
1076              strcmp(SYMNAME(ILI_OPND(ILI_OPND(bin_op, 1), 1)),
1077                     "__mth_i_dkfixx") == 0) {
1078     set_atomic_typecast_h2l(IL_DFIXK);
1079     bin_op = ILI_OPND(ILI_OPND(ILI_OPND(bin_op, 1), 2), 1);
1080     store_opcode = IL_STDP;
1081   } else if (ILI_OPC(bin_op) == IL_FIX || ILI_OPC(bin_op) == IL_UFIX ||
1082              ILI_OPC(bin_op) == IL_FIXK || ILI_OPC(bin_op) == IL_FIXUK) {
1083     set_atomic_typecast_h2l(ILI_OPC(bin_op));
1084     bin_op = ILI_OPND(bin_op, 1);
1085     store_opcode = IL_STSP;
1086   } else if (ILI_OPC(bin_op) == IL_DFIX || ILI_OPC(bin_op) == IL_DFIXU ||
1087              ILI_OPC(bin_op) == IL_DFIXK || ILI_OPC(bin_op) == IL_DFIXUK ||
1088              ILI_OPC(bin_op) == IL_SNGL) {
1089     set_atomic_typecast_h2l(ILI_OPC(bin_op));
1090     bin_op = ILI_OPND(bin_op, 1);
1091     store_opcode = IL_STDP;
1092   } else if (ILI_OPC(bin_op) == IL_DFRIR &&
1093              strcmp(SYMNAME(ILI_OPND(ILI_OPND(bin_op, 1), 1)),
1094                     "__mth_i_fixux") == 0) {
1095     // float 32bit to unsigned 32bit on 32bit machine x86 machine
1096     set_atomic_typecast_h2l(IL_UFIX);
1097     bin_op = ILI_OPND(ILI_OPND(ILI_OPND(bin_op, 1), 2), 1);
1098     store_opcode = IL_STSP;
1099   } else if (ILI_OPC(bin_op) == IL_DFRIR &&
1100              strcmp(SYMNAME(ILI_OPND(ILI_OPND(bin_op, 1), 1)),
1101                     "__mth_i_dfixux") == 0) {
1102     // float 32bit to unsigned 32bit on 32bit machine x86 machine
1103     set_atomic_typecast_h2l(IL_DFIXU);
1104     bin_op = ILI_OPND(ILI_OPND(ILI_OPND(bin_op, 1), 2), 1);
1105     store_opcode = IL_STDP;
1106   }
1107   // check if it is translated from float/double to unsigned
1108   else if (ILI_OPC(bin_op) == IL_KIMV) {
1109     bin_op = ILI_OPND(bin_op, 1);
1110     if (ILI_OPC(bin_op) == IL_FIXK) {
1111       set_atomic_typecast_h2l(IL_UFIX);
1112       store_opcode = IL_STSP;
1113     } else if (ILI_OPC(bin_op) == IL_DFIXK) {
1114       set_atomic_typecast_h2l(IL_DFIXU);
1115       store_opcode = IL_STDP;
1116     }
1117     bin_op = ILI_OPND(bin_op, 1);
1118   }
1119 
1120   if (store_opcode == IL_ST) {
1121     /* Look through the int opcodes */
1122     for (opc = 0; opc < num_int_opcodes; opc++) {
1123       if (int_atomic_opcodes[opc] == ILI_OPC(bin_op))
1124         break;
1125     }
1126     if (opc == num_int_opcodes) {
1127       return IL_NONE;
1128     }
1129     AtomicOp.atomic_operand = bin_op;
1130     AtomicOp.ldst_point = store_pt;
1131     AtomicOp.ldst_nme = store_nme;
1132     load_opcode = IL_LD;
1133   } else if (store_opcode == IL_STSP) {
1134     /* Look through the float opcodes */
1135     for (opc = 0; opc < num_float_opcodes; opc++) {
1136       if (float_atomic_opcodes[opc] == ILI_OPC(bin_op))
1137         break;
1138     }
1139     if (opc == num_float_opcodes) {
1140       return IL_NONE;
1141     }
1142     AtomicOp.atomic_operand = bin_op;
1143     AtomicOp.ldst_point = store_pt;
1144     AtomicOp.ldst_nme = store_nme;
1145     if (atomic_typecast_operand == IL_FIX || atomic_typecast_operand == IL_UFIX)
1146       load_opcode = IL_LD;
1147     else if (atomic_typecast_operand == IL_FIXK ||
1148              atomic_typecast_operand == IL_FIXUK)
1149       load_opcode = IL_LDKR;
1150     else
1151       load_opcode = IL_LDSP;
1152   } else if (store_opcode == IL_STDP) {
1153     /* Look through the double opcodes */
1154     for (opc = 0; opc < num_double_opcodes; opc++) {
1155       if (double_atomic_opcodes[opc] == ILI_OPC(bin_op))
1156         break;
1157     }
1158     if (opc == num_double_opcodes) {
1159       return IL_NONE;
1160     }
1161     AtomicOp.atomic_operand = bin_op;
1162     AtomicOp.ldst_point = store_pt;
1163     AtomicOp.ldst_nme = store_nme;
1164     if (atomic_typecast_operand == IL_DFIX ||
1165         atomic_typecast_operand == IL_DFIXU)
1166       load_opcode = IL_LD;
1167     else if (atomic_typecast_operand == IL_DFIXK ||
1168              atomic_typecast_operand == IL_DFIXUK)
1169       load_opcode = IL_LDKR;
1170     else if (atomic_typecast_operand == IL_SNGL)
1171       load_opcode = IL_LDSP;
1172     else
1173       load_opcode = IL_LDDP;
1174   } else if (store_opcode == IL_STKR) {
1175     for (opc = 0; opc < num_long_opcodes; opc++) {
1176       if (long_atomic_opcodes[opc] == ILI_OPC(bin_op))
1177         break;
1178     }
1179     if (opc == num_long_opcodes) {
1180       return IL_NONE;
1181     }
1182     AtomicOp.atomic_operand = bin_op;
1183     AtomicOp.ldst_point = store_pt;
1184     AtomicOp.ldst_nme = store_nme;
1185     load_opcode = IL_LDKR;
1186   } else if(store_opcode == IL_STSCMPLX) {
1187     for (opc = 0; opc < num_cmplx_opcodes; opc++) {
1188       if (cmplx_atomic_opcodes[opc] == ILI_OPC(bin_op))
1189         break;
1190     }
1191     if (opc == num_cmplx_opcodes) {
1192       return IL_NONE;
1193     }
1194     AtomicOp.atomic_operand = bin_op;
1195     AtomicOp.ldst_point = store_pt;
1196     AtomicOp.ldst_nme = store_nme;
1197     load_opcode = IL_LDSCMPLX;
1198   }
1199 
1200   op1 = ILI_OPND(bin_op, 1);
1201   if (ILI_OPC(op1) == IL_FLOAT || ILI_OPC(op1) == IL_DFLOAT ||
1202       ILI_OPC(op1) == IL_FLOATU || ILI_OPC(op1) == IL_DFLOATU ||
1203       ILI_OPC(op1) == IL_FLOATUK || ILI_OPC(op1) == IL_DFLOATUK ||
1204       ILI_OPC(op1) == IL_FLOATK || ILI_OPC(op1) == IL_DFLOATK ||
1205       ILI_OPC(op1) == IL_DBLE)
1206     op1 = ILI_OPND(op1, 1);
1207   // if the conversion is from unsigned integer to float/double
1208   // Locate the real LD
1209   if (ILI_OPC(op1) == IL_UIKMV)
1210     op1 = ILI_OPND(op1, 1);
1211   if ((ILI_OPC(op1) == IL_DFRSP || ILI_OPC(op1) == IL_DFRDP
1212 #ifdef IL_DFRDPX87
1213        || ILI_OPC(op1) == IL_DFRDPX87
1214 #endif
1215 #ifdef IL_DFRSPX87
1216        || ILI_OPC(op1) == IL_DFRSPX87
1217 #endif
1218        ) &&
1219       ILI_OPC(ILI_OPND(op1, 1)) == IL_QJSR &&
1220       (strcmp(SYMNAME(ILI_OPND(ILI_OPND(op1, 1), 1)), "__mth_i_dfloatuk") ==
1221            0 ||
1222        strcmp(SYMNAME(ILI_OPND(ILI_OPND(op1, 1), 1)), "__mth_i_floatuk") == 0 ||
1223        strcmp(SYMNAME(ILI_OPND(ILI_OPND(op1, 1), 1)), "__mth_i_floatux") == 0 ||
1224        strcmp(SYMNAME(ILI_OPND(ILI_OPND(op1, 1), 1)), "__mth_i_dfloatux") ==
1225            0 ||
1226        strcmp(SYMNAME(ILI_OPND(ILI_OPND(op1, 1), 1)), "__mth_i_dfloatk") == 0 ||
1227        strcmp(SYMNAME(ILI_OPND(ILI_OPND(op1, 1), 1)), "__mth_i_floatk") == 0)) {
1228     op1 = ILI_OPND(ILI_OPND(ILI_OPND(op1, 1), 2), 1);
1229   }
1230 
1231   op2 = ILI_OPND(bin_op, 2);
1232   if (ILI_OPC(op2) == IL_FLOAT || ILI_OPC(op2) == IL_DFLOAT ||
1233       ILI_OPC(op2) == IL_FLOATU || ILI_OPC(op2) == IL_DFLOATU ||
1234       ILI_OPC(op2) == IL_FLOATUK || ILI_OPC(op2) == IL_DFLOATUK ||
1235       ILI_OPC(op2) == IL_FLOATK || ILI_OPC(op2) == IL_DFLOATK ||
1236       ILI_OPC(op2) == IL_DBLE)
1237     op2 = ILI_OPND(op2, 1);
1238   // if the conversion is from unsigned integer to float/double
1239   // Locate the real LD
1240   if (ILI_OPC(op2) == IL_UIKMV)
1241     op2 = ILI_OPND(op2, 1);
1242   if ((ILI_OPC(op2) == IL_DFRSP || ILI_OPC(op2) == IL_DFRDP
1243 #ifdef IL_DFRDPX87
1244        || ILI_OPC(op2) == IL_DFRDPX87
1245 #endif
1246 #ifdef IL_DFRSPX87
1247        || ILI_OPC(op2) == IL_DFRSPX87
1248 #endif
1249        ) &&
1250       ILI_OPC(ILI_OPND(op2, 1)) == IL_QJSR &&
1251       (strcmp(SYMNAME(ILI_OPND(ILI_OPND(op2, 1), 1)), "__mth_i_dfloatuk") ==
1252            0 ||
1253        strcmp(SYMNAME(ILI_OPND(ILI_OPND(op2, 1), 1)), "__mth_i_floatuk") == 0 ||
1254        strcmp(SYMNAME(ILI_OPND(ILI_OPND(op2, 1), 1)), "__mth_i_floatux") == 0 ||
1255        strcmp(SYMNAME(ILI_OPND(ILI_OPND(op2, 1), 1)), "__mth_i_dfloatux") ==
1256            0 ||
1257        strcmp(SYMNAME(ILI_OPND(ILI_OPND(op2, 1), 1)), "__mth_i_dfloatk") == 0 ||
1258        strcmp(SYMNAME(ILI_OPND(ILI_OPND(op2, 1), 1)), "__mth_i_floatk") == 0)) {
1259     op2 = ILI_OPND(ILI_OPND(ILI_OPND(op2, 1), 2), 1);
1260   }
1261 
1262   if (ILI_OPC(op1) == IL_CSEIR) {
1263     /* Look through the CSEIR to the "real" load */
1264     op1 = ILI_OPND(op1, 1);
1265   }
1266 
1267   if (ILI_OPC(op1) == load_opcode) {
1268     load_pt1 = ILI_OPND(op1, 1);
1269   }
1270 
1271   if (ILI_OPC(op2) == load_opcode) {
1272     load_pt2 = ILI_OPND(op2, 1);
1273   }
1274 
1275   if (load_pt1 == 0 && load_pt2 == 0) {
1276     /* This is an error */
1277     return IL_NONE;
1278   }
1279 
1280   /*
1281    * If the operation operand 1 is the same as the store point, then we need the
1282    * second operand for the modifier for the atomic operation.
1283    */
1284   if (load_pt1 == store_pt) {
1285     AtomicOp.ili_operand = op2;
1286     return ILI_OPC(bin_op);
1287   }
1288 
1289   /*
1290    * If the operation operand 2 is the same as the store point, then we need the
1291    * first operand for the modifier for the atomic operation.
1292    */
1293   if (load_pt2 == store_pt) {
1294     AtomicOp.ili_operand = op1;
1295     is_atomic_operand1 = 1;
1296     return ILI_OPC(bin_op);
1297   }
1298 
1299   /* This is also an error */
1300   AtomicOp.atomic_operand = 0;
1301   return IL_NONE;
1302 }
1303 
1304 int
create_atomic_seq(int store_ili)1305 create_atomic_seq(int store_ili)
1306 {
1307   int ili = store_ili;
1308   int arg, garg;
1309   int function;
1310   int store_symbol;
1311   int atomic_mod, op2, const_val, load_op, store_op, store_pt, load_pt,
1312       store_nme;
1313   int realilix, imagilix;
1314   ILI_OP intarg_opcode, floatarg_opcode, doublearg_opcode, longarg_opcode, scmplx_opcode;
1315   ILI_OP arg_opcode;
1316   int is_add;
1317   int arg_dt = 0;
1318 
1319 #if defined(TARGET_X8664)
1320   intarg_opcode = IL_DAIR;
1321   floatarg_opcode = IL_DASP;
1322   doublearg_opcode = IL_DADP;
1323   longarg_opcode = IL_DAKR;
1324   scmplx_opcode = IL_DASP;
1325 #else
1326   intarg_opcode = IL_ARGIR;
1327   floatarg_opcode = IL_ARGSP;
1328   doublearg_opcode = IL_ARGDP;
1329   longarg_opcode = IL_ARGKR;
1330   /* As now we only support single precision complex */
1331   scmplx_opcode = IL_ARGSP;
1332 #endif
1333 
1334   atomic_mod = AtomicOp.ili_operand;
1335   store_pt = ILI_OPND(ili, 2);
1336   store_nme = ILI_OPND(ili, 3);
1337 
1338   if (ILI_OPC(store_pt) == IL_CSEAR) {
1339     store_pt = ILI_OPND(store_pt, 1);
1340   } else if (ILI_OPC(store_pt) == IL_ACON) {
1341     store_symbol = ILI_OPND(store_pt, 1);
1342     store_symbol = CONVAL1G(store_symbol);
1343     ADDRTKNP(store_symbol, 1);
1344   } else if (ILI_OPC(store_pt) == IL_AADD || ILI_OPC(store_pt) == IL_ASUB) {
1345     int acon_ili;
1346     acon_ili = ILI_OPND(store_pt, 1);
1347     if (ILI_OPC(acon_ili) != IL_ACON) {
1348       while (ILI_OPC(acon_ili) == IL_AADD || ILI_OPC(acon_ili) == IL_ASUB) {
1349         acon_ili = ILI_OPND(acon_ili, 1);
1350       }
1351 
1352       /* If the base is not a constant (perhaps it is a compiler temp)
1353        * then don't try and mark.
1354        */
1355       if (ILI_OPC(acon_ili) == IL_ACON) {
1356         store_symbol = ILI_OPND(acon_ili, 1);
1357         store_symbol = CONVAL1G(store_symbol);
1358         ADDRTKNP(store_symbol, 1);
1359       }
1360     } else {
1361       store_symbol = ILI_OPND(acon_ili, 1);
1362       store_symbol = CONVAL1G(store_symbol);
1363       ADDRTKNP(store_symbol, 1);
1364     }
1365   }
1366 
1367   arg = ad1ili(IL_NULL, 0);
1368   garg = ad1ili(IL_NULL, 0);
1369   if (is_atomic_typcast_h2l())
1370     function = get_atomic_function_ex(ILI_OPC(AtomicOp.atomic_operand));
1371   else
1372     function = get_atomic_function(ILI_OPC(AtomicOp.atomic_operand));
1373 
1374   switch (ILI_OPC(store_ili)) {
1375   case IL_ST:
1376     if (atomic_typecast_operand == IL_DFIX ||
1377         atomic_typecast_operand == IL_DFIXU) {
1378       arg_opcode = doublearg_opcode;
1379       arg_dt = DT_DBLE;
1380     }
1381     else if (atomic_typecast_operand == IL_FIX ||
1382              atomic_typecast_operand == IL_UFIX) {
1383       arg_opcode = floatarg_opcode;
1384       arg_dt = DT_FLOAT;
1385     }
1386     else  {
1387       arg_opcode = intarg_opcode;
1388       arg_dt = DT_INT;
1389     }
1390     break;
1391   case IL_STDP:
1392     arg_opcode = doublearg_opcode;
1393     arg_dt = DT_DBLE;
1394     break;
1395   case IL_STSP:
1396     if (atomic_typecast_operand == IL_SNGL) {
1397       arg_opcode = doublearg_opcode;
1398       arg_dt = DT_DBLE;
1399     }
1400     else {
1401       arg_opcode = floatarg_opcode;
1402       arg_dt = DT_FLOAT;
1403     }
1404     break;
1405   case IL_STKR:
1406     if (atomic_typecast_operand == IL_DFIXK ||
1407         atomic_typecast_operand == IL_DFIXUK) {
1408       arg_opcode = doublearg_opcode;
1409       arg_dt = DT_DBLE;
1410     }
1411     else if (atomic_typecast_operand == IL_FIXK ||
1412              atomic_typecast_operand == IL_FIXUK) {
1413       arg_opcode = floatarg_opcode;
1414       arg_dt = DT_FLOAT;
1415     }
1416     else {
1417       arg_opcode = longarg_opcode;
1418       arg_dt = DT_INT8;
1419     }
1420     break;
1421   case IL_STSCMPLX:
1422     arg_opcode = scmplx_opcode;
1423     arg_dt = DT_FLOAT;
1424     break;
1425   default:
1426     interr("Create: Unexpected atomic store opcode", ILI_OPC(store_ili), ERR_Severe);
1427     break;
1428   }
1429 #if defined(TARGET_X8664)
1430   if(ILI_OPC(store_ili) == IL_STSCMPLX)  {
1431     /* split the real and img parts */
1432     /* real part */
1433     realilix = ad1ili(IL_SCMPLX2REAL, atomic_mod);
1434     /* imag part */
1435     imagilix = ad1ili(IL_SCMPLX2IMAG, atomic_mod);
1436     /* call the ili utlity function to gen param list */
1437     initcallargs(3);
1438     addcallarg(store_pt, 0, DT_CPTR);
1439     addcallarg(realilix, 0, arg_dt);
1440     addcallarg(imagilix, 0, arg_dt);
1441     /* create argument list */
1442     arg = gencallargs();
1443   }
1444   else {
1445     initcallargs(2);
1446     addcallarg(store_pt, 0, DT_CPTR);
1447     addcallarg(atomic_mod, 0, arg_dt);
1448     arg = gencallargs();
1449   }
1450 #else
1451   if(ILI_OPC(store_ili) == IL_STSCMPLX)  {
1452     /* split the real and img parts */
1453     /* real part */
1454     realilix = ad1ili(IL_SCMPLX2REAL, atomic_mod);
1455     /* imag part */
1456     imagilix = ad1ili(IL_SCMPLX2IMAG, atomic_mod);
1457     /* create argument list */
1458     arg = ad2ili(arg_opcode, imagilix, arg);
1459     arg = ad2ili(arg_opcode, realilix, arg);
1460   }
1461   else {
1462     arg = ad2ili(arg_opcode, atomic_mod, arg);
1463   }
1464   arg = ad3ili(IL_ARGAR, store_pt, arg, 0);
1465 #endif
1466   arg = ad2ili(IL_JSR, function, arg);
1467   if(ILI_OPC(store_ili) == IL_STSCMPLX)  {
1468     /* split the real and img parts */
1469     /* real part */
1470     realilix = ad1ili(IL_SCMPLX2REAL, atomic_mod);
1471     /* imag part */
1472     imagilix = ad1ili(IL_SCMPLX2IMAG, atomic_mod);
1473     /* create argument list */
1474     garg = ad4ili(IL_GARG, imagilix, garg, arg_dt, 0);
1475     garg = ad4ili(IL_GARG, realilix, garg, arg_dt, 0);
1476   }
1477   else {
1478     garg = ad4ili(IL_GARG, atomic_mod, garg, arg_dt, 0);
1479   }
1480   garg = ad4ili(IL_GARG, store_pt, garg, DT_CPTR, store_nme);
1481   garg = ad3ili(IL_GJSR, function, garg, 0);
1482   ILI_ALT(arg) = garg;
1483   return arg;
1484 }
1485 
1486 bool
exp_end_atomic(int store,int curilm)1487 exp_end_atomic(int store, int curilm)
1488 {
1489   if (is_in_atomic) {
1490     int atomic_opcode;
1491     atomic_opcode = get_atomic_update_opcode(store);
1492     if (atomic_opcode != 0) {
1493       if (get_atomic_store_created()) {
1494         error(S_0155_OP1_OP2, ERR_Severe, gbl.lineno, "Invalid atomic expression", CNULL);
1495       } else if (atomic_opcode != IL_FREEIR) {
1496         int atomic_seq;
1497         atomic_seq = create_atomic_seq(store);
1498         chk_block(atomic_seq);
1499         ILM_RESULT(curilm) = atomic_seq;
1500         ILM_BLOCK(curilm) = expb.curbih;
1501         set_atomic_store_created(1);
1502         reset_atomic_typecast_h2l();
1503       } else {
1504         /* Is there anything to do with FREEIR */
1505       }
1506     } else {
1507       error(S_0155_OP1_OP2, ERR_Severe, gbl.lineno, "Invalid atomic expression", CNULL);
1508     }
1509     return true;
1510   }
1511   if (is_in_atomic_read) {
1512     int atomic_opcode;
1513     atomic_opcode = get_atomic_read_opcode(store);
1514     if (atomic_opcode != 0) {
1515       if (get_atomic_store_created()) {
1516         error(S_0155_OP1_OP2, ERR_Severe, gbl.lineno, "Invalid atomic read expression", CNULL);
1517       } else if (atomic_opcode != IL_FREEIR) {
1518         int atomic_seq;
1519         atomic_seq = create_atomic_read_seq(store);
1520         chk_block(atomic_seq);
1521         ILM_RESULT(curilm) = atomic_seq;
1522         ILM_BLOCK(curilm) = expb.curbih;
1523         set_atomic_store_created(1);
1524       } else {
1525         error(S_0155_OP1_OP2, ERR_Severe, gbl.lineno, "Invalid atomic read expression", CNULL);
1526       }
1527     } else {
1528       error(S_0155_OP1_OP2, ERR_Severe, gbl.lineno, "Invalid atomic read expression", CNULL);
1529     }
1530     return true;
1531   }
1532   if (is_in_atomic_write) {
1533     int atomic_opcode;
1534     atomic_opcode = get_atomic_write_opcode(store);
1535     if (atomic_opcode != 0) {
1536       if (get_atomic_store_created()) {
1537         error(S_0155_OP1_OP2, ERR_Severe, gbl.lineno, "Invalid atomic write expression", CNULL);
1538       } else if (atomic_opcode != IL_FREEIR) {
1539         int atomic_seq;
1540         atomic_seq = create_atomic_write_seq(store);
1541         chk_block(atomic_seq);
1542         ILM_RESULT(curilm) = atomic_seq;
1543         ILM_BLOCK(curilm) = expb.curbih;
1544         set_atomic_store_created(1);
1545       } else {
1546         error(S_0155_OP1_OP2, ERR_Severe, gbl.lineno, "Invalid atomic write expression", CNULL);
1547       }
1548     } else {
1549       error(S_0155_OP1_OP2, ERR_Severe, gbl.lineno, "Invalid atomic write expression", CNULL);
1550     }
1551     return true;
1552   }
1553   if (is_in_atomic_capture) {
1554     int atomic_opcode;
1555     atomic_opcode = get_atomic_read_opcode(store);
1556     if (atomic_opcode != 0 && atomic_opcode != IL_FREEIR) {
1557       if (capture_read_ili != 0) {
1558         error(S_0155_OP1_OP2, ERR_Severe, gbl.lineno,
1559               "Invalid atomic capture block, multiple reads.", CNULL);
1560       } else {
1561         capture_read_ili = store;
1562         if (capture_update_ili != 0) {
1563           int atomic_seq;
1564           /* We have both parts of the capture, capture (write) is
1565            * not first. */
1566           atomic_seq = create_atomic_capture_seq(capture_update_ili,
1567                                                  capture_read_ili, 0);
1568           chk_block(atomic_seq);
1569           ILM_RESULT(curilm) = atomic_seq;
1570           ILM_BLOCK(curilm) = expb.curbih;
1571           set_atomic_capture_created(1);
1572         }
1573       }
1574     }
1575 
1576     atomic_opcode = get_atomic_update_opcode(store);
1577     if (atomic_opcode != 0) {
1578       if (capture_update_ili != 0 && atomic_opcode != IL_FREEIR) {
1579         error(S_0155_OP1_OP2, ERR_Severe, gbl.lineno,
1580               "Invalid atomic capture block, multiple updates.", CNULL);
1581       } else if (atomic_opcode != IL_FREEIR) {
1582         capture_update_ili = store;
1583         if (capture_read_ili != 0) {
1584           /* We have both parts of the capture, capture (write) is
1585            * first. */
1586           int atomic_seq;
1587           atomic_seq = create_atomic_capture_seq(capture_update_ili,
1588                                                  capture_read_ili, 1);
1589           chk_block(atomic_seq);
1590           ILM_RESULT(curilm) = atomic_seq;
1591           ILM_BLOCK(curilm) = expb.curbih;
1592           set_atomic_capture_created(1);
1593         }
1594       } else {
1595         /* Set the result of the FREEIR ILM to the regular
1596          * storage element (basically, a fallthrough).
1597          */
1598         chk_block(store);
1599         ILM_RESULT(curilm) = store;
1600         ILM_BLOCK(curilm) = expb.curbih;
1601       }
1602     }
1603     return true;
1604   }
1605   return false;
1606 }
1607 
1608 
1609 /* Set a TARGET_*_ATOMICS macro that specifies the intrinics/run-time library to
1610  * target. */
1611 #if defined(TARGET_OSX)
1612 #define TARGET_LLVM_ATOMICS 1
1613 #else
1614 #define TARGET_GNU_ATOMICS 1
1615 #endif
1616 
1617 /** Categorization of atomic intrinsics that abstracts out details.
1618     Each class corresponds to a general code-generation schema. */
1619 typedef enum ATOMIC_OP_CATEGORY {
1620   AOC_LOAD,
1621   AOC_STORE,
1622   AOC_EXCHANGE,
1623   AOC_COMPARE_EXCHANGE,
1624   AOC_FETCH_OP,
1625 #if TARGET_GNU_ATOMICS
1626   AOC_OP_FETCH,
1627   AOC_TEST_AND_SET,
1628   AOC_CLEAR,
1629 #endif
1630   AOC_FENCE
1631 } ATOMIC_OP_CATEGORY;
1632 
1633 /* Macro for generating case labels for C++11 read-modify-write intrinsics. */
1634 /* clang-format off */
1635 #define EACH_SUBOP(s,t) \
1636        s##_add_##t: \
1637   case s##_sub_##t: \
1638   case s##_and_##t: \
1639   case s##_or_##t: \
1640   case s##_xor_##t
1641 /* clang-format on */
1642 
1643 /* Use PD_IS_ATOMIC to detect presence of atomic intrinsics */
1644 #ifdef PD_IS_ATOMIC
1645 /** Return true if pd is an atomic intrinsic with a size operand. */
1646 static bool
atomic_pd_has_size_operand(PD_KIND pd)1647 atomic_pd_has_size_operand(PD_KIND pd)
1648 {
1649   switch (pd) {
1650 #if TARGET_LLVM_ATOMICS
1651   case PD_atomic_load:
1652   case PD_atomic_store:
1653   case PD_atomic_exchange:
1654   case PD_atomic_compare_exchange:
1655     return true;
1656 #endif
1657   default:
1658     return false;
1659   }
1660 }
1661 
1662 /** Return MSZ for location atomically operated on by an atomic intrinsic.
1663     Return MSZ_UNDEF if intrinsic does not operate on a location. */
1664 static MSZ
msz_from_atomic_pd(PD_KIND pd)1665 msz_from_atomic_pd(PD_KIND pd)
1666 {
1667   switch (pd) {
1668   default:
1669     assert(0, "msz_from_atomic_pd: pd not atomic or not implemented", pd, ERR_Fatal);
1670 
1671 #if TARGET_GNU_ATOMICS
1672   case PD_atomic_load_1:
1673   case PD_atomic_store_1:
1674   case PD_atomic_exchange_1:
1675   case PD_atomic_compare_exchange_1:
1676   case EACH_SUBOP(PD_atomic, fetch_1):
1677   case PD_atomic_test_and_set:
1678   case PD_atomic_clear:
1679 #endif
1680   case EACH_SUBOP(PD_atomic_fetch, 1):
1681     return MSZ_SBYTE;
1682 
1683 #if TARGET_GNU_ATOMICS
1684   case PD_atomic_load_2:
1685   case PD_atomic_store_2:
1686   case PD_atomic_exchange_2:
1687   case PD_atomic_compare_exchange_2:
1688   case EACH_SUBOP(PD_atomic, fetch_2):
1689 #endif
1690   case EACH_SUBOP(PD_atomic_fetch, 2):
1691     return MSZ_SHWORD;
1692 
1693 #if TARGET_GNU_ATOMICS
1694   case PD_atomic_load_4:
1695   case PD_atomic_store_4:
1696   case PD_atomic_exchange_4:
1697   case PD_atomic_compare_exchange_4:
1698   case EACH_SUBOP(PD_atomic, fetch_4):
1699 #endif
1700   case EACH_SUBOP(PD_atomic_fetch, 4):
1701     return MSZ_SWORD;
1702 
1703 #if TARGET_GNU_ATOMICS
1704   case PD_atomic_load_8:
1705   case PD_atomic_store_8:
1706   case PD_atomic_exchange_8:
1707   case PD_atomic_compare_exchange_8:
1708   case EACH_SUBOP(PD_atomic, fetch_8):
1709 #endif
1710   case EACH_SUBOP(PD_atomic_fetch, 8):
1711     return MSZ_SLWORD;
1712 
1713   case PD_atomic_thread_fence:
1714   case PD_atomic_signal_fence:
1715     return MSZ_UNDEF;
1716   }
1717 }
1718 
1719 #endif
1720 
1721 static MSZ
msz_from_atomic_dtype(DTYPE dtype)1722 msz_from_atomic_dtype(DTYPE dtype)
1723 {
1724   switch(zsize_of(dtype)) {
1725   case 1:
1726     return MSZ_BYTE;
1727   case 2:
1728     return MSZ_SHWORD;
1729   case 4:
1730     return MSZ_SWORD;
1731   case 8:
1732     return MSZ_I8;
1733   default:
1734     return MSZ_UNDEF;
1735   }
1736 }
1737 
1738 /** ILI operations of a given "link" kind. */
1739 typedef struct OPCODES {
1740   ILI_OP ld, st, atomicld, atomicst, atomicrmw, cmpxchg, cmpxchg_old;
1741 } OPCODES;
1742 
1743 /** Get operations suitable for a given MSZ.
1744     The MSZ must correspond to an integer type. */
1745 static const OPCODES *
get_ops(MSZ msz,int is_openmp)1746 get_ops(MSZ msz, int is_openmp)
1747 {
1748   static const OPCODES ir_ops = {IL_LD,          IL_ST,         IL_ATOMICLDI,
1749                                  IL_ATOMICSTI,   IL_ATOMICRMWI, IL_CMPXCHGI,
1750                                  IL_CMPXCHG_OLDI};
1751   static const OPCODES kr_ops = {IL_LDKR,         IL_STKR,        IL_ATOMICLDKR,
1752                                  IL_ATOMICSTKR,   IL_ATOMICRMWKR, IL_CMPXCHGKR,
1753                                  IL_CMPXCHG_OLDKR};
1754   /* read-modify-write does not allow float/dble types */
1755   static const OPCODES sp_ops = {IL_LD,          IL_ST,         IL_ATOMICLDSP,
1756                                  IL_ATOMICSTSP,   IL_ATOMICRMWI, IL_CMPXCHGI,
1757                                  IL_CMPXCHG_OLDI};
1758   static const OPCODES dp_ops = {IL_LDKR,        IL_STKR,        IL_ATOMICLDDP,
1759                                  IL_ATOMICSTDP,   IL_ATOMICRMWKR, IL_CMPXCHGKR,
1760                                  IL_CMPXCHG_OLDKR};
1761   static const OPCODES ar_ops = {IL_LDKR,         IL_STKR,       IL_ATOMICLDA,
1762                                  IL_ATOMICSTA,   IL_ATOMICRMWKR, IL_CMPXCHGKR,
1763                                  IL_CMPXCHG_OLDKR};
1764   switch (msz) {
1765   case MSZ_F8:
1766     if (is_openmp) {
1767       return &dp_ops;
1768     }
1769   case MSZ_PTR:
1770     if (is_openmp) {
1771       return &ar_ops;
1772     }
1773   case MSZ_SLWORD:
1774   case MSZ_ULWORD:
1775   case MSZ_I8:
1776     return &kr_ops;
1777   case MSZ_F4:
1778     if (is_openmp)
1779       return &sp_ops;
1780   default:
1781     return &ir_ops;
1782   }
1783 }
1784 
1785 #if TARGET_LLVM_ATOMICS
1786 /** Given a size operand, return corresponding MSZ if operand is a constant.
1787     Otherwise return MSZ_UNDEF. */
1788 static MSZ
msz_from_size_argument(int ilix)1789 msz_from_size_argument(int ilix)
1790 {
1791   INT value;
1792 
1793   /* See if ilix represents a small constant. */
1794   switch (ILI_OPC(ilix)) {
1795   case IL_KCON:
1796     /* Punt if any high-order bits are set. */
1797     value = CONVAL1G(ILI_OPND(ilix, 1));
1798     if (value != 0)
1799       return MSZ_UNDEF;
1800   /* drop through to read of low-order bits. */
1801   case IL_ICON:
1802     value = CONVAL2G(ILI_OPND(ilix, 1));
1803     break;
1804   default:;
1805     return MSZ_UNDEF;
1806   }
1807 
1808   /* Return MSZ corresponding to the constant. */
1809   switch (value) {
1810   case 1:
1811     return MSZ_UBYTE;
1812   case 2:
1813     return MSZ_UHWORD;
1814   case 4:
1815     return MSZ_UWORD;
1816   case 8:
1817     return MSZ_ULWORD;
1818   default:;
1819     return MSZ_UNDEF;
1820   }
1821 }
1822 
1823 /** \brief Remove "weak" parameter from atomic_compare_exchange.
1824 
1825     Called when we cannot map the atomic_compare_exchange onto an ILI operation.
1826     Though present in the atomic_compare_exchange used in the OSX <atomic>
1827     header, the parameter is not present in LLVM's run-time library. */
1828 static void
remove_weak_parameter(ILM * ilmp)1829 remove_weak_parameter(ILM *ilmp)
1830 {
1831   DEBUG_ASSERT(ILM_OPC(ilmp) == IM_FAPPLY, "FAPPLY expected");
1832   /* Number of parmeters changes from 7 to 6. */
1833   DEBUG_ASSERT(ILM_OPND(ilmp, 1) == 7, "wrong number of parameters?");
1834   ILM_OPND(ilmp, 1) = 6;
1835   int callee_index = 3;
1836   /* Remove 5th parameter. */
1837   ILM_OPND(ilmp, callee_index + 5) = ILM_OPND(ilmp, callee_index + 6);
1838   ILM_OPND(ilmp, callee_index + 6) = ILM_OPND(ilmp, callee_index + 7);
1839   ILM_OPND(ilmp, callee_index + 7) = IM_NOP;
1840 }
1841 #endif
1842 
1843 #ifdef PD_IS_ATOMIC
1844 /** Given a PD_KIND, get its category. */
1845 static ATOMIC_OP_CATEGORY
atomic_op_category_from_pd(PD_KIND pd)1846 atomic_op_category_from_pd(PD_KIND pd)
1847 {
1848   switch (pd) {
1849   default:
1850     interr("atomic_op_category_from_pd: pd not atomic or not implemented", pd,
1851            4);
1852 
1853   /* load */
1854 #if TARGET_GNU_ATOMICS
1855   case PD_atomic_load_1:
1856   case PD_atomic_load_2:
1857   case PD_atomic_load_4:
1858   case PD_atomic_load_8:
1859 #endif
1860 #if TARGET_LLVM_ATOMICS
1861   case PD_atomic_load:
1862 #endif
1863     return AOC_LOAD;
1864 
1865   /* store */
1866 #if TARGET_GNU_ATOMICS
1867   case PD_atomic_store_1:
1868   case PD_atomic_store_2:
1869   case PD_atomic_store_4:
1870   case PD_atomic_store_8:
1871 #endif
1872 #if TARGET_LLVM_ATOMICS
1873   case PD_atomic_store:
1874 #endif
1875     return AOC_STORE;
1876 
1877   /* exchange */
1878 #if TARGET_GNU_ATOMICS
1879   case PD_atomic_exchange_1:
1880   case PD_atomic_exchange_2:
1881   case PD_atomic_exchange_4:
1882   case PD_atomic_exchange_8:
1883 #endif
1884 #if TARGET_LLVM_ATOMICS
1885   case PD_atomic_exchange:
1886 #endif
1887     return AOC_EXCHANGE;
1888 
1889   /* compare_exchange */
1890 #if TARGET_GNU_ATOMICS
1891   case PD_atomic_compare_exchange_1:
1892   case PD_atomic_compare_exchange_2:
1893   case PD_atomic_compare_exchange_4:
1894   case PD_atomic_compare_exchange_8:
1895 #endif
1896 #if TARGET_LLVM_ATOMICS
1897   case PD_atomic_compare_exchange:
1898 #endif
1899     return AOC_COMPARE_EXCHANGE;
1900 
1901   /* fetch_op */
1902   case EACH_SUBOP(PD_atomic_fetch, 1):
1903   case EACH_SUBOP(PD_atomic_fetch, 2):
1904   case EACH_SUBOP(PD_atomic_fetch, 4):
1905   case EACH_SUBOP(PD_atomic_fetch, 8):
1906     return AOC_FETCH_OP;
1907 
1908 #if TARGET_GNU_ATOMICS
1909   /* op_fetch */
1910   case EACH_SUBOP(PD_atomic, fetch_1):
1911   case EACH_SUBOP(PD_atomic, fetch_2):
1912   case EACH_SUBOP(PD_atomic, fetch_4):
1913   case EACH_SUBOP(PD_atomic, fetch_8):
1914     return AOC_OP_FETCH;
1915 
1916   /* test and set */
1917   case PD_atomic_test_and_set:
1918     return AOC_TEST_AND_SET;
1919 
1920   /* clear */
1921   case PD_atomic_clear:
1922     return AOC_CLEAR;
1923 #endif
1924 
1925   /* fence */
1926   case PD_atomic_thread_fence:
1927   case PD_atomic_signal_fence:
1928     return AOC_FENCE;
1929   }
1930 }
1931 
1932 /** Return ATOMIC_RMW_OP for given predefined op that is either an atomic
1933     "op_fetch" or "fetch_op".  Set *replay to the operation required to "replay"
1934     the operation. */
1935 static ATOMIC_RMW_OP
atomic_rmw_op_from_pd(PD_KIND pd,ILI_OP * replay)1936 atomic_rmw_op_from_pd(PD_KIND pd, ILI_OP *replay)
1937 {
1938   switch (pd) {
1939   default:
1940     assert(0, "op_for_replay: pd not an atomic_op_fetch or not implemented", pd,
1941            4);
1942 
1943   case PD_atomic_fetch_add_1:
1944   case PD_atomic_fetch_add_2:
1945   case PD_atomic_fetch_add_4:
1946 #if TARGET_GNU_ATOMICS
1947   case PD_atomic_add_fetch_1:
1948   case PD_atomic_add_fetch_2:
1949   case PD_atomic_add_fetch_4:
1950 #endif
1951     *replay = IL_IADD;
1952     return AOP_ADD;
1953   case PD_atomic_fetch_add_8:
1954 #if TARGET_GNU_ATOMICS
1955   case PD_atomic_add_fetch_8:
1956 #endif
1957     *replay = IL_KADD;
1958     return AOP_ADD;
1959 
1960   case PD_atomic_fetch_sub_1:
1961   case PD_atomic_fetch_sub_2:
1962   case PD_atomic_fetch_sub_4:
1963 #if TARGET_GNU_ATOMICS
1964   case PD_atomic_sub_fetch_1:
1965   case PD_atomic_sub_fetch_2:
1966   case PD_atomic_sub_fetch_4:
1967 #endif
1968     *replay = IL_ISUB;
1969     return AOP_SUB;
1970   case PD_atomic_fetch_sub_8:
1971 #if TARGET_GNU_ATOMICS
1972   case PD_atomic_sub_fetch_8:
1973 #endif
1974     *replay = IL_KSUB;
1975     return AOP_SUB;
1976 
1977   case PD_atomic_fetch_and_1:
1978   case PD_atomic_fetch_and_2:
1979   case PD_atomic_fetch_and_4:
1980 #if TARGET_GNU_ATOMICS
1981   case PD_atomic_and_fetch_1:
1982   case PD_atomic_and_fetch_2:
1983   case PD_atomic_and_fetch_4:
1984 #endif
1985     *replay = IL_AND;
1986     return AOP_AND;
1987 
1988   case PD_atomic_fetch_and_8:
1989 #if TARGET_GNU_ATOMICS
1990   case PD_atomic_and_fetch_8:
1991 #endif
1992     *replay = IL_KAND;
1993     return AOP_AND;
1994 
1995   case PD_atomic_fetch_or_1:
1996   case PD_atomic_fetch_or_2:
1997   case PD_atomic_fetch_or_4:
1998 #if TARGET_GNU_ATOMICS
1999   case PD_atomic_or_fetch_1:
2000   case PD_atomic_or_fetch_2:
2001   case PD_atomic_or_fetch_4:
2002 #endif
2003     *replay = IL_OR;
2004     return AOP_OR;
2005   case PD_atomic_fetch_or_8:
2006 #if TARGET_GNU_ATOMICS
2007   case PD_atomic_or_fetch_8:
2008 #endif
2009     *replay = IL_KOR;
2010     return AOP_OR;
2011 
2012   case PD_atomic_fetch_xor_1:
2013   case PD_atomic_fetch_xor_2:
2014   case PD_atomic_fetch_xor_4:
2015 #if TARGET_GNU_ATOMICS
2016   case PD_atomic_xor_fetch_1:
2017   case PD_atomic_xor_fetch_2:
2018   case PD_atomic_xor_fetch_4:
2019 #endif
2020     *replay = IL_XOR;
2021     return AOP_XOR;
2022   case PD_atomic_fetch_xor_8:
2023 #if TARGET_GNU_ATOMICS
2024   case PD_atomic_xor_fetch_8:
2025 #endif
2026     *replay = IL_KXOR;
2027     return AOP_XOR;
2028   }
2029 }
2030 
2031 #endif
2032 
2033 /** Object that assists generation of temporaries.
2034     See functions auto_stash and auto_retrieve for how it is used. */
2035 typedef struct auto_temp {
2036   int expr; /**< An ilix for a store into a temporary, or ilix of a constant. */
2037 } auto_temp;
2038 
2039 /** \brief Generate ILI so that value of an ILI expression can be retrieved
2040    later.
2041 
2042     \param temp pointer to object that remembers how to recover the value
2043     \param ilix ILI expression to be stashed/retrieved
2044     \param st_op IL_STx operation to be used to store value if necessary
2045     \param msz machine size of value to be stored.
2046   */
2047 static void
auto_stash(auto_temp * temp,int ilix,ILI_OP st_op,MSZ msz)2048 auto_stash(auto_temp *temp, int ilix, ILI_OP st_op, MSZ msz)
2049 {
2050   int nme, acon, store;
2051   SPTR sym;
2052   switch (ILI_OPC(ilix)) {
2053   case IL_ACON:
2054   case IL_ICON:
2055     /* Do not need a temporary */
2056     temp->expr = ilix;
2057     return;
2058   default:
2059     break;
2060   }
2061   sym = mkrtemp(ilix);
2062   acon = ad_acon(sym, (INT)0);
2063   nme = addnme(NT_VAR, sym, 0, (INT)0);
2064   store = ad4ili(st_op, ilix, acon, nme, msz);
2065   chk_block(store);
2066   temp->expr = store;
2067 }
2068 
2069 /** \brief Generate ILI to retrieve previously stashed value.
2070 
2071     \param temp pointer to object set by routine auto_stash
2072   */
2073 static int
auto_retrieve(auto_temp * temp)2074 auto_retrieve(auto_temp *temp)
2075 {
2076   switch (IL_TYPE(ILI_OPC(temp->expr))) {
2077   default:
2078     interr("auto_retrieve: unexpected IL_TYPE", IL_TYPE(temp->expr), ERR_Fatal);
2079   case ILTY_STORE:
2080   case ILTY_PSTORE:
2081     return ad_load(temp->expr);
2082   case ILTY_CONS:
2083     return temp->expr;
2084   }
2085 }
2086 
2087 #if TARGET_GNU_ATOMICS
2088 #define MAX_ATOMIC_ARGS 6
2089 #define COMPARAND_INDEX 1
2090 #elif TARGET_LLVM_ATOMICS
2091 #define MAX_ATOMIC_ARGS 7
2092 #define COMPARAND_INDEX 2
2093 #else
2094 #error "expected TARGET_GNU_ATOMICS or TARGET_LLVM_ATOMICS"
2095 #endif
2096 
2097 #ifdef PD_IS_ATOMIC
2098 /* \brief Expand a GNU or LLVM atomic intrinsic.
2099     Return true if intrinsic is expanded, false if intrinsic should be rendered
2100     as plain call. In the latter case, the ILM call may have had its parameters
2101     changed slightly.
2102 
2103    \param pd - a PD_... value from pd.h for which PD_IS_ATOMIC is true.
2104    \param ilmp - pointer to call site for an atomic intrinsic */
2105 bool
exp_atomic_intrinsic(PD_KIND pd,ILM * ilmp,int curilm)2106 exp_atomic_intrinsic(PD_KIND pd, ILM *ilmp, int curilm)
2107 {
2108   int i, n;
2109   int opnd[MAX_ATOMIC_ARGS]; /* ILI "ptrs". */
2110   int nme[MAX_ATOMIC_ARGS];
2111   int callee_index, stc, result;
2112   const OPCODES *o;
2113   ILI_OP ili_op_for_replay;
2114   MSZ msz;
2115   ATOMIC_OP_CATEGORY aoc;
2116   DEBUG_ASSERT(ilmp->opc == IM_FAPPLY || ilmp->opc == IM_VAPPLY,
2117                "atomic ops cannot throw");
2118 
2119   /* Get # of operands. */
2120   n = ILM_OPND(ilmp, 1);
2121   /* FIXME - do we need to check argument count and issue error message to
2122      user if there are the wrong number of arguments, or did the front-end
2123      already deal with that? */
2124   DEBUG_ASSERT(0 <= n && n <= MAX_ATOMIC_ARGS, "exp_atomic_intrinsic: bad ILM");
2125   callee_index = ilm_callee_index(ilmp->opc);
2126   for (i = 0; i < n; ++i) {
2127     int ilmx = ILM_OPND(ilmp, callee_index + 1 + i); /* locates ARG ilm */
2128     ILM *ilmpx = (ILM *)(ilmb.ilm_base + ilmx);
2129     ilmx = ILM_OPND(ilmpx, 2);
2130     nme[i] = NME_OF(ilmx);
2131     opnd[i] = ILI_OF(ilmx);
2132   }
2133 
2134   /* Determine size of location operated on by the atomic op. */
2135 #if TARGET_LLVM_ATOMICS
2136   if (atomic_pd_has_size_operand(pd)) {
2137     msz = msz_from_size_argument(opnd[0]);
2138     if (msz == MSZ_UNDEF) {
2139       if (pd == PD_atomic_compare_exchange) {
2140         remove_weak_parameter(ilmp);
2141       }
2142       return false;
2143     }
2144   } else
2145 #endif
2146   {
2147     msz = msz_from_atomic_pd(pd);
2148   }
2149 
2150   /* Get operations suitable for this msz. */
2151   o = msz != MSZ_UNDEF ? get_ops(msz, 0) : NULL;
2152 
2153   aoc = atomic_op_category_from_pd(pd);
2154   switch (aoc) {
2155   default:
2156     assert(false, "exp_atomic_intrinsic: unimplemented op class", aoc, ERR_Fatal);
2157 
2158   case AOC_LOAD:
2159     stc = atomic_encode(msz, SS_PROCESS, AORG_CPLUS);
2160 #if TARGET_GNU_ATOMICS
2161     result = ad4ili(o->atomicld, opnd[0], nme[0], stc, opnd[1]);
2162 #elif TARGET_LLVM_ATOMICS
2163     result = ad4ili(o->atomicld, opnd[1], nme[1], stc, opnd[3]);
2164     result = ad4ili(o->st, result, opnd[2], nme[2], msz);
2165     chk_block(result);
2166 #else
2167 #error "expected TARGET_GNU_ATOMICS or TARGET_LLVM_ATOMICS"
2168 #endif
2169     break;
2170 
2171   case AOC_STORE:
2172     stc = atomic_encode(msz, SS_PROCESS, AORG_CPLUS);
2173 #if TARGET_GNU_ATOMICS
2174     result = ad5ili(o->atomicst, opnd[1], opnd[0], nme[0], stc, opnd[2]);
2175     chk_block(result);
2176 #elif TARGET_LLVM_ATOMICS
2177     result = ad3ili(o->ld, opnd[2], nme[2], msz);
2178     result = ad5ili(o->atomicst, result, opnd[1], nme[1], stc, opnd[3]);
2179     chk_block(result);
2180 #else
2181 #error "expected TARGET_GNU_ATOMICS or TARGET_LLVM_ATOMICS"
2182 #endif
2183     break;
2184 
2185   case AOC_EXCHANGE:
2186     stc = atomic_encode_rmw(msz, SS_PROCESS, AORG_CPLUS, AOP_XCHG);
2187 #if TARGET_GNU_ATOMICS
2188     result = ad5ili(o->atomicrmw, opnd[1], opnd[0], nme[0], stc, opnd[2]);
2189 #elif TARGET_LLVM_ATOMICS
2190     result = ad3ili(o->ld, opnd[2], nme[2], msz);
2191     result = ad5ili(o->atomicrmw, result, opnd[1], nme[1], stc, opnd[4]);
2192     result = ad4ili(o->st, result, opnd[3], nme[3], msz);
2193     chk_block(result);
2194 #else
2195 #error "expected TARGET_GNU_ATOMICS or TARGET_LLVM_ATOMICS"
2196 #endif
2197     break;
2198 
2199   case AOC_COMPARE_EXCHANGE: {
2200     int expected_ptr, comparand, cmpxchg, succ, oldval;
2201     int comparand_nme, label;
2202     int desired;
2203     auto_temp expected_ptr_save, oldval_save, succ_save;
2204     stc = atomic_encode(msz, SS_PROCESS, AORG_CPLUS);
2205 
2206     /* Get the comparand ("expected") */
2207     comparand_nme = addnme(NT_IND, 0, nme[COMPARAND_INDEX], (INT)0);
2208     comparand = ad3ili(o->ld, opnd[COMPARAND_INDEX], comparand_nme, msz);
2209 
2210     /* Save the expected_ptr */
2211     expected_ptr = ad_cse(opnd[COMPARAND_INDEX]);
2212     auto_stash(&expected_ptr_save, expected_ptr, IL_STA, MSZ_PTR);
2213 
2214     /* Do the compare-exchange */
2215 #if TARGET_GNU_ATOMICS
2216     desired = opnd[2];
2217     cmpxchg = ad_cmpxchg(o->cmpxchg, desired, opnd[0], nme[0], stc, comparand,
2218                          opnd[3], opnd[4], opnd[5]);
2219 #elif TARGET_LLVM_ATOMICS
2220     desired = ad3ili(o->ld, opnd[3], nme[3], msz);
2221     cmpxchg = ad_cmpxchg(o->cmpxchg, desired, opnd[1], nme[1], stc, comparand,
2222                          opnd[4], opnd[5], opnd[6]);
2223 #else
2224 #error "expected TARGET_GNU_ATOMICS or TARGET_LLVM_ATOMICS"
2225 #endif
2226 
2227     cmpxchg = ad_cse(cmpxchg);
2228 
2229     /* Stash old value returned by cmpxchg */
2230     oldval = ad1ili(o->cmpxchg_old, cmpxchg);
2231     auto_stash(&oldval_save, oldval, o->st, msz);
2232 
2233     /* Stash success flag returned by cmpxchg */
2234     succ = ad1ili(IL_CMPXCHG_SUCCESS, cmpxchg);
2235     succ = ad_cse(succ);
2236     auto_stash(&succ_save, succ, IL_ST, MSZ_SBYTE);
2237 
2238     /* Branch on success. */
2239     label = getlab();
2240     chk_block(ad3ili(IL_ICJMPZ, succ, CC_NE, label));
2241 
2242     /* Store old value into *expected_ptr. */
2243     expected_ptr = auto_retrieve(&expected_ptr_save);
2244     oldval = auto_retrieve(&oldval_save);
2245     chk_block(ad4ili(o->st, oldval, expected_ptr, nme[1], msz));
2246 
2247     /* Emit label */
2248     wr_block();
2249     cr_block();
2250     BIH_LABEL(expb.curbih) = label;
2251     ILIBLKP(label, expb.curbih);
2252     RFCNTP(label, 1);
2253 
2254     /* Result is value of flag. */
2255     result = auto_retrieve(&succ_save);
2256   } break;
2257 
2258   case AOC_FETCH_OP:
2259     /* FIXME - should use IL_ATOMICRMWA if first operand is pointer to
2260        pointer.  Use DTYPE to tell?  Or is IL_ATOMICRMWA fundmentally a bad
2261        idea? */
2262     stc = atomic_encode_rmw(msz, SS_PROCESS, AORG_CPLUS,
2263                             atomic_rmw_op_from_pd(pd, &ili_op_for_replay));
2264     result = ad5ili(o->atomicrmw, opnd[1], opnd[0], nme[0], stc, opnd[2]);
2265     break;
2266 
2267 #if TARGET_GNU_ATOMICS
2268   case AOC_OP_FETCH:
2269     stc = atomic_encode_rmw(msz, SS_PROCESS, AORG_CPLUS,
2270                             atomic_rmw_op_from_pd(pd, &ili_op_for_replay));
2271     /* Need to "replay" operation to get final result, so we need to use opnd[1]
2272      * twice. */
2273     opnd[1] = ad_cse(opnd[1]);
2274     result = ad5ili(o->atomicrmw, opnd[1], opnd[0], nme[0], stc, opnd[2]);
2275     result = ad2ili(ili_op_for_replay, result, opnd[1]);
2276     break;
2277 
2278   case AOC_TEST_AND_SET:
2279     /* Treat as atomic exchange on a byte. */
2280     stc = atomic_encode_rmw(msz, SS_PROCESS, AORG_CPLUS, AOP_XCHG);
2281     result = ad5ili(IL_ATOMICRMWI, ad_icon(1), opnd[0], nme[0], stc, opnd[1]);
2282     break;
2283 
2284   case AOC_CLEAR:
2285     /* Treat as atomic store of a zero byte. */
2286     stc = atomic_encode(msz, SS_PROCESS, AORG_CPLUS);
2287     result = ad5ili(IL_ATOMICSTI, ad_icon(0), opnd[0], nme[0], stc, opnd[1]);
2288     chk_block(result);
2289     break;
2290 #endif /* TARGET_GNU_ATOMICS */
2291 
2292   case AOC_FENCE: {
2293     SYNC_SCOPE ss = pd == PD_atomic_signal_fence ? SS_SINGLETHREAD : SS_PROCESS;
2294     stc = atomic_encode(MSZ_UNDEF, ss, AORG_CPLUS);
2295     result = ad2ili(IL_FENCE, stc, opnd[0]);
2296   } break;
2297   }
2298   if (ilmp->opc == IM_VAPPLY) {
2299     /* result not used */
2300     switch (aoc) {
2301     case AOC_FENCE:
2302     case AOC_STORE:
2303 #if TARGET_GNU_ATOMICS
2304     case AOC_CLEAR:
2305 #endif
2306 #if TARGET_LLVM_ATOMICS
2307     case AOC_LOAD:
2308     case AOC_EXCHANGE:
2309     case AOC_COMPARE_EXCHANGE:
2310 #endif
2311       break;
2312     default:
2313       /* result was produced, but not used. */
2314       result = ad_free(result);
2315       chk_block(result);
2316       break;
2317     }
2318   } else {
2319     DEBUG_ASSERT(aoc != AOC_FENCE, "IM_VAPPLY expected for fence intrinsics");
2320     ILM_RESULT(curilm) = result;
2321   }
2322   return true;
2323 }
2324 #endif
2325 
2326 static SPTR
mkatomictemp(DTYPE dtype)2327 mkatomictemp(DTYPE dtype)
2328 {
2329   static int cnt;
2330   SPTR tmp_sptr = getnewccsym('a', cnt++, ST_VAR);
2331   SCP(tmp_sptr, (gbl.outlined? SC_PRIVATE:SC_AUTO));
2332   ENCLFUNCP(tmp_sptr, GBL_CURRFUNC);
2333   DTYPEP(tmp_sptr, dtype);
2334   return tmp_sptr;
2335 }
2336 
2337 static int
ll_make_atomic_load(int size_ili,int lhs,int rhs,int mem_order)2338 ll_make_atomic_load(int size_ili, int lhs, int rhs, int mem_order)
2339 {
2340   int result, altili;
2341   int func, arg;
2342   int garg[4];
2343   int args[4], arg_types[4] = {DT_UINT8, DT_CPTR, DT_CPTR, DT_INT};
2344 
2345   func = mkfunc("__atomic_load");
2346   SCP(func, SC_EXTERN);
2347 
2348   arg = ad1ili(IL_NULL, 0);
2349   arg = ad2ili(IL_ARGIR, mem_order, arg);
2350   arg = ad2ili(IL_ARGAR, rhs, arg);
2351   arg = ad2ili(IL_ARGAR, lhs, arg);
2352   arg = ad2ili(IL_ARGKR, ikmove(size_ili), arg);
2353   result = ad2ili(IL_JSR, func, arg);
2354 
2355   return result;
2356 }
2357 
2358 
2359 static int
ll_make_atomic_store(int size_ili,int lhs,int rhs,int mem_order)2360 ll_make_atomic_store(int size_ili, int lhs, int rhs, int mem_order)
2361 {
2362   int result, altili;
2363   int func, arg;
2364   int size, stc;
2365   int garg[4];
2366   int args[4], arg_types[4] = {DT_UINT8, DT_CPTR, DT_CPTR, DT_INT};
2367 
2368   func = mkfunc("__atomic_store");
2369   SCP(func, SC_EXTERN);
2370   arg = ad1ili(IL_NULL, 0);
2371   arg = ad2ili(IL_ARGIR, mem_order, arg);
2372   arg = ad2ili(IL_ARGAR, rhs, arg);
2373   arg = ad2ili(IL_ARGAR, lhs, arg);
2374   arg = ad2ili(IL_ARGKR, ikmove(size_ili), arg);
2375   result = ad2ili(IL_JSR, func, arg);
2376   return result;
2377 
2378   return result;
2379 }
2380 
2381 static int
ll_make_atomic_compare_xchg(int size_ili,int lhs,int expected,int desired,int success,int failure)2382 ll_make_atomic_compare_xchg(int size_ili, int lhs, int expected,
2383                             int desired, int success, int failure)
2384 {
2385   int result, altili;
2386   int func, arg;
2387   int size, stc;
2388   int garg[6];
2389   int args[6], arg_types[6] = {DT_UINT8, DT_CPTR, DT_CPTR, DT_CPTR, DT_INT, DT_INT};
2390 
2391   func = mkfunc("__atomic_compare_exchange");
2392   SCP(func, SC_EXTERN);
2393   arg = ad1ili(IL_NULL, 0);
2394   arg = ad2ili(IL_ARGIR, failure, arg);
2395   arg = ad2ili(IL_ARGIR, success, arg);
2396   arg = ad2ili(IL_ARGAR, desired, arg);
2397   arg = ad2ili(IL_ARGAR, expected, arg);
2398   arg = ad2ili(IL_ARGAR, lhs, arg);
2399   arg = ad2ili(IL_ARGKR, ikmove(size_ili), arg);
2400   result = ad2ili(IL_JSR, func, arg);
2401   result = ad2ili(IL_DFRIR, result, IR_RETVAL);
2402   return result;
2403 
2404   return result;
2405 }
2406 
2407 static int
ll_make_atomic_xchg(int lhs,int expected,int desired,int mem_order)2408 ll_make_atomic_xchg(int lhs, int expected, int desired, int mem_order)
2409 {
2410   int result, altili;
2411   int func, arg;
2412   int size, stc;
2413   int garg[4];
2414   int args[4], arg_types[4] = {DT_CPTR, DT_CPTR, DT_CPTR, DT_INT};
2415 
2416   func = mkfunc("__atomic_exchange");
2417   SCP(func, SC_EXTERN);
2418   arg = ad1ili(IL_NULL, 0);
2419   arg = ad2ili(IL_ARGIR, mem_order, arg);
2420   arg = ad2ili(IL_ARGAR, desired, arg);
2421   arg = ad2ili(IL_ARGAR, expected, arg);
2422   arg = ad2ili(IL_ARGKR, lhs, arg);
2423   result = ad2ili(IL_JSR, func, arg);
2424   return result;
2425 
2426   return result;
2427 }
2428 
2429 static int
_exp_mp_atomic_read(int stc,DTYPE dtype,int * opnd,int * nme)2430 _exp_mp_atomic_read(int stc, DTYPE dtype, int* opnd, int* nme)
2431 {
2432   int result, size_ili;
2433   SPTR tmp_sptr;
2434   ILI_OP ld, st;
2435   MSZ msz;
2436   ISZ_T size;
2437 
2438 #if use_kmpc_rte
2439   /* FIX ME */
2440   result = ll_make_kmpc_atomic_read(opnd, dtype);
2441   if (result) {
2442     ldst_msz(dtype, &ld, &st, &msz);
2443     result = ad4ili(st, result, opnd[LHS_IDX], nme[LHS_IDX], msz);
2444     iltb.callfg = 1;
2445     chk_block(result);
2446   }
2447   return result;
2448 #endif
2449 
2450   size = zsize_of(dtype);
2451   if (dtype == DT_CMPLX ||
2452       dtype == DT_DCMPLX
2453       || (size !=1 && size != 2 && size != 4 && size != 8))
2454   {
2455     tmp_sptr = GetSPTRVal(opnd);
2456     if (tmp_sptr <= NOSYM)  /* atomic capture may have set this already */
2457       tmp_sptr = mkatomictemp(dtype);
2458     size_ili = ad_icon(size);
2459     ADDRTKNP(tmp_sptr, 1);
2460     loc_of(nme[LHS_IDX]);
2461     result = ll_make_atomic_load(size_ili, opnd[LHS_IDX],
2462                                  mk_address(tmp_sptr), opnd[MO_IDX]);
2463     iltb.callfg = 1;
2464     chk_block(result);
2465     opnd[TMP_SPTR_IDX] = tmp_sptr;
2466     nme[TMP_SPTR_IDX] = addnme(NT_VAR, tmp_sptr, 0, (INT)0);
2467     ldst_msz(DTYPEG(tmp_sptr), &ld, &st, &msz);
2468     result = ad3ili(ld, mk_address(tmp_sptr), nme[TMP_SPTR_IDX], msz);
2469     return result;
2470   } else if (dtype != DT_NONE) {
2471     OPCODES const * ops;
2472     ldst_msz(dtype, &ld, &st, &msz);
2473     ops = get_ops(msz, 1);
2474     opnd[TMP_SPTR_IDX] = 0;
2475     result = ad4ili(ops->atomicld, opnd[LHS_IDX], nme[LHS_IDX], stc, opnd[MO_IDX]);
2476     return result;
2477   }
2478   return 0;
2479 }
2480 
2481 /* Obtain dtype from sym from ILMs */
2482 static DTYPE
get_dtype_from_ilm(ILM * ilmp)2483 get_dtype_from_ilm(ILM *ilmp)
2484 {
2485   SPTR sym;
2486   DTYPE dtype;
2487   ILM *ilmp2;
2488 
2489   ilmp2 = (ILM *)(ilmb.ilm_base+ILM_OPND(ilmp, 1));
2490   switch (ILM_OPC(ilmp2)) {
2491   case IM_BASE:
2492     sym = ILM_SymOPND(ilmp2, 1);
2493     dtype = DTYPEG(sym);
2494     break;
2495   case IM_PLD:
2496   case IM_MEMBER:
2497     sym = ILM_SymOPND(ilmp2, 2);
2498     dtype = DTYPEG(sym);
2499     break;
2500   case IM_ELEMENT:
2501   case IM_INLELEM:
2502     dtype = ILM_DTyOPND(ilmp2, 3);
2503     break;
2504   default:
2505     interr("get_dtype_from_ilm: unexpected ILM opc", ILM_OPND(ilmp2, 1), ERR_Severe);
2506   }
2507   return dtype;
2508 }
2509 
2510 int
exp_mp_atomic_read(ILM * ilmp)2511 exp_mp_atomic_read(ILM *ilmp)
2512 {
2513   int stc;
2514   ILI_OP ld,st;
2515   MSZ msz;
2516   DTYPE dtype;
2517   int opnd[MAX_ATOMIC_ARGS];
2518   int nme[MAX_ATOMIC_ARGS];
2519 
2520   nme[LHS_IDX] = NME_OF(ILM_OPND(ilmp, 1));
2521   dtype = dt_nme(nme[LHS_IDX]);
2522   if (!dtype) {
2523     dtype = get_dtype_from_ilm(ilmp);
2524   }
2525   ldst_msz(dtype, &ld, &st, &msz);
2526   stc = atomic_encode(msz, SS_PROCESS, AORG_OPENMP);
2527   opnd[LHS_IDX] = ILI_OF(ILM_OPND(ilmp, 1));
2528 
2529   if (ILM_OPND(ilmp, 2) == MO_SEQ_CST)
2530     opnd[MO_IDX] = ad_icon(5);
2531   else
2532     opnd[MO_IDX] = ad_icon(0);
2533 
2534   opnd[TMP_SPTR_IDX] = 0;
2535   return  _exp_mp_atomic_read(stc, dtype, opnd, nme);
2536 }
2537 
2538 static void
_exp_mp_atomic_write(int stc,DTYPE dtype,int * opnd,int * nme)2539 _exp_mp_atomic_write(int stc, DTYPE dtype, int* opnd, int* nme)
2540 {
2541   int rmw, result;
2542   ISZ_T size;
2543   int size_ili;
2544   OPCODES const * ops;
2545   SPTR tmp_sptr = NOSYM;
2546   ILI_OP ld, st;
2547   MSZ msz;
2548 
2549 #if use_kmpc_rte
2550    /* FIXME */
2551   result = ll_make_kmpc_atomic_write(opnd, dtype);
2552   if (result) {
2553     iltb.callfg = 1;
2554     chk_block(result);
2555   }
2556   return;
2557 #endif
2558 
2559   size = zsize_of(dtype);
2560   if (dtype == DT_CMPLX ||
2561       dtype == DT_DCMPLX
2562       || (size !=1 && size != 2 && size != 4 && size != 8))
2563   {
2564     tmp_sptr = mkatomictemp(dtype);
2565     nme[TMP_SPTR_IDX] = addnme(NT_VAR, tmp_sptr, 0, (INT)0);
2566 
2567     ldst_msz(dtype, &ld, &st, &msz);
2568     result = ad4ili(st, opnd[RHS_IDX], mk_address(tmp_sptr),
2569                    nme[TMP_SPTR_IDX], msz);
2570     chk_block(result);
2571     size_ili = ad_icon(size);
2572     ADDRTKNP(tmp_sptr, 1);
2573     loc_of(nme[LHS_IDX]);
2574     result = ll_make_atomic_store(size_ili, opnd[LHS_IDX],
2575                                   mk_address(tmp_sptr), opnd[MO_IDX]);
2576     iltb.callfg = 1;
2577     chk_block(result);
2578     return;
2579   } else if (dtype != DT_VOID_NONE) {
2580     ldst_msz(dtype, &ld, &st, &msz);
2581     ops = get_ops(msz, 1);
2582     result = ad5ili(ops->atomicst, opnd[RHS_IDX], opnd[LHS_IDX],
2583                     nme[LHS_IDX], stc, opnd[MO_IDX]);
2584     chk_block(result);
2585   }
2586   return;
2587 }
2588 
2589 void
exp_mp_atomic_write(ILM * ilmp)2590 exp_mp_atomic_write(ILM *ilmp)
2591 {
2592   int rmw, result;
2593   int size, stc;
2594   int size_ili;
2595   int opnd[MAX_ATOMIC_ARGS];
2596   int nme[MAX_ATOMIC_ARGS];
2597   SPTR tmp_sptr;
2598   DTYPE dtype;
2599 
2600   nme[LHS_IDX] = NME_OF(ILM_OPND(ilmp, 1));
2601   dtype = dt_nme(nme[LHS_IDX]);
2602   if (!dtype) {
2603     dtype = get_dtype_from_ilm(ilmp);
2604   }
2605   stc = atomic_encode(mem_size(DTY(dtype)),
2606                       SS_PROCESS, AORG_OPENMP);
2607 
2608   opnd[LHS_IDX] = ILI_OF(ILM_OPND(ilmp, 1));
2609   opnd[RHS_IDX] = ILI_OF(ILM_OPND(ilmp, 2));
2610 
2611   /* TODO: assert opnd[LHS_IDX] != opnd[RHS_IDX] */
2612   if (ILM_OPND(ilmp, 3) == MO_SEQ_CST)
2613     opnd[MO_IDX] = ad_icon(5);
2614   else
2615     opnd[MO_IDX] = ad_icon(0);
2616   _exp_mp_atomic_write(stc, dtype, opnd, nme);
2617 
2618 }
2619 
2620 static bool
can_use_rmw(DTYPE dtype,ATOMIC_RMW_OP aop)2621 can_use_rmw(DTYPE dtype, ATOMIC_RMW_OP aop)
2622 {
2623   if ((unsigned)aop > (unsigned)AOP_MAX_DEF)
2624     return false;
2625 
2626   if (zsize_of(dtype) > 8)
2627     return false;
2628 
2629   switch(dtype) {
2630   case DT_BLOG:
2631   case DT_SLOG:
2632   case DT_LOG:
2633   case DT_LOG8:
2634   case DT_BINT:
2635   case DT_SINT:
2636   case DT_INT:
2637   case DT_INT8:
2638   case DT_CPTR:
2639     return true;
2640   default:
2641     return false;
2642   }
2643 }
2644 
2645 static bool
is_cse(int ilix)2646 is_cse(int ilix)
2647 {
2648   switch(ILI_OPC(ilix)) {
2649   case IL_CSEIR:
2650   case IL_CSESP:
2651   case IL_CSEDP:
2652   case IL_CSECS:
2653   case IL_CSECD:
2654   case IL_CSEAR:
2655   case IL_CSEKR:
2656   case IL_CSE:
2657   case IL_CSETB:
2658    return true;
2659   default:
2660    return false;
2661   }
2662 }
2663 
2664 
2665 static int
get_simple_update_operand(int * opnd,ILM * ilmp)2666 get_simple_update_operand(int* opnd, ILM* ilmp)
2667 {
2668   int lhs, rhs, acon;
2669   int op1, op2, expr;
2670   ILI_OP opc;
2671 
2672   expr = 0;
2673   lhs = opnd[LHS_IDX];
2674   rhs = opnd[RHS_IDX];
2675   op1 = ILI_OPND(rhs, 1);
2676   op2 = ILI_OPND(rhs, 2);
2677   opc = ILI_OPC(rhs);
2678 
2679   if (is_cse(op1)) {
2680     op1 = ILI_OPND(op1, 1);
2681   }
2682   if (IL_TYPE(ILI_OPC(op1)) == ILTY_LOAD) {
2683     acon = ILI_OPND(op1, 1);
2684     if (acon == lhs) {
2685       /* make sure that lhs is also not present in op2 as iliutil can also
2686        * change x = 2*x to x = x + x
2687        */
2688       if (find_ili(lhs, op2))
2689         return expr;
2690       expr = op2;
2691       goto check_opc;
2692     }
2693   }
2694 
2695   if (is_cse(op2)) {
2696     op2 = ILI_OPND(op2, 1);
2697   }
2698   if (IL_TYPE(ILI_OPC(op2)) == ILTY_LOAD) {
2699     acon = ILI_OPND(op2, 1);
2700     if (acon == lhs) {
2701       if (find_ili(lhs, op2))
2702         return expr;
2703       expr = op1;
2704       goto check_opc;
2705     }
2706   }
2707 
2708 
2709 check_opc:
2710   /* check second operand */
2711   switch(opc) {
2712   case IL_IADD:
2713   case IL_UIADD:
2714   case IL_KADD:
2715   case IL_UKADD:
2716     opnd[AOP_IDX] = AOP_ADD;
2717     break;
2718   case IL_ISUB:
2719   case IL_UISUB:
2720   case IL_KSUB:
2721   case IL_UKSUB:
2722     opnd[AOP_IDX] = AOP_SUB;
2723     break;
2724   case IL_AND:
2725   case IL_KAND:
2726     opnd[AOP_IDX] = AOP_AND;
2727     break;
2728   case IL_OR:
2729   case IL_KOR:
2730     opnd[AOP_IDX] = AOP_OR;
2731     break;
2732   case IL_XOR:
2733     opnd[AOP_IDX]= AOP_XOR;
2734     break;
2735   case IL_IMIN:
2736   case IL_UIMIN:
2737   case IL_KMIN:
2738   case IL_UKMIN:
2739     opnd[AOP_IDX] = AOP_MIN;
2740     break;
2741   case IL_IMAX:
2742   case IL_UIMAX:
2743   case IL_KMAX:
2744   case IL_UKMAX:
2745     opnd[AOP_IDX] = AOP_MAX;
2746     break;
2747   default:
2748     return 0;
2749   }
2750 
2751   return expr;
2752 }
2753 
2754 
2755 static bool
lhs_match_rhs(int lop,int rop)2756 lhs_match_rhs(int lop, int rop)
2757 {
2758   int j, noprs;
2759   ILI_OP opc = ILI_OPC(lop);
2760 
2761   if (opc != ILI_OPC(rop))
2762     return false;
2763 
2764   noprs =  IL_OPRS(ILI_OPC(lop));
2765   for (j = 1; j <= noprs; ++j) {
2766     if  (IL_ISLINK(opc, j)) {
2767       return lhs_match_rhs(ILI_OPND(lop, j), ILI_OPND(rop, j));
2768     }
2769     if (ILI_OPND(lop, j) != ILI_OPND(rop, j)) {
2770       return false;
2771     }
2772   }
2773   return true;
2774 }
2775 
2776 
2777 
2778 /* if there is a call on lhs, make sure ili trees are exactly the same
2779  * except that the ili number may be different because we issue different
2780  * ili for calls.
2781  */
2782 
2783 static void
_ilis_are_matched(int rhs,int lhs,int * res,int * load)2784 _ilis_are_matched(int rhs, int lhs, int* res, int* load)
2785 {
2786   int rop1, rop2, j;
2787   ILI_OP lopc;
2788   int noprs;
2789   int lop1, lop2, opc;
2790 
2791   lop1 = ILI_OPND(lhs, 1);
2792   lop2 = ILI_OPND(lhs, 2);
2793   opc = ILI_OPC(lhs);
2794 
2795   if (ILI_OPC(rhs) == opc) {
2796     rop1 = ILI_OPND(rhs, 1);
2797     rop2 = ILI_OPND(rhs, 2);
2798     if (rop1 == lop1) {
2799       if (lhs_match_rhs(lop2, rop2)) {
2800         if (*res) {
2801           /* multiple occurrences of lhs on rhs */
2802           error(S_0155_OP1_OP2, ERR_Severe, gbl.lineno, "Invalid atomic statement.", CNULL);
2803         }
2804         *res = rhs;
2805         return;
2806       }
2807     }
2808   }
2809 
2810   lopc = ILI_OPC(rhs);
2811   noprs =  IL_OPRS(lopc);
2812   for (j = 1; j <= noprs; ++j) {
2813     if (IL_ISLINK(lopc, j)) {
2814       int opnd = ILI_OPND(rhs, j);
2815 
2816       _ilis_are_matched(opnd, lhs, res, load);
2817       if (*res && *load == 0) {
2818         if (IL_TYPE(ILI_OPC(rhs)) == ILTY_LOAD)
2819           *load = rhs;
2820       }
2821     }
2822   }
2823 
2824   return;
2825 }
2826 
2827 static int
load_op_match_lhs(int lhs,int rhs)2828 load_op_match_lhs(int lhs, int rhs)
2829 {
2830   int res, v, nxt, op1, op2, load;
2831   if (lhs == rhs)
2832     return 0;
2833 
2834   res = 0;
2835   load = 0;
2836 
2837   /* We check for calls and IL_AADD/ASUB are the only opc
2838    * we are looking because calls on lhs occurs when we are
2839    * in addressing mode,i.e., a[sub()...]=, a+sub()...=.
2840    * We assume that calls with same arguments will return
2841    * the same value.
2842    */
2843   if (ILI_OPC(lhs) == IL_AADD || ILI_OPC(lhs) == IL_ASUB) {
2844     op1 = ILI_OPND(lhs, 1);
2845     op2 = ILI_OPND(lhs, 2);
2846     if (find_ili(rhs, op1)) {
2847       ili_unvisit();
2848       _ilis_are_matched(rhs, lhs, &res, &load);
2849     } else
2850       ili_unvisit();
2851   }
2852 
2853   return load;
2854 
2855 }
2856 
2857 static int
get_complex_update_operand(int * opnd,ILM * ilmp,int * nme,DTYPE dtype)2858 get_complex_update_operand(int* opnd, ILM* ilmp, int* nme, DTYPE dtype)
2859 {
2860   int lhs, rhs, ili, stc, load, op1, lop;
2861   int expected_val;
2862   ILI_OP ld, st;
2863   MSZ msz;
2864   SPTR tmp_sptr;
2865   OPCODES const* ops;
2866 
2867   lhs = opnd[LHS_IDX];
2868   rhs = opnd[RHS_IDX];
2869   ldst_msz(dtype, &ld, &st, &msz);
2870   load = ad3ili(ld, lhs, nme[LHS_IDX], msz);
2871   if (!find_ili(rhs, load)) {
2872     ili_unvisit();
2873     if (find_ili(rhs, lhs)) {
2874       ili_unvisit(); /* illlegel update statement */
2875       return 0;
2876     } else {
2877       ili_unvisit();
2878       if ((lop = load_op_match_lhs(lhs, rhs)) == 0)
2879         return 0;
2880 
2881       load = lop;
2882     }
2883   } else
2884     ili_unvisit();
2885 
2886   stc = atomic_encode(msz, SS_PROCESS, AORG_OPENMP);
2887   expected_val = _exp_mp_atomic_read(stc, dtype, opnd, nme);
2888 
2889   tmp_sptr = GetSPTRVal(opnd);
2890   if (!tmp_sptr) {
2891     opnd[TMP_SPTR_IDX] = tmp_sptr = mkatomictemp(dtype);
2892     nme[TMP_SPTR_IDX] = addnme(NT_VAR, tmp_sptr, 0, 0);
2893     ldst_msz(dtype, &ld, &st, &msz);
2894     ili = ad4ili(st, expected_val, mk_address(tmp_sptr),
2895                    nme[TMP_SPTR_IDX], msz);
2896     chk_block(ili);
2897   }
2898   expected_val = ad3ili(ld, mk_address(tmp_sptr), nme[5], msz);
2899   opnd[RHS_IDX] = rewr_ili(rhs, load, expected_val);
2900   rewr_cln_ili();
2901   return expected_val;
2902 }
2903 
2904 static void
_exp_mp_atomic_update(DTYPE dtype,int * opnd,int * nme)2905 _exp_mp_atomic_update(DTYPE dtype, int* opnd, int* nme)
2906 {
2907   int rmw, result, stc;
2908   int size_ili;
2909   SPTR label;
2910   int expected_val, desired_val, cmpxchg;
2911   ILI_OP ld, st;
2912   SPTR expected_sptr, desired_sptr;
2913   MSZ msz;
2914   OPCODES const * ops;
2915   ISZ_T size = zsize_of(dtype);
2916 
2917   expected_sptr = GetSPTRVal(opnd);
2918   {
2919     label = getlab();
2920     BIH_LABEL(expb.curbih) = label;
2921     ILIBLKP(label, expb.curbih);
2922     desired_sptr = mkatomictemp(dtype);
2923     desired_val = opnd[RHS_IDX];
2924     ldst_msz(DTYPEG(desired_sptr), &ld, &st, &msz);
2925     result = ad4ili(st, desired_val, mk_address(desired_sptr),
2926                     addnme(NT_VAR, desired_sptr, 0, (INT)0),
2927                     msz);
2928     ASSNP(desired_sptr, 1);
2929     chk_block(result);
2930 
2931     if (dtype == DT_CMPLX ||
2932         dtype == DT_DCMPLX
2933         || (size != 1 && size != 2 && size !=4 && size !=8))
2934     {
2935       size_ili = ad_icon(size);
2936       ADDRTKNP(expected_sptr, 1);
2937       ADDRTKNP(desired_sptr, 1);
2938       loc_of(nme[LHS_IDX]);
2939       result = ll_make_atomic_compare_xchg(size_ili, opnd[LHS_IDX],
2940                                          mk_address(expected_sptr),
2941                                          mk_address(desired_sptr),
2942                                          opnd[MO_IDX], ad_icon(0));
2943       iltb.callfg = 1;
2944     } else {
2945       /* need to load and store as integer and with exact same size
2946        original : float 		loadi   MSZ_WORD
2947                   double 		loadK   MSZ_I8
2948                   short  		loads   MSZ_USWORD
2949                   unsigned short  	loads   MSZ_USWORD
2950 		  byte  		loadb   MSZ_BYTE
2951        */
2952       msz = msz_from_atomic_dtype(dtype);
2953       ops = get_ops(msz, 1);
2954       ADDRTKNP(expected_sptr, 1);
2955       ADDRTKNP(desired_sptr, 1);
2956       desired_val = ad3ili(ops->ld,
2957                      mk_address(desired_sptr),
2958                      addnme(NT_VAR, desired_sptr, 0, (INT)0),
2959                      msz);
2960       expected_val = ad3ili(ops->ld,
2961                      mk_address(expected_sptr),
2962                      addnme(NT_VAR, expected_sptr, 0, (INT)0),
2963                      msz);
2964       stc = atomic_encode(msz, SS_PROCESS, AORG_OPENMP);
2965       loc_of(nme[LHS_IDX]);
2966       cmpxchg = ad_cmpxchg(ops->cmpxchg, desired_val, opnd[LHS_IDX], nme[LHS_IDX],
2967                            stc, expected_val, ad_icon(0), opnd[MO_IDX],
2968                            ad_icon(0));
2969 
2970       cmpxchg = ad_cse(cmpxchg);
2971       expected_val = ad1ili(ops->cmpxchg_old, cmpxchg);
2972       result = ad4ili(ops->st, expected_val, mk_address(expected_sptr),
2973                       addnme(NT_VAR, expected_sptr, 0, (INT)0),
2974                       msz);
2975       chk_block(result);
2976 
2977       result = ad1ili(IL_CMPXCHG_SUCCESS, cmpxchg);
2978       result = ad_cse(result);
2979     }
2980 
2981     result = ad3ili(IL_ICJMPZ, result, CC_EQ, label);
2982     RFCNTI(label);
2983     chk_block(result);
2984   }
2985 }
2986 
2987 
2988 void
exp_mp_atomic_update(ILM * ilmp)2989 exp_mp_atomic_update(ILM *ilmp)
2990 {
2991   int rmw, result;
2992   int size, stc, rhs;
2993   int size_ili, label, op1;
2994   int expected_val, desired_val;
2995   int opnd[MAX_ATOMIC_ARGS];
2996   int nme[MAX_ATOMIC_ARGS];
2997   DTYPE dtype;
2998   ATOMIC_RMW_OP aop = (ATOMIC_RMW_OP) ILM_OPND(ilmp, 4); // ???
2999   ILI_OP opc, ld, st;
3000   SPTR expected_sptr, desired_sptr;
3001   MSZ msz;
3002   OPCODES const * ops;
3003 
3004   opnd[LHS_IDX] = ILI_OF(ILM_OPND(ilmp, 1));
3005   opnd[RHS_IDX] = ILI_OF(ILM_OPND(ilmp, 2));
3006   nme[LHS_IDX] = NME_OF(ILM_OPND(ilmp, 1));
3007   dtype = dt_nme(nme[LHS_IDX]);
3008   if (!dtype) {
3009     dtype = get_dtype_from_ilm(ilmp);
3010   }
3011   set_assn(nme[0]);
3012   expected_sptr = SPTR_NULL;
3013 
3014   /* Don't use CSE for LHS */
3015   op1 = opnd[LHS_IDX];
3016   if (is_cse(op1)) {
3017     op1 = ILI_OPND(op1, 1);
3018     opnd[LHS_IDX] = op1;
3019   }
3020 
3021   if (ILM_OPND(ilmp, 3) == MO_SEQ_CST)
3022     opnd[MO_IDX] = ad_icon(5);
3023   else
3024     opnd[MO_IDX] = ad_icon(0);
3025 
3026   ldst_msz(dtype, &ld, &st, &msz);
3027   ops = get_ops(msz, 1);
3028   rhs = get_simple_update_operand(opnd, ilmp);
3029   if (rhs && can_use_rmw(dtype, GetAtomicOp(opnd))) {
3030     stc = atomic_encode_rmw(mem_size(DTY(dtype)),
3031                             SS_PROCESS, AORG_OPENMP, GetAtomicOp(opnd));
3032     loc_of(nme[LHS_IDX]);
3033     result = ad5ili(ops->atomicrmw, rhs, opnd[LHS_IDX], nme[LHS_IDX], stc, opnd[MO_IDX]);
3034     expected_sptr = mkatomictemp(dtype);
3035     result = ad4ili(st, result, mk_address(expected_sptr),
3036                     addnme(NT_VAR, expected_sptr, 0, (INT)0), msz);
3037     chk_block(result);
3038   } else {
3039     opnd[TMP_SPTR_IDX] = 0;
3040     expected_val = get_complex_update_operand(opnd, ilmp, nme, dtype);
3041     if (expected_val == 0) {
3042         error(S_0155_OP1_OP2, ERR_Severe, gbl.lineno,
3043               "Invalid atomic update statement.", CNULL);
3044     }
3045     expected_sptr = GetSPTRVal(opnd);
3046     ASSNP(expected_sptr, 1);
3047 
3048     if (!expected_val && !expected_sptr)
3049       return;
3050 
3051     wr_block();
3052     cr_block();
3053     _exp_mp_atomic_update(dtype, opnd, nme);
3054   }
3055 }
3056 
3057 #ifndef DT_VOID_NONE
3058 #define DT_VOID_NONE DT_NONE
3059 #endif
3060 
3061 void
exp_mp_atomic_capture(ILM * ilmp)3062 exp_mp_atomic_capture(ILM *ilmp)
3063 {
3064   int expected_val;
3065   SPTR expected_sptr;
3066   int load, desired_val, cseload;
3067   int opnd[MAX_ATOMIC_ARGS];
3068   int nme[MAX_ATOMIC_ARGS];
3069   int cnt, stc, result, rhs, ilm_opc, op1;
3070   ILI_OP ld;
3071   ILI_OP st;
3072   MSZ msz;
3073   const ILM *op_ilmp;
3074   const OPCODES* ops;
3075 
3076   typedef enum CPT_IDX {
3077     FIRST = 0,
3078     SECOND = 1,
3079   } CPT_IDX;
3080 
3081   static struct cpt_struct {
3082     int cnt;
3083     int lhs[2];
3084     int rhs[2];
3085     int nme[2];
3086     DTYPE dtype[2];
3087     int mem_order[2];
3088     int isupdate[2];
3089     bool error;
3090     SPTR tmp_sptr;
3091   } cpt;
3092 
3093   if (cpt.cnt == 0 || cpt.cnt > 1) {
3094     cpt.cnt = 0;
3095     memset(&cpt, 0, sizeof(cpt));
3096   }
3097   cnt = cpt.cnt;
3098   cpt.lhs[cnt] = ILI_OF(ILM_OPND(ilmp, 1));
3099   cpt.rhs[cnt] = ILI_OF(ILM_OPND(ilmp, 2));
3100   cpt.nme[cnt] = nme[LHS_IDX] = NME_OF(ILM_OPND(ilmp, 1));
3101   cpt.dtype[cnt] = dt_nme(nme[LHS_IDX]);
3102   if (!cpt.dtype[cnt]) {
3103     cpt.dtype[cnt] = get_dtype_from_ilm(ilmp);
3104   }
3105   cpt.mem_order[cnt] = ILM_OPND(ilmp, 3);
3106 
3107   /* Don't use CSE for LHS */
3108   op1 = cpt.lhs[cnt];
3109   if (is_cse(op1)) {
3110     op1 = ILI_OPND(op1, 1);
3111     cpt.lhs[cnt] = op1;
3112   }
3113 
3114   opnd[LHS_IDX] = cpt.lhs[cnt];
3115   opnd[RHS_IDX] = cpt.rhs[cnt];
3116   if (cpt.mem_order[cnt] == MO_SEQ_CST)
3117     opnd[MO_IDX] = ad_icon(5);
3118   else
3119     opnd[MO_IDX] = ad_icon(0);
3120   set_assn(nme[LHS_IDX]);
3121 
3122 
3123   rhs = get_simple_update_operand(opnd, ilmp);
3124   if (rhs && can_use_rmw(cpt.dtype[cnt], GetAtomicOp(opnd))) {
3125     stc = atomic_encode_rmw(mem_size(DTY(cpt.dtype[cnt])),
3126                             SS_PROCESS, AORG_OPENMP,
3127                             GetAtomicOp(opnd));
3128     loc_of(nme[LHS_IDX]);
3129     ldst_msz(cpt.dtype[cnt], &ld, &st, &msz);
3130     ops = get_ops(msz, 1);
3131     result = ad5ili(ops->atomicrmw, rhs, opnd[LHS_IDX], nme[LHS_IDX], stc, opnd[MO_IDX]);
3132     cpt.tmp_sptr = expected_sptr = mkatomictemp(cpt.dtype[cnt]);
3133     result = ad4ili(st, result, mk_address(expected_sptr),
3134                     addnme(NT_VAR, expected_sptr, 0, 0), msz);
3135     chk_block(result);
3136     cpt.isupdate[cnt] = result;
3137     ASSNP(expected_sptr, 1);
3138     opnd[TMP_SPTR_IDX] = cpt.tmp_sptr;
3139     if (cnt == FIRST) {
3140       cpt.cnt++;
3141       return;
3142     }
3143   } else {
3144     opnd[TMP_SPTR_IDX] = cpt.tmp_sptr;
3145     expected_val = get_complex_update_operand(opnd, ilmp, nme, cpt.dtype[cnt]);
3146     cpt.isupdate[cnt] = expected_val;
3147     if (expected_val && !GetSPTRVal(opnd)) {
3148       cpt.error = true;
3149       goto capture_end;
3150     }
3151     if (expected_val) {
3152       _exp_mp_atomic_update(cpt.dtype[cnt], opnd, nme);
3153       cpt.tmp_sptr = GetSPTRVal(opnd);
3154     } else if (cnt == FIRST) {
3155       /* assume this is a capture, don't do anything just yet */
3156       goto capture_end;
3157     }
3158   }
3159 
3160   if (cpt.isupdate[FIRST] && cpt.isupdate[SECOND]) {
3161     cpt.error = true;
3162   }
3163 
3164   if (cnt == SECOND) {
3165     if (cpt.isupdate[SECOND]) { /* 1: v = x 2: x = x + 1 */
3166       /* assert: cpt.rhs[FIRST] == cpt.lhs[SECOND] */
3167       if (find_ili(cpt.rhs[FIRST], cpt.lhs[SECOND])) {
3168         ldst_msz(cpt.dtype[SECOND], &ld, &st, &msz);
3169         load = ad3ili(ld, cpt.lhs[SECOND], cpt.nme[SECOND], msz);
3170       } else {
3171         load = load_op_match_lhs(cpt.lhs[SECOND], cpt.rhs[FIRST]);
3172         if (load == 0) {
3173           cpt.error = true;
3174           goto capture_end;
3175         }
3176       }
3177 
3178       cpt.tmp_sptr = GetSPTRVal(opnd);
3179       if (cpt.tmp_sptr <= NOSYM) {
3180         cpt.error = true;
3181         goto capture_end;
3182       }
3183       /* replace ili of load:x with a load of tmp */
3184       ldst_msz(DTYPEG(cpt.tmp_sptr), &ld, &st, &msz);
3185       expected_val = ad3ili(ld, mk_address(cpt.tmp_sptr),
3186                           addnme(NT_VAR, cpt.tmp_sptr, 0, (INT)0), msz);
3187       expected_val = rewr_ili(cpt.rhs[FIRST], load, expected_val);
3188       rewr_cln_ili();
3189 
3190       /* assign value to v */
3191       ldst_msz(cpt.dtype[FIRST], &ld, &st, &msz);
3192       result = ad4ili(st, expected_val, cpt.lhs[FIRST], cpt.nme[FIRST], msz);
3193       chk_block(result);
3194     } else if (cpt.isupdate[FIRST]) { /* 1: x = x +1; 2: v = x */
3195       /* assert: cpt.rhs[FIRST] == cpt.lhs[SECOND] */
3196       /* replace a load of x with new ili and store to x */
3197       cpt.tmp_sptr = GetSPTRVal(opnd);
3198       if (cpt.tmp_sptr <= NOSYM) {
3199         cpt.error = true;
3200         goto capture_end;
3201       }
3202 
3203       ldst_msz(DTYPEG(cpt.tmp_sptr), &ld, &st, &msz);
3204       expected_val = ad3ili(ld, mk_address(cpt.tmp_sptr),
3205                           addnme(NT_VAR, cpt.tmp_sptr, 0, 0), msz);
3206 
3207       ldst_msz(cpt.dtype[FIRST], &ld, &st, &msz);
3208       load = ad3ili(ld, cpt.lhs[FIRST], cpt.nme[FIRST], msz);
3209 
3210       {
3211         if (!find_ili(cpt.rhs[SECOND], cpt.lhs[FIRST])) {
3212           if (load_op_match_lhs(cpt.lhs[FIRST], cpt.rhs[SECOND]) == 0) {
3213             cpt.error = true;
3214             goto capture_end;
3215           }
3216         }
3217         /* Grab rhs expression of update statement and
3218          * replace load of x with load of tmp
3219          * We will assign this expression to v.
3220          */
3221         expected_val = rewr_ili(cpt.rhs[FIRST], load, expected_val);
3222         rewr_cln_ili();
3223 
3224         /* Replace a load of x in v = x; with expected_val.
3225          * The only reason we do if there is a type conversion when
3226          * assigning x to v.
3227          */
3228         expected_val = rewr_ili(cpt.rhs[SECOND], load, expected_val);
3229       }
3230 
3231       rewr_cln_ili();
3232 
3233       /* assign value to v */
3234       ldst_msz(cpt.dtype[SECOND], &ld, &st, &msz);
3235       result = ad4ili(st, expected_val, cpt.lhs[SECOND], cpt.nme[SECOND], msz);
3236       chk_block(result);
3237     } else {
3238       /* 1: v = x, 2: x = expr */
3239       /* assert: cpt.rhs[FIRST] == cpt.lhs[SECOND] */
3240 
3241      if (find_ili(cpt.rhs[FIRST], cpt.lhs[SECOND])) {
3242        ldst_msz(cpt.dtype[SECOND], &ld, &st, &msz);
3243        load = ad3ili(ld, cpt.lhs[SECOND], cpt.nme[SECOND], msz);
3244      } else {
3245        load = load_op_match_lhs(cpt.lhs[SECOND], cpt.rhs[FIRST]);
3246        if (load == 0) {
3247          cpt.error = true;
3248          goto capture_end;
3249        }
3250      }
3251 
3252       opnd[TMP_SPTR_IDX] = mkatomictemp(cpt.dtype[SECOND]);
3253       _exp_mp_atomic_update(cpt.dtype[cnt], opnd, nme);
3254 
3255       ldst_msz(cpt.dtype[SECOND], &ld, &st, &msz);
3256       expected_val = ad3ili(ld, mk_address(GetSPTRVal(opnd)),
3257                             addnme(NT_VAR, GetSPTRVal(opnd), 0, 0), msz);
3258       if (cpt.dtype[FIRST] != cpt.dtype[SECOND]) {
3259         /* possible conversion */
3260         expected_val = rewr_ili(cpt.rhs[FIRST], load, expected_val);
3261         rewr_cln_ili();
3262       }
3263       ldst_msz(cpt.dtype[FIRST], &ld, &st, &msz);
3264       result = ad4ili(st, expected_val, cpt.lhs[FIRST], cpt.nme[FIRST], msz);
3265       chk_block(result);
3266 
3267     }
3268   }
3269 
3270 capture_end:
3271   if (cpt.error) {
3272     error(S_0155_OP1_OP2, ERR_Severe, gbl.lineno, "Invalid atomic capture statement(s).", CNULL);
3273   }
3274   cpt.cnt++;
3275   return;
3276 }
3277