1 /*
2 * Copyright (c) 2015-2019, NVIDIA CORPORATION. All rights reserved.
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 *
16 */
17
18 /** \file
19 * \brief OpenMP/OpenACC/C++11 atomics expander routines; all targets including
20 * LLVM
21 */
22
23 #include "expatomics.h"
24 #include "exputil.h"
25 #include "error.h"
26 #include "dtypeutl.h"
27 #include "regutil.h"
28 #include "machreg.h"
29 #include "ilmtp.h"
30 #include "ilm.h"
31 #include "ili.h"
32 #define EXPANDER_DECLARE_INTERNAL
33 #include "expand.h"
34 #include "machar.h"
35 #include "ccffinfo.h"
36 #include "pd.h"
37 #include "symfun.h"
38
39 static int atomic_capture_created;
40 static int atomic_capture_update_first;
41 static int atomic_store_created;
42 static int is_in_atomic;
43 static int is_in_atomic_read;
44 static int is_in_atomic_write;
45 static int is_in_atomic_capture;
46
47 static int capture_read_ili;
48 static int capture_update_ili;
49 static int atomic_typecast_operand;
50 // this is for the non-commutable operators.
51 // If it is non-zero, AtomicOp.ili_operand is the
52 // 1st operand of the atomic binary operator.
53 static int is_atomic_operand1 = 0;
54
55 static int cmplx_atomic_opcodes[] = {IL_SCMPLXADD, IL_SCMPLXSUB};
56 static int num_cmplx_opcodes = sizeof(cmplx_atomic_opcodes) / sizeof(int);
57
58 static int float_atomic_opcodes[] = {IL_FADD, IL_FSUB, IL_FDIV,
59 IL_FMUL, IL_FMAX, IL_FMIN};
60 static int num_float_opcodes = sizeof(float_atomic_opcodes) / sizeof(int);
61
62 static int double_atomic_opcodes[] = {IL_DADD, IL_DSUB, IL_DDIV,
63 IL_DMUL, IL_DMAX, IL_DMIN};
64 static int num_double_opcodes = sizeof(double_atomic_opcodes) / sizeof(int);
65
66 // FIX: integer*4 <-- real*4
67 // UFIX: unsigned integer*4 <-- real*4
68 // DFIX: integer*4 <-- real*8
69 // DFIXU: unsigned integer*4 <-- real*8
70 static int int_atomic_opcodes[] = {
71 IL_IADD, IL_ISUB, IL_LEQV, IL_XOR, IL_IMUL, IL_UIMUL,
72 IL_AND, IL_OR, IL_UIADD, IL_UISUB, IL_UIMUL, IL_ULSHIFT,
73 IL_URSHIFT, IL_LSHIFT, IL_RSHIFT, IL_IDIV, IL_UIDIV, IL_IMAX,
74 IL_IMIN, IL_NOT, IL_INEG, IL_LD};
75 static int num_int_opcodes = sizeof(int_atomic_opcodes) / sizeof(int);
76
77 // FIXK: integer*8 <-- real*4
78 // FIXUK: unsigned integer*8 <-- real*4
79 // DFIXK: integer*8 <-- real*8
80 // DFIXUK: unsigned integer*8 <-- real*8
81 static int long_atomic_opcodes[] = {IL_KADD, IL_KSUB, IL_KXOR, IL_KMUL,
82 IL_UKMUL, IL_KAND, IL_KOR, IL_UKADD,
83 IL_UKSUB, IL_KDIV, IL_UKDIV
84 ,
85 IL_KMAX, IL_KMIN
86 };
87
88 typedef enum MP_ATOMIC_IDX {
89 LHS_IDX = 0,
90 RHS_IDX,
91 MO_IDX,
92 AOP_IDX,
93 UNUSED1,
94 TMP_SPTR_IDX
95 } MP_ATOMIC_IDX;
96
97 static int num_long_opcodes = sizeof(long_atomic_opcodes) / sizeof(int);
98
99 static struct {
100 int atomic_operand;
101 int ldst_point;
102 int ldst_nme;
103 int ili_operand;
104 } AtomicOp;
105
106 #ifdef __cplusplus
107
GetAtomicOp(int * array)108 inline static ATOMIC_RMW_OP GetAtomicOp(int *array) {
109 return static_cast<ATOMIC_RMW_OP>(array[AOP_IDX]);
110 }
111
GetSPTRVal(int * array)112 inline static SPTR GetSPTRVal(int *array) {
113 return static_cast<SPTR>(array[TMP_SPTR_IDX]);
114 }
115
116 #else // ! C++
117
118 #define GetAtomicOp(A) (A[AOP_IDX])
119 #define GetSPTRVal(A) (A[TMP_SPTR_IDX])
120
121 #endif // C++
122
123 int
get_atomic_function_ex(ILI_OP opcode)124 get_atomic_function_ex(ILI_OP opcode)
125 {
126 // the last two/three(if "r" is the last letter) letters of the function name
127 // i: integer 32bit
128 // f: float, single precision 32bit
129 // d: double,double precision 64bit
130 // u: unsigned integer 32bit
131 // k: integer 64bit
132 // l: unsigned 64bit
133 // r: reverse, non-commutable operator
134 // 32bit integer
135 if (atomic_typecast_operand == IL_FIX) {
136 switch (opcode) {
137 case IL_FADD:
138 return mk_prototype("atomicaddif", "pure", DT_INT, 2, DT_CPTR, DT_FLOAT);
139 case IL_FSUB:
140 if (is_atomic_operand1)
141 return mk_prototype("atomicsubifr", "pure", DT_INT, 2, DT_CPTR, DT_FLOAT);
142 else
143 return mk_prototype("atomicsubif", "pure", DT_INT, 2, DT_CPTR, DT_FLOAT);
144 case IL_FMUL:
145 return mk_prototype("atomicmulif", "pure", DT_INT, 2, DT_CPTR, DT_FLOAT);
146 case IL_FDIV:
147 if (is_atomic_operand1)
148 return mk_prototype("atomicdivifr", "pure", DT_INT, 2, DT_CPTR, DT_FLOAT);
149 else
150 return mk_prototype("atomicdivif", "pure", DT_INT, 2, DT_CPTR, DT_FLOAT);
151 case IL_FMAX:
152 return mk_prototype("atomicmaxif", "pure", DT_INT, 2, DT_CPTR, DT_FLOAT);
153 case IL_FMIN:
154 return mk_prototype("atomicminif", "pure", DT_INT, 2, DT_CPTR, DT_FLOAT);
155 default:;
156 }
157 } else if (atomic_typecast_operand == IL_UFIX) {
158 switch (opcode) {
159 case IL_FADD:
160 return mk_prototype("atomicadduf", "pure", DT_UINT, 2, DT_CPTR, DT_FLOAT);
161 case IL_FSUB:
162 if (is_atomic_operand1)
163 return mk_prototype("atomicsubufr", "pure", DT_UINT, 2, DT_CPTR, DT_FLOAT);
164 else
165 return mk_prototype("atomicsubuf", "pure", DT_UINT, 2, DT_CPTR, DT_FLOAT);
166 case IL_FMUL:
167 return mk_prototype("atomicmuluf", "pure", DT_UINT, 2, DT_CPTR, DT_FLOAT);
168 case IL_FDIV:
169 if (is_atomic_operand1)
170 return mk_prototype("atomicdivufr", "pure", DT_UINT, 2, DT_CPTR, DT_FLOAT);
171 else
172 return mk_prototype("atomicdivuf", "pure", DT_UINT, 2, DT_CPTR, DT_FLOAT);
173 case IL_FMAX:
174 return mk_prototype("atomicmaxuf", "pure", DT_UINT, 2, DT_CPTR, DT_FLOAT);
175 case IL_FMIN:
176 return mk_prototype("atomicminuf", "pure", DT_UINT, 2, DT_CPTR, DT_FLOAT);
177 default:;
178 }
179 } else if (atomic_typecast_operand == IL_DFIX) {
180 switch (opcode) {
181 case IL_DADD:
182 return mk_prototype("atomicaddid", "pure", DT_INT, 2, DT_CPTR, DT_DBLE);
183 case IL_DSUB:
184 if (is_atomic_operand1)
185 return mk_prototype("atomicsubidr", "pure", DT_INT, 2, DT_CPTR, DT_DBLE);
186 else
187 return mk_prototype("atomicsubid", "pure", DT_INT, 2, DT_CPTR, DT_DBLE);
188 case IL_DMUL:
189 return mk_prototype("atomicmulid", "pure", DT_INT, 2, DT_CPTR, DT_DBLE);
190 case IL_DDIV:
191 if (is_atomic_operand1)
192 return mk_prototype("atomicdividr", "pure", DT_INT, 2, DT_CPTR, DT_DBLE);
193 else
194 return mk_prototype("atomicdivid", "pure", DT_INT, 2, DT_CPTR, DT_DBLE);
195 case IL_DMAX:
196 return mk_prototype("atomicmaxid", "pure", DT_INT, 2, DT_CPTR, DT_DBLE);
197 case IL_DMIN:
198 return mk_prototype("atomicminid", "pure", DT_INT, 2, DT_CPTR, DT_DBLE);
199 default:;
200 }
201 } else if (atomic_typecast_operand == IL_DFIXU) {
202 switch (opcode) {
203 case IL_DADD:
204 return mk_prototype("atomicaddud", "pure", DT_UINT, 2, DT_CPTR, DT_DBLE);
205 case IL_DSUB:
206 if (is_atomic_operand1)
207 return mk_prototype("atomicsubudr", "pure", DT_UINT, 2, DT_CPTR, DT_DBLE);
208 else
209 return mk_prototype("atomicsubud", "pure", DT_UINT, 2, DT_CPTR, DT_DBLE);
210 case IL_DMUL:
211 return mk_prototype("atomicmulud", "pure", DT_UINT, 2, DT_CPTR, DT_DBLE);
212 case IL_DDIV:
213 if (is_atomic_operand1)
214 return mk_prototype("atomicdivudr", "pure", DT_UINT, 2, DT_CPTR, DT_DBLE);
215 else
216 return mk_prototype("atomicdivud", "pure", DT_UINT, 2, DT_CPTR, DT_DBLE);
217 case IL_DMAX:
218 return mk_prototype("atomicmaxud", "pure", DT_UINT, 2, DT_CPTR, DT_DBLE);
219 case IL_DMIN:
220 return mk_prototype("atomicminud", "pure", DT_UINT, 2, DT_CPTR, DT_DBLE);
221 default:;
222 }
223 }
224 // 64bits integer
225 else if (atomic_typecast_operand == IL_FIXK) {
226 switch (opcode) {
227 case IL_FADD:
228 return mk_prototype("atomicaddkf", "pure", DT_INT8, 2, DT_CPTR, DT_FLOAT);
229 case IL_FSUB:
230 if (is_atomic_operand1)
231 return mk_prototype("atomicsubkfr", "pure", DT_INT8, 2, DT_CPTR, DT_FLOAT);
232 else
233 return mk_prototype("atomicsubkf", "pure", DT_INT8, 2, DT_CPTR, DT_FLOAT);
234 case IL_FMUL:
235 return mk_prototype("atomicmulkf", "pure", DT_INT8, 2, DT_CPTR, DT_FLOAT);
236 case IL_FDIV:
237 if (is_atomic_operand1)
238 return mk_prototype("atomicdivkfr", "pure", DT_INT8, 2, DT_CPTR, DT_FLOAT);
239 else
240 return mk_prototype("atomicdivkf", "pure", DT_INT8, 2, DT_CPTR, DT_FLOAT);
241 case IL_FMAX:
242 return mk_prototype("atomicmaxkf", "pure", DT_INT8, 2, DT_CPTR, DT_FLOAT);
243 case IL_FMIN:
244 return mk_prototype("atomicminkf", "pure", DT_INT8, 2, DT_CPTR, DT_FLOAT);
245 default:;
246 }
247 } else if (atomic_typecast_operand == IL_FIXUK) {
248 switch (opcode) {
249 case IL_FADD:
250 return mk_prototype("atomicaddlf", "pure", DT_UINT8, 2, DT_CPTR, DT_FLOAT);
251 case IL_FSUB:
252 if (is_atomic_operand1)
253 return mk_prototype("atomicsublfr", "pure", DT_UINT8, 2, DT_CPTR, DT_FLOAT);
254 else
255 return mk_prototype("atomicsublf", "pure", DT_UINT8, 2, DT_CPTR, DT_FLOAT);
256 case IL_FMUL:
257 return mk_prototype("atomicmullf", "pure", DT_UINT8, 2, DT_CPTR, DT_FLOAT);
258 case IL_FDIV:
259 if (is_atomic_operand1)
260 return mk_prototype("atomicdivlfr", "pure", DT_UINT8, 2, DT_CPTR, DT_FLOAT);
261 else
262 return mk_prototype("atomicdivlf", "pure", DT_UINT8, 2, DT_CPTR, DT_FLOAT);
263 case IL_FMAX:
264 return mk_prototype("atomicmaxlf", "pure", DT_UINT8, 2, DT_CPTR, DT_FLOAT);
265 case IL_FMIN:
266 return mk_prototype("atomicminlf", "pure", DT_UINT8, 2, DT_CPTR, DT_FLOAT);
267 default:;
268 }
269 } else if (atomic_typecast_operand == IL_DFIXK) {
270 switch (opcode) {
271 case IL_DADD:
272 return mk_prototype("atomicaddkd", "pure", DT_INT8, 2, DT_CPTR, DT_DBLE);
273 case IL_DSUB:
274 if (is_atomic_operand1)
275 return mk_prototype("atomicsubkdr", "pure", DT_INT8, 2, DT_CPTR, DT_DBLE);
276 else
277 return mk_prototype("atomicsubkd", "pure", DT_INT8, 2, DT_CPTR, DT_DBLE);
278 case IL_DMUL:
279 return mk_prototype("atomicmulkd", "pure", DT_INT8, 2, DT_CPTR, DT_DBLE);
280 case IL_DDIV:
281 if (is_atomic_operand1)
282 return mk_prototype("atomicdivkdr", "pure", DT_INT8, 2, DT_CPTR, DT_DBLE);
283 else
284 return mk_prototype("atomicdivkd", "pure", DT_INT8, 2, DT_CPTR, DT_DBLE);
285 case IL_DMAX:
286 return mk_prototype("atomicmaxkd", "pure", DT_INT8, 2, DT_CPTR, DT_DBLE);
287 case IL_DMIN:
288 return mk_prototype("atomicminkd", "pure", DT_INT8, 2, DT_CPTR, DT_DBLE);
289 default:;
290 }
291 } else if (atomic_typecast_operand == IL_DFIXUK) {
292 switch (opcode) {
293 case IL_DADD:
294 return mk_prototype("atomicaddld", "pure", DT_UINT8, 2, DT_CPTR, DT_DBLE);
295 case IL_DSUB:
296 if (is_atomic_operand1)
297 return mk_prototype("atomicsubldr", "pure", DT_UINT8, 2, DT_CPTR, DT_DBLE);
298 else
299 return mk_prototype("atomicsubld", "pure", DT_UINT8, 2, DT_CPTR, DT_DBLE);
300 case IL_DMUL:
301 return mk_prototype("atomicmulld", "pure", DT_UINT8, 2, DT_CPTR, DT_DBLE);
302 case IL_DDIV:
303 if (is_atomic_operand1)
304 return mk_prototype("atomicdivldr", "pure", DT_UINT8, 2, DT_CPTR, DT_DBLE);
305 else
306 return mk_prototype("atomicdivld", "pure", DT_UINT8, 2, DT_CPTR, DT_DBLE);
307 case IL_DMAX:
308 return mk_prototype("atomicmaxld", "pure", DT_UINT8, 2, DT_CPTR, DT_DBLE);
309 case IL_DMIN:
310 return mk_prototype("atomicminld", "pure", DT_UINT8, 2, DT_CPTR, DT_DBLE);
311 default:;
312 }
313 } else if (atomic_typecast_operand == IL_SNGL) {
314 switch (opcode) {
315 case IL_DADD:
316 return mk_prototype("atomicaddfd", "pure", DT_FLOAT, 2, DT_CPTR, DT_DBLE);
317 case IL_DSUB:
318 if (is_atomic_operand1)
319 return mk_prototype("atomicsubfdr", "pure", DT_FLOAT, 2, DT_CPTR, DT_DBLE);
320 else
321 return mk_prototype("atomicsubfd", "pure", DT_FLOAT, 2, DT_CPTR, DT_DBLE);
322 case IL_DMUL:
323 return mk_prototype("atomicmulfd", "pure", DT_FLOAT, 2, DT_CPTR, DT_DBLE);
324 case IL_DDIV:
325 if (is_atomic_operand1)
326 return mk_prototype("atomicdivfdr", "pure", DT_FLOAT, 2, DT_CPTR, DT_DBLE);
327 else
328 return mk_prototype("atomicdivfd", "pure", DT_FLOAT, 2, DT_CPTR, DT_DBLE);
329 case IL_DMAX:
330 return mk_prototype("atomicmaxfd", "pure", DT_FLOAT, 2, DT_CPTR, DT_DBLE);
331 case IL_DMIN:
332 return mk_prototype("atomicminfd", "pure", DT_FLOAT, 2, DT_CPTR, DT_DBLE);
333 default:;
334 }
335 }
336
337 error(S_0155_OP1_OP2, ERR_Severe, gbl.lineno, "Invalid atomic operation.", CNULL);
338 return 0;
339 }
340
341 int
get_atomic_function(ILI_OP opcode)342 get_atomic_function(ILI_OP opcode)
343 {
344 switch (opcode) {
345 /*
346 * Update:
347 */
348 case IL_IMUL:
349 return mk_prototype("atomicmuli", "pure", DT_INT, 2, DT_CPTR, DT_INT);
350 case IL_UIMUL:
351 return mk_prototype("atomicmulu", "pure", DT_UINT, 2, DT_CPTR, DT_UINT);
352 case IL_KMUL:
353 return mk_prototype("atomicmulil", "pure", DT_INT8, 2, DT_CPTR, DT_INT8);
354 case IL_UKMUL:
355 return mk_prototype("atomicmulul", "pure", DT_UINT8, 2, DT_CPTR, DT_UINT8);
356 case IL_FMUL:
357 return mk_prototype("atomicmulf", "pure", DT_FLOAT, 2, DT_CPTR, DT_FLOAT);
358 case IL_DMUL:
359 return mk_prototype("atomicmuld", "pure", DT_DBLE, 2, DT_CPTR, DT_DBLE);
360 case IL_IMAX:
361 return mk_prototype("atomicmaxi", "pure", DT_INT, 2, DT_CPTR, DT_INT);
362 case IL_IMIN:
363 return mk_prototype("atomicmini", "pure", DT_INT, 2, DT_CPTR, DT_INT);
364 case IL_KMAX:
365 return mk_prototype("atomicmaxil", "pure", DT_INT8, 2, DT_CPTR, DT_INT8);
366 case IL_KMIN:
367 return mk_prototype("atomicminil", "pure", DT_INT8, 2, DT_CPTR, DT_INT8);
368 case IL_NOT:
369 return mk_prototype("atomicnoti", "pure", DT_INT, 2, DT_CPTR, DT_INT);
370 case IL_XOR:
371 return mk_prototype("atomicxori", "pure", DT_INT, 2, DT_CPTR, DT_INT);
372 case IL_LEQV:
373 return mk_prototype("atomicleqvi", "pure", DT_INT, 2, DT_CPTR, DT_INT);
374 case IL_KXOR:
375 return mk_prototype("atomicxorll", "pure", DT_INT8, 2, DT_CPTR, DT_INT8);
376 case IL_AND:
377 return mk_prototype("atomicandi", "pure", DT_INT, 2, DT_CPTR, DT_INT);
378 case IL_KAND:
379 return mk_prototype("atomicandll", "pure", DT_INT8, 2, DT_CPTR, DT_INT8);
380 case IL_OR:
381 return mk_prototype("atomicori", "pure", DT_INT, 2, DT_CPTR, DT_INT);
382 case IL_KOR:
383 return mk_prototype("atomicorll", "pure", DT_INT8, 2, DT_CPTR, DT_INT8);
384 case IL_IADD:
385 return mk_prototype("atomicaddi", "pure", DT_INT, 2, DT_CPTR, DT_INT);
386 case IL_KADD:
387 return mk_prototype("atomicaddil", "pure", DT_INT8, 2, DT_CPTR, DT_INT8);
388 case IL_ISUB:
389 return mk_prototype("atomicsubi", "pure", DT_INT, 2, DT_CPTR, DT_INT);
390 case IL_KSUB:
391 return mk_prototype("atomicsubil", "pure", DT_INT8, 2, DT_CPTR, DT_INT8);
392 case IL_UIADD:
393 return mk_prototype("atomicaddu", "pure", DT_UINT, 2, DT_CPTR, DT_UINT);
394 case IL_UKADD:
395 return mk_prototype("atomicaddul", "pure", DT_UINT8, 2, DT_CPTR, DT_UINT8);
396 case IL_UISUB:
397 return mk_prototype("atomicsubu", "pure", DT_UINT, 2, DT_CPTR, DT_UINT);
398 case IL_UKSUB:
399 return mk_prototype("atomicsubull", "pure", DT_UINT8, 2, DT_CPTR, DT_UINT8);
400 case IL_INEG:
401 return mk_prototype("atomicnegi", "pure", DT_INT, 2, DT_CPTR, DT_INT);
402 case IL_ULSHIFT:
403 return mk_prototype("atomiclshiftu", "pure", DT_UINT, 2, DT_CPTR, DT_UINT);
404 case IL_URSHIFT:
405 return mk_prototype("atomicrshiftu", "pure", DT_UINT, 2, DT_CPTR, DT_UINT);
406 case IL_LSHIFT:
407 return mk_prototype("atomiclshifti", "pure", DT_INT, 2, DT_CPTR, DT_INT);
408 case IL_RSHIFT:
409 return mk_prototype("atomicrshifti", "pure", DT_INT, 2, DT_CPTR, DT_INT);
410 case IL_FADD:
411 return mk_prototype("atomicaddf", "pure", DT_FLOAT, 2, DT_CPTR, DT_FLOAT);
412 case IL_FSUB:
413 return mk_prototype("atomicsubf", "pure", DT_FLOAT, 2, DT_CPTR, DT_FLOAT);
414 case IL_FMAX:
415 return mk_prototype("atomicmaxf", "pure", DT_FLOAT, 2, DT_CPTR, DT_FLOAT);
416 case IL_FMIN:
417 return mk_prototype("atomicminf", "pure", DT_FLOAT, 2, DT_CPTR, DT_FLOAT);
418 case IL_DADD:
419 return mk_prototype("atomicaddd", "pure", DT_DBLE, 2, DT_CPTR, DT_DBLE);
420 case IL_DSUB:
421 return mk_prototype("atomicsubd", "pure", DT_DBLE, 2, DT_CPTR, DT_DBLE);
422 case IL_DMAX:
423 return mk_prototype("atomicmaxd", "pure", DT_DBLE, 2, DT_CPTR, DT_DBLE);
424 case IL_DMIN:
425 return mk_prototype("atomicmind", "pure", DT_DBLE, 2, DT_CPTR, DT_DBLE);
426 case IL_IDIV:
427 return mk_prototype("atomicdivi", "pure", DT_INT, 2, DT_CPTR, DT_INT);
428 case IL_KDIV:
429 return mk_prototype("atomicdivil", "pure", DT_INT8, 2, DT_CPTR, DT_INT8);
430 case IL_UIDIV:
431 return mk_prototype("atomicdivu", "pure", DT_UINT, 2, DT_CPTR, DT_UINT);
432 case IL_UKDIV:
433 return mk_prototype("atomicdivul", "pure", DT_UINT8, 2, DT_CPTR, DT_UINT8);
434 case IL_FDIV:
435 return mk_prototype("atomicdivf", "pure", DT_FLOAT, 2, DT_CPTR, DT_FLOAT);
436 case IL_DDIV:
437 return mk_prototype("atomicdivd", "pure", DT_DBLE, 2, DT_CPTR, DT_DBLE);
438 /*
439 * Read:
440 */
441 #define DT_VOID_NONE DT_NONE
442 case IL_LD:
443 return mk_prototype("atomicloadi", "pure", DT_VOID_NONE, 2, DT_CPTR, DT_CPTR);
444 case IL_LDSP:
445 return mk_prototype("atomicloadf", "pure", DT_VOID_NONE, 2, DT_CPTR, DT_CPTR);
446 case IL_LDDP:
447 return mk_prototype("atomicloadd", "pure", DT_VOID_NONE, 2, DT_CPTR, DT_CPTR);
448 case IL_LDKR:
449 return mk_prototype("atomicloadl", "pure", DT_VOID_NONE, 2, DT_CPTR, DT_CPTR);
450 /*
451 * Write:
452 */
453 case IL_ST:
454 return mk_prototype("atomicexchi", "pure", DT_INT, 2, DT_CPTR, DT_INT);
455 case IL_STSP:
456 return mk_prototype("atomicexchf", "pure", DT_FLOAT, 2, DT_CPTR, DT_FLOAT);
457 case IL_STDP:
458 return mk_prototype("atomicexchd", "pure", DT_DBLE, 2, DT_CPTR, DT_DBLE);
459 case IL_STKR:
460 return mk_prototype("atomicexchul", "pure", DT_INT8, 2, DT_CPTR, DT_INT8);
461 case IL_SCMPLXADD:
462 return mk_prototype("atomicaddcmplx", "pure", DT_VOID_NONE, 3, DT_CPTR, DT_FLOAT, DT_FLOAT);
463 case IL_SCMPLXSUB:
464 return mk_prototype("atomicsubcmplx", "pure", DT_VOID_NONE, 3, DT_CPTR, DT_FLOAT, DT_FLOAT);
465 default:
466 interr("Unsupported atomic opcode: ", opcode, ERR_Severe);
467 return 0;
468 }
469 }
470
471 int
get_capture_read_ili(void)472 get_capture_read_ili(void)
473 {
474 return capture_read_ili;
475 }
476
477 void
set_capture_read_ili(int x)478 set_capture_read_ili(int x)
479 {
480 capture_read_ili = x;
481 }
482
483 int
get_capture_update_ili(void)484 get_capture_update_ili(void)
485 {
486 return capture_update_ili;
487 }
488
489 void
set_capture_update_ili(int x)490 set_capture_update_ili(int x)
491 {
492 capture_update_ili = x;
493 }
494
495 int
get_is_in_atomic(void)496 get_is_in_atomic(void)
497 {
498 return is_in_atomic;
499 }
500
501 void
set_is_in_atomic(int x)502 set_is_in_atomic(int x)
503 {
504 is_in_atomic = x;
505 }
506
507 int
get_is_in_atomic_read(void)508 get_is_in_atomic_read(void)
509 {
510 return is_in_atomic_read;
511 }
512
513 void
set_is_in_atomic_read(int x)514 set_is_in_atomic_read(int x)
515 {
516 is_in_atomic_read = x;
517 }
518
519 int
get_is_in_atomic_write(void)520 get_is_in_atomic_write(void)
521 {
522 return is_in_atomic_write;
523 }
524
525 void
set_is_in_atomic_write(int x)526 set_is_in_atomic_write(int x)
527 {
528 is_in_atomic_write = x;
529 }
530
531 int
get_is_in_atomic_capture(void)532 get_is_in_atomic_capture(void)
533 {
534 return is_in_atomic_capture;
535 }
536
537 void
set_is_in_atomic_capture(int x)538 set_is_in_atomic_capture(int x)
539 {
540 is_in_atomic_capture = x;
541 }
542
543 int
get_atomic_capture_created(void)544 get_atomic_capture_created(void)
545 {
546 return atomic_capture_created;
547 }
548
549 void
set_atomic_capture_created(int x)550 set_atomic_capture_created(int x)
551 {
552 atomic_capture_created = x;
553 }
554
555 int
get_atomic_store_created(void)556 get_atomic_store_created(void)
557 {
558 return atomic_store_created;
559 }
560
561 void
set_atomic_store_created(int x)562 set_atomic_store_created(int x)
563 {
564 atomic_store_created = x;
565 }
566
567 int
get_atomic_write_opcode(int current_ili)568 get_atomic_write_opcode(int current_ili)
569 {
570 int ili = current_ili;
571 int store_opcode;
572 int store_value;
573 int store_pt, store_nme;
574
575 store_opcode = ILI_OPC(ili);
576 store_value = ILI_OPND(ili, 1);
577 store_pt = ILI_OPND(ili, 2);
578 store_nme = ILI_OPND(ili, 3);
579
580 if (store_opcode != IL_ST && store_opcode != IL_STDP &&
581 store_opcode != IL_STSP && store_opcode != IL_STKR) {
582 /* Rely on the caller to issue an error if necessary */
583 return 0;
584 }
585
586 AtomicOp.atomic_operand = 0;
587 AtomicOp.ldst_point = store_pt;
588 AtomicOp.ldst_nme = store_nme;
589 AtomicOp.ili_operand = store_value;
590
591 return store_opcode;
592 }
593
594 /* This function is used to set the address token flag which
595 * will be used in the later accelerator code generation
596 * For example, if the atomic operates on scalar variable
597 * this flag may indicate an optimization to place this variable
598 * in accelerator's shared memory */
set_store_pt_addrtkn_flg(int store_pt)599 static void set_store_pt_addrtkn_flg(int store_pt)
600 {
601 int store_symbol;
602 if (ILI_OPC(store_pt) == IL_ACON) {
603 store_symbol = ILI_OPND(store_pt, 1);
604 store_symbol = CONVAL1G(store_symbol);
605 ADDRTKNP(store_symbol, 1);
606 } else if (ILI_OPC(store_pt) == IL_AADD || ILI_OPC(store_pt) == IL_ASUB) {
607 int acon_ili;
608 acon_ili = ILI_OPND(store_pt, 1);
609 if (ILI_OPC(acon_ili) != IL_ACON) {
610 while (ILI_OPC(acon_ili) == IL_AADD || ILI_OPC(acon_ili) == IL_ASUB) {
611 acon_ili = ILI_OPND(acon_ili, 1);
612 }
613 /* If the base is not a constant (perhaps it is a compiler temp)
614 ** then don't try and mark.
615 **/
616 if (ILI_OPC(acon_ili) == IL_ACON) {
617 store_symbol = ILI_OPND(acon_ili, 1);
618 store_symbol = CONVAL1G(store_symbol);
619 ADDRTKNP(store_symbol, 1);
620 }
621 } else {
622 store_symbol = ILI_OPND(acon_ili, 1);
623 store_symbol = CONVAL1G(store_symbol);
624 ADDRTKNP(store_symbol, 1);
625 }
626 }
627 }
628
629 int
create_atomic_capture_seq(int update_ili,int read_ili,int capture_first)630 create_atomic_capture_seq(int update_ili, int read_ili, int capture_first)
631 {
632 int function;
633 ILI_OP intarg_opcode, floatarg_opcode, doublearg_opcode, longarg_opcode;
634 int ld_opcode;
635 ILI_OP st_opcode, arg_opcode;
636 int store_pt, store_nme, arg, garg;
637 int store_symbol;
638 int argreg = 0;
639 int update_operand;
640 int load_pt1, load_pt2;
641 int op1, op2, opc;
642 int update_op;
643 ILI_OP return_op;
644 int result;
645 int result_arg;
646 int msize;
647 int allow_capture_last = 1;
648 int arg_dt = 0;
649
650 #if defined(TARGET_X8664)
651 intarg_opcode = IL_DAIR;
652 floatarg_opcode = IL_DASP;
653 doublearg_opcode = IL_DADP;
654 longarg_opcode = IL_DAKR;
655 #else
656 intarg_opcode = IL_ARGIR;
657 floatarg_opcode = IL_ARGSP;
658 doublearg_opcode = IL_ARGDP;
659 longarg_opcode = IL_ARGKR;
660 #endif
661
662 st_opcode = ILI_OPC(read_ili);
663 if (st_opcode != ILI_OPC(update_ili)) {
664 /* This is not a legal atomic capture--data type mismatch */
665 interr("Atomic Capture: Mismatched storage operations.", 0, ERR_Severe);
666 }
667
668 switch (st_opcode) {
669 case IL_ST:
670 arg_opcode = intarg_opcode;
671 ld_opcode = IL_LD;
672 return_op = IL_DFRIR;
673 result_arg = IR_RETVAL;
674 arg_dt = DT_INT;
675 #if defined(TARGET_X8664)
676 argreg = ARG_IR(1);
677 #else
678 #endif
679 break;
680 case IL_STDP:
681 arg_opcode = doublearg_opcode;
682 ld_opcode = IL_LDDP;
683 arg_dt = DT_DBLE;
684 #if defined(TARGET_X8664)
685 argreg = ARG_XR(0);
686 return_op = IL_DFRDP;
687 result_arg = FR_RETVAL;
688 #else
689 return_op = IL_DFRDP;
690 result_arg = FR_RETVAL;
691 #endif
692 break;
693 case IL_STSP:
694 arg_opcode = floatarg_opcode;
695 ld_opcode = IL_LDSP;
696 arg_dt = DT_FLOAT;
697 #if defined(TARGET_X8664)
698 argreg = ARG_XR(0);
699 return_op = IL_DFRSP;
700 result_arg = FR_RETVAL;
701 #else
702 return_op = IL_DFRSP;
703 result_arg = FR_RETVAL;
704 #endif
705 break;
706 case IL_STKR:
707 arg_opcode = longarg_opcode;
708 ld_opcode = IL_LDKR;
709 arg_dt = DT_INT8;
710 #if defined(TARGET_X8664)
711 return_op = IL_DFRKR;
712 result_arg = KR_RETVAL;
713 argreg = ARG_IR(1);
714 #else
715 return_op = IL_DFRKR;
716 result_arg = KR_RETVAL;
717 #endif
718 break;
719 default:
720 interr("Create: Unexpected atomic store opcode", st_opcode, ERR_Severe);
721 break;
722 }
723
724 op1 = ILI_OPND(update_ili, 1);
725 op2 = ILI_OPND(update_ili, 2);
726 store_nme = ILI_OPND(update_ili, 3);
727
728 update_op = op1;
729 function = get_atomic_function(ILI_OPC(op1));
730
731 load_pt1 = load_pt2 = -1;
732
733 if (ILI_OPC(ILI_OPND(update_op, 1)) == IL_CSEIR) {
734 /* Look through the CSEIR to the "real" load.
735 * If the read_ili is done off of a CSE, make sure
736 * it is the same.
737 */
738 if (ILI_OPC(ILI_OPND(read_ili, 1)) == IL_CSEIR) {
739 if (ILI_OPND(read_ili, 1) != ILI_OPND(update_op, 1)) {
740 interr("Mismatched CSE (1).\n", 0, ERR_unused);
741 } else {
742 allow_capture_last = 0;
743 }
744 }
745 load_pt1 = ILI_OPND(ILI_OPND(update_op, 1), 1);
746 } else if (ILI_OPC(ILI_OPND(update_op, 1)) == ld_opcode) {
747 load_pt1 = ILI_OPND(update_op, 1);
748 }
749
750 if (ILI_OPC(ILI_OPND(update_op, 2)) == IL_CSEIR) {
751 /* Look through the CSEIR to the "real" load.
752 * If the read_ili is done off of a CSE, make sure
753 * it is the same.
754 */
755 if (ILI_OPC(ILI_OPND(read_ili, 1)) == IL_CSEIR) {
756 if (ILI_OPND(read_ili, 1) != ILI_OPND(update_op, 2)) {
757 interr("Mismatched CSE (2).\n", 0, ERR_unused);
758 } else {
759 allow_capture_last = 0;
760 }
761 }
762 load_pt2 = ILI_OPND(ILI_OPND(update_op, 2), 1);
763 } else if (ILI_OPC(ILI_OPND(update_op, 2)) == ld_opcode) {
764 load_pt2 = ILI_OPND(update_op, 2);
765 }
766
767 if (load_pt1 == -1 && load_pt2 == -1) {
768 interr("Can't find matching load operation in atomic capture.", 0, ERR_Severe);
769 return 0;
770 }
771
772 store_pt = op2;
773 if (ILI_OPC(store_pt) == IL_CSEAR) {
774 store_pt = ILI_OPND(store_pt, 1);
775 }
776 set_store_pt_addrtkn_flg(store_pt);
777
778 /* Determine which operand from update_op comes from the load,
779 * and which operand comes from the "updating" part.
780 */
781 if (ILI_OPND(load_pt1, 1) == store_pt) {
782 /* The first argument for the update_op was the load/store point.
783 * Use the second as the update_operand.
784 */
785 update_operand = ILI_OPND(update_op, 2);
786 } else if (ILI_OPND(load_pt2, 1) == store_pt) {
787 /* The second argument for the update_op was the load/store point.
788 * Use the first as the update_operand.
789 */
790 update_operand = ILI_OPND(update_op, 1);
791 } else {
792 interr("Can't find load operation in atomic capture.", 0, ERR_Severe);
793 return 0;
794 }
795
796 arg = ad1ili(IL_NULL, 0);
797 #if defined(TARGET_X8664)
798 arg = ad3ili(arg_opcode, update_operand, argreg, arg);
799 arg = ad3ili(IL_DAAR, store_pt, ARG_IR(0), arg);
800 #else
801 arg = ad3ili(arg_opcode, update_operand, arg, arg);
802 arg = ad3ili(IL_ARGAR, store_pt, arg, 0);
803 #endif
804 garg = ad1ili(IL_NULL, 0);
805 garg = ad4ili(IL_GARG, update_operand, garg, arg_dt, 0);
806 garg = ad4ili(IL_GARG, store_pt, garg, DT_CPTR, store_nme);
807 arg = ad2ili(IL_JSR, function, arg);
808 garg = ad3ili(IL_GJSR, function, garg, 0);
809 ILI_ALT(arg) = garg;
810 arg = ad2ili(return_op, arg, result_arg);
811 if (!capture_first && allow_capture_last) {
812 arg = ad2ili(ILI_OPC(op1), arg, update_operand);
813 }
814 /* Replicate the store for the original read_ili, except the
815 * value that is being stored.
816 */
817 result = ad4ili(st_opcode, arg, ILI_OPND(read_ili, 2), ILI_OPND(read_ili, 3),
818 ILI_OPND(read_ili, 4));
819 return result;
820 }
821
822 int
create_atomic_write_seq(int store_ili)823 create_atomic_write_seq(int store_ili)
824 {
825 int arg, garg;
826 int function;
827 int store_pt, store_nme;
828 ILI_OP arg_opcode;
829 ILI_OP intarg_opcode, floatarg_opcode, doublearg_opcode, longarg_opcode;
830 int argreg;
831 int arg_dt = 0;
832
833 #if defined(TARGET_X8664)
834 intarg_opcode = IL_DAIR;
835 floatarg_opcode = IL_DASP;
836 doublearg_opcode = IL_DADP;
837 longarg_opcode = IL_DAKR;
838 #else
839 intarg_opcode = IL_ARGIR;
840 floatarg_opcode = IL_ARGSP;
841 doublearg_opcode = IL_ARGDP;
842 longarg_opcode = IL_ARGKR;
843 #endif
844
845 switch (ILI_OPC(store_ili)) {
846 case IL_ST:
847 arg_dt = DT_INT;
848 arg_opcode = intarg_opcode;
849 #if defined(TARGET_X8664)
850 argreg = ARG_IR(1);
851 #endif
852 break;
853 case IL_STDP:
854 arg_dt = DT_DBLE;
855 arg_opcode = doublearg_opcode;
856 #if defined(TARGET_X8664)
857 argreg = ARG_XR(0);
858 #endif
859 break;
860 case IL_STSP:
861 arg_dt = DT_FLOAT;
862 arg_opcode = floatarg_opcode;
863 #if defined(TARGET_X8664)
864 argreg = ARG_XR(0);
865 #endif
866 break;
867 case IL_STKR:
868 arg_dt = DT_INT8;
869 arg_opcode = longarg_opcode;
870 #if defined(TARGET_X8664)
871 argreg = ARG_IR(1);
872 #endif
873 break;
874 default:
875 interr("Create: Unexpected atomic store opcode", ILI_OPC(store_ili), ERR_Severe);
876 break;
877 }
878
879 /* Create a call to:
880 * atomicexch*(store_pt, load_val)
881 * which stores val atomically into store_pt.
882 */
883
884 store_pt = ILI_OPND(store_ili, 2);
885 store_nme = ILI_OPND(store_ili, 3);
886 set_store_pt_addrtkn_flg(store_pt);
887 function = get_atomic_function(ILI_OPC(store_ili));
888 arg = ad1ili(IL_NULL, 0);
889 garg = ad1ili(IL_NULL, 0);
890 #if defined(TARGET_X8664)
891 arg = ad3ili(arg_opcode, AtomicOp.ili_operand, argreg, arg);
892 arg = ad3ili(IL_DAAR, store_pt, ARG_IR(0), arg);
893 #else
894 arg = ad3ili(arg_opcode, AtomicOp.ili_operand, arg, arg);
895 arg = ad3ili(IL_ARGAR, store_pt, arg, 0);
896 #endif
897 garg = ad4ili(IL_GARG, AtomicOp.ili_operand, garg, arg_dt, 0);
898 garg = ad4ili(IL_GARG, store_pt, garg, DT_CPTR, store_nme);
899 arg = ad2ili(IL_JSR, function, arg);
900 garg = ad3ili(IL_GJSR, function, garg, 0);
901 ILI_ALT(arg) = garg;
902 return arg;
903 }
904
905 int
get_atomic_read_opcode(int current_ili)906 get_atomic_read_opcode(int current_ili)
907 {
908 int ili = current_ili;
909 int load_opcode, store_opcode;
910 int ld_op;
911 int load_pt, load_nme, store_pt;
912
913 store_opcode = ILI_OPC(ili);
914 ld_op = ILI_OPND(ili, 1);
915 store_pt = ILI_OPND(ili, 2);
916
917 load_opcode = ILI_OPC(ld_op);
918
919 if (load_opcode == IL_CSEIR) {
920 /* Look through the CSEIR opcode */
921 ld_op = ILI_OPND(ILI_OPND(ili, 1), 1);
922 load_opcode = ILI_OPC(ld_op);
923 }
924
925 if (load_opcode != IL_LD && load_opcode != IL_LDDP &&
926 load_opcode != IL_LDSP && load_opcode != IL_LDKR) {
927 /* Rely on the caller to issue an error if necessary */
928 return 0;
929 }
930
931 load_pt = ILI_OPND(ld_op, 1);
932 load_nme = ILI_OPND(ld_op, 2);
933 AtomicOp.atomic_operand = ld_op;
934 AtomicOp.ldst_point = load_pt;
935 AtomicOp.ldst_nme = load_nme;
936 AtomicOp.ili_operand = 0;
937
938 return store_opcode;
939 }
940
941 int
create_atomic_read_seq(int store_ili)942 create_atomic_read_seq(int store_ili)
943 {
944 int arg, garg;
945 int function;
946 int store_pt, store_nme;
947
948 /* Create a call to:
949 * atomicload*(store_pt, load_pt)
950 * which loads (atomically from load_pt), and stores (non-atomically)
951 * into store_pt.
952 */
953 store_pt = ILI_OPND(store_ili, 2);
954 store_nme = ILI_OPND(store_ili, 3);
955 set_store_pt_addrtkn_flg(store_pt);
956 function = get_atomic_function(ILI_OPC(AtomicOp.atomic_operand));
957 arg = ad1ili(IL_NULL, 0);
958 #if defined(TARGET_X8664)
959 arg = ad3ili(IL_DAAR, AtomicOp.ldst_point, ARG_IR(1), arg);
960 arg = ad3ili(IL_DAAR, store_pt, ARG_IR(0), arg);
961 #else
962 arg = ad3ili(IL_ARGAR, AtomicOp.ldst_point, arg, 0);
963 arg = ad3ili(IL_ARGAR, store_pt, arg, 0);
964 #endif
965 garg = ad1ili(IL_NULL, 0);
966 garg = ad4ili(IL_GARG, AtomicOp.ldst_point, garg, DT_CPTR, AtomicOp.ldst_nme);
967 garg = ad4ili(IL_GARG, store_pt, garg, DT_CPTR, store_nme);
968 arg = ad2ili(IL_JSR, function, arg);
969 garg = ad3ili(IL_GJSR, function, garg, 0);
970 ILI_ALT(arg) = garg;
971 return arg;
972 }
973
974 /*setup the atomic operands and opcode
975 when high-precision to low-precision conversion happens
976 */
977 static void
set_atomic_typecast_h2l(int tcast_ili)978 set_atomic_typecast_h2l(int tcast_ili)
979 {
980 atomic_typecast_operand = tcast_ili;
981 }
982
983 static void
reset_atomic_typecast_h2l()984 reset_atomic_typecast_h2l()
985 {
986 atomic_typecast_operand = 0;
987 is_atomic_operand1 = 0;
988 }
989
990 static int
is_atomic_typcast_h2l()991 is_atomic_typcast_h2l()
992 {
993 return atomic_typecast_operand != 0;
994 }
995
996 ILI_OP
get_atomic_update_opcode(int current_ili)997 get_atomic_update_opcode(int current_ili)
998 {
999 int ili = current_ili;
1000 int bin_op, op1, op2, store_pt, store_nme, load_pt1, load_pt2;
1001 int opc;
1002 ILI_OP store_opcode, load_opcode;
1003
1004 load_pt1 = 0;
1005 load_pt2 = 0;
1006
1007 store_opcode = ILI_OPC(ili);
1008
1009 if (store_opcode == IL_FREEIR) {
1010 AtomicOp.atomic_operand = ili;
1011 AtomicOp.ldst_point = 0;
1012 AtomicOp.ldst_nme = 0;
1013 AtomicOp.ili_operand = 0;
1014 return IL_FREEIR;
1015 }
1016
1017 if (store_opcode != IL_ST && store_opcode != IL_STDP &&
1018 store_opcode != IL_STSP && store_opcode != IL_STKR &&
1019 store_opcode != IL_STSCMPLX) {
1020 if(store_opcode == IL_STDCMPLX)
1021 error(S_0155_OP1_OP2, ERR_Severe, gbl.lineno, "Double precision complex data type are not supported in atomic region within accelerator region.", CNULL);
1022 else
1023 interr("Error: Detected unexpected atomic store opcode.", store_opcode, ERR_Severe);
1024 return IL_NONE;
1025 }
1026
1027 bin_op = ILI_OPND(ili, 1);
1028 store_pt = ILI_OPND(ili, 2);
1029 store_nme = ILI_OPND(ili, 3);
1030
1031 if (ILI_OPC(store_pt) == IL_CSEAR) {
1032 store_pt = ILI_OPND(store_pt, 1);
1033 }
1034 // check the high precision to low precision type cast first
1035 // a demo example of double to unsigned long long translation
1036 // 106 LDKR 48^ 12~ <array[0]> i8
1037 // 107 DAKR 106^ kr( 5) 1^
1038 // 108 QJSR 212~<__mth_i_dfloatuk> 107^
1039 // 109 DFRDP 108^ dp( 1)
1040 // 110 DFLOATUK 106^ 109^-alt
1041 // 111 DADD 105^ 110^
1042 // 112 DADP 111^ dp( 1) 1^
1043 // 113 QJSR 213~<__mth_i_dfixuk> 112^
1044 // 114 DFRKR 113^ kr( 1)
1045 // 115 STKR 114^ 48^ 12~ <array[0]> i8
1046 // The following if statement takes care of the ili 115 ~ ili 111
1047 // There is another if statement (in this function) takes care of
1048 // ili 110 ~ ili 106
1049 if (store_opcode == IL_STKR && ILI_OPC(bin_op) == IL_DFRKR &&
1050 ILI_OPC(ILI_OPND(bin_op, 1)) == IL_QJSR &&
1051 (strcmp(SYMNAME(ILI_OPND(ILI_OPND(bin_op, 1), 1)), "__mth_i_fixuk") ==
1052 0 ||
1053 strcmp(SYMNAME(ILI_OPND(ILI_OPND(bin_op, 1), 1)), "__mth_i_fixukx") ==
1054 0)) {
1055 set_atomic_typecast_h2l(IL_FIXUK);
1056 bin_op = ILI_OPND(ILI_OPND(ILI_OPND(bin_op, 1), 2), 1);
1057 store_opcode = IL_STSP;
1058 } else if (store_opcode == IL_STKR && ILI_OPC(bin_op) == IL_DFRKR &&
1059 ILI_OPC(ILI_OPND(bin_op, 1)) == IL_QJSR &&
1060 (strcmp(SYMNAME(ILI_OPND(ILI_OPND(bin_op, 1), 1)),
1061 "__mth_i_dfixuk") == 0 ||
1062 strcmp(SYMNAME(ILI_OPND(ILI_OPND(bin_op, 1), 1)),
1063 "__mth_i_dfixukx") == 0)) {
1064 set_atomic_typecast_h2l(IL_DFIXUK);
1065 bin_op = ILI_OPND(ILI_OPND(ILI_OPND(bin_op, 1), 2), 1);
1066 store_opcode = IL_STDP;
1067 } else if (store_opcode == IL_STKR && ILI_OPC(bin_op) == IL_DFRKR &&
1068 ILI_OPC(ILI_OPND(bin_op, 1)) == IL_QJSR &&
1069 strcmp(SYMNAME(ILI_OPND(ILI_OPND(bin_op, 1), 1)),
1070 "__mth_i_kfixx") == 0) {
1071 set_atomic_typecast_h2l(IL_FIXK);
1072 bin_op = ILI_OPND(ILI_OPND(ILI_OPND(bin_op, 1), 2), 1);
1073 store_opcode = IL_STSP;
1074 } else if (store_opcode == IL_STKR && ILI_OPC(bin_op) == IL_DFRKR &&
1075 ILI_OPC(ILI_OPND(bin_op, 1)) == IL_QJSR &&
1076 strcmp(SYMNAME(ILI_OPND(ILI_OPND(bin_op, 1), 1)),
1077 "__mth_i_dkfixx") == 0) {
1078 set_atomic_typecast_h2l(IL_DFIXK);
1079 bin_op = ILI_OPND(ILI_OPND(ILI_OPND(bin_op, 1), 2), 1);
1080 store_opcode = IL_STDP;
1081 } else if (ILI_OPC(bin_op) == IL_FIX || ILI_OPC(bin_op) == IL_UFIX ||
1082 ILI_OPC(bin_op) == IL_FIXK || ILI_OPC(bin_op) == IL_FIXUK) {
1083 set_atomic_typecast_h2l(ILI_OPC(bin_op));
1084 bin_op = ILI_OPND(bin_op, 1);
1085 store_opcode = IL_STSP;
1086 } else if (ILI_OPC(bin_op) == IL_DFIX || ILI_OPC(bin_op) == IL_DFIXU ||
1087 ILI_OPC(bin_op) == IL_DFIXK || ILI_OPC(bin_op) == IL_DFIXUK ||
1088 ILI_OPC(bin_op) == IL_SNGL) {
1089 set_atomic_typecast_h2l(ILI_OPC(bin_op));
1090 bin_op = ILI_OPND(bin_op, 1);
1091 store_opcode = IL_STDP;
1092 } else if (ILI_OPC(bin_op) == IL_DFRIR &&
1093 strcmp(SYMNAME(ILI_OPND(ILI_OPND(bin_op, 1), 1)),
1094 "__mth_i_fixux") == 0) {
1095 // float 32bit to unsigned 32bit on 32bit machine x86 machine
1096 set_atomic_typecast_h2l(IL_UFIX);
1097 bin_op = ILI_OPND(ILI_OPND(ILI_OPND(bin_op, 1), 2), 1);
1098 store_opcode = IL_STSP;
1099 } else if (ILI_OPC(bin_op) == IL_DFRIR &&
1100 strcmp(SYMNAME(ILI_OPND(ILI_OPND(bin_op, 1), 1)),
1101 "__mth_i_dfixux") == 0) {
1102 // float 32bit to unsigned 32bit on 32bit machine x86 machine
1103 set_atomic_typecast_h2l(IL_DFIXU);
1104 bin_op = ILI_OPND(ILI_OPND(ILI_OPND(bin_op, 1), 2), 1);
1105 store_opcode = IL_STDP;
1106 }
1107 // check if it is translated from float/double to unsigned
1108 else if (ILI_OPC(bin_op) == IL_KIMV) {
1109 bin_op = ILI_OPND(bin_op, 1);
1110 if (ILI_OPC(bin_op) == IL_FIXK) {
1111 set_atomic_typecast_h2l(IL_UFIX);
1112 store_opcode = IL_STSP;
1113 } else if (ILI_OPC(bin_op) == IL_DFIXK) {
1114 set_atomic_typecast_h2l(IL_DFIXU);
1115 store_opcode = IL_STDP;
1116 }
1117 bin_op = ILI_OPND(bin_op, 1);
1118 }
1119
1120 if (store_opcode == IL_ST) {
1121 /* Look through the int opcodes */
1122 for (opc = 0; opc < num_int_opcodes; opc++) {
1123 if (int_atomic_opcodes[opc] == ILI_OPC(bin_op))
1124 break;
1125 }
1126 if (opc == num_int_opcodes) {
1127 return IL_NONE;
1128 }
1129 AtomicOp.atomic_operand = bin_op;
1130 AtomicOp.ldst_point = store_pt;
1131 AtomicOp.ldst_nme = store_nme;
1132 load_opcode = IL_LD;
1133 } else if (store_opcode == IL_STSP) {
1134 /* Look through the float opcodes */
1135 for (opc = 0; opc < num_float_opcodes; opc++) {
1136 if (float_atomic_opcodes[opc] == ILI_OPC(bin_op))
1137 break;
1138 }
1139 if (opc == num_float_opcodes) {
1140 return IL_NONE;
1141 }
1142 AtomicOp.atomic_operand = bin_op;
1143 AtomicOp.ldst_point = store_pt;
1144 AtomicOp.ldst_nme = store_nme;
1145 if (atomic_typecast_operand == IL_FIX || atomic_typecast_operand == IL_UFIX)
1146 load_opcode = IL_LD;
1147 else if (atomic_typecast_operand == IL_FIXK ||
1148 atomic_typecast_operand == IL_FIXUK)
1149 load_opcode = IL_LDKR;
1150 else
1151 load_opcode = IL_LDSP;
1152 } else if (store_opcode == IL_STDP) {
1153 /* Look through the double opcodes */
1154 for (opc = 0; opc < num_double_opcodes; opc++) {
1155 if (double_atomic_opcodes[opc] == ILI_OPC(bin_op))
1156 break;
1157 }
1158 if (opc == num_double_opcodes) {
1159 return IL_NONE;
1160 }
1161 AtomicOp.atomic_operand = bin_op;
1162 AtomicOp.ldst_point = store_pt;
1163 AtomicOp.ldst_nme = store_nme;
1164 if (atomic_typecast_operand == IL_DFIX ||
1165 atomic_typecast_operand == IL_DFIXU)
1166 load_opcode = IL_LD;
1167 else if (atomic_typecast_operand == IL_DFIXK ||
1168 atomic_typecast_operand == IL_DFIXUK)
1169 load_opcode = IL_LDKR;
1170 else if (atomic_typecast_operand == IL_SNGL)
1171 load_opcode = IL_LDSP;
1172 else
1173 load_opcode = IL_LDDP;
1174 } else if (store_opcode == IL_STKR) {
1175 for (opc = 0; opc < num_long_opcodes; opc++) {
1176 if (long_atomic_opcodes[opc] == ILI_OPC(bin_op))
1177 break;
1178 }
1179 if (opc == num_long_opcodes) {
1180 return IL_NONE;
1181 }
1182 AtomicOp.atomic_operand = bin_op;
1183 AtomicOp.ldst_point = store_pt;
1184 AtomicOp.ldst_nme = store_nme;
1185 load_opcode = IL_LDKR;
1186 } else if(store_opcode == IL_STSCMPLX) {
1187 for (opc = 0; opc < num_cmplx_opcodes; opc++) {
1188 if (cmplx_atomic_opcodes[opc] == ILI_OPC(bin_op))
1189 break;
1190 }
1191 if (opc == num_cmplx_opcodes) {
1192 return IL_NONE;
1193 }
1194 AtomicOp.atomic_operand = bin_op;
1195 AtomicOp.ldst_point = store_pt;
1196 AtomicOp.ldst_nme = store_nme;
1197 load_opcode = IL_LDSCMPLX;
1198 }
1199
1200 op1 = ILI_OPND(bin_op, 1);
1201 if (ILI_OPC(op1) == IL_FLOAT || ILI_OPC(op1) == IL_DFLOAT ||
1202 ILI_OPC(op1) == IL_FLOATU || ILI_OPC(op1) == IL_DFLOATU ||
1203 ILI_OPC(op1) == IL_FLOATUK || ILI_OPC(op1) == IL_DFLOATUK ||
1204 ILI_OPC(op1) == IL_FLOATK || ILI_OPC(op1) == IL_DFLOATK ||
1205 ILI_OPC(op1) == IL_DBLE)
1206 op1 = ILI_OPND(op1, 1);
1207 // if the conversion is from unsigned integer to float/double
1208 // Locate the real LD
1209 if (ILI_OPC(op1) == IL_UIKMV)
1210 op1 = ILI_OPND(op1, 1);
1211 if ((ILI_OPC(op1) == IL_DFRSP || ILI_OPC(op1) == IL_DFRDP
1212 #ifdef IL_DFRDPX87
1213 || ILI_OPC(op1) == IL_DFRDPX87
1214 #endif
1215 #ifdef IL_DFRSPX87
1216 || ILI_OPC(op1) == IL_DFRSPX87
1217 #endif
1218 ) &&
1219 ILI_OPC(ILI_OPND(op1, 1)) == IL_QJSR &&
1220 (strcmp(SYMNAME(ILI_OPND(ILI_OPND(op1, 1), 1)), "__mth_i_dfloatuk") ==
1221 0 ||
1222 strcmp(SYMNAME(ILI_OPND(ILI_OPND(op1, 1), 1)), "__mth_i_floatuk") == 0 ||
1223 strcmp(SYMNAME(ILI_OPND(ILI_OPND(op1, 1), 1)), "__mth_i_floatux") == 0 ||
1224 strcmp(SYMNAME(ILI_OPND(ILI_OPND(op1, 1), 1)), "__mth_i_dfloatux") ==
1225 0 ||
1226 strcmp(SYMNAME(ILI_OPND(ILI_OPND(op1, 1), 1)), "__mth_i_dfloatk") == 0 ||
1227 strcmp(SYMNAME(ILI_OPND(ILI_OPND(op1, 1), 1)), "__mth_i_floatk") == 0)) {
1228 op1 = ILI_OPND(ILI_OPND(ILI_OPND(op1, 1), 2), 1);
1229 }
1230
1231 op2 = ILI_OPND(bin_op, 2);
1232 if (ILI_OPC(op2) == IL_FLOAT || ILI_OPC(op2) == IL_DFLOAT ||
1233 ILI_OPC(op2) == IL_FLOATU || ILI_OPC(op2) == IL_DFLOATU ||
1234 ILI_OPC(op2) == IL_FLOATUK || ILI_OPC(op2) == IL_DFLOATUK ||
1235 ILI_OPC(op2) == IL_FLOATK || ILI_OPC(op2) == IL_DFLOATK ||
1236 ILI_OPC(op2) == IL_DBLE)
1237 op2 = ILI_OPND(op2, 1);
1238 // if the conversion is from unsigned integer to float/double
1239 // Locate the real LD
1240 if (ILI_OPC(op2) == IL_UIKMV)
1241 op2 = ILI_OPND(op2, 1);
1242 if ((ILI_OPC(op2) == IL_DFRSP || ILI_OPC(op2) == IL_DFRDP
1243 #ifdef IL_DFRDPX87
1244 || ILI_OPC(op2) == IL_DFRDPX87
1245 #endif
1246 #ifdef IL_DFRSPX87
1247 || ILI_OPC(op2) == IL_DFRSPX87
1248 #endif
1249 ) &&
1250 ILI_OPC(ILI_OPND(op2, 1)) == IL_QJSR &&
1251 (strcmp(SYMNAME(ILI_OPND(ILI_OPND(op2, 1), 1)), "__mth_i_dfloatuk") ==
1252 0 ||
1253 strcmp(SYMNAME(ILI_OPND(ILI_OPND(op2, 1), 1)), "__mth_i_floatuk") == 0 ||
1254 strcmp(SYMNAME(ILI_OPND(ILI_OPND(op2, 1), 1)), "__mth_i_floatux") == 0 ||
1255 strcmp(SYMNAME(ILI_OPND(ILI_OPND(op2, 1), 1)), "__mth_i_dfloatux") ==
1256 0 ||
1257 strcmp(SYMNAME(ILI_OPND(ILI_OPND(op2, 1), 1)), "__mth_i_dfloatk") == 0 ||
1258 strcmp(SYMNAME(ILI_OPND(ILI_OPND(op2, 1), 1)), "__mth_i_floatk") == 0)) {
1259 op2 = ILI_OPND(ILI_OPND(ILI_OPND(op2, 1), 2), 1);
1260 }
1261
1262 if (ILI_OPC(op1) == IL_CSEIR) {
1263 /* Look through the CSEIR to the "real" load */
1264 op1 = ILI_OPND(op1, 1);
1265 }
1266
1267 if (ILI_OPC(op1) == load_opcode) {
1268 load_pt1 = ILI_OPND(op1, 1);
1269 }
1270
1271 if (ILI_OPC(op2) == load_opcode) {
1272 load_pt2 = ILI_OPND(op2, 1);
1273 }
1274
1275 if (load_pt1 == 0 && load_pt2 == 0) {
1276 /* This is an error */
1277 return IL_NONE;
1278 }
1279
1280 /*
1281 * If the operation operand 1 is the same as the store point, then we need the
1282 * second operand for the modifier for the atomic operation.
1283 */
1284 if (load_pt1 == store_pt) {
1285 AtomicOp.ili_operand = op2;
1286 return ILI_OPC(bin_op);
1287 }
1288
1289 /*
1290 * If the operation operand 2 is the same as the store point, then we need the
1291 * first operand for the modifier for the atomic operation.
1292 */
1293 if (load_pt2 == store_pt) {
1294 AtomicOp.ili_operand = op1;
1295 is_atomic_operand1 = 1;
1296 return ILI_OPC(bin_op);
1297 }
1298
1299 /* This is also an error */
1300 AtomicOp.atomic_operand = 0;
1301 return IL_NONE;
1302 }
1303
1304 int
create_atomic_seq(int store_ili)1305 create_atomic_seq(int store_ili)
1306 {
1307 int ili = store_ili;
1308 int arg, garg;
1309 int function;
1310 int store_symbol;
1311 int atomic_mod, op2, const_val, load_op, store_op, store_pt, load_pt,
1312 store_nme;
1313 int realilix, imagilix;
1314 ILI_OP intarg_opcode, floatarg_opcode, doublearg_opcode, longarg_opcode, scmplx_opcode;
1315 ILI_OP arg_opcode;
1316 int is_add;
1317 int arg_dt = 0;
1318
1319 #if defined(TARGET_X8664)
1320 intarg_opcode = IL_DAIR;
1321 floatarg_opcode = IL_DASP;
1322 doublearg_opcode = IL_DADP;
1323 longarg_opcode = IL_DAKR;
1324 scmplx_opcode = IL_DASP;
1325 #else
1326 intarg_opcode = IL_ARGIR;
1327 floatarg_opcode = IL_ARGSP;
1328 doublearg_opcode = IL_ARGDP;
1329 longarg_opcode = IL_ARGKR;
1330 /* As now we only support single precision complex */
1331 scmplx_opcode = IL_ARGSP;
1332 #endif
1333
1334 atomic_mod = AtomicOp.ili_operand;
1335 store_pt = ILI_OPND(ili, 2);
1336 store_nme = ILI_OPND(ili, 3);
1337
1338 if (ILI_OPC(store_pt) == IL_CSEAR) {
1339 store_pt = ILI_OPND(store_pt, 1);
1340 } else if (ILI_OPC(store_pt) == IL_ACON) {
1341 store_symbol = ILI_OPND(store_pt, 1);
1342 store_symbol = CONVAL1G(store_symbol);
1343 ADDRTKNP(store_symbol, 1);
1344 } else if (ILI_OPC(store_pt) == IL_AADD || ILI_OPC(store_pt) == IL_ASUB) {
1345 int acon_ili;
1346 acon_ili = ILI_OPND(store_pt, 1);
1347 if (ILI_OPC(acon_ili) != IL_ACON) {
1348 while (ILI_OPC(acon_ili) == IL_AADD || ILI_OPC(acon_ili) == IL_ASUB) {
1349 acon_ili = ILI_OPND(acon_ili, 1);
1350 }
1351
1352 /* If the base is not a constant (perhaps it is a compiler temp)
1353 * then don't try and mark.
1354 */
1355 if (ILI_OPC(acon_ili) == IL_ACON) {
1356 store_symbol = ILI_OPND(acon_ili, 1);
1357 store_symbol = CONVAL1G(store_symbol);
1358 ADDRTKNP(store_symbol, 1);
1359 }
1360 } else {
1361 store_symbol = ILI_OPND(acon_ili, 1);
1362 store_symbol = CONVAL1G(store_symbol);
1363 ADDRTKNP(store_symbol, 1);
1364 }
1365 }
1366
1367 arg = ad1ili(IL_NULL, 0);
1368 garg = ad1ili(IL_NULL, 0);
1369 if (is_atomic_typcast_h2l())
1370 function = get_atomic_function_ex(ILI_OPC(AtomicOp.atomic_operand));
1371 else
1372 function = get_atomic_function(ILI_OPC(AtomicOp.atomic_operand));
1373
1374 switch (ILI_OPC(store_ili)) {
1375 case IL_ST:
1376 if (atomic_typecast_operand == IL_DFIX ||
1377 atomic_typecast_operand == IL_DFIXU) {
1378 arg_opcode = doublearg_opcode;
1379 arg_dt = DT_DBLE;
1380 }
1381 else if (atomic_typecast_operand == IL_FIX ||
1382 atomic_typecast_operand == IL_UFIX) {
1383 arg_opcode = floatarg_opcode;
1384 arg_dt = DT_FLOAT;
1385 }
1386 else {
1387 arg_opcode = intarg_opcode;
1388 arg_dt = DT_INT;
1389 }
1390 break;
1391 case IL_STDP:
1392 arg_opcode = doublearg_opcode;
1393 arg_dt = DT_DBLE;
1394 break;
1395 case IL_STSP:
1396 if (atomic_typecast_operand == IL_SNGL) {
1397 arg_opcode = doublearg_opcode;
1398 arg_dt = DT_DBLE;
1399 }
1400 else {
1401 arg_opcode = floatarg_opcode;
1402 arg_dt = DT_FLOAT;
1403 }
1404 break;
1405 case IL_STKR:
1406 if (atomic_typecast_operand == IL_DFIXK ||
1407 atomic_typecast_operand == IL_DFIXUK) {
1408 arg_opcode = doublearg_opcode;
1409 arg_dt = DT_DBLE;
1410 }
1411 else if (atomic_typecast_operand == IL_FIXK ||
1412 atomic_typecast_operand == IL_FIXUK) {
1413 arg_opcode = floatarg_opcode;
1414 arg_dt = DT_FLOAT;
1415 }
1416 else {
1417 arg_opcode = longarg_opcode;
1418 arg_dt = DT_INT8;
1419 }
1420 break;
1421 case IL_STSCMPLX:
1422 arg_opcode = scmplx_opcode;
1423 arg_dt = DT_FLOAT;
1424 break;
1425 default:
1426 interr("Create: Unexpected atomic store opcode", ILI_OPC(store_ili), ERR_Severe);
1427 break;
1428 }
1429 #if defined(TARGET_X8664)
1430 if(ILI_OPC(store_ili) == IL_STSCMPLX) {
1431 /* split the real and img parts */
1432 /* real part */
1433 realilix = ad1ili(IL_SCMPLX2REAL, atomic_mod);
1434 /* imag part */
1435 imagilix = ad1ili(IL_SCMPLX2IMAG, atomic_mod);
1436 /* call the ili utlity function to gen param list */
1437 initcallargs(3);
1438 addcallarg(store_pt, 0, DT_CPTR);
1439 addcallarg(realilix, 0, arg_dt);
1440 addcallarg(imagilix, 0, arg_dt);
1441 /* create argument list */
1442 arg = gencallargs();
1443 }
1444 else {
1445 initcallargs(2);
1446 addcallarg(store_pt, 0, DT_CPTR);
1447 addcallarg(atomic_mod, 0, arg_dt);
1448 arg = gencallargs();
1449 }
1450 #else
1451 if(ILI_OPC(store_ili) == IL_STSCMPLX) {
1452 /* split the real and img parts */
1453 /* real part */
1454 realilix = ad1ili(IL_SCMPLX2REAL, atomic_mod);
1455 /* imag part */
1456 imagilix = ad1ili(IL_SCMPLX2IMAG, atomic_mod);
1457 /* create argument list */
1458 arg = ad2ili(arg_opcode, imagilix, arg);
1459 arg = ad2ili(arg_opcode, realilix, arg);
1460 }
1461 else {
1462 arg = ad2ili(arg_opcode, atomic_mod, arg);
1463 }
1464 arg = ad3ili(IL_ARGAR, store_pt, arg, 0);
1465 #endif
1466 arg = ad2ili(IL_JSR, function, arg);
1467 if(ILI_OPC(store_ili) == IL_STSCMPLX) {
1468 /* split the real and img parts */
1469 /* real part */
1470 realilix = ad1ili(IL_SCMPLX2REAL, atomic_mod);
1471 /* imag part */
1472 imagilix = ad1ili(IL_SCMPLX2IMAG, atomic_mod);
1473 /* create argument list */
1474 garg = ad4ili(IL_GARG, imagilix, garg, arg_dt, 0);
1475 garg = ad4ili(IL_GARG, realilix, garg, arg_dt, 0);
1476 }
1477 else {
1478 garg = ad4ili(IL_GARG, atomic_mod, garg, arg_dt, 0);
1479 }
1480 garg = ad4ili(IL_GARG, store_pt, garg, DT_CPTR, store_nme);
1481 garg = ad3ili(IL_GJSR, function, garg, 0);
1482 ILI_ALT(arg) = garg;
1483 return arg;
1484 }
1485
1486 bool
exp_end_atomic(int store,int curilm)1487 exp_end_atomic(int store, int curilm)
1488 {
1489 if (is_in_atomic) {
1490 int atomic_opcode;
1491 atomic_opcode = get_atomic_update_opcode(store);
1492 if (atomic_opcode != 0) {
1493 if (get_atomic_store_created()) {
1494 error(S_0155_OP1_OP2, ERR_Severe, gbl.lineno, "Invalid atomic expression", CNULL);
1495 } else if (atomic_opcode != IL_FREEIR) {
1496 int atomic_seq;
1497 atomic_seq = create_atomic_seq(store);
1498 chk_block(atomic_seq);
1499 ILM_RESULT(curilm) = atomic_seq;
1500 ILM_BLOCK(curilm) = expb.curbih;
1501 set_atomic_store_created(1);
1502 reset_atomic_typecast_h2l();
1503 } else {
1504 /* Is there anything to do with FREEIR */
1505 }
1506 } else {
1507 error(S_0155_OP1_OP2, ERR_Severe, gbl.lineno, "Invalid atomic expression", CNULL);
1508 }
1509 return true;
1510 }
1511 if (is_in_atomic_read) {
1512 int atomic_opcode;
1513 atomic_opcode = get_atomic_read_opcode(store);
1514 if (atomic_opcode != 0) {
1515 if (get_atomic_store_created()) {
1516 error(S_0155_OP1_OP2, ERR_Severe, gbl.lineno, "Invalid atomic read expression", CNULL);
1517 } else if (atomic_opcode != IL_FREEIR) {
1518 int atomic_seq;
1519 atomic_seq = create_atomic_read_seq(store);
1520 chk_block(atomic_seq);
1521 ILM_RESULT(curilm) = atomic_seq;
1522 ILM_BLOCK(curilm) = expb.curbih;
1523 set_atomic_store_created(1);
1524 } else {
1525 error(S_0155_OP1_OP2, ERR_Severe, gbl.lineno, "Invalid atomic read expression", CNULL);
1526 }
1527 } else {
1528 error(S_0155_OP1_OP2, ERR_Severe, gbl.lineno, "Invalid atomic read expression", CNULL);
1529 }
1530 return true;
1531 }
1532 if (is_in_atomic_write) {
1533 int atomic_opcode;
1534 atomic_opcode = get_atomic_write_opcode(store);
1535 if (atomic_opcode != 0) {
1536 if (get_atomic_store_created()) {
1537 error(S_0155_OP1_OP2, ERR_Severe, gbl.lineno, "Invalid atomic write expression", CNULL);
1538 } else if (atomic_opcode != IL_FREEIR) {
1539 int atomic_seq;
1540 atomic_seq = create_atomic_write_seq(store);
1541 chk_block(atomic_seq);
1542 ILM_RESULT(curilm) = atomic_seq;
1543 ILM_BLOCK(curilm) = expb.curbih;
1544 set_atomic_store_created(1);
1545 } else {
1546 error(S_0155_OP1_OP2, ERR_Severe, gbl.lineno, "Invalid atomic write expression", CNULL);
1547 }
1548 } else {
1549 error(S_0155_OP1_OP2, ERR_Severe, gbl.lineno, "Invalid atomic write expression", CNULL);
1550 }
1551 return true;
1552 }
1553 if (is_in_atomic_capture) {
1554 int atomic_opcode;
1555 atomic_opcode = get_atomic_read_opcode(store);
1556 if (atomic_opcode != 0 && atomic_opcode != IL_FREEIR) {
1557 if (capture_read_ili != 0) {
1558 error(S_0155_OP1_OP2, ERR_Severe, gbl.lineno,
1559 "Invalid atomic capture block, multiple reads.", CNULL);
1560 } else {
1561 capture_read_ili = store;
1562 if (capture_update_ili != 0) {
1563 int atomic_seq;
1564 /* We have both parts of the capture, capture (write) is
1565 * not first. */
1566 atomic_seq = create_atomic_capture_seq(capture_update_ili,
1567 capture_read_ili, 0);
1568 chk_block(atomic_seq);
1569 ILM_RESULT(curilm) = atomic_seq;
1570 ILM_BLOCK(curilm) = expb.curbih;
1571 set_atomic_capture_created(1);
1572 }
1573 }
1574 }
1575
1576 atomic_opcode = get_atomic_update_opcode(store);
1577 if (atomic_opcode != 0) {
1578 if (capture_update_ili != 0 && atomic_opcode != IL_FREEIR) {
1579 error(S_0155_OP1_OP2, ERR_Severe, gbl.lineno,
1580 "Invalid atomic capture block, multiple updates.", CNULL);
1581 } else if (atomic_opcode != IL_FREEIR) {
1582 capture_update_ili = store;
1583 if (capture_read_ili != 0) {
1584 /* We have both parts of the capture, capture (write) is
1585 * first. */
1586 int atomic_seq;
1587 atomic_seq = create_atomic_capture_seq(capture_update_ili,
1588 capture_read_ili, 1);
1589 chk_block(atomic_seq);
1590 ILM_RESULT(curilm) = atomic_seq;
1591 ILM_BLOCK(curilm) = expb.curbih;
1592 set_atomic_capture_created(1);
1593 }
1594 } else {
1595 /* Set the result of the FREEIR ILM to the regular
1596 * storage element (basically, a fallthrough).
1597 */
1598 chk_block(store);
1599 ILM_RESULT(curilm) = store;
1600 ILM_BLOCK(curilm) = expb.curbih;
1601 }
1602 }
1603 return true;
1604 }
1605 return false;
1606 }
1607
1608
1609 /* Set a TARGET_*_ATOMICS macro that specifies the intrinics/run-time library to
1610 * target. */
1611 #if defined(TARGET_OSX)
1612 #define TARGET_LLVM_ATOMICS 1
1613 #else
1614 #define TARGET_GNU_ATOMICS 1
1615 #endif
1616
1617 /** Categorization of atomic intrinsics that abstracts out details.
1618 Each class corresponds to a general code-generation schema. */
1619 typedef enum ATOMIC_OP_CATEGORY {
1620 AOC_LOAD,
1621 AOC_STORE,
1622 AOC_EXCHANGE,
1623 AOC_COMPARE_EXCHANGE,
1624 AOC_FETCH_OP,
1625 #if TARGET_GNU_ATOMICS
1626 AOC_OP_FETCH,
1627 AOC_TEST_AND_SET,
1628 AOC_CLEAR,
1629 #endif
1630 AOC_FENCE
1631 } ATOMIC_OP_CATEGORY;
1632
1633 /* Macro for generating case labels for C++11 read-modify-write intrinsics. */
1634 /* clang-format off */
1635 #define EACH_SUBOP(s,t) \
1636 s##_add_##t: \
1637 case s##_sub_##t: \
1638 case s##_and_##t: \
1639 case s##_or_##t: \
1640 case s##_xor_##t
1641 /* clang-format on */
1642
1643 /* Use PD_IS_ATOMIC to detect presence of atomic intrinsics */
1644 #ifdef PD_IS_ATOMIC
1645 /** Return true if pd is an atomic intrinsic with a size operand. */
1646 static bool
atomic_pd_has_size_operand(PD_KIND pd)1647 atomic_pd_has_size_operand(PD_KIND pd)
1648 {
1649 switch (pd) {
1650 #if TARGET_LLVM_ATOMICS
1651 case PD_atomic_load:
1652 case PD_atomic_store:
1653 case PD_atomic_exchange:
1654 case PD_atomic_compare_exchange:
1655 return true;
1656 #endif
1657 default:
1658 return false;
1659 }
1660 }
1661
1662 /** Return MSZ for location atomically operated on by an atomic intrinsic.
1663 Return MSZ_UNDEF if intrinsic does not operate on a location. */
1664 static MSZ
msz_from_atomic_pd(PD_KIND pd)1665 msz_from_atomic_pd(PD_KIND pd)
1666 {
1667 switch (pd) {
1668 default:
1669 assert(0, "msz_from_atomic_pd: pd not atomic or not implemented", pd, ERR_Fatal);
1670
1671 #if TARGET_GNU_ATOMICS
1672 case PD_atomic_load_1:
1673 case PD_atomic_store_1:
1674 case PD_atomic_exchange_1:
1675 case PD_atomic_compare_exchange_1:
1676 case EACH_SUBOP(PD_atomic, fetch_1):
1677 case PD_atomic_test_and_set:
1678 case PD_atomic_clear:
1679 #endif
1680 case EACH_SUBOP(PD_atomic_fetch, 1):
1681 return MSZ_SBYTE;
1682
1683 #if TARGET_GNU_ATOMICS
1684 case PD_atomic_load_2:
1685 case PD_atomic_store_2:
1686 case PD_atomic_exchange_2:
1687 case PD_atomic_compare_exchange_2:
1688 case EACH_SUBOP(PD_atomic, fetch_2):
1689 #endif
1690 case EACH_SUBOP(PD_atomic_fetch, 2):
1691 return MSZ_SHWORD;
1692
1693 #if TARGET_GNU_ATOMICS
1694 case PD_atomic_load_4:
1695 case PD_atomic_store_4:
1696 case PD_atomic_exchange_4:
1697 case PD_atomic_compare_exchange_4:
1698 case EACH_SUBOP(PD_atomic, fetch_4):
1699 #endif
1700 case EACH_SUBOP(PD_atomic_fetch, 4):
1701 return MSZ_SWORD;
1702
1703 #if TARGET_GNU_ATOMICS
1704 case PD_atomic_load_8:
1705 case PD_atomic_store_8:
1706 case PD_atomic_exchange_8:
1707 case PD_atomic_compare_exchange_8:
1708 case EACH_SUBOP(PD_atomic, fetch_8):
1709 #endif
1710 case EACH_SUBOP(PD_atomic_fetch, 8):
1711 return MSZ_SLWORD;
1712
1713 case PD_atomic_thread_fence:
1714 case PD_atomic_signal_fence:
1715 return MSZ_UNDEF;
1716 }
1717 }
1718
1719 #endif
1720
1721 static MSZ
msz_from_atomic_dtype(DTYPE dtype)1722 msz_from_atomic_dtype(DTYPE dtype)
1723 {
1724 switch(zsize_of(dtype)) {
1725 case 1:
1726 return MSZ_BYTE;
1727 case 2:
1728 return MSZ_SHWORD;
1729 case 4:
1730 return MSZ_SWORD;
1731 case 8:
1732 return MSZ_I8;
1733 default:
1734 return MSZ_UNDEF;
1735 }
1736 }
1737
1738 /** ILI operations of a given "link" kind. */
1739 typedef struct OPCODES {
1740 ILI_OP ld, st, atomicld, atomicst, atomicrmw, cmpxchg, cmpxchg_old;
1741 } OPCODES;
1742
1743 /** Get operations suitable for a given MSZ.
1744 The MSZ must correspond to an integer type. */
1745 static const OPCODES *
get_ops(MSZ msz,int is_openmp)1746 get_ops(MSZ msz, int is_openmp)
1747 {
1748 static const OPCODES ir_ops = {IL_LD, IL_ST, IL_ATOMICLDI,
1749 IL_ATOMICSTI, IL_ATOMICRMWI, IL_CMPXCHGI,
1750 IL_CMPXCHG_OLDI};
1751 static const OPCODES kr_ops = {IL_LDKR, IL_STKR, IL_ATOMICLDKR,
1752 IL_ATOMICSTKR, IL_ATOMICRMWKR, IL_CMPXCHGKR,
1753 IL_CMPXCHG_OLDKR};
1754 /* read-modify-write does not allow float/dble types */
1755 static const OPCODES sp_ops = {IL_LD, IL_ST, IL_ATOMICLDSP,
1756 IL_ATOMICSTSP, IL_ATOMICRMWI, IL_CMPXCHGI,
1757 IL_CMPXCHG_OLDI};
1758 static const OPCODES dp_ops = {IL_LDKR, IL_STKR, IL_ATOMICLDDP,
1759 IL_ATOMICSTDP, IL_ATOMICRMWKR, IL_CMPXCHGKR,
1760 IL_CMPXCHG_OLDKR};
1761 static const OPCODES ar_ops = {IL_LDKR, IL_STKR, IL_ATOMICLDA,
1762 IL_ATOMICSTA, IL_ATOMICRMWKR, IL_CMPXCHGKR,
1763 IL_CMPXCHG_OLDKR};
1764 switch (msz) {
1765 case MSZ_F8:
1766 if (is_openmp) {
1767 return &dp_ops;
1768 }
1769 case MSZ_PTR:
1770 if (is_openmp) {
1771 return &ar_ops;
1772 }
1773 case MSZ_SLWORD:
1774 case MSZ_ULWORD:
1775 case MSZ_I8:
1776 return &kr_ops;
1777 case MSZ_F4:
1778 if (is_openmp)
1779 return &sp_ops;
1780 default:
1781 return &ir_ops;
1782 }
1783 }
1784
1785 #if TARGET_LLVM_ATOMICS
1786 /** Given a size operand, return corresponding MSZ if operand is a constant.
1787 Otherwise return MSZ_UNDEF. */
1788 static MSZ
msz_from_size_argument(int ilix)1789 msz_from_size_argument(int ilix)
1790 {
1791 INT value;
1792
1793 /* See if ilix represents a small constant. */
1794 switch (ILI_OPC(ilix)) {
1795 case IL_KCON:
1796 /* Punt if any high-order bits are set. */
1797 value = CONVAL1G(ILI_OPND(ilix, 1));
1798 if (value != 0)
1799 return MSZ_UNDEF;
1800 /* drop through to read of low-order bits. */
1801 case IL_ICON:
1802 value = CONVAL2G(ILI_OPND(ilix, 1));
1803 break;
1804 default:;
1805 return MSZ_UNDEF;
1806 }
1807
1808 /* Return MSZ corresponding to the constant. */
1809 switch (value) {
1810 case 1:
1811 return MSZ_UBYTE;
1812 case 2:
1813 return MSZ_UHWORD;
1814 case 4:
1815 return MSZ_UWORD;
1816 case 8:
1817 return MSZ_ULWORD;
1818 default:;
1819 return MSZ_UNDEF;
1820 }
1821 }
1822
1823 /** \brief Remove "weak" parameter from atomic_compare_exchange.
1824
1825 Called when we cannot map the atomic_compare_exchange onto an ILI operation.
1826 Though present in the atomic_compare_exchange used in the OSX <atomic>
1827 header, the parameter is not present in LLVM's run-time library. */
1828 static void
remove_weak_parameter(ILM * ilmp)1829 remove_weak_parameter(ILM *ilmp)
1830 {
1831 DEBUG_ASSERT(ILM_OPC(ilmp) == IM_FAPPLY, "FAPPLY expected");
1832 /* Number of parmeters changes from 7 to 6. */
1833 DEBUG_ASSERT(ILM_OPND(ilmp, 1) == 7, "wrong number of parameters?");
1834 ILM_OPND(ilmp, 1) = 6;
1835 int callee_index = 3;
1836 /* Remove 5th parameter. */
1837 ILM_OPND(ilmp, callee_index + 5) = ILM_OPND(ilmp, callee_index + 6);
1838 ILM_OPND(ilmp, callee_index + 6) = ILM_OPND(ilmp, callee_index + 7);
1839 ILM_OPND(ilmp, callee_index + 7) = IM_NOP;
1840 }
1841 #endif
1842
1843 #ifdef PD_IS_ATOMIC
1844 /** Given a PD_KIND, get its category. */
1845 static ATOMIC_OP_CATEGORY
atomic_op_category_from_pd(PD_KIND pd)1846 atomic_op_category_from_pd(PD_KIND pd)
1847 {
1848 switch (pd) {
1849 default:
1850 interr("atomic_op_category_from_pd: pd not atomic or not implemented", pd,
1851 4);
1852
1853 /* load */
1854 #if TARGET_GNU_ATOMICS
1855 case PD_atomic_load_1:
1856 case PD_atomic_load_2:
1857 case PD_atomic_load_4:
1858 case PD_atomic_load_8:
1859 #endif
1860 #if TARGET_LLVM_ATOMICS
1861 case PD_atomic_load:
1862 #endif
1863 return AOC_LOAD;
1864
1865 /* store */
1866 #if TARGET_GNU_ATOMICS
1867 case PD_atomic_store_1:
1868 case PD_atomic_store_2:
1869 case PD_atomic_store_4:
1870 case PD_atomic_store_8:
1871 #endif
1872 #if TARGET_LLVM_ATOMICS
1873 case PD_atomic_store:
1874 #endif
1875 return AOC_STORE;
1876
1877 /* exchange */
1878 #if TARGET_GNU_ATOMICS
1879 case PD_atomic_exchange_1:
1880 case PD_atomic_exchange_2:
1881 case PD_atomic_exchange_4:
1882 case PD_atomic_exchange_8:
1883 #endif
1884 #if TARGET_LLVM_ATOMICS
1885 case PD_atomic_exchange:
1886 #endif
1887 return AOC_EXCHANGE;
1888
1889 /* compare_exchange */
1890 #if TARGET_GNU_ATOMICS
1891 case PD_atomic_compare_exchange_1:
1892 case PD_atomic_compare_exchange_2:
1893 case PD_atomic_compare_exchange_4:
1894 case PD_atomic_compare_exchange_8:
1895 #endif
1896 #if TARGET_LLVM_ATOMICS
1897 case PD_atomic_compare_exchange:
1898 #endif
1899 return AOC_COMPARE_EXCHANGE;
1900
1901 /* fetch_op */
1902 case EACH_SUBOP(PD_atomic_fetch, 1):
1903 case EACH_SUBOP(PD_atomic_fetch, 2):
1904 case EACH_SUBOP(PD_atomic_fetch, 4):
1905 case EACH_SUBOP(PD_atomic_fetch, 8):
1906 return AOC_FETCH_OP;
1907
1908 #if TARGET_GNU_ATOMICS
1909 /* op_fetch */
1910 case EACH_SUBOP(PD_atomic, fetch_1):
1911 case EACH_SUBOP(PD_atomic, fetch_2):
1912 case EACH_SUBOP(PD_atomic, fetch_4):
1913 case EACH_SUBOP(PD_atomic, fetch_8):
1914 return AOC_OP_FETCH;
1915
1916 /* test and set */
1917 case PD_atomic_test_and_set:
1918 return AOC_TEST_AND_SET;
1919
1920 /* clear */
1921 case PD_atomic_clear:
1922 return AOC_CLEAR;
1923 #endif
1924
1925 /* fence */
1926 case PD_atomic_thread_fence:
1927 case PD_atomic_signal_fence:
1928 return AOC_FENCE;
1929 }
1930 }
1931
1932 /** Return ATOMIC_RMW_OP for given predefined op that is either an atomic
1933 "op_fetch" or "fetch_op". Set *replay to the operation required to "replay"
1934 the operation. */
1935 static ATOMIC_RMW_OP
atomic_rmw_op_from_pd(PD_KIND pd,ILI_OP * replay)1936 atomic_rmw_op_from_pd(PD_KIND pd, ILI_OP *replay)
1937 {
1938 switch (pd) {
1939 default:
1940 assert(0, "op_for_replay: pd not an atomic_op_fetch or not implemented", pd,
1941 4);
1942
1943 case PD_atomic_fetch_add_1:
1944 case PD_atomic_fetch_add_2:
1945 case PD_atomic_fetch_add_4:
1946 #if TARGET_GNU_ATOMICS
1947 case PD_atomic_add_fetch_1:
1948 case PD_atomic_add_fetch_2:
1949 case PD_atomic_add_fetch_4:
1950 #endif
1951 *replay = IL_IADD;
1952 return AOP_ADD;
1953 case PD_atomic_fetch_add_8:
1954 #if TARGET_GNU_ATOMICS
1955 case PD_atomic_add_fetch_8:
1956 #endif
1957 *replay = IL_KADD;
1958 return AOP_ADD;
1959
1960 case PD_atomic_fetch_sub_1:
1961 case PD_atomic_fetch_sub_2:
1962 case PD_atomic_fetch_sub_4:
1963 #if TARGET_GNU_ATOMICS
1964 case PD_atomic_sub_fetch_1:
1965 case PD_atomic_sub_fetch_2:
1966 case PD_atomic_sub_fetch_4:
1967 #endif
1968 *replay = IL_ISUB;
1969 return AOP_SUB;
1970 case PD_atomic_fetch_sub_8:
1971 #if TARGET_GNU_ATOMICS
1972 case PD_atomic_sub_fetch_8:
1973 #endif
1974 *replay = IL_KSUB;
1975 return AOP_SUB;
1976
1977 case PD_atomic_fetch_and_1:
1978 case PD_atomic_fetch_and_2:
1979 case PD_atomic_fetch_and_4:
1980 #if TARGET_GNU_ATOMICS
1981 case PD_atomic_and_fetch_1:
1982 case PD_atomic_and_fetch_2:
1983 case PD_atomic_and_fetch_4:
1984 #endif
1985 *replay = IL_AND;
1986 return AOP_AND;
1987
1988 case PD_atomic_fetch_and_8:
1989 #if TARGET_GNU_ATOMICS
1990 case PD_atomic_and_fetch_8:
1991 #endif
1992 *replay = IL_KAND;
1993 return AOP_AND;
1994
1995 case PD_atomic_fetch_or_1:
1996 case PD_atomic_fetch_or_2:
1997 case PD_atomic_fetch_or_4:
1998 #if TARGET_GNU_ATOMICS
1999 case PD_atomic_or_fetch_1:
2000 case PD_atomic_or_fetch_2:
2001 case PD_atomic_or_fetch_4:
2002 #endif
2003 *replay = IL_OR;
2004 return AOP_OR;
2005 case PD_atomic_fetch_or_8:
2006 #if TARGET_GNU_ATOMICS
2007 case PD_atomic_or_fetch_8:
2008 #endif
2009 *replay = IL_KOR;
2010 return AOP_OR;
2011
2012 case PD_atomic_fetch_xor_1:
2013 case PD_atomic_fetch_xor_2:
2014 case PD_atomic_fetch_xor_4:
2015 #if TARGET_GNU_ATOMICS
2016 case PD_atomic_xor_fetch_1:
2017 case PD_atomic_xor_fetch_2:
2018 case PD_atomic_xor_fetch_4:
2019 #endif
2020 *replay = IL_XOR;
2021 return AOP_XOR;
2022 case PD_atomic_fetch_xor_8:
2023 #if TARGET_GNU_ATOMICS
2024 case PD_atomic_xor_fetch_8:
2025 #endif
2026 *replay = IL_KXOR;
2027 return AOP_XOR;
2028 }
2029 }
2030
2031 #endif
2032
2033 /** Object that assists generation of temporaries.
2034 See functions auto_stash and auto_retrieve for how it is used. */
2035 typedef struct auto_temp {
2036 int expr; /**< An ilix for a store into a temporary, or ilix of a constant. */
2037 } auto_temp;
2038
2039 /** \brief Generate ILI so that value of an ILI expression can be retrieved
2040 later.
2041
2042 \param temp pointer to object that remembers how to recover the value
2043 \param ilix ILI expression to be stashed/retrieved
2044 \param st_op IL_STx operation to be used to store value if necessary
2045 \param msz machine size of value to be stored.
2046 */
2047 static void
auto_stash(auto_temp * temp,int ilix,ILI_OP st_op,MSZ msz)2048 auto_stash(auto_temp *temp, int ilix, ILI_OP st_op, MSZ msz)
2049 {
2050 int nme, acon, store;
2051 SPTR sym;
2052 switch (ILI_OPC(ilix)) {
2053 case IL_ACON:
2054 case IL_ICON:
2055 /* Do not need a temporary */
2056 temp->expr = ilix;
2057 return;
2058 default:
2059 break;
2060 }
2061 sym = mkrtemp(ilix);
2062 acon = ad_acon(sym, (INT)0);
2063 nme = addnme(NT_VAR, sym, 0, (INT)0);
2064 store = ad4ili(st_op, ilix, acon, nme, msz);
2065 chk_block(store);
2066 temp->expr = store;
2067 }
2068
2069 /** \brief Generate ILI to retrieve previously stashed value.
2070
2071 \param temp pointer to object set by routine auto_stash
2072 */
2073 static int
auto_retrieve(auto_temp * temp)2074 auto_retrieve(auto_temp *temp)
2075 {
2076 switch (IL_TYPE(ILI_OPC(temp->expr))) {
2077 default:
2078 interr("auto_retrieve: unexpected IL_TYPE", IL_TYPE(temp->expr), ERR_Fatal);
2079 case ILTY_STORE:
2080 case ILTY_PSTORE:
2081 return ad_load(temp->expr);
2082 case ILTY_CONS:
2083 return temp->expr;
2084 }
2085 }
2086
2087 #if TARGET_GNU_ATOMICS
2088 #define MAX_ATOMIC_ARGS 6
2089 #define COMPARAND_INDEX 1
2090 #elif TARGET_LLVM_ATOMICS
2091 #define MAX_ATOMIC_ARGS 7
2092 #define COMPARAND_INDEX 2
2093 #else
2094 #error "expected TARGET_GNU_ATOMICS or TARGET_LLVM_ATOMICS"
2095 #endif
2096
2097 #ifdef PD_IS_ATOMIC
2098 /* \brief Expand a GNU or LLVM atomic intrinsic.
2099 Return true if intrinsic is expanded, false if intrinsic should be rendered
2100 as plain call. In the latter case, the ILM call may have had its parameters
2101 changed slightly.
2102
2103 \param pd - a PD_... value from pd.h for which PD_IS_ATOMIC is true.
2104 \param ilmp - pointer to call site for an atomic intrinsic */
2105 bool
exp_atomic_intrinsic(PD_KIND pd,ILM * ilmp,int curilm)2106 exp_atomic_intrinsic(PD_KIND pd, ILM *ilmp, int curilm)
2107 {
2108 int i, n;
2109 int opnd[MAX_ATOMIC_ARGS]; /* ILI "ptrs". */
2110 int nme[MAX_ATOMIC_ARGS];
2111 int callee_index, stc, result;
2112 const OPCODES *o;
2113 ILI_OP ili_op_for_replay;
2114 MSZ msz;
2115 ATOMIC_OP_CATEGORY aoc;
2116 DEBUG_ASSERT(ilmp->opc == IM_FAPPLY || ilmp->opc == IM_VAPPLY,
2117 "atomic ops cannot throw");
2118
2119 /* Get # of operands. */
2120 n = ILM_OPND(ilmp, 1);
2121 /* FIXME - do we need to check argument count and issue error message to
2122 user if there are the wrong number of arguments, or did the front-end
2123 already deal with that? */
2124 DEBUG_ASSERT(0 <= n && n <= MAX_ATOMIC_ARGS, "exp_atomic_intrinsic: bad ILM");
2125 callee_index = ilm_callee_index(ilmp->opc);
2126 for (i = 0; i < n; ++i) {
2127 int ilmx = ILM_OPND(ilmp, callee_index + 1 + i); /* locates ARG ilm */
2128 ILM *ilmpx = (ILM *)(ilmb.ilm_base + ilmx);
2129 ilmx = ILM_OPND(ilmpx, 2);
2130 nme[i] = NME_OF(ilmx);
2131 opnd[i] = ILI_OF(ilmx);
2132 }
2133
2134 /* Determine size of location operated on by the atomic op. */
2135 #if TARGET_LLVM_ATOMICS
2136 if (atomic_pd_has_size_operand(pd)) {
2137 msz = msz_from_size_argument(opnd[0]);
2138 if (msz == MSZ_UNDEF) {
2139 if (pd == PD_atomic_compare_exchange) {
2140 remove_weak_parameter(ilmp);
2141 }
2142 return false;
2143 }
2144 } else
2145 #endif
2146 {
2147 msz = msz_from_atomic_pd(pd);
2148 }
2149
2150 /* Get operations suitable for this msz. */
2151 o = msz != MSZ_UNDEF ? get_ops(msz, 0) : NULL;
2152
2153 aoc = atomic_op_category_from_pd(pd);
2154 switch (aoc) {
2155 default:
2156 assert(false, "exp_atomic_intrinsic: unimplemented op class", aoc, ERR_Fatal);
2157
2158 case AOC_LOAD:
2159 stc = atomic_encode(msz, SS_PROCESS, AORG_CPLUS);
2160 #if TARGET_GNU_ATOMICS
2161 result = ad4ili(o->atomicld, opnd[0], nme[0], stc, opnd[1]);
2162 #elif TARGET_LLVM_ATOMICS
2163 result = ad4ili(o->atomicld, opnd[1], nme[1], stc, opnd[3]);
2164 result = ad4ili(o->st, result, opnd[2], nme[2], msz);
2165 chk_block(result);
2166 #else
2167 #error "expected TARGET_GNU_ATOMICS or TARGET_LLVM_ATOMICS"
2168 #endif
2169 break;
2170
2171 case AOC_STORE:
2172 stc = atomic_encode(msz, SS_PROCESS, AORG_CPLUS);
2173 #if TARGET_GNU_ATOMICS
2174 result = ad5ili(o->atomicst, opnd[1], opnd[0], nme[0], stc, opnd[2]);
2175 chk_block(result);
2176 #elif TARGET_LLVM_ATOMICS
2177 result = ad3ili(o->ld, opnd[2], nme[2], msz);
2178 result = ad5ili(o->atomicst, result, opnd[1], nme[1], stc, opnd[3]);
2179 chk_block(result);
2180 #else
2181 #error "expected TARGET_GNU_ATOMICS or TARGET_LLVM_ATOMICS"
2182 #endif
2183 break;
2184
2185 case AOC_EXCHANGE:
2186 stc = atomic_encode_rmw(msz, SS_PROCESS, AORG_CPLUS, AOP_XCHG);
2187 #if TARGET_GNU_ATOMICS
2188 result = ad5ili(o->atomicrmw, opnd[1], opnd[0], nme[0], stc, opnd[2]);
2189 #elif TARGET_LLVM_ATOMICS
2190 result = ad3ili(o->ld, opnd[2], nme[2], msz);
2191 result = ad5ili(o->atomicrmw, result, opnd[1], nme[1], stc, opnd[4]);
2192 result = ad4ili(o->st, result, opnd[3], nme[3], msz);
2193 chk_block(result);
2194 #else
2195 #error "expected TARGET_GNU_ATOMICS or TARGET_LLVM_ATOMICS"
2196 #endif
2197 break;
2198
2199 case AOC_COMPARE_EXCHANGE: {
2200 int expected_ptr, comparand, cmpxchg, succ, oldval;
2201 int comparand_nme, label;
2202 int desired;
2203 auto_temp expected_ptr_save, oldval_save, succ_save;
2204 stc = atomic_encode(msz, SS_PROCESS, AORG_CPLUS);
2205
2206 /* Get the comparand ("expected") */
2207 comparand_nme = addnme(NT_IND, 0, nme[COMPARAND_INDEX], (INT)0);
2208 comparand = ad3ili(o->ld, opnd[COMPARAND_INDEX], comparand_nme, msz);
2209
2210 /* Save the expected_ptr */
2211 expected_ptr = ad_cse(opnd[COMPARAND_INDEX]);
2212 auto_stash(&expected_ptr_save, expected_ptr, IL_STA, MSZ_PTR);
2213
2214 /* Do the compare-exchange */
2215 #if TARGET_GNU_ATOMICS
2216 desired = opnd[2];
2217 cmpxchg = ad_cmpxchg(o->cmpxchg, desired, opnd[0], nme[0], stc, comparand,
2218 opnd[3], opnd[4], opnd[5]);
2219 #elif TARGET_LLVM_ATOMICS
2220 desired = ad3ili(o->ld, opnd[3], nme[3], msz);
2221 cmpxchg = ad_cmpxchg(o->cmpxchg, desired, opnd[1], nme[1], stc, comparand,
2222 opnd[4], opnd[5], opnd[6]);
2223 #else
2224 #error "expected TARGET_GNU_ATOMICS or TARGET_LLVM_ATOMICS"
2225 #endif
2226
2227 cmpxchg = ad_cse(cmpxchg);
2228
2229 /* Stash old value returned by cmpxchg */
2230 oldval = ad1ili(o->cmpxchg_old, cmpxchg);
2231 auto_stash(&oldval_save, oldval, o->st, msz);
2232
2233 /* Stash success flag returned by cmpxchg */
2234 succ = ad1ili(IL_CMPXCHG_SUCCESS, cmpxchg);
2235 succ = ad_cse(succ);
2236 auto_stash(&succ_save, succ, IL_ST, MSZ_SBYTE);
2237
2238 /* Branch on success. */
2239 label = getlab();
2240 chk_block(ad3ili(IL_ICJMPZ, succ, CC_NE, label));
2241
2242 /* Store old value into *expected_ptr. */
2243 expected_ptr = auto_retrieve(&expected_ptr_save);
2244 oldval = auto_retrieve(&oldval_save);
2245 chk_block(ad4ili(o->st, oldval, expected_ptr, nme[1], msz));
2246
2247 /* Emit label */
2248 wr_block();
2249 cr_block();
2250 BIH_LABEL(expb.curbih) = label;
2251 ILIBLKP(label, expb.curbih);
2252 RFCNTP(label, 1);
2253
2254 /* Result is value of flag. */
2255 result = auto_retrieve(&succ_save);
2256 } break;
2257
2258 case AOC_FETCH_OP:
2259 /* FIXME - should use IL_ATOMICRMWA if first operand is pointer to
2260 pointer. Use DTYPE to tell? Or is IL_ATOMICRMWA fundmentally a bad
2261 idea? */
2262 stc = atomic_encode_rmw(msz, SS_PROCESS, AORG_CPLUS,
2263 atomic_rmw_op_from_pd(pd, &ili_op_for_replay));
2264 result = ad5ili(o->atomicrmw, opnd[1], opnd[0], nme[0], stc, opnd[2]);
2265 break;
2266
2267 #if TARGET_GNU_ATOMICS
2268 case AOC_OP_FETCH:
2269 stc = atomic_encode_rmw(msz, SS_PROCESS, AORG_CPLUS,
2270 atomic_rmw_op_from_pd(pd, &ili_op_for_replay));
2271 /* Need to "replay" operation to get final result, so we need to use opnd[1]
2272 * twice. */
2273 opnd[1] = ad_cse(opnd[1]);
2274 result = ad5ili(o->atomicrmw, opnd[1], opnd[0], nme[0], stc, opnd[2]);
2275 result = ad2ili(ili_op_for_replay, result, opnd[1]);
2276 break;
2277
2278 case AOC_TEST_AND_SET:
2279 /* Treat as atomic exchange on a byte. */
2280 stc = atomic_encode_rmw(msz, SS_PROCESS, AORG_CPLUS, AOP_XCHG);
2281 result = ad5ili(IL_ATOMICRMWI, ad_icon(1), opnd[0], nme[0], stc, opnd[1]);
2282 break;
2283
2284 case AOC_CLEAR:
2285 /* Treat as atomic store of a zero byte. */
2286 stc = atomic_encode(msz, SS_PROCESS, AORG_CPLUS);
2287 result = ad5ili(IL_ATOMICSTI, ad_icon(0), opnd[0], nme[0], stc, opnd[1]);
2288 chk_block(result);
2289 break;
2290 #endif /* TARGET_GNU_ATOMICS */
2291
2292 case AOC_FENCE: {
2293 SYNC_SCOPE ss = pd == PD_atomic_signal_fence ? SS_SINGLETHREAD : SS_PROCESS;
2294 stc = atomic_encode(MSZ_UNDEF, ss, AORG_CPLUS);
2295 result = ad2ili(IL_FENCE, stc, opnd[0]);
2296 } break;
2297 }
2298 if (ilmp->opc == IM_VAPPLY) {
2299 /* result not used */
2300 switch (aoc) {
2301 case AOC_FENCE:
2302 case AOC_STORE:
2303 #if TARGET_GNU_ATOMICS
2304 case AOC_CLEAR:
2305 #endif
2306 #if TARGET_LLVM_ATOMICS
2307 case AOC_LOAD:
2308 case AOC_EXCHANGE:
2309 case AOC_COMPARE_EXCHANGE:
2310 #endif
2311 break;
2312 default:
2313 /* result was produced, but not used. */
2314 result = ad_free(result);
2315 chk_block(result);
2316 break;
2317 }
2318 } else {
2319 DEBUG_ASSERT(aoc != AOC_FENCE, "IM_VAPPLY expected for fence intrinsics");
2320 ILM_RESULT(curilm) = result;
2321 }
2322 return true;
2323 }
2324 #endif
2325
2326 static SPTR
mkatomictemp(DTYPE dtype)2327 mkatomictemp(DTYPE dtype)
2328 {
2329 static int cnt;
2330 SPTR tmp_sptr = getnewccsym('a', cnt++, ST_VAR);
2331 SCP(tmp_sptr, (gbl.outlined? SC_PRIVATE:SC_AUTO));
2332 ENCLFUNCP(tmp_sptr, GBL_CURRFUNC);
2333 DTYPEP(tmp_sptr, dtype);
2334 return tmp_sptr;
2335 }
2336
2337 static int
ll_make_atomic_load(int size_ili,int lhs,int rhs,int mem_order)2338 ll_make_atomic_load(int size_ili, int lhs, int rhs, int mem_order)
2339 {
2340 int result, altili;
2341 int func, arg;
2342 int garg[4];
2343 int args[4], arg_types[4] = {DT_UINT8, DT_CPTR, DT_CPTR, DT_INT};
2344
2345 func = mkfunc("__atomic_load");
2346 SCP(func, SC_EXTERN);
2347
2348 arg = ad1ili(IL_NULL, 0);
2349 arg = ad2ili(IL_ARGIR, mem_order, arg);
2350 arg = ad2ili(IL_ARGAR, rhs, arg);
2351 arg = ad2ili(IL_ARGAR, lhs, arg);
2352 arg = ad2ili(IL_ARGKR, ikmove(size_ili), arg);
2353 result = ad2ili(IL_JSR, func, arg);
2354
2355 return result;
2356 }
2357
2358
2359 static int
ll_make_atomic_store(int size_ili,int lhs,int rhs,int mem_order)2360 ll_make_atomic_store(int size_ili, int lhs, int rhs, int mem_order)
2361 {
2362 int result, altili;
2363 int func, arg;
2364 int size, stc;
2365 int garg[4];
2366 int args[4], arg_types[4] = {DT_UINT8, DT_CPTR, DT_CPTR, DT_INT};
2367
2368 func = mkfunc("__atomic_store");
2369 SCP(func, SC_EXTERN);
2370 arg = ad1ili(IL_NULL, 0);
2371 arg = ad2ili(IL_ARGIR, mem_order, arg);
2372 arg = ad2ili(IL_ARGAR, rhs, arg);
2373 arg = ad2ili(IL_ARGAR, lhs, arg);
2374 arg = ad2ili(IL_ARGKR, ikmove(size_ili), arg);
2375 result = ad2ili(IL_JSR, func, arg);
2376 return result;
2377
2378 return result;
2379 }
2380
2381 static int
ll_make_atomic_compare_xchg(int size_ili,int lhs,int expected,int desired,int success,int failure)2382 ll_make_atomic_compare_xchg(int size_ili, int lhs, int expected,
2383 int desired, int success, int failure)
2384 {
2385 int result, altili;
2386 int func, arg;
2387 int size, stc;
2388 int garg[6];
2389 int args[6], arg_types[6] = {DT_UINT8, DT_CPTR, DT_CPTR, DT_CPTR, DT_INT, DT_INT};
2390
2391 func = mkfunc("__atomic_compare_exchange");
2392 SCP(func, SC_EXTERN);
2393 arg = ad1ili(IL_NULL, 0);
2394 arg = ad2ili(IL_ARGIR, failure, arg);
2395 arg = ad2ili(IL_ARGIR, success, arg);
2396 arg = ad2ili(IL_ARGAR, desired, arg);
2397 arg = ad2ili(IL_ARGAR, expected, arg);
2398 arg = ad2ili(IL_ARGAR, lhs, arg);
2399 arg = ad2ili(IL_ARGKR, ikmove(size_ili), arg);
2400 result = ad2ili(IL_JSR, func, arg);
2401 result = ad2ili(IL_DFRIR, result, IR_RETVAL);
2402 return result;
2403
2404 return result;
2405 }
2406
2407 static int
ll_make_atomic_xchg(int lhs,int expected,int desired,int mem_order)2408 ll_make_atomic_xchg(int lhs, int expected, int desired, int mem_order)
2409 {
2410 int result, altili;
2411 int func, arg;
2412 int size, stc;
2413 int garg[4];
2414 int args[4], arg_types[4] = {DT_CPTR, DT_CPTR, DT_CPTR, DT_INT};
2415
2416 func = mkfunc("__atomic_exchange");
2417 SCP(func, SC_EXTERN);
2418 arg = ad1ili(IL_NULL, 0);
2419 arg = ad2ili(IL_ARGIR, mem_order, arg);
2420 arg = ad2ili(IL_ARGAR, desired, arg);
2421 arg = ad2ili(IL_ARGAR, expected, arg);
2422 arg = ad2ili(IL_ARGKR, lhs, arg);
2423 result = ad2ili(IL_JSR, func, arg);
2424 return result;
2425
2426 return result;
2427 }
2428
2429 static int
_exp_mp_atomic_read(int stc,DTYPE dtype,int * opnd,int * nme)2430 _exp_mp_atomic_read(int stc, DTYPE dtype, int* opnd, int* nme)
2431 {
2432 int result, size_ili;
2433 SPTR tmp_sptr;
2434 ILI_OP ld, st;
2435 MSZ msz;
2436 ISZ_T size;
2437
2438 #if use_kmpc_rte
2439 /* FIX ME */
2440 result = ll_make_kmpc_atomic_read(opnd, dtype);
2441 if (result) {
2442 ldst_msz(dtype, &ld, &st, &msz);
2443 result = ad4ili(st, result, opnd[LHS_IDX], nme[LHS_IDX], msz);
2444 iltb.callfg = 1;
2445 chk_block(result);
2446 }
2447 return result;
2448 #endif
2449
2450 size = zsize_of(dtype);
2451 if (dtype == DT_CMPLX ||
2452 dtype == DT_DCMPLX
2453 || (size !=1 && size != 2 && size != 4 && size != 8))
2454 {
2455 tmp_sptr = GetSPTRVal(opnd);
2456 if (tmp_sptr <= NOSYM) /* atomic capture may have set this already */
2457 tmp_sptr = mkatomictemp(dtype);
2458 size_ili = ad_icon(size);
2459 ADDRTKNP(tmp_sptr, 1);
2460 loc_of(nme[LHS_IDX]);
2461 result = ll_make_atomic_load(size_ili, opnd[LHS_IDX],
2462 mk_address(tmp_sptr), opnd[MO_IDX]);
2463 iltb.callfg = 1;
2464 chk_block(result);
2465 opnd[TMP_SPTR_IDX] = tmp_sptr;
2466 nme[TMP_SPTR_IDX] = addnme(NT_VAR, tmp_sptr, 0, (INT)0);
2467 ldst_msz(DTYPEG(tmp_sptr), &ld, &st, &msz);
2468 result = ad3ili(ld, mk_address(tmp_sptr), nme[TMP_SPTR_IDX], msz);
2469 return result;
2470 } else if (dtype != DT_NONE) {
2471 OPCODES const * ops;
2472 ldst_msz(dtype, &ld, &st, &msz);
2473 ops = get_ops(msz, 1);
2474 opnd[TMP_SPTR_IDX] = 0;
2475 result = ad4ili(ops->atomicld, opnd[LHS_IDX], nme[LHS_IDX], stc, opnd[MO_IDX]);
2476 return result;
2477 }
2478 return 0;
2479 }
2480
2481 /* Obtain dtype from sym from ILMs */
2482 static DTYPE
get_dtype_from_ilm(ILM * ilmp)2483 get_dtype_from_ilm(ILM *ilmp)
2484 {
2485 SPTR sym;
2486 DTYPE dtype;
2487 ILM *ilmp2;
2488
2489 ilmp2 = (ILM *)(ilmb.ilm_base+ILM_OPND(ilmp, 1));
2490 switch (ILM_OPC(ilmp2)) {
2491 case IM_BASE:
2492 sym = ILM_SymOPND(ilmp2, 1);
2493 dtype = DTYPEG(sym);
2494 break;
2495 case IM_PLD:
2496 case IM_MEMBER:
2497 sym = ILM_SymOPND(ilmp2, 2);
2498 dtype = DTYPEG(sym);
2499 break;
2500 case IM_ELEMENT:
2501 case IM_INLELEM:
2502 dtype = ILM_DTyOPND(ilmp2, 3);
2503 break;
2504 default:
2505 interr("get_dtype_from_ilm: unexpected ILM opc", ILM_OPND(ilmp2, 1), ERR_Severe);
2506 }
2507 return dtype;
2508 }
2509
2510 int
exp_mp_atomic_read(ILM * ilmp)2511 exp_mp_atomic_read(ILM *ilmp)
2512 {
2513 int stc;
2514 ILI_OP ld,st;
2515 MSZ msz;
2516 DTYPE dtype;
2517 int opnd[MAX_ATOMIC_ARGS];
2518 int nme[MAX_ATOMIC_ARGS];
2519
2520 nme[LHS_IDX] = NME_OF(ILM_OPND(ilmp, 1));
2521 dtype = dt_nme(nme[LHS_IDX]);
2522 if (!dtype) {
2523 dtype = get_dtype_from_ilm(ilmp);
2524 }
2525 ldst_msz(dtype, &ld, &st, &msz);
2526 stc = atomic_encode(msz, SS_PROCESS, AORG_OPENMP);
2527 opnd[LHS_IDX] = ILI_OF(ILM_OPND(ilmp, 1));
2528
2529 if (ILM_OPND(ilmp, 2) == MO_SEQ_CST)
2530 opnd[MO_IDX] = ad_icon(5);
2531 else
2532 opnd[MO_IDX] = ad_icon(0);
2533
2534 opnd[TMP_SPTR_IDX] = 0;
2535 return _exp_mp_atomic_read(stc, dtype, opnd, nme);
2536 }
2537
2538 static void
_exp_mp_atomic_write(int stc,DTYPE dtype,int * opnd,int * nme)2539 _exp_mp_atomic_write(int stc, DTYPE dtype, int* opnd, int* nme)
2540 {
2541 int rmw, result;
2542 ISZ_T size;
2543 int size_ili;
2544 OPCODES const * ops;
2545 SPTR tmp_sptr = NOSYM;
2546 ILI_OP ld, st;
2547 MSZ msz;
2548
2549 #if use_kmpc_rte
2550 /* FIXME */
2551 result = ll_make_kmpc_atomic_write(opnd, dtype);
2552 if (result) {
2553 iltb.callfg = 1;
2554 chk_block(result);
2555 }
2556 return;
2557 #endif
2558
2559 size = zsize_of(dtype);
2560 if (dtype == DT_CMPLX ||
2561 dtype == DT_DCMPLX
2562 || (size !=1 && size != 2 && size != 4 && size != 8))
2563 {
2564 tmp_sptr = mkatomictemp(dtype);
2565 nme[TMP_SPTR_IDX] = addnme(NT_VAR, tmp_sptr, 0, (INT)0);
2566
2567 ldst_msz(dtype, &ld, &st, &msz);
2568 result = ad4ili(st, opnd[RHS_IDX], mk_address(tmp_sptr),
2569 nme[TMP_SPTR_IDX], msz);
2570 chk_block(result);
2571 size_ili = ad_icon(size);
2572 ADDRTKNP(tmp_sptr, 1);
2573 loc_of(nme[LHS_IDX]);
2574 result = ll_make_atomic_store(size_ili, opnd[LHS_IDX],
2575 mk_address(tmp_sptr), opnd[MO_IDX]);
2576 iltb.callfg = 1;
2577 chk_block(result);
2578 return;
2579 } else if (dtype != DT_VOID_NONE) {
2580 ldst_msz(dtype, &ld, &st, &msz);
2581 ops = get_ops(msz, 1);
2582 result = ad5ili(ops->atomicst, opnd[RHS_IDX], opnd[LHS_IDX],
2583 nme[LHS_IDX], stc, opnd[MO_IDX]);
2584 chk_block(result);
2585 }
2586 return;
2587 }
2588
2589 void
exp_mp_atomic_write(ILM * ilmp)2590 exp_mp_atomic_write(ILM *ilmp)
2591 {
2592 int rmw, result;
2593 int size, stc;
2594 int size_ili;
2595 int opnd[MAX_ATOMIC_ARGS];
2596 int nme[MAX_ATOMIC_ARGS];
2597 SPTR tmp_sptr;
2598 DTYPE dtype;
2599
2600 nme[LHS_IDX] = NME_OF(ILM_OPND(ilmp, 1));
2601 dtype = dt_nme(nme[LHS_IDX]);
2602 if (!dtype) {
2603 dtype = get_dtype_from_ilm(ilmp);
2604 }
2605 stc = atomic_encode(mem_size(DTY(dtype)),
2606 SS_PROCESS, AORG_OPENMP);
2607
2608 opnd[LHS_IDX] = ILI_OF(ILM_OPND(ilmp, 1));
2609 opnd[RHS_IDX] = ILI_OF(ILM_OPND(ilmp, 2));
2610
2611 /* TODO: assert opnd[LHS_IDX] != opnd[RHS_IDX] */
2612 if (ILM_OPND(ilmp, 3) == MO_SEQ_CST)
2613 opnd[MO_IDX] = ad_icon(5);
2614 else
2615 opnd[MO_IDX] = ad_icon(0);
2616 _exp_mp_atomic_write(stc, dtype, opnd, nme);
2617
2618 }
2619
2620 static bool
can_use_rmw(DTYPE dtype,ATOMIC_RMW_OP aop)2621 can_use_rmw(DTYPE dtype, ATOMIC_RMW_OP aop)
2622 {
2623 if ((unsigned)aop > (unsigned)AOP_MAX_DEF)
2624 return false;
2625
2626 if (zsize_of(dtype) > 8)
2627 return false;
2628
2629 switch(dtype) {
2630 case DT_BLOG:
2631 case DT_SLOG:
2632 case DT_LOG:
2633 case DT_LOG8:
2634 case DT_BINT:
2635 case DT_SINT:
2636 case DT_INT:
2637 case DT_INT8:
2638 case DT_CPTR:
2639 return true;
2640 default:
2641 return false;
2642 }
2643 }
2644
2645 static bool
is_cse(int ilix)2646 is_cse(int ilix)
2647 {
2648 switch(ILI_OPC(ilix)) {
2649 case IL_CSEIR:
2650 case IL_CSESP:
2651 case IL_CSEDP:
2652 case IL_CSECS:
2653 case IL_CSECD:
2654 case IL_CSEAR:
2655 case IL_CSEKR:
2656 case IL_CSE:
2657 case IL_CSETB:
2658 return true;
2659 default:
2660 return false;
2661 }
2662 }
2663
2664
2665 static int
get_simple_update_operand(int * opnd,ILM * ilmp)2666 get_simple_update_operand(int* opnd, ILM* ilmp)
2667 {
2668 int lhs, rhs, acon;
2669 int op1, op2, expr;
2670 ILI_OP opc;
2671
2672 expr = 0;
2673 lhs = opnd[LHS_IDX];
2674 rhs = opnd[RHS_IDX];
2675 op1 = ILI_OPND(rhs, 1);
2676 op2 = ILI_OPND(rhs, 2);
2677 opc = ILI_OPC(rhs);
2678
2679 if (is_cse(op1)) {
2680 op1 = ILI_OPND(op1, 1);
2681 }
2682 if (IL_TYPE(ILI_OPC(op1)) == ILTY_LOAD) {
2683 acon = ILI_OPND(op1, 1);
2684 if (acon == lhs) {
2685 /* make sure that lhs is also not present in op2 as iliutil can also
2686 * change x = 2*x to x = x + x
2687 */
2688 if (find_ili(lhs, op2))
2689 return expr;
2690 expr = op2;
2691 goto check_opc;
2692 }
2693 }
2694
2695 if (is_cse(op2)) {
2696 op2 = ILI_OPND(op2, 1);
2697 }
2698 if (IL_TYPE(ILI_OPC(op2)) == ILTY_LOAD) {
2699 acon = ILI_OPND(op2, 1);
2700 if (acon == lhs) {
2701 if (find_ili(lhs, op2))
2702 return expr;
2703 expr = op1;
2704 goto check_opc;
2705 }
2706 }
2707
2708
2709 check_opc:
2710 /* check second operand */
2711 switch(opc) {
2712 case IL_IADD:
2713 case IL_UIADD:
2714 case IL_KADD:
2715 case IL_UKADD:
2716 opnd[AOP_IDX] = AOP_ADD;
2717 break;
2718 case IL_ISUB:
2719 case IL_UISUB:
2720 case IL_KSUB:
2721 case IL_UKSUB:
2722 opnd[AOP_IDX] = AOP_SUB;
2723 break;
2724 case IL_AND:
2725 case IL_KAND:
2726 opnd[AOP_IDX] = AOP_AND;
2727 break;
2728 case IL_OR:
2729 case IL_KOR:
2730 opnd[AOP_IDX] = AOP_OR;
2731 break;
2732 case IL_XOR:
2733 opnd[AOP_IDX]= AOP_XOR;
2734 break;
2735 case IL_IMIN:
2736 case IL_UIMIN:
2737 case IL_KMIN:
2738 case IL_UKMIN:
2739 opnd[AOP_IDX] = AOP_MIN;
2740 break;
2741 case IL_IMAX:
2742 case IL_UIMAX:
2743 case IL_KMAX:
2744 case IL_UKMAX:
2745 opnd[AOP_IDX] = AOP_MAX;
2746 break;
2747 default:
2748 return 0;
2749 }
2750
2751 return expr;
2752 }
2753
2754
2755 static bool
lhs_match_rhs(int lop,int rop)2756 lhs_match_rhs(int lop, int rop)
2757 {
2758 int j, noprs;
2759 ILI_OP opc = ILI_OPC(lop);
2760
2761 if (opc != ILI_OPC(rop))
2762 return false;
2763
2764 noprs = IL_OPRS(ILI_OPC(lop));
2765 for (j = 1; j <= noprs; ++j) {
2766 if (IL_ISLINK(opc, j)) {
2767 return lhs_match_rhs(ILI_OPND(lop, j), ILI_OPND(rop, j));
2768 }
2769 if (ILI_OPND(lop, j) != ILI_OPND(rop, j)) {
2770 return false;
2771 }
2772 }
2773 return true;
2774 }
2775
2776
2777
2778 /* if there is a call on lhs, make sure ili trees are exactly the same
2779 * except that the ili number may be different because we issue different
2780 * ili for calls.
2781 */
2782
2783 static void
_ilis_are_matched(int rhs,int lhs,int * res,int * load)2784 _ilis_are_matched(int rhs, int lhs, int* res, int* load)
2785 {
2786 int rop1, rop2, j;
2787 ILI_OP lopc;
2788 int noprs;
2789 int lop1, lop2, opc;
2790
2791 lop1 = ILI_OPND(lhs, 1);
2792 lop2 = ILI_OPND(lhs, 2);
2793 opc = ILI_OPC(lhs);
2794
2795 if (ILI_OPC(rhs) == opc) {
2796 rop1 = ILI_OPND(rhs, 1);
2797 rop2 = ILI_OPND(rhs, 2);
2798 if (rop1 == lop1) {
2799 if (lhs_match_rhs(lop2, rop2)) {
2800 if (*res) {
2801 /* multiple occurrences of lhs on rhs */
2802 error(S_0155_OP1_OP2, ERR_Severe, gbl.lineno, "Invalid atomic statement.", CNULL);
2803 }
2804 *res = rhs;
2805 return;
2806 }
2807 }
2808 }
2809
2810 lopc = ILI_OPC(rhs);
2811 noprs = IL_OPRS(lopc);
2812 for (j = 1; j <= noprs; ++j) {
2813 if (IL_ISLINK(lopc, j)) {
2814 int opnd = ILI_OPND(rhs, j);
2815
2816 _ilis_are_matched(opnd, lhs, res, load);
2817 if (*res && *load == 0) {
2818 if (IL_TYPE(ILI_OPC(rhs)) == ILTY_LOAD)
2819 *load = rhs;
2820 }
2821 }
2822 }
2823
2824 return;
2825 }
2826
2827 static int
load_op_match_lhs(int lhs,int rhs)2828 load_op_match_lhs(int lhs, int rhs)
2829 {
2830 int res, v, nxt, op1, op2, load;
2831 if (lhs == rhs)
2832 return 0;
2833
2834 res = 0;
2835 load = 0;
2836
2837 /* We check for calls and IL_AADD/ASUB are the only opc
2838 * we are looking because calls on lhs occurs when we are
2839 * in addressing mode,i.e., a[sub()...]=, a+sub()...=.
2840 * We assume that calls with same arguments will return
2841 * the same value.
2842 */
2843 if (ILI_OPC(lhs) == IL_AADD || ILI_OPC(lhs) == IL_ASUB) {
2844 op1 = ILI_OPND(lhs, 1);
2845 op2 = ILI_OPND(lhs, 2);
2846 if (find_ili(rhs, op1)) {
2847 ili_unvisit();
2848 _ilis_are_matched(rhs, lhs, &res, &load);
2849 } else
2850 ili_unvisit();
2851 }
2852
2853 return load;
2854
2855 }
2856
2857 static int
get_complex_update_operand(int * opnd,ILM * ilmp,int * nme,DTYPE dtype)2858 get_complex_update_operand(int* opnd, ILM* ilmp, int* nme, DTYPE dtype)
2859 {
2860 int lhs, rhs, ili, stc, load, op1, lop;
2861 int expected_val;
2862 ILI_OP ld, st;
2863 MSZ msz;
2864 SPTR tmp_sptr;
2865 OPCODES const* ops;
2866
2867 lhs = opnd[LHS_IDX];
2868 rhs = opnd[RHS_IDX];
2869 ldst_msz(dtype, &ld, &st, &msz);
2870 load = ad3ili(ld, lhs, nme[LHS_IDX], msz);
2871 if (!find_ili(rhs, load)) {
2872 ili_unvisit();
2873 if (find_ili(rhs, lhs)) {
2874 ili_unvisit(); /* illlegel update statement */
2875 return 0;
2876 } else {
2877 ili_unvisit();
2878 if ((lop = load_op_match_lhs(lhs, rhs)) == 0)
2879 return 0;
2880
2881 load = lop;
2882 }
2883 } else
2884 ili_unvisit();
2885
2886 stc = atomic_encode(msz, SS_PROCESS, AORG_OPENMP);
2887 expected_val = _exp_mp_atomic_read(stc, dtype, opnd, nme);
2888
2889 tmp_sptr = GetSPTRVal(opnd);
2890 if (!tmp_sptr) {
2891 opnd[TMP_SPTR_IDX] = tmp_sptr = mkatomictemp(dtype);
2892 nme[TMP_SPTR_IDX] = addnme(NT_VAR, tmp_sptr, 0, 0);
2893 ldst_msz(dtype, &ld, &st, &msz);
2894 ili = ad4ili(st, expected_val, mk_address(tmp_sptr),
2895 nme[TMP_SPTR_IDX], msz);
2896 chk_block(ili);
2897 }
2898 expected_val = ad3ili(ld, mk_address(tmp_sptr), nme[5], msz);
2899 opnd[RHS_IDX] = rewr_ili(rhs, load, expected_val);
2900 rewr_cln_ili();
2901 return expected_val;
2902 }
2903
2904 static void
_exp_mp_atomic_update(DTYPE dtype,int * opnd,int * nme)2905 _exp_mp_atomic_update(DTYPE dtype, int* opnd, int* nme)
2906 {
2907 int rmw, result, stc;
2908 int size_ili;
2909 SPTR label;
2910 int expected_val, desired_val, cmpxchg;
2911 ILI_OP ld, st;
2912 SPTR expected_sptr, desired_sptr;
2913 MSZ msz;
2914 OPCODES const * ops;
2915 ISZ_T size = zsize_of(dtype);
2916
2917 expected_sptr = GetSPTRVal(opnd);
2918 {
2919 label = getlab();
2920 BIH_LABEL(expb.curbih) = label;
2921 ILIBLKP(label, expb.curbih);
2922 desired_sptr = mkatomictemp(dtype);
2923 desired_val = opnd[RHS_IDX];
2924 ldst_msz(DTYPEG(desired_sptr), &ld, &st, &msz);
2925 result = ad4ili(st, desired_val, mk_address(desired_sptr),
2926 addnme(NT_VAR, desired_sptr, 0, (INT)0),
2927 msz);
2928 ASSNP(desired_sptr, 1);
2929 chk_block(result);
2930
2931 if (dtype == DT_CMPLX ||
2932 dtype == DT_DCMPLX
2933 || (size != 1 && size != 2 && size !=4 && size !=8))
2934 {
2935 size_ili = ad_icon(size);
2936 ADDRTKNP(expected_sptr, 1);
2937 ADDRTKNP(desired_sptr, 1);
2938 loc_of(nme[LHS_IDX]);
2939 result = ll_make_atomic_compare_xchg(size_ili, opnd[LHS_IDX],
2940 mk_address(expected_sptr),
2941 mk_address(desired_sptr),
2942 opnd[MO_IDX], ad_icon(0));
2943 iltb.callfg = 1;
2944 } else {
2945 /* need to load and store as integer and with exact same size
2946 original : float loadi MSZ_WORD
2947 double loadK MSZ_I8
2948 short loads MSZ_USWORD
2949 unsigned short loads MSZ_USWORD
2950 byte loadb MSZ_BYTE
2951 */
2952 msz = msz_from_atomic_dtype(dtype);
2953 ops = get_ops(msz, 1);
2954 ADDRTKNP(expected_sptr, 1);
2955 ADDRTKNP(desired_sptr, 1);
2956 desired_val = ad3ili(ops->ld,
2957 mk_address(desired_sptr),
2958 addnme(NT_VAR, desired_sptr, 0, (INT)0),
2959 msz);
2960 expected_val = ad3ili(ops->ld,
2961 mk_address(expected_sptr),
2962 addnme(NT_VAR, expected_sptr, 0, (INT)0),
2963 msz);
2964 stc = atomic_encode(msz, SS_PROCESS, AORG_OPENMP);
2965 loc_of(nme[LHS_IDX]);
2966 cmpxchg = ad_cmpxchg(ops->cmpxchg, desired_val, opnd[LHS_IDX], nme[LHS_IDX],
2967 stc, expected_val, ad_icon(0), opnd[MO_IDX],
2968 ad_icon(0));
2969
2970 cmpxchg = ad_cse(cmpxchg);
2971 expected_val = ad1ili(ops->cmpxchg_old, cmpxchg);
2972 result = ad4ili(ops->st, expected_val, mk_address(expected_sptr),
2973 addnme(NT_VAR, expected_sptr, 0, (INT)0),
2974 msz);
2975 chk_block(result);
2976
2977 result = ad1ili(IL_CMPXCHG_SUCCESS, cmpxchg);
2978 result = ad_cse(result);
2979 }
2980
2981 result = ad3ili(IL_ICJMPZ, result, CC_EQ, label);
2982 RFCNTI(label);
2983 chk_block(result);
2984 }
2985 }
2986
2987
2988 void
exp_mp_atomic_update(ILM * ilmp)2989 exp_mp_atomic_update(ILM *ilmp)
2990 {
2991 int rmw, result;
2992 int size, stc, rhs;
2993 int size_ili, label, op1;
2994 int expected_val, desired_val;
2995 int opnd[MAX_ATOMIC_ARGS];
2996 int nme[MAX_ATOMIC_ARGS];
2997 DTYPE dtype;
2998 ATOMIC_RMW_OP aop = (ATOMIC_RMW_OP) ILM_OPND(ilmp, 4); // ???
2999 ILI_OP opc, ld, st;
3000 SPTR expected_sptr, desired_sptr;
3001 MSZ msz;
3002 OPCODES const * ops;
3003
3004 opnd[LHS_IDX] = ILI_OF(ILM_OPND(ilmp, 1));
3005 opnd[RHS_IDX] = ILI_OF(ILM_OPND(ilmp, 2));
3006 nme[LHS_IDX] = NME_OF(ILM_OPND(ilmp, 1));
3007 dtype = dt_nme(nme[LHS_IDX]);
3008 if (!dtype) {
3009 dtype = get_dtype_from_ilm(ilmp);
3010 }
3011 set_assn(nme[0]);
3012 expected_sptr = SPTR_NULL;
3013
3014 /* Don't use CSE for LHS */
3015 op1 = opnd[LHS_IDX];
3016 if (is_cse(op1)) {
3017 op1 = ILI_OPND(op1, 1);
3018 opnd[LHS_IDX] = op1;
3019 }
3020
3021 if (ILM_OPND(ilmp, 3) == MO_SEQ_CST)
3022 opnd[MO_IDX] = ad_icon(5);
3023 else
3024 opnd[MO_IDX] = ad_icon(0);
3025
3026 ldst_msz(dtype, &ld, &st, &msz);
3027 ops = get_ops(msz, 1);
3028 rhs = get_simple_update_operand(opnd, ilmp);
3029 if (rhs && can_use_rmw(dtype, GetAtomicOp(opnd))) {
3030 stc = atomic_encode_rmw(mem_size(DTY(dtype)),
3031 SS_PROCESS, AORG_OPENMP, GetAtomicOp(opnd));
3032 loc_of(nme[LHS_IDX]);
3033 result = ad5ili(ops->atomicrmw, rhs, opnd[LHS_IDX], nme[LHS_IDX], stc, opnd[MO_IDX]);
3034 expected_sptr = mkatomictemp(dtype);
3035 result = ad4ili(st, result, mk_address(expected_sptr),
3036 addnme(NT_VAR, expected_sptr, 0, (INT)0), msz);
3037 chk_block(result);
3038 } else {
3039 opnd[TMP_SPTR_IDX] = 0;
3040 expected_val = get_complex_update_operand(opnd, ilmp, nme, dtype);
3041 if (expected_val == 0) {
3042 error(S_0155_OP1_OP2, ERR_Severe, gbl.lineno,
3043 "Invalid atomic update statement.", CNULL);
3044 }
3045 expected_sptr = GetSPTRVal(opnd);
3046 ASSNP(expected_sptr, 1);
3047
3048 if (!expected_val && !expected_sptr)
3049 return;
3050
3051 wr_block();
3052 cr_block();
3053 _exp_mp_atomic_update(dtype, opnd, nme);
3054 }
3055 }
3056
3057 #ifndef DT_VOID_NONE
3058 #define DT_VOID_NONE DT_NONE
3059 #endif
3060
3061 void
exp_mp_atomic_capture(ILM * ilmp)3062 exp_mp_atomic_capture(ILM *ilmp)
3063 {
3064 int expected_val;
3065 SPTR expected_sptr;
3066 int load, desired_val, cseload;
3067 int opnd[MAX_ATOMIC_ARGS];
3068 int nme[MAX_ATOMIC_ARGS];
3069 int cnt, stc, result, rhs, ilm_opc, op1;
3070 ILI_OP ld;
3071 ILI_OP st;
3072 MSZ msz;
3073 const ILM *op_ilmp;
3074 const OPCODES* ops;
3075
3076 typedef enum CPT_IDX {
3077 FIRST = 0,
3078 SECOND = 1,
3079 } CPT_IDX;
3080
3081 static struct cpt_struct {
3082 int cnt;
3083 int lhs[2];
3084 int rhs[2];
3085 int nme[2];
3086 DTYPE dtype[2];
3087 int mem_order[2];
3088 int isupdate[2];
3089 bool error;
3090 SPTR tmp_sptr;
3091 } cpt;
3092
3093 if (cpt.cnt == 0 || cpt.cnt > 1) {
3094 cpt.cnt = 0;
3095 memset(&cpt, 0, sizeof(cpt));
3096 }
3097 cnt = cpt.cnt;
3098 cpt.lhs[cnt] = ILI_OF(ILM_OPND(ilmp, 1));
3099 cpt.rhs[cnt] = ILI_OF(ILM_OPND(ilmp, 2));
3100 cpt.nme[cnt] = nme[LHS_IDX] = NME_OF(ILM_OPND(ilmp, 1));
3101 cpt.dtype[cnt] = dt_nme(nme[LHS_IDX]);
3102 if (!cpt.dtype[cnt]) {
3103 cpt.dtype[cnt] = get_dtype_from_ilm(ilmp);
3104 }
3105 cpt.mem_order[cnt] = ILM_OPND(ilmp, 3);
3106
3107 /* Don't use CSE for LHS */
3108 op1 = cpt.lhs[cnt];
3109 if (is_cse(op1)) {
3110 op1 = ILI_OPND(op1, 1);
3111 cpt.lhs[cnt] = op1;
3112 }
3113
3114 opnd[LHS_IDX] = cpt.lhs[cnt];
3115 opnd[RHS_IDX] = cpt.rhs[cnt];
3116 if (cpt.mem_order[cnt] == MO_SEQ_CST)
3117 opnd[MO_IDX] = ad_icon(5);
3118 else
3119 opnd[MO_IDX] = ad_icon(0);
3120 set_assn(nme[LHS_IDX]);
3121
3122
3123 rhs = get_simple_update_operand(opnd, ilmp);
3124 if (rhs && can_use_rmw(cpt.dtype[cnt], GetAtomicOp(opnd))) {
3125 stc = atomic_encode_rmw(mem_size(DTY(cpt.dtype[cnt])),
3126 SS_PROCESS, AORG_OPENMP,
3127 GetAtomicOp(opnd));
3128 loc_of(nme[LHS_IDX]);
3129 ldst_msz(cpt.dtype[cnt], &ld, &st, &msz);
3130 ops = get_ops(msz, 1);
3131 result = ad5ili(ops->atomicrmw, rhs, opnd[LHS_IDX], nme[LHS_IDX], stc, opnd[MO_IDX]);
3132 cpt.tmp_sptr = expected_sptr = mkatomictemp(cpt.dtype[cnt]);
3133 result = ad4ili(st, result, mk_address(expected_sptr),
3134 addnme(NT_VAR, expected_sptr, 0, 0), msz);
3135 chk_block(result);
3136 cpt.isupdate[cnt] = result;
3137 ASSNP(expected_sptr, 1);
3138 opnd[TMP_SPTR_IDX] = cpt.tmp_sptr;
3139 if (cnt == FIRST) {
3140 cpt.cnt++;
3141 return;
3142 }
3143 } else {
3144 opnd[TMP_SPTR_IDX] = cpt.tmp_sptr;
3145 expected_val = get_complex_update_operand(opnd, ilmp, nme, cpt.dtype[cnt]);
3146 cpt.isupdate[cnt] = expected_val;
3147 if (expected_val && !GetSPTRVal(opnd)) {
3148 cpt.error = true;
3149 goto capture_end;
3150 }
3151 if (expected_val) {
3152 _exp_mp_atomic_update(cpt.dtype[cnt], opnd, nme);
3153 cpt.tmp_sptr = GetSPTRVal(opnd);
3154 } else if (cnt == FIRST) {
3155 /* assume this is a capture, don't do anything just yet */
3156 goto capture_end;
3157 }
3158 }
3159
3160 if (cpt.isupdate[FIRST] && cpt.isupdate[SECOND]) {
3161 cpt.error = true;
3162 }
3163
3164 if (cnt == SECOND) {
3165 if (cpt.isupdate[SECOND]) { /* 1: v = x 2: x = x + 1 */
3166 /* assert: cpt.rhs[FIRST] == cpt.lhs[SECOND] */
3167 if (find_ili(cpt.rhs[FIRST], cpt.lhs[SECOND])) {
3168 ldst_msz(cpt.dtype[SECOND], &ld, &st, &msz);
3169 load = ad3ili(ld, cpt.lhs[SECOND], cpt.nme[SECOND], msz);
3170 } else {
3171 load = load_op_match_lhs(cpt.lhs[SECOND], cpt.rhs[FIRST]);
3172 if (load == 0) {
3173 cpt.error = true;
3174 goto capture_end;
3175 }
3176 }
3177
3178 cpt.tmp_sptr = GetSPTRVal(opnd);
3179 if (cpt.tmp_sptr <= NOSYM) {
3180 cpt.error = true;
3181 goto capture_end;
3182 }
3183 /* replace ili of load:x with a load of tmp */
3184 ldst_msz(DTYPEG(cpt.tmp_sptr), &ld, &st, &msz);
3185 expected_val = ad3ili(ld, mk_address(cpt.tmp_sptr),
3186 addnme(NT_VAR, cpt.tmp_sptr, 0, (INT)0), msz);
3187 expected_val = rewr_ili(cpt.rhs[FIRST], load, expected_val);
3188 rewr_cln_ili();
3189
3190 /* assign value to v */
3191 ldst_msz(cpt.dtype[FIRST], &ld, &st, &msz);
3192 result = ad4ili(st, expected_val, cpt.lhs[FIRST], cpt.nme[FIRST], msz);
3193 chk_block(result);
3194 } else if (cpt.isupdate[FIRST]) { /* 1: x = x +1; 2: v = x */
3195 /* assert: cpt.rhs[FIRST] == cpt.lhs[SECOND] */
3196 /* replace a load of x with new ili and store to x */
3197 cpt.tmp_sptr = GetSPTRVal(opnd);
3198 if (cpt.tmp_sptr <= NOSYM) {
3199 cpt.error = true;
3200 goto capture_end;
3201 }
3202
3203 ldst_msz(DTYPEG(cpt.tmp_sptr), &ld, &st, &msz);
3204 expected_val = ad3ili(ld, mk_address(cpt.tmp_sptr),
3205 addnme(NT_VAR, cpt.tmp_sptr, 0, 0), msz);
3206
3207 ldst_msz(cpt.dtype[FIRST], &ld, &st, &msz);
3208 load = ad3ili(ld, cpt.lhs[FIRST], cpt.nme[FIRST], msz);
3209
3210 {
3211 if (!find_ili(cpt.rhs[SECOND], cpt.lhs[FIRST])) {
3212 if (load_op_match_lhs(cpt.lhs[FIRST], cpt.rhs[SECOND]) == 0) {
3213 cpt.error = true;
3214 goto capture_end;
3215 }
3216 }
3217 /* Grab rhs expression of update statement and
3218 * replace load of x with load of tmp
3219 * We will assign this expression to v.
3220 */
3221 expected_val = rewr_ili(cpt.rhs[FIRST], load, expected_val);
3222 rewr_cln_ili();
3223
3224 /* Replace a load of x in v = x; with expected_val.
3225 * The only reason we do if there is a type conversion when
3226 * assigning x to v.
3227 */
3228 expected_val = rewr_ili(cpt.rhs[SECOND], load, expected_val);
3229 }
3230
3231 rewr_cln_ili();
3232
3233 /* assign value to v */
3234 ldst_msz(cpt.dtype[SECOND], &ld, &st, &msz);
3235 result = ad4ili(st, expected_val, cpt.lhs[SECOND], cpt.nme[SECOND], msz);
3236 chk_block(result);
3237 } else {
3238 /* 1: v = x, 2: x = expr */
3239 /* assert: cpt.rhs[FIRST] == cpt.lhs[SECOND] */
3240
3241 if (find_ili(cpt.rhs[FIRST], cpt.lhs[SECOND])) {
3242 ldst_msz(cpt.dtype[SECOND], &ld, &st, &msz);
3243 load = ad3ili(ld, cpt.lhs[SECOND], cpt.nme[SECOND], msz);
3244 } else {
3245 load = load_op_match_lhs(cpt.lhs[SECOND], cpt.rhs[FIRST]);
3246 if (load == 0) {
3247 cpt.error = true;
3248 goto capture_end;
3249 }
3250 }
3251
3252 opnd[TMP_SPTR_IDX] = mkatomictemp(cpt.dtype[SECOND]);
3253 _exp_mp_atomic_update(cpt.dtype[cnt], opnd, nme);
3254
3255 ldst_msz(cpt.dtype[SECOND], &ld, &st, &msz);
3256 expected_val = ad3ili(ld, mk_address(GetSPTRVal(opnd)),
3257 addnme(NT_VAR, GetSPTRVal(opnd), 0, 0), msz);
3258 if (cpt.dtype[FIRST] != cpt.dtype[SECOND]) {
3259 /* possible conversion */
3260 expected_val = rewr_ili(cpt.rhs[FIRST], load, expected_val);
3261 rewr_cln_ili();
3262 }
3263 ldst_msz(cpt.dtype[FIRST], &ld, &st, &msz);
3264 result = ad4ili(st, expected_val, cpt.lhs[FIRST], cpt.nme[FIRST], msz);
3265 chk_block(result);
3266
3267 }
3268 }
3269
3270 capture_end:
3271 if (cpt.error) {
3272 error(S_0155_OP1_OP2, ERR_Severe, gbl.lineno, "Invalid atomic capture statement(s).", CNULL);
3273 }
3274 cpt.cnt++;
3275 return;
3276 }
3277