1 /*
2 * Copyright (c) 2013-2018, NVIDIA CORPORATION. All rights reserved.
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 *
16 */
17
18 /**
19 \file
20 \brief optimization/peephole/inst simplification routines for LLVM Code
21 Generator
22 */
23
24 #include "llopt.h"
25 #include "dtypeutl.h"
26 #include "global.h"
27 #include "error.h"
28 #include "symtab.h"
29 #include "llutil.h"
30 #include "cgllvm.h"
31 #include "ili.h"
32 #include "mwd.h"
33 #include <stdlib.h>
34
35 #define DEC_UCOUNT(i) ((i)->tmps->use_count--)
36
37 static void
replace_by_call_to_llvm_instrinsic(INSTR_LIST * instr,char * fname,OPERAND * params)38 replace_by_call_to_llvm_instrinsic(INSTR_LIST *instr, char *fname,
39 OPERAND *params)
40 {
41 OPERAND *call_op;
42 char *intrinsic_name;
43 static char buf[MAXIDLEN];
44 LL_Type *return_ll_type = NULL;
45
46 DBGXTRACEIN1(DBGBIT(12, 0x20), 1, "ilix %d", instr->ilix);
47
48 intrinsic_name = (char *)getitem(LLVM_LONGTERM_AREA, strlen(fname) + 1);
49 strcpy(intrinsic_name, fname);
50 return_ll_type = instr->ll_type;
51 instr->i_name = I_PICALL;
52 instr->flags = CALL_INTRINSIC_FLAG;
53 call_op = make_operand();
54 call_op->ot_type = OT_CALL;
55 call_op->string = intrinsic_name;
56 call_op->ll_type = return_ll_type;
57 instr->operands = call_op;
58 call_op->next = params;
59 /* add global define of llvm.xxx to external function list, if needed */
60 sprintf(buf, "declare %s %s(", return_ll_type->str, intrinsic_name);
61 if (params) {
62 sprintf(buf, "%s%s", buf, params->ll_type->str);
63 params = params->next;
64 }
65 while (params) {
66 sprintf(buf, "%s, %s", buf, params->ll_type->str);
67 params = params->next;
68 }
69 strcat(buf, ")");
70 update_external_function_declarations(intrinsic_name, buf, EXF_INTRINSIC);
71
72 DBGXTRACEOUT1(DBGBIT(12, 0x20), 1, " %s", buf)
73 }
74
75 static void
update_param_use_count(OPERAND * params)76 update_param_use_count(OPERAND *params)
77 {
78 while (params) {
79 if (params->ot_type == OT_TMP) {
80 params->tmps->use_count++;
81 }
82 params = params->next;
83 }
84 }
85
86 static void
replace_by_fma_intrinsic(INSTR_LIST * instr,OPERAND * op,INSTR_LIST * mul_instr)87 replace_by_fma_intrinsic(INSTR_LIST *instr, OPERAND *op, INSTR_LIST *mul_instr)
88 {
89 OPERAND *params;
90 char *intrinsic_name = NULL;
91
92 switch (instr->ll_type->data_type) {
93 case LL_FLOAT:
94 if (instr->i_name == I_FADD)
95 intrinsic_name = "@llvm.pgi.arm.vmla.f32";
96 else if (instr->i_name == I_FSUB)
97 intrinsic_name = "@llvm.pgi.arm.vmls.f32";
98 break;
99 case LL_DOUBLE:
100 if (instr->i_name == I_FADD)
101 intrinsic_name = "@llvm.pgi.arm.vmla.f64";
102 else if (instr->i_name == I_FSUB)
103 intrinsic_name = "@llvm.pgi.arm.vmls.f64";
104 break;
105 default:
106 break;
107 }
108 if (intrinsic_name) {
109 params = gen_copy_op(op);
110 params->next = gen_copy_list_op(mul_instr->operands);
111 update_param_use_count(params);
112 replace_by_call_to_llvm_instrinsic(instr, intrinsic_name, params);
113 DEC_UCOUNT(mul_instr);
114 }
115 }
116
117 static INSTR_LIST *
is_from_instr(int i_name,OPERAND * op)118 is_from_instr(int i_name, OPERAND *op)
119 {
120 if (op->ot_type == OT_TMP) {
121 INSTR_LIST *idef;
122 idef = op->tmps->info.idef;
123 if (idef && (idef->i_name == i_name))
124 return idef;
125 }
126 return NULL;
127 }
128
129 static void
optimize_instruction(INSTR_LIST * instr)130 optimize_instruction(INSTR_LIST *instr)
131 {
132 INSTR_LIST *op_instr;
133
134 if (instr->tmps && instr->tmps->use_count == 0)
135 return;
136 switch (instr->i_name) {
137 default:
138 break;
139 case I_FADD:
140 if ((op_instr = is_from_instr(I_FMUL, instr->operands)))
141 replace_by_fma_intrinsic(instr, instr->operands, op_instr);
142 else if ((op_instr = is_from_instr(I_FMUL, instr->operands->next)))
143 replace_by_fma_intrinsic(instr, instr->operands->next, op_instr);
144 break;
145 case I_FSUB:
146 if ((op_instr = is_from_instr(I_FMUL, instr->operands->next)))
147 replace_by_fma_intrinsic(instr, instr->operands->next, op_instr);
148 break;
149 }
150 }
151
152 void
optimize_block(INSTR_LIST * last_block_instr)153 optimize_block(INSTR_LIST *last_block_instr)
154 {
155 #ifdef TARGET_LLVM_ARM
156 INSTR_LIST *instr, *last_instr;
157
158 last_instr = NULL;
159 for (instr = last_block_instr; instr; instr = instr->prev) {
160 instr->flags |= INST_VISITED;
161
162 if (last_instr == NULL && instr->i_name == I_NONE)
163 last_instr = instr;
164 if (instr->flags & STARTEBB) {
165 if (last_instr != NULL)
166 break;
167 }
168 }
169
170 for (instr = last_block_instr; instr; instr = instr->prev) {
171 optimize_instruction(instr);
172
173 if (last_instr == NULL && instr->i_name == I_NONE)
174 last_instr = instr;
175 if (instr->flags & STARTEBB) {
176 if (last_instr != NULL)
177 break;
178 }
179 }
180
181 for (instr = last_block_instr; instr; instr = instr->prev) {
182 instr->flags &= ~INST_VISITED;
183
184 if (last_instr == NULL && instr->i_name == I_NONE)
185 last_instr = instr;
186 if (instr->flags & STARTEBB) {
187 if (last_instr != NULL)
188 break;
189 }
190 }
191 #endif
192 }
193
194 /**
195 \brief Determine if \p cand has the form <tt>1.0 / y</tt>
196 */
197 static bool
is_recip(OPERAND * cand)198 is_recip(OPERAND *cand)
199 {
200 if (cand && cand->tmps) {
201 INSTR_LIST *il = cand->tmps->info.idef;
202 const int divIli = il ? il->ilix : 0;
203 OPERAND *ilOp = divIli ? il->operands : NULL;
204 if (ilOp && (cand->tmps->use_count == 1) && (il->i_name == I_FDIV) &&
205 (ilOp->ot_type == OT_CONSTSPTR)) {
206 const int sptr = ilOp->val.sptr;
207 switch (ILI_OPC(ILI_OPND(divIli, 1))) {
208 case IL_FCON:
209 return sptr == stb.flt1;
210 case IL_DCON:
211 return sptr == stb.dbl1;
212 #ifdef LONG_DOUBLE_FLOAT128
213 case IL_FLOAT128CON:
214 return sptr == stb.float128_1;
215 #endif
216 default:
217 break;
218 }
219 }
220 }
221 return false;
222 }
223
224 /**
225 \brief Helper function
226 \param x This is the \c x operand in a <tt>(/ x)</tt> insn [precondition]
227 \param recip The <tt>(/ 1.0 y)</tt> term for splicing
228
229 Peephole rewrite of the bridge IR. The C compiler will DCE the unused div
230 operation. The C++ compiler will not, but instead leans on LLVM to DCE the
231 <tt>(/ 1.0 undef)</tt> operation.
232 */
233 static void
fixup_recip_div(OPERAND * x,OPERAND * recip)234 fixup_recip_div(OPERAND *x, OPERAND *recip)
235 {
236 INSTR_LIST *il = recip->tmps->info.idef; // il <- (/ 1.0 y)
237 OPERAND *undef = make_undef_op(il->operands->next->ll_type);
238 x->next = il->operands->next; // (/ x) ==> (/ x y)
239 il->operands->next = undef; // (/ 1.0 y) ==> (/ 1.0 undef)
240 recip->tmps->use_count--;
241 }
242
243 /**
244 \brief Translate a fp mul to a fp div ILI opcode
245 \param opc The opcode to translate
246 \return The DIV form if \c opc is a FP MUL, otherwise \c opc itself
247
248 NB: Used to overwrite the opcode in the ILI. Any subsequent passes (FMA) that
249 examine the ILI must not conclude that this is still a multiply operation.
250 */
251 static ILI_OP
convert_mul_to_div(ILI_OP opc)252 convert_mul_to_div(ILI_OP opc)
253 {
254 switch (opc) {
255 case IL_FMUL:
256 return IL_FDIV;
257 case IL_DMUL:
258 return IL_DDIV;
259 #ifdef LONG_DOUBLE_FLOAT128
260 case IL_FLOAT128MUL:
261 return IL_FLOAT128DIV;
262 #endif
263 default:
264 break;
265 }
266 return opc;
267 }
268
269 /**
270 \brief Translate <tt>x * 1.0 / y</tt> to <tt>x / y</tt>.
271 \param mul A FP multiply instruction
272
273 Preconditions: \p mul is a well-formed I_FMUL, has a positive use count
274 */
275 void
maybe_undo_recip_div(INSTR_LIST * mul)276 maybe_undo_recip_div(INSTR_LIST *mul)
277 {
278 OPERAND *lop = mul->operands;
279 OPERAND *rop = lop->next;
280
281 if (is_recip(lop)) {
282 // case: (1.0 / y) * x
283 mul->i_name = I_FDIV;
284 ILI_OPCP(mul->ilix, convert_mul_to_div(ILI_OPC(mul->ilix)));
285 mul->operands = rop; // x
286 fixup_recip_div(rop, lop);
287 } else if (is_recip(rop)) {
288 // case: x * (1.0 / y)
289 mul->i_name = I_FDIV;
290 ILI_OPCP(mul->ilix, convert_mul_to_div(ILI_OPC(mul->ilix)));
291 fixup_recip_div(lop, rop);
292 } else {
293 // mul not recognized as a mult-by-recip form
294 // ok, do nothing
295 }
296 }
297
298 /* ---------------------------------------------------------------------- */
299 //
300 // Widening transform:
301 //
302 // Rewrite the ILIs such that address arithmetic is done in the i64 domain
303 // rather than mostly in the i32 domain and then extended late to i64. It is
304 // understood that the behavior at two's complement i32's boundaries is not
305 // semantically identical, and we don't make wraparound guarantees here.
306
307 /**
308 \brief Return a wide integer dtype, either signed or unsigned
309 */
310 INLINE static DTYPE
getWideDType(bool isUnsigned)311 getWideDType(bool isUnsigned)
312 {
313 return isUnsigned ? DT_UINT8 : DT_INT8;
314 }
315
316 /**
317 \brief Create a new temp that is a wide integer type
318 \param dty The wide integer dtype
319 */
320 static SPTR
getNewWideSym(DTYPE dty)321 getNewWideSym(DTYPE dty)
322 {
323 static int bump;
324 SPTR wideSym = getccsym('w', ++bump, ST_VAR);
325
326 SCP(wideSym, SC_AUTO);
327 SCOPEP(wideSym, 3);
328 DTYPEP(wideSym, dty);
329 return wideSym;
330 }
331
332 /**
333 \brief Push a cast of \p opc down the tree \p ilix
334 */
335 static int
widenPushdown(ILI_OP opc,int ilix)336 widenPushdown(ILI_OP opc, int ilix)
337 {
338 int l, r, r3, r4;
339 ILI_OP newOp;
340
341 switch (ILI_OPC(ilix)) {
342 case IL_KIMV:
343 return ILI_OPND(ilix, 1);
344 case IL_ICJMP:
345 l = widenPushdown(opc, ILI_OPND(ilix, 1));
346 r = widenPushdown(opc, ILI_OPND(ilix, 2));
347 r3 = ILI_OPND(ilix, 3);
348 r4 = ILI_OPND(ilix, 4);
349 return ad4ili(IL_KCJMP, l, r, r3, r4);
350 case IL_UICJMP:
351 l = widenPushdown(opc, ILI_OPND(ilix, 1));
352 r = widenPushdown(opc, ILI_OPND(ilix, 2));
353 r3 = ILI_OPND(ilix, 3);
354 r4 = ILI_OPND(ilix, 4);
355 return ad4ili(IL_UKCJMP, l, r, r3, r4);
356 case IL_IKMV:
357 case IL_UIKMV:
358 return widenPushdown(opc, ILI_OPND(ilix, 1));
359 case IL_LDKR:
360 case IL_KMUL:
361 case IL_KADD:
362 case IL_KSUB:
363 case IL_KCON:
364 return ilix;
365 default:
366 return ad1ili(opc, ilix);
367 case IL_IADD:
368 case IL_UIADD:
369 newOp = IL_KADD;
370 break;
371 case IL_ISUB:
372 case IL_UISUB:
373 newOp = IL_KSUB;
374 break;
375 case IL_IMUL:
376 case IL_UIMUL:
377 newOp = IL_KMUL;
378 break;
379 case IL_IMAX:
380 newOp = IL_KMAX;
381 break;
382 case IL_IMIN:
383 newOp = IL_KMIN;
384 break;
385 }
386 l = widenPushdown(opc, ILI_OPND(ilix, 1));
387 r = widenPushdown(opc, ILI_OPND(ilix, 2));
388 return ad2ili(newOp, l, r);
389 }
390
391 /**
392 \brief Perform widening on address arithmetic
393 \param ilix The root of the tree to be widened
394
395 The root of the tree will already be wide because of large arrays. However,
396 we want to force any sign- or zero-extension operations down towards the
397 leaves of the tree and promote the arithmetic operations to 64 bits.
398 */
399 static bool
widenAddressArithmetic(int ilix)400 widenAddressArithmetic(int ilix)
401 {
402 int i;
403 bool rv = false;
404 const ILI_OP opc = ILI_OPC(ilix);
405 const int noprs = ilis[opc].oprs;
406
407 for (i = 1; i <= noprs; ++i)
408 if (IL_ISLINK(opc, i)) {
409 const int opnd = ILI_OPND(ilix, i);
410 ILI_OP opx = ILI_OPC(opnd);
411 if ((opx == IL_IKMV) || (opx == IL_UIKMV)) {
412 int x = widenPushdown(opx, ILI_OPND(opnd, 1));
413 ILI_OPND(ilix, i) = x;
414 rv = true;
415 } else {
416 rv |= widenAddressArithmetic(ILI_OPND(ilix, i));
417 }
418 }
419 return rv;
420 }
421
422 /**
423 \brief Find address arithmetic and widen it
424 \param ilix The root of the ILI tree to be examined
425
426 We traverse the tree and look for any address arithmetic. If any is found,
427 call widenAddressArithmetic().
428 */
429 static bool
widenAnyAddressing(int ilix)430 widenAnyAddressing(int ilix)
431 {
432 bool rv = false;
433 const ILI_OP opc = ILI_OPC(ilix);
434 const int noprs = ilis[opc].oprs;
435
436 if (opc == IL_KAMV) {
437 rv = widenAddressArithmetic(ILI_OPND(ilix, 1));
438 } else {
439 int i;
440 for (i = 1; i <= noprs; ++i)
441 if (IL_ISLINK(opc, i))
442 rv |= widenAnyAddressing(ILI_OPND(ilix, i));
443 }
444 return rv;
445 }
446
447 /**
448 \brief Add widen targets in \p ilix to the var map.
449 */
450 static void
widenAddNarrowVars(int ilix,hashset_t widenVar_set)451 widenAddNarrowVars(int ilix, hashset_t widenVar_set)
452 {
453 int i;
454 const ILI_OP opc = ILI_OPC(ilix);
455 const int noprs = ilis[opc].oprs;
456
457 for (i = 1; i <= noprs; ++i)
458 if (IL_ISLINK(opc, i)) {
459 const int opnd = ILI_OPND(ilix, i);
460 if (((opc == IL_IKMV) || (opc == IL_UIKMV))) {
461 if (IL_TYPE(ILI_OPC(opnd)) == ILTY_LOAD)
462 hashset_replace(widenVar_set, INT2HKEY(opnd));
463 } else {
464 widenAddNarrowVars(opnd, widenVar_set);
465 }
466 }
467 }
468
469 /**
470 \brief Test if this load is from a private variable
471 */
472 static bool
widenAconIsPrivate(int ilix)473 widenAconIsPrivate(int ilix)
474 {
475 SPTR sym;
476
477 assert(ILI_OPC(ilix) == IL_ACON, "ilix must be ACON", ilix, ERR_Fatal);
478 sym = ILI_SymOPND(ilix, 1);
479 if (DTY(DTYPEG(sym)) == TY_PTR)
480 sym = SymConval1(sym);
481
482 if (DT_ISINT(DTYPEG(sym)))
483 return (SCG(sym) == SC_PRIVATE);
484 return false;
485 }
486
487 /**
488 \brief Add a new wider store
489
490 Generate a new tree and insert it after \p ilt.
491 */
492 INLINE static int
widenInsertWideStore(int ilt,int lhsIli,int rhsIli,int nme)493 widenInsertWideStore(int ilt, int lhsIli, int rhsIli, int nme)
494 {
495 const int newIli = ad4ili(IL_STKR, rhsIli, lhsIli, nme, MSZ_I8);
496 addilt(ilt, newIli);
497 return newIli;
498 }
499
500 INLINE static void
widenProcessDirectLoad(int ldIli,hashmap_t map)501 widenProcessDirectLoad(int ldIli, hashmap_t map)
502 {
503 const DTYPE dty = getWideDType(false); // FIXME
504 const SPTR wideVar = getNewWideSym(dty);
505 const int nme = ILI_OPND(ldIli, 2);
506 const DTYPE wty = get_type(2, TY_PTR, dty);
507 const int wideAddr = ad1ili(IL_ACON, get_acon3(wideVar, 0, wty));
508 const int wideLoad = ad1ili(IL_KIMV, ad3ili(IL_LDKR, wideAddr, nme, MSZ_I8));
509 hash_data_t data = INT2HKEY(wideLoad);
510 hashmap_replace(map, INT2HKEY(ldIli), &data);
511 }
512
513 INLINE static bool
hasExactlyOneStore(int aconIli,int * cilt)514 hasExactlyOneStore(int aconIli, int *cilt)
515 {
516 int bih, ilt;
517 unsigned count = 0;
518
519 for (bih = BIH_NEXT(0); bih; bih = BIH_NEXT(bih)) {
520 // make sure aconSym is written to once
521 for (ilt = BIH_ILTFIRST(bih); ilt; ilt = ILT_NEXT(ilt)) {
522 const int ilix = ILT_ILIP(ilt);
523 if (ILT_DELETE(ilt))
524 continue;
525 if ((IL_TYPE(ILI_OPC(ilix)) == ILTY_STORE) &&
526 (ILI_OPND(ilix, 2) == aconIli)) {
527 ++count;
528 *cilt = ilt;
529 }
530 }
531 }
532 return (count == 1);
533 }
534
535 INLINE static void
widenProcessIndirectLoad(int ldIli,int aconIli,hashmap_t map)536 widenProcessIndirectLoad(int ldIli, int aconIli, hashmap_t map)
537 {
538 int cilt;
539 if (hasExactlyOneStore(aconIli, &cilt)) {
540 int st;
541 const DTYPE dty = getWideDType(false); // FIXME
542 const SPTR wideVar = getNewWideSym(dty);
543 const int nme = ILI_OPND(ldIli, 2);
544 const DTYPE tyw = get_type(2, TY_PTR, dty);
545 const int wideAddr = ad1ili(IL_ACON, get_acon3(wideVar, 0, tyw));
546 int wideLoad = ad1ili(IL_KIMV, ad3ili(IL_LDKR, wideAddr, nme, MSZ_I8));
547 hash_data_t data = INT2HKEY(wideLoad);
548 hashmap_replace(map, INT2HKEY(ldIli), &data);
549 wideLoad = ad1ili(IL_IKMV, ldIli);
550 st = widenInsertWideStore(cilt, wideAddr, wideLoad, nme);
551 data = 0;
552 hashmap_replace(map, INT2HKEY(st), &data);
553 }
554 }
555
556 /**
557 \brief Create a wide load from the given narrow load
558 \param key The narrow load (as a key)
559 \param map A hashmap to add the (narrow -> wide) mapping
560 */
561 static void
widenCreateWideLocal(hash_key_t key,void * map)562 widenCreateWideLocal(hash_key_t key, void *map)
563 {
564 const int loadIli = HKEY2INT(key);
565 hashmap_t newMap = (hashmap_t)map;
566 const ILI_OP ldOpc = ILI_OPC(loadIli);
567
568 assert(ldOpc == IL_LD, "load map does not contain load", ldOpc, ERR_Fatal);
569
570 // Does loadIli match indirect form?
571 if ((ILI_OPC(ILI_OPND(loadIli, 1)) == IL_LDA) &&
572 (ILI_OPC(ILI_OPND(ILI_OPND(loadIli, 1), 1)) == IL_ACON)) {
573 const int aconIli = ILI_OPND(ILI_OPND(loadIli, 1), 1);
574 if (widenAconIsPrivate(aconIli))
575 widenProcessIndirectLoad(loadIli, aconIli, newMap);
576 } else if (ILI_OPC(ILI_OPND(loadIli, 1)) == IL_ACON) {
577 const int aconIli = ILI_OPND(loadIli, 1);
578 if (widenAconIsPrivate(aconIli))
579 widenProcessDirectLoad(loadIli, newMap);
580 }
581 }
582
583 INLINE static void
widenApplyFree(int ilix,hashmap_t map)584 widenApplyFree(int ilix, hashmap_t map)
585 {
586 const ILI_OP opc = ILI_OPC(ilix);
587 if (opc == IL_FREEIR) {
588 const int argIli = ILI_OPND(ilix, 1);
589 hash_data_t data;
590 if (hashmap_lookup(map, INT2HKEY(argIli), &data)) {
591 const int newIli = HKEY2INT(data);
592 ILI_OPCP(ilix, IL_FREEKR);
593 ILI_OPND(ilix, 1) = ILI_OPND(newIli, 1);
594 }
595 }
596 }
597
598 static void
widenApplyVarMap(int ilix,hashmap_t map)599 widenApplyVarMap(int ilix, hashmap_t map)
600 {
601 hash_data_t data;
602 int i;
603 const ILI_OP opc = ILI_OPC(ilix);
604 const int noprs = ilis[opc].oprs;
605
606 if (hashmap_lookup(map, INT2HKEY(ilix), &data))
607 if (!data)
608 return;
609
610 for (i = 1; i <= noprs; ++i)
611 if (IL_ISLINK(opc, i)) {
612 const int opnd = ILI_OPND(ilix, i);
613 const ILI_OP opx = ILI_OPC(opnd);
614 if ((opx == IL_IKMV) || (opx == IL_UIKMV)) {
615 const int opnd2 = ILI_OPND(opnd, 1);
616 if (hashmap_lookup(map, INT2HKEY(opnd2), &data)) {
617 const int newIli = HKEY2INT(data);
618 ILI_OPND(ilix, i) = ILI_OPND(newIli, 1);
619 } else {
620 widenApplyVarMap(opnd2, map);
621 }
622 } else if (opx == IL_CSEIR) {
623 // ignore
624 } else if (hashmap_lookup(map, INT2HKEY(opnd), &data)) {
625 const int newIli = HKEY2INT(data);
626 ILI_OPND(ilix, i) = newIli;
627 } else {
628 widenApplyVarMap(opnd, map);
629 }
630 }
631 if (ILI_ALT(ilix))
632 widenApplyVarMap(ILI_ALT(ilix), map);
633 }
634
635 static void
widenApplyCse(int ilix,hashmap_t map)636 widenApplyCse(int ilix, hashmap_t map)
637 {
638 hash_data_t data;
639 int i;
640 const ILI_OP opc = ILI_OPC(ilix);
641 const int noprs = ilis[opc].oprs;
642
643 if (hashmap_lookup(map, INT2HKEY(ilix), &data))
644 if (!data)
645 return;
646
647 for (i = 1; i <= noprs; ++i)
648 if (IL_ISLINK(opc, i)) {
649 const int opnd = ILI_OPND(ilix, i);
650 const ILI_OP opx = ILI_OPC(opnd);
651 if ((opx == IL_IKMV) || (opx == IL_UIKMV)) {
652 const int opnd2 = ILI_OPND(opnd, 1);
653 if ((ILI_OPC(opnd2) == IL_CSEIR) &&
654 hashmap_lookup(map, INT2HKEY(ILI_OPND(opnd2, 1)), &data)) {
655 const int replIlix = ILI_OPND(HKEY2INT(data), 1);
656 ILI_OPND(ilix, i) = ad1ili(IL_CSEKR, replIlix);
657 }
658 } else {
659 widenApplyCse(opnd, map);
660 }
661 }
662 }
663
664 #if DEBUG
665 static void
dumpWidenVars(hash_key_t key,hash_data_t data,void * _)666 dumpWidenVars(hash_key_t key, hash_data_t data, void *_)
667 {
668 FILE *f = gbl.dbgfil ? gbl.dbgfil : stderr;
669 fprintf(f, "{\n\tkey:\n");
670 dilitre(HKEY2INT(key));
671 fprintf(f, "\tdata:\n");
672 if (data)
673 dilitre(HKEY2INT(data));
674 fprintf(f, "}\n");
675 }
676 #endif
677
678 /**
679 \brief If <tt>{key -> data}</tt> maps to an ACON, place ACON in \p map_
680 \param key A hashmap key
681 \param data A hashmap value
682 \param map_ A hashmap to be populated
683 Called by hashmap_iterate().
684 */
685 static void
gatherLocals(hash_key_t key,hash_data_t data,void * map_)686 gatherLocals(hash_key_t key, hash_data_t data, void *map_)
687 {
688 const int ldIli = HKEY2INT(key);
689 hashmap_t map = (hashmap_t)map_;
690
691 if (!data)
692 return;
693 if (ILI_OPC(ILI_OPND(ldIli, 1)) == IL_ACON)
694 hashmap_replace(map, INT2HKEY(ILI_OPND(ldIli, 1)), &data);
695 }
696
697 INLINE static void
widenApplyStore(int ilix,hashmap_t map)698 widenApplyStore(int ilix, hashmap_t map)
699 {
700 if (IL_TYPE(ILI_OPC(ilix)) == ILTY_STORE) {
701 const int stAcon = ILI_OPND(ilix, 2);
702 hash_data_t data;
703 if (hashmap_lookup(map, INT2HKEY(stAcon), &data)) {
704 ILI_OPND(ilix, 4) = MSZ_I8;
705 ILI_OPND(ilix, 2) = ILI_OPND(ILI_OPND(HKEY2INT(data), 1), 1);
706 ILI_OPND(ilix, 1) = ad1ili(IL_IKMV, ILI_OPND(ilix, 1));
707 }
708 }
709 }
710
711 /**
712 \brief Does the current procedure have blocks marked no dep check?
713 */
714 bool
funcHasNoDepChk(void)715 funcHasNoDepChk(void)
716 {
717 int bih = BIH_NEXT(0);
718 for (; bih; bih = BIH_NEXT(bih))
719 if (BIH_NODEPCHK(bih))
720 return true;
721 return false;
722 }
723
724 static void
widenElimAddrTaken(int ilix,hashset_t zt)725 widenElimAddrTaken(int ilix, hashset_t zt)
726 {
727 int i;
728 const ILI_OP opc = ILI_OPC(ilix);
729 const int noprs = ilis[opc].oprs;
730
731 if ((IL_TYPE(opc) == ILTY_STORE) &&
732 (ILI_OPC(ILI_OPND(ilix, 2)) == IL_ACON)) {
733 widenElimAddrTaken(ILI_OPND(ilix, 1), zt);
734 return;
735 }
736 if ((opc == IL_ACON) && hashset_lookup(zt, INT2HKEY(ilix))) {
737 hashset_erase(zt, INT2HKEY(ilix));
738 return;
739 }
740 for (i = 1; i <= noprs; ++i)
741 if (IL_ISLINK(opc, i)) {
742 const int opnd = ILI_OPND(ilix, i);
743 const ILI_OP opx = ILI_OPC(opnd);
744 if ((IL_TYPE(opx) == ILTY_LOAD) &&
745 (ILI_OPC(ILI_OPND(opnd, 1)) == IL_ACON))
746 continue;
747 widenElimAddrTaken(ILI_OPND(ilix, i), zt);
748 }
749 if (ILI_ALT(ilix))
750 widenElimAddrTaken(ILI_ALT(ilix), zt);
751 }
752
753 static void
widenGatherAcon(hash_key_t key,void * zet_)754 widenGatherAcon(hash_key_t key, void *zet_)
755 {
756 hashset_t zet = (hashset_t)zet_;
757 const int ilix = HKEY2INT(key);
758 const int acon = ILI_OPND(ilix, 1);
759 assert(IL_TYPE(ILI_OPC(ilix)) == ILTY_LOAD, "expected load", 0, ERR_Fatal);
760 hashset_replace(zet, INT2HKEY(acon));
761 }
762
763 INLINE static void
hashset_swap(hashset_t * s1,hashset_t * s2)764 hashset_swap(hashset_t *s1, hashset_t *s2)
765 {
766 hashset_t swap = *s1;
767 *s1 = *s2;
768 *s2 = swap;
769 }
770
771 typedef struct { hashset_t inSet; hashset_t aconSet; } AconSets;
772
773 /**
774 \brief Populate \c inSet with \p key if acon is in \c aconSet
775 */
776 static void
pruneAcon(hash_key_t key,void * p)777 pruneAcon(hash_key_t key, void *p)
778 {
779 AconSets *sp = (AconSets*) p;
780 const int acon = ILI_OPND(HKEY2INT(key), 1);
781 if (hashset_lookup(sp->aconSet, INT2HKEY(acon)))
782 hashset_replace(sp->inSet, key);
783 }
784
785 /**
786 \brief Prune \p loadSt to only those loads that reference acon in \p aconSt
787 \param loadSt The set of loads to be pruned
788 \param aconSt The set of legal ACON nodes
789 */
790 INLINE static void
widenPruneAcon(hashset_t * loadSt,hashset_t aconSt)791 widenPruneAcon(hashset_t *loadSt, hashset_t aconSt)
792 {
793 hashset_t newSet = hashset_alloc(hash_functions_direct);
794 AconSets aconSets = {newSet, aconSt};
795 hashset_iterate(*loadSt, pruneAcon, (void*)&aconSets);
796 hashset_swap(loadSt, &newSet);
797 hashset_free(newSet);
798 }
799
800 /**
801 \brief Widen address arithmetic
802 */
803 void
widenAddressArith(void)804 widenAddressArith(void)
805 {
806 int bih, ilt;
807 hashset_t widenVar_set;
808
809 // paranoia check: only process functions with nodepchk flag
810 if (!funcHasNoDepChk())
811 return;
812
813 #if DEBUG
814 if (DBGBIT(12, 0x40))
815 dumpblocks("before widen");
816 #endif
817
818 widenVar_set = hashset_alloc(hash_functions_direct);
819 for (bih = BIH_NEXT(0); bih; bih = BIH_NEXT(bih))
820 for (ilt = BIH_ILTFIRST(bih); ilt; ilt = ILT_NEXT(ilt)) {
821 const int ilix = ILT_ILIP(ilt);
822 if (ILT_DELETE(ilt))
823 continue;
824 if (ILI_OPC(ilix) == IL_ICJMP) {
825 const int x = widenPushdown(IL_IKMV, ilix);
826 ILT_ILIP(ilt) = x;
827 widenAddNarrowVars(x, widenVar_set);
828 } else if (ILI_OPC(ilix) == IL_UICJMP) {
829 const int x = widenPushdown(IL_UIKMV, ilix);
830 ILT_ILIP(ilt) = x;
831 widenAddNarrowVars(x, widenVar_set);
832 } else {
833 const bool found = widenAnyAddressing(ilix);
834 if (found)
835 widenAddNarrowVars(ilix, widenVar_set);
836 }
837 }
838
839 if (hashset_size(widenVar_set)) {
840 hashset_t zet = hashset_alloc(hash_functions_direct);
841 hashset_iterate(widenVar_set, widenGatherAcon, (void*)zet);
842 for (bih = BIH_NEXT(0); bih; bih = BIH_NEXT(bih))
843 for (ilt = BIH_ILTFIRST(bih); ilt; ilt = ILT_NEXT(ilt))
844 widenElimAddrTaken(ILT_ILIP(ilt), zet);
845 widenPruneAcon(&widenVar_set, zet);
846 hashset_free(zet);
847 }
848
849 if (hashset_size(widenVar_set)) {
850 hashmap_t newMap = hashmap_alloc(hash_functions_direct);
851 hashmap_t aconMap = hashmap_alloc(hash_functions_direct);
852 hashset_iterate(widenVar_set, widenCreateWideLocal, newMap);
853 hashmap_iterate(newMap, gatherLocals, aconMap);
854 #if DEBUG
855 if (DBGBIT(12, 0x40)) {
856 hashmap_iterate(newMap, dumpWidenVars, NULL);
857 }
858 #endif
859 for (bih = BIH_NEXT(0); bih; bih = BIH_NEXT(bih))
860 for (ilt = BIH_ILTFIRST(bih); ilt; ilt = ILT_NEXT(ilt)) {
861 const int ilix = ILT_ILIP(ilt);
862 widenApplyFree(ilix, newMap);
863 widenApplyVarMap(ilix, newMap);
864 widenApplyStore(ilix, aconMap);
865 widenApplyCse(ilix, newMap);
866 }
867 hashmap_free(aconMap);
868 hashmap_free(newMap);
869 }
870
871 #if DEBUG
872 if (DBGBIT(12, 0x40))
873 dumpblocks("after widen");
874 #endif
875
876 hashset_free(widenVar_set);
877 }
878
879 /* ---------------------------------------------------------------------- */
880 //
881 // Replace load-load sequences in "omp simd" loops transform:
882 //
883 // We want to cache the result of the first load (a pointer) in a temp and
884 // have the second load use the temp in the body of the loop. This is to
885 // workaround the issue that the outliner creates a bag of <tt>void*</tt>
886 // for all the enclosing routine's variables and introduces false aliasing.
887
888 /**
889 \brief Does block \p bih branch to block \p target?
890 */
891 bool
block_branches_to(int bih,int target)892 block_branches_to(int bih, int target)
893 {
894 if (bih != 0) {
895 const int ilt = BIH_ILTLAST(bih);
896 const int ili = ILT_ILIP(ilt);
897 const ILI_OP op = ILI_OPC(ili);
898 if (IL_TYPE(op) == ILTY_BRANCH) {
899 const int lab = ILI_OPND(ili, ilis[op].oprs);
900 const int targ = ILIBLKG(lab);
901 return (targ == target);
902 }
903 }
904 return false;
905 }
906
907 INLINE static void
rlleMakeSet(hash_key_t key,hash_data_t data,void * set_)908 rlleMakeSet(hash_key_t key, hash_data_t data, void *set_)
909 {
910 hashset_t zet = (hashset_t)set_;
911 const int isGood = HKEY2INT(data);
912 if (isGood)
913 hashset_insert(zet, key);
914 }
915
916 /**
917 \brief Search the omp simd block for load-load patterns
918 \param ilix the root of the ILI tree
919 \param zt set of candidates (<tt>ACON</tt>)
920 \param mp <tt>{ACON -> 0}</tt> (output map)
921 */
922 static void
rlleFindLL(int ilix,hashset_t zt,hashmap_t mp)923 rlleFindLL(int ilix, hashset_t zt, hashmap_t mp)
924 {
925 int i;
926 const ILI_OP opc = ILI_OPC(ilix);
927 const int noprs = ilis[opc].oprs;
928 hash_data_t data = INT2HKEY(0);
929
930 for (i = 1; i <= noprs; ++i)
931 if (IL_ISLINK(opc, i)) {
932 const int opnd = ILI_OPND(ilix, i);
933 const ILI_OP opx = ILI_OPC(opnd);
934 if (IL_TYPE(opx) == ILTY_LOAD) {
935 const int opnd2 = ILI_OPND(opnd, 1);
936 if ((IL_TYPE(ILI_OPC(opnd2)) == ILTY_LOAD) &&
937 hashset_lookup(zt, INT2HKEY(ILI_OPND(opnd2, 1)))) {
938 const int opnd3 = ILI_OPND(opnd2, 1);
939 hashmap_replace(mp, INT2HKEY(opnd3), &data);
940 } else {
941 rlleFindLL(opnd, zt, mp);
942 }
943 } else {
944 rlleFindLL(opnd, zt, mp);
945 }
946 }
947 }
948
949 /**
950 \brief Was an initializer for this load pair created?
951 If a candidate's initializer cannot be found, the map will be 0.
952 */
953 INLINE static bool
rlleInitializerFound(hash_data_t data)954 rlleInitializerFound(hash_data_t data)
955 {
956 return data != INT2HKEY(0);
957 }
958
959 /**
960 \brief Rewrite the ILIs with substitutions for load-load patterns
961 */
962 static void
rlleReplace(int ilix,hashmap_t map)963 rlleReplace(int ilix, hashmap_t map)
964 {
965 int i;
966 const ILI_OP opc = ILI_OPC(ilix);
967 const int noprs = ilis[opc].oprs;
968
969 for (i = 1; i <= noprs; ++i)
970 if (IL_ISLINK(opc, i)) {
971 const int opnd = ILI_OPND(ilix, i);
972 const ILI_OP opx = ILI_OPC(opnd);
973 // look for (LDA (LDA (ACON _))) where ACON is in map
974 if (IL_TYPE(opx) == ILTY_LOAD) {
975 const int opnd2 = ILI_OPND(opnd, 1);
976 hash_data_t data;
977 if ((IL_TYPE(ILI_OPC(opnd2)) == ILTY_LOAD) &&
978 hashmap_lookup(map, INT2HKEY(ILI_OPND(opnd2, 1)), &data) &&
979 rlleInitializerFound(data)) {
980 // rewrite to (LDA (ACON' _))
981 ILI_OPND(opnd, 1) = HKEY2INT(data);
982 } else {
983 rlleReplace(opnd2, map);
984 }
985 } else {
986 rlleReplace(opnd, map);
987 }
988 }
989 }
990
991 /**
992 \brief Eliminate load-load operations from the uplevel structure
993 \pre The current procedure is an outlined function with no dep check blocks
994 */
995 void
redundantLdLdElim(void)996 redundantLdLdElim(void)
997 {
998 int bih, ilt;
999 hashmap_t candMap = hashmap_alloc(hash_functions_direct);
1000
1001 // scan forward to find candidates
1002 // construct map of private variables written exactly once
1003 for (bih = BIH_NEXT(0); bih; bih = BIH_NEXT(bih)) {
1004 for (ilt = BIH_ILTFIRST(bih); ilt; ilt = ILT_NEXT(ilt)) {
1005 const int ilix = ILT_ILIP(ilt);
1006 if (ILT_DELETE(ilt))
1007 continue;
1008 if ((IL_TYPE(ILI_OPC(ilix)) == ILTY_STORE) &&
1009 (ILI_OPC(ILI_OPND(ilix, 2)) == IL_ACON) &&
1010 widenAconIsPrivate(ILI_OPND(ilix, 2))) {
1011 const int acon = ILI_OPND(ilix, 2);
1012 hash_data_t data;
1013 if (hashmap_lookup(candMap, INT2HKEY(acon), &data)) {
1014 data = INT2HKEY(0);
1015 hashmap_replace(candMap, INT2HKEY(acon), &data);
1016 } else {
1017 data = INT2HKEY(1);
1018 hashmap_insert(candMap, INT2HKEY(acon), data);
1019 }
1020 }
1021 }
1022 }
1023 if (hashmap_size(candMap)) {
1024 // find candidates that appear in omp simd block, create new map
1025 bool inNoDep = false;
1026 hashset_t zet = hashset_alloc(hash_functions_direct);
1027 hashmap_iterate(candMap, rlleMakeSet, (void*)zet);
1028 hashmap_clear(candMap);
1029 for (bih = BIH_NEXT(0); bih; bih = BIH_NEXT(bih)) {
1030 if (!inNoDep)
1031 inNoDep = BIH_NODEPCHK(bih);
1032 if (inNoDep)
1033 for (ilt = BIH_ILTFIRST(bih); ilt; ilt = ILT_NEXT(ilt)) {
1034 const int ilix = ILT_ILIP(ilt);
1035 rlleFindLL(ilix, zet, candMap);
1036 }
1037 if (!(inNoDep && block_branches_to(BIH_NEXT(bih), bih)))
1038 inNoDep = false;
1039 }
1040 hashset_free(zet);
1041 }
1042
1043 // if we have no candidates, we're done
1044 if (hashmap_size(candMap) == 0) {
1045 hashmap_free(candMap);
1046 return;
1047 }
1048
1049 // scan the header blocks and create temps, updating map
1050 for (bih = BIH_NEXT(0); bih; bih = BIH_NEXT(bih)) {
1051 if (BIH_NODEPCHK(bih))
1052 break;
1053 for (ilt = BIH_ILTFIRST(bih); ilt; ilt = ILT_NEXT(ilt)) {
1054 const int ilix = ILT_ILIP(ilt);
1055 hash_data_t data;
1056 if (IL_TYPE(ILI_OPC(ilix)) == ILTY_STORE) {
1057 if (hashmap_lookup(candMap, INT2HKEY(ILI_OPND(ilix, 2)), &data)) {
1058 const int acon = ILI_OPND(ilix, 2);
1059 const int nme = ILI_OPND(ilix, 3);
1060 const int lda = ad3ili(IL_LDA, acon, nme, MSZ_PTR);
1061 const int loada = ad3ili(IL_LDA, lda, nme, MSZ_PTR);
1062 const DTYPE dty = DT_CPTR;
1063 const SPTR wideVar = getNewWideSym(dty);
1064 const int wAddr = ad1ili(IL_ACON, get_acon3(wideVar, 0, dty));
1065 const int stv = ad4ili(IL_STA, loada, wAddr, nme, MSZ_PTR);
1066 assert(!data, "data should be null", HKEY2INT(data), ERR_Fatal);
1067 data = INT2HKEY(wAddr);
1068 hashmap_replace(candMap, INT2HKEY(acon), &data);
1069 addilt(ilt, stv);
1070 ilt = ILT_NEXT(ilt);
1071 }
1072 }
1073 }
1074 }
1075
1076 // scan the function and replace load-load sequences with temps, if available
1077 for (; bih; bih = BIH_NEXT(bih))
1078 for (ilt = BIH_ILTFIRST(bih); ilt; ilt = ILT_NEXT(ilt))
1079 rlleReplace(ILT_ILIP(ilt), candMap);
1080
1081 hashmap_free(candMap);
1082 }
1083