1 /*
2  * Copyright (c) 2013-2018, NVIDIA CORPORATION.  All rights reserved.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *     http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  *
16  */
17 
18 /**
19    \file
20    \brief optimization/peephole/inst simplification routines for LLVM Code
21    Generator
22  */
23 
24 #include "llopt.h"
25 #include "dtypeutl.h"
26 #include "global.h"
27 #include "error.h"
28 #include "symtab.h"
29 #include "llutil.h"
30 #include "cgllvm.h"
31 #include "ili.h"
32 #include "mwd.h"
33 #include <stdlib.h>
34 
35 #define DEC_UCOUNT(i) ((i)->tmps->use_count--)
36 
37 static void
replace_by_call_to_llvm_instrinsic(INSTR_LIST * instr,char * fname,OPERAND * params)38 replace_by_call_to_llvm_instrinsic(INSTR_LIST *instr, char *fname,
39                                    OPERAND *params)
40 {
41   OPERAND *call_op;
42   char *intrinsic_name;
43   static char buf[MAXIDLEN];
44   LL_Type *return_ll_type = NULL;
45 
46   DBGXTRACEIN1(DBGBIT(12, 0x20), 1, "ilix %d", instr->ilix);
47 
48   intrinsic_name = (char *)getitem(LLVM_LONGTERM_AREA, strlen(fname) + 1);
49   strcpy(intrinsic_name, fname);
50   return_ll_type = instr->ll_type;
51   instr->i_name = I_PICALL;
52   instr->flags = CALL_INTRINSIC_FLAG;
53   call_op = make_operand();
54   call_op->ot_type = OT_CALL;
55   call_op->string = intrinsic_name;
56   call_op->ll_type = return_ll_type;
57   instr->operands = call_op;
58   call_op->next = params;
59   /* add global define of llvm.xxx to external function list, if needed */
60   sprintf(buf, "declare %s %s(", return_ll_type->str, intrinsic_name);
61   if (params) {
62     sprintf(buf, "%s%s", buf, params->ll_type->str);
63     params = params->next;
64   }
65   while (params) {
66     sprintf(buf, "%s, %s", buf, params->ll_type->str);
67     params = params->next;
68   }
69   strcat(buf, ")");
70   update_external_function_declarations(intrinsic_name, buf, EXF_INTRINSIC);
71 
72   DBGXTRACEOUT1(DBGBIT(12, 0x20), 1, " %s", buf)
73 }
74 
75 static void
update_param_use_count(OPERAND * params)76 update_param_use_count(OPERAND *params)
77 {
78   while (params) {
79     if (params->ot_type == OT_TMP) {
80       params->tmps->use_count++;
81     }
82     params = params->next;
83   }
84 }
85 
86 static void
replace_by_fma_intrinsic(INSTR_LIST * instr,OPERAND * op,INSTR_LIST * mul_instr)87 replace_by_fma_intrinsic(INSTR_LIST *instr, OPERAND *op, INSTR_LIST *mul_instr)
88 {
89   OPERAND *params;
90   char *intrinsic_name = NULL;
91 
92   switch (instr->ll_type->data_type) {
93   case LL_FLOAT:
94     if (instr->i_name == I_FADD)
95       intrinsic_name = "@llvm.pgi.arm.vmla.f32";
96     else if (instr->i_name == I_FSUB)
97       intrinsic_name = "@llvm.pgi.arm.vmls.f32";
98     break;
99   case LL_DOUBLE:
100     if (instr->i_name == I_FADD)
101       intrinsic_name = "@llvm.pgi.arm.vmla.f64";
102     else if (instr->i_name == I_FSUB)
103       intrinsic_name = "@llvm.pgi.arm.vmls.f64";
104     break;
105   default:
106     break;
107   }
108   if (intrinsic_name) {
109     params = gen_copy_op(op);
110     params->next = gen_copy_list_op(mul_instr->operands);
111     update_param_use_count(params);
112     replace_by_call_to_llvm_instrinsic(instr, intrinsic_name, params);
113     DEC_UCOUNT(mul_instr);
114   }
115 }
116 
117 static INSTR_LIST *
is_from_instr(int i_name,OPERAND * op)118 is_from_instr(int i_name, OPERAND *op)
119 {
120   if (op->ot_type == OT_TMP) {
121     INSTR_LIST *idef;
122     idef = op->tmps->info.idef;
123     if (idef && (idef->i_name == i_name))
124       return idef;
125   }
126   return NULL;
127 }
128 
129 static void
optimize_instruction(INSTR_LIST * instr)130 optimize_instruction(INSTR_LIST *instr)
131 {
132   INSTR_LIST *op_instr;
133 
134   if (instr->tmps && instr->tmps->use_count == 0)
135     return;
136   switch (instr->i_name) {
137   default:
138     break;
139   case I_FADD:
140     if ((op_instr = is_from_instr(I_FMUL, instr->operands)))
141       replace_by_fma_intrinsic(instr, instr->operands, op_instr);
142     else if ((op_instr = is_from_instr(I_FMUL, instr->operands->next)))
143       replace_by_fma_intrinsic(instr, instr->operands->next, op_instr);
144     break;
145   case I_FSUB:
146     if ((op_instr = is_from_instr(I_FMUL, instr->operands->next)))
147       replace_by_fma_intrinsic(instr, instr->operands->next, op_instr);
148     break;
149   }
150 }
151 
152 void
optimize_block(INSTR_LIST * last_block_instr)153 optimize_block(INSTR_LIST *last_block_instr)
154 {
155 #ifdef TARGET_LLVM_ARM
156   INSTR_LIST *instr, *last_instr;
157 
158   last_instr = NULL;
159   for (instr = last_block_instr; instr; instr = instr->prev) {
160     instr->flags |= INST_VISITED;
161 
162     if (last_instr == NULL && instr->i_name == I_NONE)
163       last_instr = instr;
164     if (instr->flags & STARTEBB) {
165       if (last_instr != NULL)
166         break;
167     }
168   }
169 
170   for (instr = last_block_instr; instr; instr = instr->prev) {
171     optimize_instruction(instr);
172 
173     if (last_instr == NULL && instr->i_name == I_NONE)
174       last_instr = instr;
175     if (instr->flags & STARTEBB) {
176       if (last_instr != NULL)
177         break;
178     }
179   }
180 
181   for (instr = last_block_instr; instr; instr = instr->prev) {
182     instr->flags &= ~INST_VISITED;
183 
184     if (last_instr == NULL && instr->i_name == I_NONE)
185       last_instr = instr;
186     if (instr->flags & STARTEBB) {
187       if (last_instr != NULL)
188         break;
189     }
190   }
191 #endif
192 }
193 
194 /**
195    \brief Determine if \p cand has the form <tt>1.0 / y</tt>
196  */
197 static bool
is_recip(OPERAND * cand)198 is_recip(OPERAND *cand)
199 {
200   if (cand && cand->tmps) {
201     INSTR_LIST *il = cand->tmps->info.idef;
202     const int divIli = il ? il->ilix : 0;
203     OPERAND *ilOp = divIli ? il->operands : NULL;
204     if (ilOp && (cand->tmps->use_count == 1) && (il->i_name == I_FDIV) &&
205         (ilOp->ot_type == OT_CONSTSPTR)) {
206       const int sptr = ilOp->val.sptr;
207       switch (ILI_OPC(ILI_OPND(divIli, 1))) {
208       case IL_FCON:
209         return sptr == stb.flt1;
210       case IL_DCON:
211         return sptr == stb.dbl1;
212 #ifdef LONG_DOUBLE_FLOAT128
213       case IL_FLOAT128CON:
214         return sptr == stb.float128_1;
215 #endif
216       default:
217         break;
218       }
219     }
220   }
221   return false;
222 }
223 
224 /**
225    \brief Helper function
226    \param x  This is the \c x operand in a <tt>(/ x)</tt> insn [precondition]
227    \param recip  The <tt>(/ 1.0 y)</tt> term for splicing
228 
229    Peephole rewrite of the bridge IR. The C compiler will DCE the unused div
230    operation. The C++ compiler will not, but instead leans on LLVM to DCE the
231    <tt>(/ 1.0 undef)</tt> operation.
232  */
233 static void
fixup_recip_div(OPERAND * x,OPERAND * recip)234 fixup_recip_div(OPERAND *x, OPERAND *recip)
235 {
236   INSTR_LIST *il = recip->tmps->info.idef; // il <- (/ 1.0 y)
237   OPERAND *undef = make_undef_op(il->operands->next->ll_type);
238   x->next = il->operands->next; // (/ x) ==> (/ x y)
239   il->operands->next = undef;   // (/ 1.0 y) ==> (/ 1.0 undef)
240   recip->tmps->use_count--;
241 }
242 
243 /**
244    \brief Translate a fp mul to a fp div ILI opcode
245    \param opc  The opcode to translate
246    \return The DIV form if \c opc is a FP MUL, otherwise \c opc itself
247 
248    NB: Used to overwrite the opcode in the ILI. Any subsequent passes (FMA) that
249    examine the ILI must not conclude that this is still a multiply operation.
250  */
251 static ILI_OP
convert_mul_to_div(ILI_OP opc)252 convert_mul_to_div(ILI_OP opc)
253 {
254   switch (opc) {
255   case IL_FMUL:
256     return IL_FDIV;
257   case IL_DMUL:
258     return IL_DDIV;
259 #ifdef LONG_DOUBLE_FLOAT128
260   case IL_FLOAT128MUL:
261     return IL_FLOAT128DIV;
262 #endif
263   default:
264     break;
265   }
266   return opc;
267 }
268 
269 /**
270    \brief Translate <tt>x * 1.0 / y</tt> to <tt>x / y</tt>.
271    \param mul  A FP multiply instruction
272 
273    Preconditions: \p mul is a well-formed I_FMUL, has a positive use count
274  */
275 void
maybe_undo_recip_div(INSTR_LIST * mul)276 maybe_undo_recip_div(INSTR_LIST *mul)
277 {
278   OPERAND *lop = mul->operands;
279   OPERAND *rop = lop->next;
280 
281   if (is_recip(lop)) {
282     // case: (1.0 / y) * x
283     mul->i_name = I_FDIV;
284     ILI_OPCP(mul->ilix, convert_mul_to_div(ILI_OPC(mul->ilix)));
285     mul->operands = rop; // x
286     fixup_recip_div(rop, lop);
287   } else if (is_recip(rop)) {
288     // case: x * (1.0 / y)
289     mul->i_name = I_FDIV;
290     ILI_OPCP(mul->ilix, convert_mul_to_div(ILI_OPC(mul->ilix)));
291     fixup_recip_div(lop, rop);
292   } else {
293     // mul not recognized as a mult-by-recip form
294     // ok, do nothing
295   }
296 }
297 
298 /* ---------------------------------------------------------------------- */
299 //
300 // Widening transform:
301 //
302 // Rewrite the ILIs such that address arithmetic is done in the i64 domain
303 // rather than mostly in the i32 domain and then extended late to i64. It is
304 // understood that the behavior at two's complement i32's boundaries is not
305 // semantically identical, and we don't make wraparound guarantees here.
306 
307 /**
308    \brief Return a wide integer dtype, either signed or unsigned
309  */
310 INLINE static DTYPE
getWideDType(bool isUnsigned)311 getWideDType(bool isUnsigned)
312 {
313   return isUnsigned ? DT_UINT8 : DT_INT8;
314 }
315 
316 /**
317    \brief Create a new temp that is a wide integer type
318    \param dty  The wide integer dtype
319  */
320 static SPTR
getNewWideSym(DTYPE dty)321 getNewWideSym(DTYPE dty)
322 {
323   static int bump;
324   SPTR wideSym = getccsym('w', ++bump, ST_VAR);
325 
326   SCP(wideSym, SC_AUTO);
327   SCOPEP(wideSym, 3);
328   DTYPEP(wideSym, dty);
329   return wideSym;
330 }
331 
332 /**
333    \brief Push a cast of \p opc down the tree \p ilix
334  */
335 static int
widenPushdown(ILI_OP opc,int ilix)336 widenPushdown(ILI_OP opc, int ilix)
337 {
338   int l, r, r3, r4;
339   ILI_OP newOp;
340 
341   switch (ILI_OPC(ilix)) {
342   case IL_KIMV:
343     return ILI_OPND(ilix, 1);
344   case IL_ICJMP:
345     l = widenPushdown(opc, ILI_OPND(ilix, 1));
346     r = widenPushdown(opc, ILI_OPND(ilix, 2));
347     r3 = ILI_OPND(ilix, 3);
348     r4 = ILI_OPND(ilix, 4);
349     return ad4ili(IL_KCJMP, l, r, r3, r4);
350   case IL_UICJMP:
351     l = widenPushdown(opc, ILI_OPND(ilix, 1));
352     r = widenPushdown(opc, ILI_OPND(ilix, 2));
353     r3 = ILI_OPND(ilix, 3);
354     r4 = ILI_OPND(ilix, 4);
355     return ad4ili(IL_UKCJMP, l, r, r3, r4);
356   case IL_IKMV:
357   case IL_UIKMV:
358     return widenPushdown(opc, ILI_OPND(ilix, 1));
359   case IL_LDKR:
360   case IL_KMUL:
361   case IL_KADD:
362   case IL_KSUB:
363   case IL_KCON:
364     return ilix;
365   default:
366     return ad1ili(opc, ilix);
367   case IL_IADD:
368   case IL_UIADD:
369     newOp = IL_KADD;
370     break;
371   case IL_ISUB:
372   case IL_UISUB:
373     newOp = IL_KSUB;
374     break;
375   case IL_IMUL:
376   case IL_UIMUL:
377     newOp = IL_KMUL;
378     break;
379   case IL_IMAX:
380     newOp = IL_KMAX;
381     break;
382   case IL_IMIN:
383     newOp = IL_KMIN;
384     break;
385   }
386   l = widenPushdown(opc, ILI_OPND(ilix, 1));
387   r = widenPushdown(opc, ILI_OPND(ilix, 2));
388   return ad2ili(newOp, l, r);
389 }
390 
391 /**
392    \brief Perform widening on address arithmetic
393    \param ilix  The root of the tree to be widened
394 
395    The root of the tree will already be wide because of large arrays.  However,
396    we want to force any sign- or zero-extension operations down towards the
397    leaves of the tree and promote the arithmetic operations to 64 bits.
398  */
399 static bool
widenAddressArithmetic(int ilix)400 widenAddressArithmetic(int ilix)
401 {
402   int i;
403   bool rv = false;
404   const ILI_OP opc = ILI_OPC(ilix);
405   const int noprs = ilis[opc].oprs;
406 
407   for (i = 1; i <= noprs; ++i)
408     if (IL_ISLINK(opc, i)) {
409       const int opnd = ILI_OPND(ilix, i);
410       ILI_OP opx = ILI_OPC(opnd);
411       if ((opx == IL_IKMV) || (opx == IL_UIKMV)) {
412         int x = widenPushdown(opx, ILI_OPND(opnd, 1));
413         ILI_OPND(ilix, i) = x;
414         rv = true;
415       } else {
416         rv |= widenAddressArithmetic(ILI_OPND(ilix, i));
417       }
418     }
419   return rv;
420 }
421 
422 /**
423    \brief Find address arithmetic and widen it
424    \param ilix  The root of the ILI tree to be examined
425 
426    We traverse the tree and look for any address arithmetic. If any is found,
427    call widenAddressArithmetic().
428  */
429 static bool
widenAnyAddressing(int ilix)430 widenAnyAddressing(int ilix)
431 {
432   bool rv = false;
433   const ILI_OP opc = ILI_OPC(ilix);
434   const int noprs = ilis[opc].oprs;
435 
436   if (opc == IL_KAMV) {
437     rv = widenAddressArithmetic(ILI_OPND(ilix, 1));
438   } else {
439     int i;
440     for (i = 1; i <= noprs; ++i)
441       if (IL_ISLINK(opc, i))
442         rv |= widenAnyAddressing(ILI_OPND(ilix, i));
443   }
444   return rv;
445 }
446 
447 /**
448    \brief Add widen targets in \p ilix to the var map.
449  */
450 static void
widenAddNarrowVars(int ilix,hashset_t widenVar_set)451 widenAddNarrowVars(int ilix, hashset_t widenVar_set)
452 {
453   int i;
454   const ILI_OP opc = ILI_OPC(ilix);
455   const int noprs = ilis[opc].oprs;
456 
457   for (i = 1; i <= noprs; ++i)
458     if (IL_ISLINK(opc, i)) {
459       const int opnd = ILI_OPND(ilix, i);
460       if (((opc == IL_IKMV) || (opc == IL_UIKMV))) {
461         if (IL_TYPE(ILI_OPC(opnd)) == ILTY_LOAD)
462           hashset_replace(widenVar_set, INT2HKEY(opnd));
463       } else {
464         widenAddNarrowVars(opnd, widenVar_set);
465       }
466     }
467 }
468 
469 /**
470    \brief Test if this load is from a private variable
471  */
472 static bool
widenAconIsPrivate(int ilix)473 widenAconIsPrivate(int ilix)
474 {
475   SPTR sym;
476 
477   assert(ILI_OPC(ilix) == IL_ACON, "ilix must be ACON", ilix, ERR_Fatal);
478   sym = ILI_SymOPND(ilix, 1);
479   if (DTY(DTYPEG(sym)) == TY_PTR)
480     sym = SymConval1(sym);
481 
482   if (DT_ISINT(DTYPEG(sym)))
483     return (SCG(sym) == SC_PRIVATE);
484   return false;
485 }
486 
487 /**
488    \brief Add a new wider store
489 
490    Generate a new tree and insert it after \p ilt.
491  */
492 INLINE static int
widenInsertWideStore(int ilt,int lhsIli,int rhsIli,int nme)493 widenInsertWideStore(int ilt, int lhsIli, int rhsIli, int nme)
494 {
495   const int newIli = ad4ili(IL_STKR, rhsIli, lhsIli, nme, MSZ_I8);
496   addilt(ilt, newIli);
497   return newIli;
498 }
499 
500 INLINE static void
widenProcessDirectLoad(int ldIli,hashmap_t map)501 widenProcessDirectLoad(int ldIli, hashmap_t map)
502 {
503   const DTYPE dty = getWideDType(false); // FIXME
504   const SPTR wideVar = getNewWideSym(dty);
505   const int nme = ILI_OPND(ldIli, 2);
506   const DTYPE wty = get_type(2, TY_PTR, dty);
507   const int wideAddr = ad1ili(IL_ACON, get_acon3(wideVar, 0, wty));
508   const int wideLoad = ad1ili(IL_KIMV, ad3ili(IL_LDKR, wideAddr, nme, MSZ_I8));
509   hash_data_t data = INT2HKEY(wideLoad);
510   hashmap_replace(map, INT2HKEY(ldIli), &data);
511 }
512 
513 INLINE static bool
hasExactlyOneStore(int aconIli,int * cilt)514 hasExactlyOneStore(int aconIli, int *cilt)
515 {
516   int bih, ilt;
517   unsigned count = 0;
518 
519   for (bih = BIH_NEXT(0); bih; bih = BIH_NEXT(bih)) {
520     // make sure aconSym is written to once
521     for (ilt = BIH_ILTFIRST(bih); ilt; ilt = ILT_NEXT(ilt)) {
522       const int ilix = ILT_ILIP(ilt);
523       if (ILT_DELETE(ilt))
524         continue;
525       if ((IL_TYPE(ILI_OPC(ilix)) == ILTY_STORE) &&
526           (ILI_OPND(ilix, 2) == aconIli)) {
527         ++count;
528         *cilt = ilt;
529       }
530     }
531   }
532   return (count == 1);
533 }
534 
535 INLINE static void
widenProcessIndirectLoad(int ldIli,int aconIli,hashmap_t map)536 widenProcessIndirectLoad(int ldIli, int aconIli, hashmap_t map)
537 {
538   int cilt;
539   if (hasExactlyOneStore(aconIli, &cilt)) {
540     int st;
541     const DTYPE dty = getWideDType(false); // FIXME
542     const SPTR wideVar = getNewWideSym(dty);
543     const int nme = ILI_OPND(ldIli, 2);
544     const DTYPE tyw = get_type(2, TY_PTR, dty);
545     const int wideAddr = ad1ili(IL_ACON, get_acon3(wideVar, 0, tyw));
546     int wideLoad = ad1ili(IL_KIMV, ad3ili(IL_LDKR, wideAddr, nme, MSZ_I8));
547     hash_data_t data = INT2HKEY(wideLoad);
548     hashmap_replace(map, INT2HKEY(ldIli), &data);
549     wideLoad = ad1ili(IL_IKMV, ldIli);
550     st = widenInsertWideStore(cilt, wideAddr, wideLoad, nme);
551     data = 0;
552     hashmap_replace(map, INT2HKEY(st), &data);
553   }
554 }
555 
556 /**
557    \brief Create a wide load from the given narrow load
558    \param key  The narrow load (as a key)
559    \param map  A hashmap to add the (narrow -> wide) mapping
560  */
561 static void
widenCreateWideLocal(hash_key_t key,void * map)562 widenCreateWideLocal(hash_key_t key, void *map)
563 {
564   const int loadIli = HKEY2INT(key);
565   hashmap_t newMap = (hashmap_t)map;
566   const ILI_OP ldOpc = ILI_OPC(loadIli);
567 
568   assert(ldOpc == IL_LD, "load map does not contain load", ldOpc, ERR_Fatal);
569 
570   // Does loadIli match indirect form?
571   if ((ILI_OPC(ILI_OPND(loadIli, 1)) == IL_LDA) &&
572       (ILI_OPC(ILI_OPND(ILI_OPND(loadIli, 1), 1)) == IL_ACON)) {
573     const int aconIli = ILI_OPND(ILI_OPND(loadIli, 1), 1);
574     if (widenAconIsPrivate(aconIli))
575       widenProcessIndirectLoad(loadIli, aconIli, newMap);
576   } else if (ILI_OPC(ILI_OPND(loadIli, 1)) == IL_ACON) {
577     const int aconIli = ILI_OPND(loadIli, 1);
578     if (widenAconIsPrivate(aconIli))
579       widenProcessDirectLoad(loadIli, newMap);
580   }
581 }
582 
583 INLINE static void
widenApplyFree(int ilix,hashmap_t map)584 widenApplyFree(int ilix, hashmap_t map)
585 {
586   const ILI_OP opc = ILI_OPC(ilix);
587   if (opc == IL_FREEIR) {
588     const int argIli = ILI_OPND(ilix, 1);
589     hash_data_t data;
590     if (hashmap_lookup(map, INT2HKEY(argIli), &data)) {
591       const int newIli = HKEY2INT(data);
592       ILI_OPCP(ilix, IL_FREEKR);
593       ILI_OPND(ilix, 1) = ILI_OPND(newIli, 1);
594     }
595   }
596 }
597 
598 static void
widenApplyVarMap(int ilix,hashmap_t map)599 widenApplyVarMap(int ilix, hashmap_t map)
600 {
601   hash_data_t data;
602   int i;
603   const ILI_OP opc = ILI_OPC(ilix);
604   const int noprs = ilis[opc].oprs;
605 
606   if (hashmap_lookup(map, INT2HKEY(ilix), &data))
607     if (!data)
608       return;
609 
610   for (i = 1; i <= noprs; ++i)
611     if (IL_ISLINK(opc, i)) {
612       const int opnd = ILI_OPND(ilix, i);
613       const ILI_OP opx = ILI_OPC(opnd);
614       if ((opx == IL_IKMV) || (opx == IL_UIKMV)) {
615         const int opnd2 = ILI_OPND(opnd, 1);
616         if (hashmap_lookup(map, INT2HKEY(opnd2), &data)) {
617           const int newIli = HKEY2INT(data);
618           ILI_OPND(ilix, i) = ILI_OPND(newIli, 1);
619         } else {
620           widenApplyVarMap(opnd2, map);
621         }
622       } else if (opx == IL_CSEIR) {
623         // ignore
624       } else if (hashmap_lookup(map, INT2HKEY(opnd), &data)) {
625         const int newIli = HKEY2INT(data);
626         ILI_OPND(ilix, i) = newIli;
627       } else {
628         widenApplyVarMap(opnd, map);
629       }
630     }
631   if (ILI_ALT(ilix))
632     widenApplyVarMap(ILI_ALT(ilix), map);
633 }
634 
635 static void
widenApplyCse(int ilix,hashmap_t map)636 widenApplyCse(int ilix, hashmap_t map)
637 {
638   hash_data_t data;
639   int i;
640   const ILI_OP opc = ILI_OPC(ilix);
641   const int noprs = ilis[opc].oprs;
642 
643   if (hashmap_lookup(map, INT2HKEY(ilix), &data))
644     if (!data)
645       return;
646 
647   for (i = 1; i <= noprs; ++i)
648     if (IL_ISLINK(opc, i)) {
649       const int opnd = ILI_OPND(ilix, i);
650       const ILI_OP opx = ILI_OPC(opnd);
651       if ((opx == IL_IKMV) || (opx == IL_UIKMV)) {
652         const int opnd2 = ILI_OPND(opnd, 1);
653         if ((ILI_OPC(opnd2) == IL_CSEIR) &&
654             hashmap_lookup(map, INT2HKEY(ILI_OPND(opnd2, 1)), &data)) {
655           const int replIlix = ILI_OPND(HKEY2INT(data), 1);
656           ILI_OPND(ilix, i) = ad1ili(IL_CSEKR, replIlix);
657         }
658       } else {
659         widenApplyCse(opnd, map);
660       }
661     }
662 }
663 
664 #if DEBUG
665 static void
dumpWidenVars(hash_key_t key,hash_data_t data,void * _)666 dumpWidenVars(hash_key_t key, hash_data_t data, void *_)
667 {
668   FILE *f = gbl.dbgfil ? gbl.dbgfil : stderr;
669   fprintf(f, "{\n\tkey:\n");
670   dilitre(HKEY2INT(key));
671   fprintf(f, "\tdata:\n");
672   if (data)
673     dilitre(HKEY2INT(data));
674   fprintf(f, "}\n");
675 }
676 #endif
677 
678 /**
679    \brief If <tt>{key -> data}</tt> maps to an ACON, place ACON in \p map_
680    \param key  A hashmap key
681    \param data A hashmap value
682    \param map_ A hashmap to be populated
683    Called by hashmap_iterate().
684  */
685 static void
gatherLocals(hash_key_t key,hash_data_t data,void * map_)686 gatherLocals(hash_key_t key, hash_data_t data, void *map_)
687 {
688   const int ldIli = HKEY2INT(key);
689   hashmap_t map = (hashmap_t)map_;
690 
691   if (!data)
692     return;
693   if (ILI_OPC(ILI_OPND(ldIli, 1)) == IL_ACON)
694     hashmap_replace(map, INT2HKEY(ILI_OPND(ldIli, 1)), &data);
695 }
696 
697 INLINE static void
widenApplyStore(int ilix,hashmap_t map)698 widenApplyStore(int ilix, hashmap_t map)
699 {
700   if (IL_TYPE(ILI_OPC(ilix)) == ILTY_STORE) {
701     const int stAcon = ILI_OPND(ilix, 2);
702     hash_data_t data;
703     if (hashmap_lookup(map, INT2HKEY(stAcon), &data)) {
704       ILI_OPND(ilix, 4) = MSZ_I8;
705       ILI_OPND(ilix, 2) = ILI_OPND(ILI_OPND(HKEY2INT(data), 1), 1);
706       ILI_OPND(ilix, 1) = ad1ili(IL_IKMV, ILI_OPND(ilix, 1));
707     }
708   }
709 }
710 
711 /**
712    \brief Does the current procedure have blocks marked no dep check?
713  */
714 bool
funcHasNoDepChk(void)715 funcHasNoDepChk(void)
716 {
717   int bih = BIH_NEXT(0);
718   for (; bih; bih = BIH_NEXT(bih))
719     if (BIH_NODEPCHK(bih))
720       return true;
721   return false;
722 }
723 
724 static void
widenElimAddrTaken(int ilix,hashset_t zt)725 widenElimAddrTaken(int ilix, hashset_t zt)
726 {
727   int i;
728   const ILI_OP opc = ILI_OPC(ilix);
729   const int noprs = ilis[opc].oprs;
730 
731   if ((IL_TYPE(opc) == ILTY_STORE) &&
732       (ILI_OPC(ILI_OPND(ilix, 2)) == IL_ACON)) {
733     widenElimAddrTaken(ILI_OPND(ilix, 1), zt);
734     return;
735   }
736   if ((opc == IL_ACON) && hashset_lookup(zt, INT2HKEY(ilix))) {
737     hashset_erase(zt, INT2HKEY(ilix));
738     return;
739   }
740   for (i = 1; i <= noprs; ++i)
741     if (IL_ISLINK(opc, i)) {
742       const int opnd = ILI_OPND(ilix, i);
743       const ILI_OP opx = ILI_OPC(opnd);
744       if ((IL_TYPE(opx) == ILTY_LOAD) &&
745           (ILI_OPC(ILI_OPND(opnd, 1)) == IL_ACON))
746         continue;
747       widenElimAddrTaken(ILI_OPND(ilix, i), zt);
748     }
749   if (ILI_ALT(ilix))
750     widenElimAddrTaken(ILI_ALT(ilix), zt);
751 }
752 
753 static void
widenGatherAcon(hash_key_t key,void * zet_)754 widenGatherAcon(hash_key_t key, void *zet_)
755 {
756   hashset_t zet = (hashset_t)zet_;
757   const int ilix = HKEY2INT(key);
758   const int acon = ILI_OPND(ilix, 1);
759   assert(IL_TYPE(ILI_OPC(ilix)) == ILTY_LOAD, "expected load", 0, ERR_Fatal);
760   hashset_replace(zet, INT2HKEY(acon));
761 }
762 
763 INLINE static void
hashset_swap(hashset_t * s1,hashset_t * s2)764 hashset_swap(hashset_t *s1, hashset_t *s2)
765 {
766   hashset_t swap = *s1;
767   *s1 = *s2;
768   *s2 = swap;
769 }
770 
771 typedef struct { hashset_t inSet; hashset_t aconSet; } AconSets;
772 
773 /**
774    \brief Populate \c inSet with \p key if acon is in \c aconSet
775  */
776 static void
pruneAcon(hash_key_t key,void * p)777 pruneAcon(hash_key_t key, void *p)
778 {
779   AconSets *sp = (AconSets*) p;
780   const int acon = ILI_OPND(HKEY2INT(key), 1);
781   if (hashset_lookup(sp->aconSet, INT2HKEY(acon)))
782     hashset_replace(sp->inSet, key);
783 }
784 
785 /**
786    \brief Prune \p loadSt to only those loads that reference acon in \p aconSt
787    \param loadSt   The set of loads to be pruned
788    \param aconSt   The set of legal ACON nodes
789  */
790 INLINE static void
widenPruneAcon(hashset_t * loadSt,hashset_t aconSt)791 widenPruneAcon(hashset_t *loadSt, hashset_t aconSt)
792 {
793   hashset_t newSet = hashset_alloc(hash_functions_direct);
794   AconSets aconSets = {newSet, aconSt};
795   hashset_iterate(*loadSt, pruneAcon, (void*)&aconSets);
796   hashset_swap(loadSt, &newSet);
797   hashset_free(newSet);
798 }
799 
800 /**
801    \brief Widen address arithmetic
802  */
803 void
widenAddressArith(void)804 widenAddressArith(void)
805 {
806   int bih, ilt;
807   hashset_t widenVar_set;
808 
809   // paranoia check: only process functions with nodepchk flag
810   if (!funcHasNoDepChk())
811     return;
812 
813 #if DEBUG
814   if (DBGBIT(12, 0x40))
815     dumpblocks("before widen");
816 #endif
817 
818   widenVar_set = hashset_alloc(hash_functions_direct);
819   for (bih = BIH_NEXT(0); bih; bih = BIH_NEXT(bih))
820     for (ilt = BIH_ILTFIRST(bih); ilt; ilt = ILT_NEXT(ilt)) {
821       const int ilix = ILT_ILIP(ilt);
822       if (ILT_DELETE(ilt))
823         continue;
824       if (ILI_OPC(ilix) == IL_ICJMP) {
825         const int x = widenPushdown(IL_IKMV, ilix);
826         ILT_ILIP(ilt) = x;
827         widenAddNarrowVars(x, widenVar_set);
828       } else if (ILI_OPC(ilix) == IL_UICJMP) {
829         const int x = widenPushdown(IL_UIKMV, ilix);
830         ILT_ILIP(ilt) = x;
831         widenAddNarrowVars(x, widenVar_set);
832       } else {
833         const bool found = widenAnyAddressing(ilix);
834         if (found)
835           widenAddNarrowVars(ilix, widenVar_set);
836       }
837     }
838 
839   if (hashset_size(widenVar_set)) {
840     hashset_t zet = hashset_alloc(hash_functions_direct);
841     hashset_iterate(widenVar_set, widenGatherAcon, (void*)zet);
842     for (bih = BIH_NEXT(0); bih; bih = BIH_NEXT(bih))
843       for (ilt = BIH_ILTFIRST(bih); ilt; ilt = ILT_NEXT(ilt))
844         widenElimAddrTaken(ILT_ILIP(ilt), zet);
845     widenPruneAcon(&widenVar_set, zet);
846     hashset_free(zet);
847   }
848 
849   if (hashset_size(widenVar_set)) {
850     hashmap_t newMap = hashmap_alloc(hash_functions_direct);
851     hashmap_t aconMap = hashmap_alloc(hash_functions_direct);
852     hashset_iterate(widenVar_set, widenCreateWideLocal, newMap);
853     hashmap_iterate(newMap, gatherLocals, aconMap);
854 #if DEBUG
855     if (DBGBIT(12, 0x40)) {
856       hashmap_iterate(newMap, dumpWidenVars, NULL);
857     }
858 #endif
859     for (bih = BIH_NEXT(0); bih; bih = BIH_NEXT(bih))
860       for (ilt = BIH_ILTFIRST(bih); ilt; ilt = ILT_NEXT(ilt)) {
861         const int ilix = ILT_ILIP(ilt);
862         widenApplyFree(ilix, newMap);
863         widenApplyVarMap(ilix, newMap);
864         widenApplyStore(ilix, aconMap);
865         widenApplyCse(ilix, newMap);
866       }
867     hashmap_free(aconMap);
868     hashmap_free(newMap);
869   }
870 
871 #if DEBUG
872   if (DBGBIT(12, 0x40))
873     dumpblocks("after widen");
874 #endif
875 
876   hashset_free(widenVar_set);
877 }
878 
879 /* ---------------------------------------------------------------------- */
880 //
881 // Replace load-load sequences in "omp simd" loops transform:
882 //
883 // We want to cache the result of the first load (a pointer) in a temp and
884 // have the second load use the temp in the body of the loop.  This is to
885 // workaround the issue that the outliner creates a bag of <tt>void*</tt>
886 // for all the enclosing routine's variables and introduces false aliasing.
887 
888 /**
889    \brief Does block \p bih branch to block \p target?
890  */
891 bool
block_branches_to(int bih,int target)892 block_branches_to(int bih, int target)
893 {
894   if (bih != 0) {
895     const int ilt = BIH_ILTLAST(bih);
896     const int ili = ILT_ILIP(ilt);
897     const ILI_OP op = ILI_OPC(ili);
898     if (IL_TYPE(op) == ILTY_BRANCH) {
899       const int lab = ILI_OPND(ili, ilis[op].oprs);
900       const int targ = ILIBLKG(lab);
901       return (targ == target);
902     }
903   }
904   return false;
905 }
906 
907 INLINE static void
rlleMakeSet(hash_key_t key,hash_data_t data,void * set_)908 rlleMakeSet(hash_key_t key, hash_data_t data, void *set_)
909 {
910   hashset_t zet = (hashset_t)set_;
911   const int isGood = HKEY2INT(data);
912   if (isGood)
913     hashset_insert(zet, key);
914 }
915 
916 /**
917    \brief Search the omp simd block for load-load patterns
918    \param ilix  the root of the ILI tree
919    \param zt    set of candidates (<tt>ACON</tt>)
920    \param mp    <tt>{ACON -> 0}</tt> (output map)
921  */
922 static void
rlleFindLL(int ilix,hashset_t zt,hashmap_t mp)923 rlleFindLL(int ilix, hashset_t zt, hashmap_t mp)
924 {
925   int i;
926   const ILI_OP opc = ILI_OPC(ilix);
927   const int noprs = ilis[opc].oprs;
928   hash_data_t data = INT2HKEY(0);
929 
930   for (i = 1; i <= noprs; ++i)
931     if (IL_ISLINK(opc, i)) {
932       const int opnd = ILI_OPND(ilix, i);
933       const ILI_OP opx = ILI_OPC(opnd);
934       if (IL_TYPE(opx) == ILTY_LOAD) {
935         const int opnd2 = ILI_OPND(opnd, 1);
936         if ((IL_TYPE(ILI_OPC(opnd2)) == ILTY_LOAD) &&
937             hashset_lookup(zt, INT2HKEY(ILI_OPND(opnd2, 1)))) {
938           const int opnd3 = ILI_OPND(opnd2, 1);
939           hashmap_replace(mp, INT2HKEY(opnd3), &data);
940         } else {
941           rlleFindLL(opnd, zt, mp);
942         }
943       } else {
944         rlleFindLL(opnd, zt, mp);
945       }
946     }
947 }
948 
949 /**
950    \brief Was an initializer for this load pair created?
951    If a candidate's initializer cannot be found, the map will be 0.
952  */
953 INLINE static bool
rlleInitializerFound(hash_data_t data)954 rlleInitializerFound(hash_data_t data)
955 {
956   return data != INT2HKEY(0);
957 }
958 
959 /**
960    \brief Rewrite the ILIs with substitutions for load-load patterns
961  */
962 static void
rlleReplace(int ilix,hashmap_t map)963 rlleReplace(int ilix, hashmap_t map)
964 {
965   int i;
966   const ILI_OP opc = ILI_OPC(ilix);
967   const int noprs = ilis[opc].oprs;
968 
969   for (i = 1; i <= noprs; ++i)
970     if (IL_ISLINK(opc, i)) {
971       const int opnd = ILI_OPND(ilix, i);
972       const ILI_OP opx = ILI_OPC(opnd);
973       // look for (LDA (LDA (ACON _))) where ACON is in map
974       if (IL_TYPE(opx) == ILTY_LOAD) {
975         const int opnd2 = ILI_OPND(opnd, 1);
976         hash_data_t data;
977         if ((IL_TYPE(ILI_OPC(opnd2)) == ILTY_LOAD) &&
978             hashmap_lookup(map, INT2HKEY(ILI_OPND(opnd2, 1)), &data) &&
979             rlleInitializerFound(data)) {
980           // rewrite to (LDA (ACON' _))
981           ILI_OPND(opnd, 1) = HKEY2INT(data);
982         } else {
983           rlleReplace(opnd2, map);
984         }
985       } else {
986         rlleReplace(opnd, map);
987       }
988     }
989 }
990 
991 /**
992    \brief Eliminate load-load operations from the uplevel structure
993    \pre The current procedure is an outlined function with no dep check blocks
994  */
995 void
redundantLdLdElim(void)996 redundantLdLdElim(void)
997 {
998   int bih, ilt;
999   hashmap_t candMap = hashmap_alloc(hash_functions_direct);
1000 
1001   // scan forward to find candidates
1002   //  construct map of private variables written exactly once
1003   for (bih = BIH_NEXT(0); bih; bih = BIH_NEXT(bih)) {
1004     for (ilt = BIH_ILTFIRST(bih); ilt; ilt = ILT_NEXT(ilt)) {
1005       const int ilix = ILT_ILIP(ilt);
1006       if (ILT_DELETE(ilt))
1007         continue;
1008       if ((IL_TYPE(ILI_OPC(ilix)) == ILTY_STORE) &&
1009           (ILI_OPC(ILI_OPND(ilix, 2)) == IL_ACON) &&
1010           widenAconIsPrivate(ILI_OPND(ilix, 2))) {
1011         const int acon = ILI_OPND(ilix, 2);
1012         hash_data_t data;
1013         if (hashmap_lookup(candMap, INT2HKEY(acon), &data)) {
1014           data = INT2HKEY(0);
1015           hashmap_replace(candMap, INT2HKEY(acon), &data);
1016         } else {
1017           data = INT2HKEY(1);
1018           hashmap_insert(candMap, INT2HKEY(acon), data);
1019         }
1020       }
1021     }
1022   }
1023   if (hashmap_size(candMap)) {
1024     // find candidates that appear in omp simd block, create new map
1025     bool inNoDep = false;
1026     hashset_t zet = hashset_alloc(hash_functions_direct);
1027     hashmap_iterate(candMap, rlleMakeSet, (void*)zet);
1028     hashmap_clear(candMap);
1029     for (bih = BIH_NEXT(0); bih; bih = BIH_NEXT(bih)) {
1030       if (!inNoDep)
1031         inNoDep = BIH_NODEPCHK(bih);
1032       if (inNoDep)
1033         for (ilt = BIH_ILTFIRST(bih); ilt; ilt = ILT_NEXT(ilt)) {
1034           const int ilix = ILT_ILIP(ilt);
1035           rlleFindLL(ilix, zet, candMap);
1036         }
1037       if (!(inNoDep && block_branches_to(BIH_NEXT(bih), bih)))
1038         inNoDep = false;
1039     }
1040     hashset_free(zet);
1041   }
1042 
1043   // if we have no candidates, we're done
1044   if (hashmap_size(candMap) == 0) {
1045     hashmap_free(candMap);
1046     return;
1047   }
1048 
1049   // scan the header blocks and create temps, updating map
1050   for (bih = BIH_NEXT(0); bih; bih = BIH_NEXT(bih)) {
1051     if (BIH_NODEPCHK(bih))
1052       break;
1053     for (ilt = BIH_ILTFIRST(bih); ilt; ilt = ILT_NEXT(ilt)) {
1054       const int ilix = ILT_ILIP(ilt);
1055       hash_data_t data;
1056       if (IL_TYPE(ILI_OPC(ilix)) == ILTY_STORE) {
1057         if (hashmap_lookup(candMap, INT2HKEY(ILI_OPND(ilix, 2)), &data)) {
1058           const int acon = ILI_OPND(ilix, 2);
1059           const int nme = ILI_OPND(ilix, 3);
1060           const int lda = ad3ili(IL_LDA, acon, nme, MSZ_PTR);
1061           const int loada = ad3ili(IL_LDA, lda, nme, MSZ_PTR);
1062           const DTYPE dty = DT_CPTR;
1063           const SPTR wideVar = getNewWideSym(dty);
1064           const int wAddr = ad1ili(IL_ACON, get_acon3(wideVar, 0, dty));
1065           const int stv = ad4ili(IL_STA, loada, wAddr, nme, MSZ_PTR);
1066           assert(!data, "data should be null", HKEY2INT(data), ERR_Fatal);
1067           data = INT2HKEY(wAddr);
1068           hashmap_replace(candMap, INT2HKEY(acon), &data);
1069           addilt(ilt, stv);
1070           ilt = ILT_NEXT(ilt);
1071         }
1072       }
1073     }
1074   }
1075 
1076   // scan the function and replace load-load sequences with temps, if available
1077   for (; bih; bih = BIH_NEXT(bih))
1078     for (ilt = BIH_ILTFIRST(bih); ilt; ilt = ILT_NEXT(ilt))
1079       rlleReplace(ILT_ILIP(ilt), candMap);
1080 
1081   hashmap_free(candMap);
1082 }
1083