1 /*========================== begin_copyright_notice ============================
2 
3 Copyright (C) 2017-2021 Intel Corporation
4 
5 SPDX-License-Identifier: MIT
6 
7 ============================= end_copyright_notice ===========================*/
8 
9 #include <vector>
10 #include "Mem_Manager.h"
11 #include "FlowGraph.h"
12 #include "BuildIR.h"
13 #include "SpillCode.h"
14 
15 #ifdef _DEBUG
16 #define _DEBUG_SPILL
17 #endif
18 
19 using namespace vISA;
20 
21 void splice(G4_BB* bb, INST_LIST_ITER iter, INST_LIST& instList, unsigned int CISAOff);
22 
23 //
24 // create a declare to hold the spill value of var
25 //
createNewSpillLocDeclare(G4_Declare * dcl)26 G4_Declare* SpillManager::createNewSpillLocDeclare(G4_Declare* dcl)
27 {
28 
29     if (dcl->getRegFile() == G4_FLAG)
30     {
31         MUST_BE_TRUE(dcl->getElemType() == Type_UW || dcl->getElemType() == Type_W, "flag reg's type should be UW");
32         MUST_BE_TRUE(dcl->getNumElems() <= builder.getNumFlagRegisters(), "Flag reg Spill size exceeds limit");
33     }
34     else
35     {
36         // if we are dealing with type other than UW, e.g., B, then we need to
37         // take care different data type reg moves of spill code. For now, just
38         // assume data types of addr reg are UW
39         //
40         G4_Type type = dcl->getElemType();
41         MUST_BE_TRUE(type == Type_UW ||
42                      type == Type_W ||
43                      type == Type_UD ||
44                      type == Type_D, "addr reg's type should be UW or UD");
45         MUST_BE_TRUE(dcl->getNumElems() <= getNumAddrRegisters(), "Addr reg Spill size exceeds 16 bytes");
46     }
47 
48     G4_Declare* sp = dcl->getSpilledDeclare();
49     if (sp == NULL) // not yet created
50     {
51         sp = builder.createAddrFlagSpillLoc(dcl);
52         gra.setBBId(sp, bbId);
53     }
54 
55     return sp;
56 }
57 
58 //
59 // replicate dcl for temporary use (loading value from SPILL location)
60 //
createNewTempAddrDeclare(G4_Declare * dcl)61 G4_Declare* SpillManager::createNewTempAddrDeclare(G4_Declare* dcl)
62 {
63     const char* name = builder.getNameString(builder.mem, 16, "Temp_ADDR_%d", tempDclId++);
64 
65     MUST_BE_TRUE(dcl->getElemType() == Type_UW || dcl->getElemType() == Type_W, "addr reg's type should be UW");
66     MUST_BE_TRUE(dcl->getNumRows() == 1, "Temp_ADDR should be only 1 row");
67     MUST_BE_TRUE(dcl->getNumElems() <= getNumAddrRegisters(), "Temp_ADDR exceeds 16 bytes");
68     G4_Declare* sp = builder.createDeclareNoLookup(name,
69         G4_ADDRESS,
70         dcl->getNumElems(),
71         1, // 1 row
72         Type_UW);
73     gra.setBBId(sp, bbId);
74     // Live range of new temp addrs is short so that there is no point spilling them.
75     // indicate this is for newly created addr temp so that RA won't spill it
76     // in subsequent RA allocation
77     gra.addAddrFlagSpillDcl(sp);
78 
79     return sp;
80 }
81 
createNewTempFlagDeclare(G4_Declare * dcl)82 G4_Declare* SpillManager::createNewTempFlagDeclare(G4_Declare* dcl)
83 {
84     const char* name = builder.getNameString(builder.mem, 32, "Temp_FSPILL_%d", tempDclId++);
85 
86     assert(dcl->getRegFile() == G4_FLAG && "dcl should be a flag");
87     G4_Declare* sp = builder.createFlag(dcl->getNumberFlagElements(), name);
88     gra.setBBId(sp, bbId);
89     sp->copyAlign(dcl);
90     gra.copyAlignment(sp, dcl);
91     gra.addAddrFlagSpillDcl(sp);
92 
93     return sp;
94 }
95 
96 //
97 // replicate dcl for temporary use (loading value from SPILL location)
98 //
createNewTempAddrDeclare(G4_Declare * dcl,uint16_t num_reg)99 G4_Declare* SpillManager::createNewTempAddrDeclare(G4_Declare* dcl, uint16_t num_reg)
100 {
101     const char* name = builder.getNameString(builder.mem, 16, "Temp_ADDR_%d", tempDclId++);
102 
103     G4_Type type = dcl->getElemType();
104     MUST_BE_TRUE(type == Type_UW ||
105                  type == Type_W ||
106                  type == Type_UD ||
107                  type == Type_D, "addr reg's type should be UW or UD");
108     MUST_BE_TRUE(dcl->getNumRows() == 1, "Temp_ADDR should be only 1 row");
109     MUST_BE_TRUE(dcl->getNumElems() <= getNumAddrRegisters(), "Temp_ADDR exceeds 16 bytes");
110     G4_Declare* sp = builder.createDeclareNoLookup(name,
111                                             G4_ADDRESS,
112                                             num_reg,
113                                             1, // 1 row
114                                             type);
115     gra.setBBId(sp, bbId);
116     // Live range of new temp addrs is short so that there is no point spilling them.
117     // indicate this is for newly created addr temp so that RA won't spill it
118     // in subsequent RA allocation
119     gra.addAddrFlagSpillDcl(sp);
120 
121     return sp;
122 }
123 
124 //
125 // generate a reg to reg mov inst for addr/flag spill
126 // mov  dst(dRegOff,dSubRegOff)<1>  src(sRegOff,sSubRegOff)<nRegs;nRegs,1>
127 //
genRegMov(G4_BB * bb,INST_LIST_ITER it,G4_VarBase * src,unsigned short sSubRegOff,G4_VarBase * dst,unsigned nRegs,bool useNoMask=true)128 void SpillManager::genRegMov(G4_BB* bb,
129                              INST_LIST_ITER it,
130                              G4_VarBase*    src,
131                              unsigned short sSubRegOff,
132                              G4_VarBase*    dst,
133                              unsigned       nRegs,
134                              bool           useNoMask = true)
135 {
136     builder.instList.clear();
137 
138     uint16_t dSubRegOff = 0;
139     for (uint16_t i = 16; i != 0 && nRegs != 0; i >>= 1)  // 16, 8, 4, 2, 1
140     {
141         if (nRegs >= i)
142         {
143             //
144             // create loc(0,locOff)<nRegs;nRegs,1> operand
145             //
146             /*
147                 Flag registers should always be scalar regions
148             */
149             G4_Type type = Type_W;
150             const RegionDesc* srcRgn = NULL;
151             G4_ExecSize execSize {i};
152             if (src->isFlag() || dst->isFlag())
153             {
154 
155                 type = Type_UW;
156                 if (i == 2)
157                 {
158                     type = Type_UD;
159                     execSize = g4::SIMD1;
160                 }
161                 else if (i > 2)
162                 {
163                     ASSERT_USER(false, "unsupported flag width");
164                 }
165 
166                 srcRgn = builder.getRegionScalar();
167             }
168             else
169             {
170                 srcRgn = (i== 1) ? builder.getRegionScalar() : builder.getRegionStride1();
171             }
172 
173             G4_SrcRegRegion* s = builder.createSrc(
174                 src,
175                 0,
176                 sSubRegOff,
177                 srcRgn,
178                 type);
179             //
180             // create a0.aOff<1>
181             //
182             G4_DstRegRegion* d = builder.createDst(dst, 0, dSubRegOff, 1, type);
183 
184             if (execSize != kernel.getSimdSize())
185             {
186                 // NoMask must be used in this case
187                 useNoMask = true;
188             }
189             // mov (nRegs)  a0.aOff<1>  loc(0,locOff)<4;4,1>
190             builder.createMov(execSize, d, s,
191                 useNoMask ? InstOpt_WriteEnable : InstOpt_NoOpt, true);
192 
193             sSubRegOff += i;
194             dSubRegOff += i;
195 
196             nRegs -= i;
197         }
198     }
199     MUST_BE_TRUE(nRegs == 0, ERROR_SPILLCODE);
200 
201     //
202     // insert newly created insts from builder to instList
203     //
204     splice(bb, it, builder.instList, currCISAOffset);
205 }
206 //
207 // check if dst is spilled & insert spill code
208 //
replaceSpilledDst(G4_BB * bb,INST_LIST_ITER it,G4_INST * inst,G4_Operand ** operands_analyzed,G4_Declare ** declares_created)209 void SpillManager::replaceSpilledDst(G4_BB* bb,
210                                      INST_LIST_ITER it, // where new insts will be inserted
211                                      G4_INST*       inst,
212                                      G4_Operand ** operands_analyzed,
213                                      G4_Declare ** declares_created)
214 {
215     G4_DstRegRegion* dst = inst->getDst();
216     if (dst == NULL)
217         return;
218 
219     if (dst->getBase()->isRegAllocPartaker() &&
220         dst->getBase()->asRegVar()->getDeclare()->getSpilledDeclare() != NULL) // spilled base
221     {
222         // create a dst region with spill loc
223         // original dst region  V100_uw(0,0)<1>:uw ==>
224         // new dst region SP_uw(0,0)<1>:uw
225         G4_Declare* spDcl = dst->getBase()->asRegVar()->getDeclare()->getSpilledDeclare();
226         if (dst->getRegAccess() == Direct)
227         {
228 
229             G4_DstRegRegion rgn(*dst, spDcl->getRegVar()); // using spDcl as new base
230             if (rgn.getHorzStride() == UNDEFINED_SHORT &&
231                 dst->isFlag())
232             {
233                 // Flag as destination has undefined hstride
234                 // For replacing it with spill range, make hstride 1
235                 rgn.setHorzStride(1);
236             }
237             G4_DstRegRegion* d = builder.createDstRegRegion(rgn);
238             inst->setDest(d);
239         }
240         else if (dst->getRegAccess() == IndirGRF)
241         {
242             // add (1)  r[V100_uw(0,0),0]<1>:f V124_f(0,0)<0;1,0>:f  1
243             // indirect access' base must be addr reg so we need to create a temp addr live range
244             // to load value from V100's spill loc
245             // e.g.,   mov (1) T_uw(0,0)<1>:uw SPILL_LOC_V100_uw(0,0)<0;1,0>:uw
246             //         add (1)  r[T_uw(0,0),0]<1>:f V124_f(0,0)<0;1,0>:f   1
247             //
248             // create declare for temp addr live range
249             //
250             G4_Declare* tmpDcl = NULL;
251             bool match_found = false;
252 
253             for (unsigned int j = 0; j < G4_MAX_SRCS; j++)
254             {
255                 G4_SrcRegRegion* analyzed_src = static_cast<G4_SrcRegRegion*>(operands_analyzed[j]);
256                 if (analyzed_src != NULL &&
257                     analyzed_src->getBase()->asRegVar()->getDeclare() == dst->getBase()->asRegVar()->getDeclare() &&
258                     analyzed_src->getSubRegOff() == dst->getSubRegOff() &&
259                     !analyzed_src->getRegion()->isRegionWH())
260                 {
261                     tmpDcl = declares_created[j];
262                     match_found = true;
263                 }
264             }
265 
266             if (!match_found)
267             {
268                 tmpDcl = createNewTempAddrDeclare(spDcl);
269                 //
270                 // generate mov Tmp(0,0)<1>  SPILL_LOC_V100(0,0)
271                 //
272                 genRegMov(bb, it,
273                     spDcl->getRegVar(), 0,
274                     tmpDcl->getRegVar(),
275                     tmpDcl->getNumElems());
276             }
277 
278             G4_DstRegRegion rgn(*dst, tmpDcl->getRegVar()); // using tmpDcl as new base
279             G4_DstRegRegion* d = match_found ? builder.createDstWithNewSubRegOff(&rgn, 0) : builder.createDstRegRegion(rgn);
280             inst->setDest(d);
281 
282             if (!match_found)
283             {
284                 pointsToAnalysis.insertAndMergeFilledAddr(dst->getBase()->asRegVar(), tmpDcl->getRegVar());
285             }
286         }
287         else
288             MUST_BE_TRUE(false, "Unknown reg access");
289     }
290 }
291 //
292 // check if src is spilled and insert spill code to load spilled value
293 //
replaceSpilledSrc(G4_BB * bb,INST_LIST_ITER it,G4_INST * inst,unsigned i,G4_Operand ** operands_analyzed,G4_Declare ** declares_created)294 void SpillManager::replaceSpilledSrc(G4_BB* bb,
295                                      INST_LIST_ITER it, // where new insts will be inserted
296                                      G4_INST*       inst,
297                                      unsigned       i,
298                                      G4_Operand ** operands_analyzed,
299                                      G4_Declare ** declares_created)
300 {
301     G4_Operand* src = inst->getSrc(i);
302     if (src == NULL)
303         return;
304     //
305     // go ahead replace src (addr reg) with spill GRF
306     //
307 
308     if (src->isSrcRegRegion() &&
309         src->asSrcRegRegion()->getBase()->isRegAllocPartaker() &&
310         src->asSrcRegRegion()->getBase()->asRegVar()->getDeclare()->getSpilledDeclare() != NULL) // spilled base
311     {
312         // create a src region with spill loc
313         // original src region  V100_uw(0,0)<0;1,0>:uw
314         // new src region SP_uw(0,0)<0;1,0>:uw
315         G4_SrcRegRegion* ss = src->asSrcRegRegion();
316         G4_Declare* spDcl = ss->getBase()->asRegVar()->getDeclare()->getSpilledDeclare();
317         if (ss->getRegAccess() == Direct)
318         {
319             G4_SrcRegRegion* s;
320             if (inst->isSplitSend() && i == 3)
321             {
322                 G4_Declare* tmpDcl = createNewTempAddrDeclare(spDcl, 1);
323                 tmpDcl->setSubRegAlign(Four_Word);
324                 gra.setSubRegAlign(tmpDcl, Four_Word);
325                 // (W) mov (1) tmpDcl<1>:ud spDcl<0;1,0>:ud
326                 auto movSrc = builder.createSrcRegRegion(spDcl, builder.getRegionScalar());
327                 auto movDst = builder.createDstRegRegion(tmpDcl, 1);
328                 G4_INST* movInst = builder.createMov(g4::SIMD1, movDst, movSrc, InstOpt_WriteEnable, false);
329                 bb->insertBefore(it, movInst);
330 
331                 s = builder.createSrc(
332                     tmpDcl->getRegVar(),
333                     0,
334                     0,
335                     ss->getRegion(),
336                     spDcl->getElemType());
337                 inst->setSrc(s, i);
338             }
339             else
340             {
341                 s = builder.createSrcWithNewBase(ss, spDcl->getRegVar()); // using spDcl as new base
342             }
343             inst->setSrc(s,i);
344         }
345         else if (ss->getRegAccess() == IndirGRF)
346         {
347             // add (2)  V124_f(0,0)<1>:f  r[V100_uw(0,0),0]<4;2,2>:f 1
348             // indirect access' base must be addr reg so we need to create a temp addr live range
349             // to load value from V100's spill loc
350             // e.g.,   mov (1) T(0,0)<1>:uw SPILL_LOC_V100_uw(0,0)<0;1,0>:uw
351             //         add (2)  V124_f(0,0)<1>:f  r[T(0,0),0]<4;2,2>:f 1
352             //
353             // create declare for temp addr live range
354             //
355 
356             uint16_t num_reg = 1;
357             //if access is VxH copy number of addresses based on execution size of instruction
358             if (ss->getRegion()->isRegionWH())
359             {
360                 num_reg = inst->getExecSize();
361             }
362 
363             G4_Declare* tmpDcl = NULL;
364             bool match_found = false;
365 
366             for (unsigned int j = 0; j < i; j++)
367             {
368                 G4_SrcRegRegion* analyzed_src = (G4_SrcRegRegion*)operands_analyzed[j];
369                 if (analyzed_src != NULL &&
370                     analyzed_src->getBase()->asRegVar()->getDeclare() == ss->getBase()->asRegVar()->getDeclare() &&
371                     analyzed_src->getSubRegOff() == ss->getSubRegOff())
372                 {
373                     tmpDcl = declares_created[j];
374                     match_found = true;
375                 }
376             }
377 
378             if (!match_found)
379             {
380                 tmpDcl = createNewTempAddrDeclare(spDcl, num_reg);
381                 operands_analyzed[i] = ss;
382                 declares_created[i] = tmpDcl;
383 
384                 //
385                 // generate mov Tmp(0,0)<1>  SPILL_LOC_V100(0,0)
386                 //
387                 genRegMov(bb, it,
388                     spDcl->getRegVar(), ss->getSubRegOff(),
389                     tmpDcl->getRegVar(),
390                     tmpDcl->getNumElems(), builder.getPlatform() >= GENX_ICLLP ? false : true);
391             }
392 
393             // create new src from the temp address variable, with offset 0
394             auto s = builder.createIndirectSrc(ss->getModifier(), tmpDcl->getRegVar(), ss->getRegOff(), 0,
395                 ss->getRegion(), ss->getType(), ss->getAddrImm());
396             inst->setSrc(s, i);
397             if (!match_found)
398             {
399                 pointsToAnalysis.insertAndMergeFilledAddr(ss->getBase()->asRegVar(), tmpDcl->getRegVar());
400             }
401         }
402         else
403             MUST_BE_TRUE(false, "Unknown reg access");
404     }
405 }
406 
407 //
408 // check if predicate is spilled & insert fill code
409 //
replaceSpilledPredicate(G4_BB * bb,INST_LIST_ITER it,G4_INST * inst)410 void SpillManager::replaceSpilledPredicate(G4_BB* bb,
411                                            INST_LIST_ITER it, // where new insts will be inserted
412                                            G4_INST*       inst)
413 {
414     G4_Predicate* predicate = inst->getPredicate();
415     if (predicate == NULL)
416         return;
417 
418     G4_VarBase *flagReg = predicate->getBase();
419     if (flagReg->asRegVar()->isRegAllocPartaker())
420     {
421         G4_Declare* flagDcl = flagReg->asRegVar()->getDeclare();
422         G4_Declare* spDcl = flagDcl->getSpilledDeclare();
423         if (spDcl != NULL)
424         {
425             G4_Declare* tmpDcl = createNewTempFlagDeclare(flagDcl);
426             genRegMov(bb, it,
427                       spDcl->getRegVar(), 0,
428                       tmpDcl->getRegVar(),
429                       tmpDcl->getNumElems());
430             G4_Predicate *new_pred = builder.createPredicate(predicate->getState(), tmpDcl->getRegVar(), 0, predicate->getControl());
431             inst->setPredicate(new_pred);
432             ++numFlagSpillLoad;
433         }
434     }
435 }
436 //
437 // check if flag dst is spilled and insert spill code
438 //
replaceSpilledFlagDst(G4_BB * bb,INST_LIST_ITER it,G4_INST * inst)439 void SpillManager::replaceSpilledFlagDst(G4_BB*         bb,
440                                          INST_LIST_ITER it, // where new insts will be inserted
441                                          G4_INST*       inst)
442 {
443     G4_CondMod* mod = inst->getCondMod();
444     if (mod == NULL)
445         return;
446 
447     G4_VarBase *flagReg = mod->getBase();
448     if (flagReg != NULL && flagReg->asRegVar()->isRegAllocPartaker())
449     {
450         G4_Declare* flagDcl = flagReg->asRegVar()->getDeclare();
451         G4_Declare* spDcl = flagDcl->getSpilledDeclare();
452         if (spDcl != NULL)
453         {
454             G4_Declare* tmpDcl;
455             G4_Predicate* predicate = inst->getPredicate();
456 
457             if (predicate != NULL)
458             {
459                 G4_VarBase *flagReg = predicate->getBase();
460                 tmpDcl = flagReg->asRegVar()->getDeclare();
461             }
462             else
463             {
464                 tmpDcl = createNewTempFlagDeclare(flagDcl);
465             }
466 
467             // Need to pre-load the spill GRF if the inst isn't going to write the full
468             // spilled GRF variable.
469             if (flagDcl->getNumberFlagElements() > inst->getExecSize() ||
470                 (!bb->isAllLaneActive() && !inst->isWriteEnableInst()))
471             {
472                 genRegMov(bb, it,
473                     spDcl->getRegVar(), 0,
474                     tmpDcl->getRegVar(),
475                     tmpDcl->getNumElems());
476                 ++numFlagSpillLoad;
477             }
478 
479             G4_CondMod *newCondMod = builder.createCondMod(mod->getMod(), tmpDcl->getRegVar(), 0);
480 
481             inst->setCondMod(newCondMod);
482 
483             genRegMov(bb, ++it,
484                       tmpDcl->getRegVar(), 0,
485                       spDcl->getRegVar(),
486                       tmpDcl->getNumElems());
487             ++numFlagSpillStore;
488         }
489     }
490 }
491 
492 //
493 // go over all declares and allocate spill locations
494 //
createSpillLocations(const G4_Kernel & kernel)495 void SpillManager::createSpillLocations(const G4_Kernel& kernel)
496 {
497     // set spill flag to indicate which vars are spilled
498     for (const LiveRange* lr : spilledLRs)
499     {
500         G4_Declare* dcl = lr->getVar()->getDeclare();
501         dcl->setSpillFlag();
502         MUST_BE_TRUE(lr->getPhyReg() == NULL, "Spilled Live Range shouldn't have physical reg");
503         MUST_BE_TRUE(lr->getSpillCost() < MAXSPILLCOST, "ERROR: spill live range with infinite spill cost");
504         // create spill loc for holding spilled addr regs
505         createNewSpillLocDeclare(dcl);
506     }
507     // take care of alias declares
508     for (G4_Declare* dcl : kernel.Declares)
509     {
510         if (!dcl->getRegVar()->isRegAllocPartaker()) // skip non reg alloc candidate
511             continue;
512 
513         if (dcl->getAliasDeclare() != NULL &&    // dcl is not a representative declare
514             dcl->getAliasDeclare()->isSpilled()) // dcl's representative decl is spilled
515         {
516             G4_Declare* sp = createNewSpillLocDeclare(dcl);
517             // when doing RA multiple times (due to spill code), we don't want to set alias
518             // information more than once
519             if (sp->getAliasDeclare() == NULL)
520             {
521                 sp->setAliasDeclare(dcl->getAliasDeclare()->getSpilledDeclare(), dcl->getAliasOffset());
522             }
523         }
524     }
525 }
526 
isSpillCandidateForLifetimeOpRemoval(G4_INST * inst)527 bool isSpillCandidateForLifetimeOpRemoval(G4_INST* inst)
528 {
529     if (inst->isPseudoKill())
530     {
531         return inst->getDst()->isSpilled();
532     }
533     else if (inst->isLifeTimeEnd())
534     {
535         return inst->getSrc(0)->asSrcRegRegion()->isSpilled();
536     }
537 
538     return false;
539 }
540 
insertSpillCode()541 void SpillManager::insertSpillCode()
542 {
543     //
544     // create spill locations
545     //
546     createSpillLocations(kernel);
547 
548     for (G4_BB* bb : kernel.fg)
549     {
550         bbId = bb->getId();
551         //
552         // handle spill code for the current BB
553         //
554 
555         // In one iteration remove all spilled lifetime.start/end
556         // ops.
557         bb->erase(
558             std::remove_if(bb->begin(), bb->end(), isSpillCandidateForLifetimeOpRemoval),
559             bb->end());
560 
561         for (INST_LIST_ITER inst_it = bb->begin(); inst_it != bb->end();)
562         {
563             G4_INST* inst = *inst_it;
564 
565             currCISAOffset = inst->getCISAOff();
566 
567             G4_Operand * operands_analyzed[G4_MAX_SRCS] = {NULL, NULL, NULL};
568             G4_Declare * declares_created[G4_MAX_SRCS] = {NULL, NULL, NULL};
569             // insert spill inst for spilled srcs
570             for (unsigned i = 0; i < G4_MAX_SRCS; i++)
571             {
572                 replaceSpilledSrc(bb, inst_it, inst, i, operands_analyzed, declares_created);
573             }
574             // insert spill inst for spilled dst
575             replaceSpilledDst(bb, inst_it, inst, operands_analyzed, declares_created);
576 
577             //
578             // Process predicate
579             //
580             G4_Predicate* predicate = inst->getPredicate();
581             if (predicate != NULL) {
582                 replaceSpilledPredicate(bb, inst_it, inst);
583             }
584 
585             //
586             // Process condMod
587             //
588             G4_CondMod* mod = inst->getCondMod();
589             if (mod != NULL &&
590                 mod->getBase() != NULL) {
591                 replaceSpilledFlagDst(bb, inst_it, inst);
592             }
593             inst_it++;
594         }
595         bbId = UINT_MAX;
596     }
597 
598 }
599