1 /*========================== begin_copyright_notice ============================
2
3 Copyright (C) 2017-2021 Intel Corporation
4
5 SPDX-License-Identifier: MIT
6
7 ============================= end_copyright_notice ===========================*/
8
9 #include <vector>
10 #include "Mem_Manager.h"
11 #include "FlowGraph.h"
12 #include "BuildIR.h"
13 #include "SpillCode.h"
14
15 #ifdef _DEBUG
16 #define _DEBUG_SPILL
17 #endif
18
19 using namespace vISA;
20
21 void splice(G4_BB* bb, INST_LIST_ITER iter, INST_LIST& instList, unsigned int CISAOff);
22
23 //
24 // create a declare to hold the spill value of var
25 //
createNewSpillLocDeclare(G4_Declare * dcl)26 G4_Declare* SpillManager::createNewSpillLocDeclare(G4_Declare* dcl)
27 {
28
29 if (dcl->getRegFile() == G4_FLAG)
30 {
31 MUST_BE_TRUE(dcl->getElemType() == Type_UW || dcl->getElemType() == Type_W, "flag reg's type should be UW");
32 MUST_BE_TRUE(dcl->getNumElems() <= builder.getNumFlagRegisters(), "Flag reg Spill size exceeds limit");
33 }
34 else
35 {
36 // if we are dealing with type other than UW, e.g., B, then we need to
37 // take care different data type reg moves of spill code. For now, just
38 // assume data types of addr reg are UW
39 //
40 G4_Type type = dcl->getElemType();
41 MUST_BE_TRUE(type == Type_UW ||
42 type == Type_W ||
43 type == Type_UD ||
44 type == Type_D, "addr reg's type should be UW or UD");
45 MUST_BE_TRUE(dcl->getNumElems() <= getNumAddrRegisters(), "Addr reg Spill size exceeds 16 bytes");
46 }
47
48 G4_Declare* sp = dcl->getSpilledDeclare();
49 if (sp == NULL) // not yet created
50 {
51 sp = builder.createAddrFlagSpillLoc(dcl);
52 gra.setBBId(sp, bbId);
53 }
54
55 return sp;
56 }
57
58 //
59 // replicate dcl for temporary use (loading value from SPILL location)
60 //
createNewTempAddrDeclare(G4_Declare * dcl)61 G4_Declare* SpillManager::createNewTempAddrDeclare(G4_Declare* dcl)
62 {
63 const char* name = builder.getNameString(builder.mem, 16, "Temp_ADDR_%d", tempDclId++);
64
65 MUST_BE_TRUE(dcl->getElemType() == Type_UW || dcl->getElemType() == Type_W, "addr reg's type should be UW");
66 MUST_BE_TRUE(dcl->getNumRows() == 1, "Temp_ADDR should be only 1 row");
67 MUST_BE_TRUE(dcl->getNumElems() <= getNumAddrRegisters(), "Temp_ADDR exceeds 16 bytes");
68 G4_Declare* sp = builder.createDeclareNoLookup(name,
69 G4_ADDRESS,
70 dcl->getNumElems(),
71 1, // 1 row
72 Type_UW);
73 gra.setBBId(sp, bbId);
74 // Live range of new temp addrs is short so that there is no point spilling them.
75 // indicate this is for newly created addr temp so that RA won't spill it
76 // in subsequent RA allocation
77 gra.addAddrFlagSpillDcl(sp);
78
79 return sp;
80 }
81
createNewTempFlagDeclare(G4_Declare * dcl)82 G4_Declare* SpillManager::createNewTempFlagDeclare(G4_Declare* dcl)
83 {
84 const char* name = builder.getNameString(builder.mem, 32, "Temp_FSPILL_%d", tempDclId++);
85
86 assert(dcl->getRegFile() == G4_FLAG && "dcl should be a flag");
87 G4_Declare* sp = builder.createFlag(dcl->getNumberFlagElements(), name);
88 gra.setBBId(sp, bbId);
89 sp->copyAlign(dcl);
90 gra.copyAlignment(sp, dcl);
91 gra.addAddrFlagSpillDcl(sp);
92
93 return sp;
94 }
95
96 //
97 // replicate dcl for temporary use (loading value from SPILL location)
98 //
createNewTempAddrDeclare(G4_Declare * dcl,uint16_t num_reg)99 G4_Declare* SpillManager::createNewTempAddrDeclare(G4_Declare* dcl, uint16_t num_reg)
100 {
101 const char* name = builder.getNameString(builder.mem, 16, "Temp_ADDR_%d", tempDclId++);
102
103 G4_Type type = dcl->getElemType();
104 MUST_BE_TRUE(type == Type_UW ||
105 type == Type_W ||
106 type == Type_UD ||
107 type == Type_D, "addr reg's type should be UW or UD");
108 MUST_BE_TRUE(dcl->getNumRows() == 1, "Temp_ADDR should be only 1 row");
109 MUST_BE_TRUE(dcl->getNumElems() <= getNumAddrRegisters(), "Temp_ADDR exceeds 16 bytes");
110 G4_Declare* sp = builder.createDeclareNoLookup(name,
111 G4_ADDRESS,
112 num_reg,
113 1, // 1 row
114 type);
115 gra.setBBId(sp, bbId);
116 // Live range of new temp addrs is short so that there is no point spilling them.
117 // indicate this is for newly created addr temp so that RA won't spill it
118 // in subsequent RA allocation
119 gra.addAddrFlagSpillDcl(sp);
120
121 return sp;
122 }
123
124 //
125 // generate a reg to reg mov inst for addr/flag spill
126 // mov dst(dRegOff,dSubRegOff)<1> src(sRegOff,sSubRegOff)<nRegs;nRegs,1>
127 //
genRegMov(G4_BB * bb,INST_LIST_ITER it,G4_VarBase * src,unsigned short sSubRegOff,G4_VarBase * dst,unsigned nRegs,bool useNoMask=true)128 void SpillManager::genRegMov(G4_BB* bb,
129 INST_LIST_ITER it,
130 G4_VarBase* src,
131 unsigned short sSubRegOff,
132 G4_VarBase* dst,
133 unsigned nRegs,
134 bool useNoMask = true)
135 {
136 builder.instList.clear();
137
138 uint16_t dSubRegOff = 0;
139 for (uint16_t i = 16; i != 0 && nRegs != 0; i >>= 1) // 16, 8, 4, 2, 1
140 {
141 if (nRegs >= i)
142 {
143 //
144 // create loc(0,locOff)<nRegs;nRegs,1> operand
145 //
146 /*
147 Flag registers should always be scalar regions
148 */
149 G4_Type type = Type_W;
150 const RegionDesc* srcRgn = NULL;
151 G4_ExecSize execSize {i};
152 if (src->isFlag() || dst->isFlag())
153 {
154
155 type = Type_UW;
156 if (i == 2)
157 {
158 type = Type_UD;
159 execSize = g4::SIMD1;
160 }
161 else if (i > 2)
162 {
163 ASSERT_USER(false, "unsupported flag width");
164 }
165
166 srcRgn = builder.getRegionScalar();
167 }
168 else
169 {
170 srcRgn = (i== 1) ? builder.getRegionScalar() : builder.getRegionStride1();
171 }
172
173 G4_SrcRegRegion* s = builder.createSrc(
174 src,
175 0,
176 sSubRegOff,
177 srcRgn,
178 type);
179 //
180 // create a0.aOff<1>
181 //
182 G4_DstRegRegion* d = builder.createDst(dst, 0, dSubRegOff, 1, type);
183
184 if (execSize != kernel.getSimdSize())
185 {
186 // NoMask must be used in this case
187 useNoMask = true;
188 }
189 // mov (nRegs) a0.aOff<1> loc(0,locOff)<4;4,1>
190 builder.createMov(execSize, d, s,
191 useNoMask ? InstOpt_WriteEnable : InstOpt_NoOpt, true);
192
193 sSubRegOff += i;
194 dSubRegOff += i;
195
196 nRegs -= i;
197 }
198 }
199 MUST_BE_TRUE(nRegs == 0, ERROR_SPILLCODE);
200
201 //
202 // insert newly created insts from builder to instList
203 //
204 splice(bb, it, builder.instList, currCISAOffset);
205 }
206 //
207 // check if dst is spilled & insert spill code
208 //
replaceSpilledDst(G4_BB * bb,INST_LIST_ITER it,G4_INST * inst,G4_Operand ** operands_analyzed,G4_Declare ** declares_created)209 void SpillManager::replaceSpilledDst(G4_BB* bb,
210 INST_LIST_ITER it, // where new insts will be inserted
211 G4_INST* inst,
212 G4_Operand ** operands_analyzed,
213 G4_Declare ** declares_created)
214 {
215 G4_DstRegRegion* dst = inst->getDst();
216 if (dst == NULL)
217 return;
218
219 if (dst->getBase()->isRegAllocPartaker() &&
220 dst->getBase()->asRegVar()->getDeclare()->getSpilledDeclare() != NULL) // spilled base
221 {
222 // create a dst region with spill loc
223 // original dst region V100_uw(0,0)<1>:uw ==>
224 // new dst region SP_uw(0,0)<1>:uw
225 G4_Declare* spDcl = dst->getBase()->asRegVar()->getDeclare()->getSpilledDeclare();
226 if (dst->getRegAccess() == Direct)
227 {
228
229 G4_DstRegRegion rgn(*dst, spDcl->getRegVar()); // using spDcl as new base
230 if (rgn.getHorzStride() == UNDEFINED_SHORT &&
231 dst->isFlag())
232 {
233 // Flag as destination has undefined hstride
234 // For replacing it with spill range, make hstride 1
235 rgn.setHorzStride(1);
236 }
237 G4_DstRegRegion* d = builder.createDstRegRegion(rgn);
238 inst->setDest(d);
239 }
240 else if (dst->getRegAccess() == IndirGRF)
241 {
242 // add (1) r[V100_uw(0,0),0]<1>:f V124_f(0,0)<0;1,0>:f 1
243 // indirect access' base must be addr reg so we need to create a temp addr live range
244 // to load value from V100's spill loc
245 // e.g., mov (1) T_uw(0,0)<1>:uw SPILL_LOC_V100_uw(0,0)<0;1,0>:uw
246 // add (1) r[T_uw(0,0),0]<1>:f V124_f(0,0)<0;1,0>:f 1
247 //
248 // create declare for temp addr live range
249 //
250 G4_Declare* tmpDcl = NULL;
251 bool match_found = false;
252
253 for (unsigned int j = 0; j < G4_MAX_SRCS; j++)
254 {
255 G4_SrcRegRegion* analyzed_src = static_cast<G4_SrcRegRegion*>(operands_analyzed[j]);
256 if (analyzed_src != NULL &&
257 analyzed_src->getBase()->asRegVar()->getDeclare() == dst->getBase()->asRegVar()->getDeclare() &&
258 analyzed_src->getSubRegOff() == dst->getSubRegOff() &&
259 !analyzed_src->getRegion()->isRegionWH())
260 {
261 tmpDcl = declares_created[j];
262 match_found = true;
263 }
264 }
265
266 if (!match_found)
267 {
268 tmpDcl = createNewTempAddrDeclare(spDcl);
269 //
270 // generate mov Tmp(0,0)<1> SPILL_LOC_V100(0,0)
271 //
272 genRegMov(bb, it,
273 spDcl->getRegVar(), 0,
274 tmpDcl->getRegVar(),
275 tmpDcl->getNumElems());
276 }
277
278 G4_DstRegRegion rgn(*dst, tmpDcl->getRegVar()); // using tmpDcl as new base
279 G4_DstRegRegion* d = match_found ? builder.createDstWithNewSubRegOff(&rgn, 0) : builder.createDstRegRegion(rgn);
280 inst->setDest(d);
281
282 if (!match_found)
283 {
284 pointsToAnalysis.insertAndMergeFilledAddr(dst->getBase()->asRegVar(), tmpDcl->getRegVar());
285 }
286 }
287 else
288 MUST_BE_TRUE(false, "Unknown reg access");
289 }
290 }
291 //
292 // check if src is spilled and insert spill code to load spilled value
293 //
replaceSpilledSrc(G4_BB * bb,INST_LIST_ITER it,G4_INST * inst,unsigned i,G4_Operand ** operands_analyzed,G4_Declare ** declares_created)294 void SpillManager::replaceSpilledSrc(G4_BB* bb,
295 INST_LIST_ITER it, // where new insts will be inserted
296 G4_INST* inst,
297 unsigned i,
298 G4_Operand ** operands_analyzed,
299 G4_Declare ** declares_created)
300 {
301 G4_Operand* src = inst->getSrc(i);
302 if (src == NULL)
303 return;
304 //
305 // go ahead replace src (addr reg) with spill GRF
306 //
307
308 if (src->isSrcRegRegion() &&
309 src->asSrcRegRegion()->getBase()->isRegAllocPartaker() &&
310 src->asSrcRegRegion()->getBase()->asRegVar()->getDeclare()->getSpilledDeclare() != NULL) // spilled base
311 {
312 // create a src region with spill loc
313 // original src region V100_uw(0,0)<0;1,0>:uw
314 // new src region SP_uw(0,0)<0;1,0>:uw
315 G4_SrcRegRegion* ss = src->asSrcRegRegion();
316 G4_Declare* spDcl = ss->getBase()->asRegVar()->getDeclare()->getSpilledDeclare();
317 if (ss->getRegAccess() == Direct)
318 {
319 G4_SrcRegRegion* s;
320 if (inst->isSplitSend() && i == 3)
321 {
322 G4_Declare* tmpDcl = createNewTempAddrDeclare(spDcl, 1);
323 tmpDcl->setSubRegAlign(Four_Word);
324 gra.setSubRegAlign(tmpDcl, Four_Word);
325 // (W) mov (1) tmpDcl<1>:ud spDcl<0;1,0>:ud
326 auto movSrc = builder.createSrcRegRegion(spDcl, builder.getRegionScalar());
327 auto movDst = builder.createDstRegRegion(tmpDcl, 1);
328 G4_INST* movInst = builder.createMov(g4::SIMD1, movDst, movSrc, InstOpt_WriteEnable, false);
329 bb->insertBefore(it, movInst);
330
331 s = builder.createSrc(
332 tmpDcl->getRegVar(),
333 0,
334 0,
335 ss->getRegion(),
336 spDcl->getElemType());
337 inst->setSrc(s, i);
338 }
339 else
340 {
341 s = builder.createSrcWithNewBase(ss, spDcl->getRegVar()); // using spDcl as new base
342 }
343 inst->setSrc(s,i);
344 }
345 else if (ss->getRegAccess() == IndirGRF)
346 {
347 // add (2) V124_f(0,0)<1>:f r[V100_uw(0,0),0]<4;2,2>:f 1
348 // indirect access' base must be addr reg so we need to create a temp addr live range
349 // to load value from V100's spill loc
350 // e.g., mov (1) T(0,0)<1>:uw SPILL_LOC_V100_uw(0,0)<0;1,0>:uw
351 // add (2) V124_f(0,0)<1>:f r[T(0,0),0]<4;2,2>:f 1
352 //
353 // create declare for temp addr live range
354 //
355
356 uint16_t num_reg = 1;
357 //if access is VxH copy number of addresses based on execution size of instruction
358 if (ss->getRegion()->isRegionWH())
359 {
360 num_reg = inst->getExecSize();
361 }
362
363 G4_Declare* tmpDcl = NULL;
364 bool match_found = false;
365
366 for (unsigned int j = 0; j < i; j++)
367 {
368 G4_SrcRegRegion* analyzed_src = (G4_SrcRegRegion*)operands_analyzed[j];
369 if (analyzed_src != NULL &&
370 analyzed_src->getBase()->asRegVar()->getDeclare() == ss->getBase()->asRegVar()->getDeclare() &&
371 analyzed_src->getSubRegOff() == ss->getSubRegOff())
372 {
373 tmpDcl = declares_created[j];
374 match_found = true;
375 }
376 }
377
378 if (!match_found)
379 {
380 tmpDcl = createNewTempAddrDeclare(spDcl, num_reg);
381 operands_analyzed[i] = ss;
382 declares_created[i] = tmpDcl;
383
384 //
385 // generate mov Tmp(0,0)<1> SPILL_LOC_V100(0,0)
386 //
387 genRegMov(bb, it,
388 spDcl->getRegVar(), ss->getSubRegOff(),
389 tmpDcl->getRegVar(),
390 tmpDcl->getNumElems(), builder.getPlatform() >= GENX_ICLLP ? false : true);
391 }
392
393 // create new src from the temp address variable, with offset 0
394 auto s = builder.createIndirectSrc(ss->getModifier(), tmpDcl->getRegVar(), ss->getRegOff(), 0,
395 ss->getRegion(), ss->getType(), ss->getAddrImm());
396 inst->setSrc(s, i);
397 if (!match_found)
398 {
399 pointsToAnalysis.insertAndMergeFilledAddr(ss->getBase()->asRegVar(), tmpDcl->getRegVar());
400 }
401 }
402 else
403 MUST_BE_TRUE(false, "Unknown reg access");
404 }
405 }
406
407 //
408 // check if predicate is spilled & insert fill code
409 //
replaceSpilledPredicate(G4_BB * bb,INST_LIST_ITER it,G4_INST * inst)410 void SpillManager::replaceSpilledPredicate(G4_BB* bb,
411 INST_LIST_ITER it, // where new insts will be inserted
412 G4_INST* inst)
413 {
414 G4_Predicate* predicate = inst->getPredicate();
415 if (predicate == NULL)
416 return;
417
418 G4_VarBase *flagReg = predicate->getBase();
419 if (flagReg->asRegVar()->isRegAllocPartaker())
420 {
421 G4_Declare* flagDcl = flagReg->asRegVar()->getDeclare();
422 G4_Declare* spDcl = flagDcl->getSpilledDeclare();
423 if (spDcl != NULL)
424 {
425 G4_Declare* tmpDcl = createNewTempFlagDeclare(flagDcl);
426 genRegMov(bb, it,
427 spDcl->getRegVar(), 0,
428 tmpDcl->getRegVar(),
429 tmpDcl->getNumElems());
430 G4_Predicate *new_pred = builder.createPredicate(predicate->getState(), tmpDcl->getRegVar(), 0, predicate->getControl());
431 inst->setPredicate(new_pred);
432 ++numFlagSpillLoad;
433 }
434 }
435 }
436 //
437 // check if flag dst is spilled and insert spill code
438 //
replaceSpilledFlagDst(G4_BB * bb,INST_LIST_ITER it,G4_INST * inst)439 void SpillManager::replaceSpilledFlagDst(G4_BB* bb,
440 INST_LIST_ITER it, // where new insts will be inserted
441 G4_INST* inst)
442 {
443 G4_CondMod* mod = inst->getCondMod();
444 if (mod == NULL)
445 return;
446
447 G4_VarBase *flagReg = mod->getBase();
448 if (flagReg != NULL && flagReg->asRegVar()->isRegAllocPartaker())
449 {
450 G4_Declare* flagDcl = flagReg->asRegVar()->getDeclare();
451 G4_Declare* spDcl = flagDcl->getSpilledDeclare();
452 if (spDcl != NULL)
453 {
454 G4_Declare* tmpDcl;
455 G4_Predicate* predicate = inst->getPredicate();
456
457 if (predicate != NULL)
458 {
459 G4_VarBase *flagReg = predicate->getBase();
460 tmpDcl = flagReg->asRegVar()->getDeclare();
461 }
462 else
463 {
464 tmpDcl = createNewTempFlagDeclare(flagDcl);
465 }
466
467 // Need to pre-load the spill GRF if the inst isn't going to write the full
468 // spilled GRF variable.
469 if (flagDcl->getNumberFlagElements() > inst->getExecSize() ||
470 (!bb->isAllLaneActive() && !inst->isWriteEnableInst()))
471 {
472 genRegMov(bb, it,
473 spDcl->getRegVar(), 0,
474 tmpDcl->getRegVar(),
475 tmpDcl->getNumElems());
476 ++numFlagSpillLoad;
477 }
478
479 G4_CondMod *newCondMod = builder.createCondMod(mod->getMod(), tmpDcl->getRegVar(), 0);
480
481 inst->setCondMod(newCondMod);
482
483 genRegMov(bb, ++it,
484 tmpDcl->getRegVar(), 0,
485 spDcl->getRegVar(),
486 tmpDcl->getNumElems());
487 ++numFlagSpillStore;
488 }
489 }
490 }
491
492 //
493 // go over all declares and allocate spill locations
494 //
createSpillLocations(const G4_Kernel & kernel)495 void SpillManager::createSpillLocations(const G4_Kernel& kernel)
496 {
497 // set spill flag to indicate which vars are spilled
498 for (const LiveRange* lr : spilledLRs)
499 {
500 G4_Declare* dcl = lr->getVar()->getDeclare();
501 dcl->setSpillFlag();
502 MUST_BE_TRUE(lr->getPhyReg() == NULL, "Spilled Live Range shouldn't have physical reg");
503 MUST_BE_TRUE(lr->getSpillCost() < MAXSPILLCOST, "ERROR: spill live range with infinite spill cost");
504 // create spill loc for holding spilled addr regs
505 createNewSpillLocDeclare(dcl);
506 }
507 // take care of alias declares
508 for (G4_Declare* dcl : kernel.Declares)
509 {
510 if (!dcl->getRegVar()->isRegAllocPartaker()) // skip non reg alloc candidate
511 continue;
512
513 if (dcl->getAliasDeclare() != NULL && // dcl is not a representative declare
514 dcl->getAliasDeclare()->isSpilled()) // dcl's representative decl is spilled
515 {
516 G4_Declare* sp = createNewSpillLocDeclare(dcl);
517 // when doing RA multiple times (due to spill code), we don't want to set alias
518 // information more than once
519 if (sp->getAliasDeclare() == NULL)
520 {
521 sp->setAliasDeclare(dcl->getAliasDeclare()->getSpilledDeclare(), dcl->getAliasOffset());
522 }
523 }
524 }
525 }
526
isSpillCandidateForLifetimeOpRemoval(G4_INST * inst)527 bool isSpillCandidateForLifetimeOpRemoval(G4_INST* inst)
528 {
529 if (inst->isPseudoKill())
530 {
531 return inst->getDst()->isSpilled();
532 }
533 else if (inst->isLifeTimeEnd())
534 {
535 return inst->getSrc(0)->asSrcRegRegion()->isSpilled();
536 }
537
538 return false;
539 }
540
insertSpillCode()541 void SpillManager::insertSpillCode()
542 {
543 //
544 // create spill locations
545 //
546 createSpillLocations(kernel);
547
548 for (G4_BB* bb : kernel.fg)
549 {
550 bbId = bb->getId();
551 //
552 // handle spill code for the current BB
553 //
554
555 // In one iteration remove all spilled lifetime.start/end
556 // ops.
557 bb->erase(
558 std::remove_if(bb->begin(), bb->end(), isSpillCandidateForLifetimeOpRemoval),
559 bb->end());
560
561 for (INST_LIST_ITER inst_it = bb->begin(); inst_it != bb->end();)
562 {
563 G4_INST* inst = *inst_it;
564
565 currCISAOffset = inst->getCISAOff();
566
567 G4_Operand * operands_analyzed[G4_MAX_SRCS] = {NULL, NULL, NULL};
568 G4_Declare * declares_created[G4_MAX_SRCS] = {NULL, NULL, NULL};
569 // insert spill inst for spilled srcs
570 for (unsigned i = 0; i < G4_MAX_SRCS; i++)
571 {
572 replaceSpilledSrc(bb, inst_it, inst, i, operands_analyzed, declares_created);
573 }
574 // insert spill inst for spilled dst
575 replaceSpilledDst(bb, inst_it, inst, operands_analyzed, declares_created);
576
577 //
578 // Process predicate
579 //
580 G4_Predicate* predicate = inst->getPredicate();
581 if (predicate != NULL) {
582 replaceSpilledPredicate(bb, inst_it, inst);
583 }
584
585 //
586 // Process condMod
587 //
588 G4_CondMod* mod = inst->getCondMod();
589 if (mod != NULL &&
590 mod->getBase() != NULL) {
591 replaceSpilledFlagDst(bb, inst_it, inst);
592 }
593 inst_it++;
594 }
595 bbId = UINT_MAX;
596 }
597
598 }
599