1 /*========================== begin_copyright_notice ============================
2
3 Copyright (C) 2017-2021 Intel Corporation
4
5 SPDX-License-Identifier: MIT
6
7 ============================= end_copyright_notice ===========================*/
8
9 #include "DebugInfo.h"
10 #include "Common_ISA.h"
11 #include "G4_IR.hpp"
12 #include "FlowGraph.h"
13 #include "BuildIR.h"
14 #include "Common_ISA_framework.h"
15 #include "VISAKernel.h"
16 #include "BitSet.h"
17
18 #include <map>
19
20 using namespace vISA;
21
22 using std::fclose;
23 using std::fopen;
24 using std::fwrite;
25 using std::FILE;
26
27 uint32_t getBinInstSize(G4_INST* inst);
28
get32BitSignedIntFrom31BitSignedInt(uint32_t data)29 int32_t get32BitSignedIntFrom31BitSignedInt(uint32_t data)
30 {
31 // MSB of 32-bit input is discarded
32 int32_t signedMemOffset = (int32_t)data;
33 bool isNeg = ((signedMemOffset << 1) < 0);
34 // Right shift on signed int is implementation defined
35 // so on some compilers it could do bitshift and
36 // on some arithmetic shift. Bitwise OR'ing later
37 // ensures that sign bit is setup correctly.
38 signedMemOffset = (signedMemOffset << 1) >> 1;
39 signedMemOffset |= (isNeg ? (1 << (sizeof(int32_t)-1)) : 0);
40
41 return signedMemOffset;
42 }
43
ddName()44 void DbgDecoder::ddName()
45 {
46 uint16_t nameLen;
47 auto retval = fread(&nameLen, sizeof(uint16_t), 1, dbgFile);
48 if (!retval)
49 return;
50
51 auto name = (char*)malloc(nameLen + 1);
52 retval = fread(name, sizeof(uint8_t), nameLen, dbgFile);
53 if (!retval)
54 {
55 free(name);
56 return;
57 }
58
59 name[nameLen] = 0;
60
61 std::cout << name;
62
63 free(name);
64 }
65
66 template<class T>
ddLiveInterval()67 void DbgDecoder::ddLiveInterval()
68 {
69 // Dump live-interval info
70 uint16_t numLiveIntervals;
71 T start, end;
72 auto retval = fread(&numLiveIntervals, sizeof(uint16_t), 1, dbgFile);
73 if (!retval)
74 return;
75
76 std::cout << "\tLive intervals: \n";
77 for (uint16_t i = 0; i < numLiveIntervals; i++)
78 {
79 retval = fread(&start, sizeof(T), 1, dbgFile);
80 if (!retval)
81 return;
82
83 retval = fread(&end, sizeof(T), 1, dbgFile);
84 if (!retval)
85 return;
86
87 std::cout << "(" << start << ", " << end << ") @ ";
88
89 uint8_t virtualType;
90 retval = fread(&virtualType, sizeof(uint8_t), 1, dbgFile);
91 if (!retval)
92 return;
93
94 if (virtualType == VARMAP_VREG_FILE_ADDRESS)
95 {
96 std::cout << "\t";
97 }
98 else if (virtualType == VARMAP_VREG_FILE_FLAG)
99 {
100 std::cout << "\t";
101 }
102 else if (virtualType == VARMAP_VREG_FILE_GRF)
103 {
104 std::cout << "\t";
105 }
106 else
107 {
108 MUST_BE_TRUE(false, "Unknown virtual type found");
109 }
110
111 uint8_t physicalType;
112 retval = fread(&physicalType, sizeof(uint8_t), 1, dbgFile);
113 if (!retval)
114 return;
115
116 if (physicalType == VARMAP_PREG_FILE_ADDRESS)
117 {
118 std::cout << "a";
119 }
120 else if (physicalType == VARMAP_PREG_FILE_FLAG)
121 {
122 std::cout << "f";
123 }
124 else if (physicalType == VARMAP_PREG_FILE_GRF)
125 {
126 std::cout << "r";
127 }
128 else if (physicalType == VARMAP_PREG_FILE_MEMORY)
129 {
130 std::cout << "Spilled";
131 }
132 else
133 {
134 MUST_BE_TRUE(false, "Unknown physical type found");
135 }
136
137 if (physicalType == VARMAP_PREG_FILE_MEMORY)
138 {
139 uint32_t memoryOffset;
140 bool isAbsoluteOffset = false;
141 retval = fread(&memoryOffset, sizeof(uint32_t), 1, dbgFile);
142 if (!retval)
143 return;
144
145 if (memoryOffset & 0x80000000)
146 {
147 isAbsoluteOffset = true;
148 }
149
150 std::cout << " (offset = " << get32BitSignedIntFrom31BitSignedInt(memoryOffset) << " bytes)" <<
151 (isAbsoluteOffset ? " (absolute offset)" : " (off be_fp)") <<
152 "\n";
153 }
154 else
155 {
156 uint16_t regNum, subRegNum;
157 retval = fread(®Num, sizeof(uint16_t), 1, dbgFile);
158 if (!retval)
159 return;
160
161 retval = fread(&subRegNum, sizeof(uint16_t), 1, dbgFile);
162 if (!retval)
163 return;
164
165 std::cout << regNum << "." << subRegNum;
166
167 if (physicalType == VARMAP_PREG_FILE_GRF)
168 {
169 std::cout << ":ub";
170 }
171
172 std::cout << "\n";
173
174 }
175 }
176 std::cout << "\n";
177 }
178
ddCalleeCallerSave(uint32_t relocOffset)179 void DbgDecoder::ddCalleeCallerSave(uint32_t relocOffset)
180 {
181 uint16_t num;
182
183 if (feof(dbgFile))
184 {
185 return;
186 }
187
188 auto retval = fread(&num, sizeof(uint16_t), 1, dbgFile);
189 if (!retval)
190 return;
191
192 for (uint32_t i = 0; i < num; i++)
193 {
194 uint32_t genOffset;
195 retval = fread(&genOffset, sizeof(uint32_t), 1, dbgFile);
196 if (!retval)
197 return;
198
199 std::cout << "Gen ISA offset: " << genOffset << "\n";
200
201 uint16_t numElems;
202 retval = fread(&numElems, sizeof(uint16_t), 1, dbgFile);
203 if (!retval)
204 return;
205
206 for (uint32_t j = 0; j < numElems; j++)
207 {
208 uint16_t srcReg, numBytes;
209 retval = fread(&srcReg, sizeof(uint16_t), 1, dbgFile);
210 if (!retval)
211 return;
212
213 retval = fread(&numBytes, sizeof(uint16_t), 1, dbgFile);
214 if (!retval)
215 return;
216
217 uint8_t subReg = srcReg%numEltPerGRF<Type_UB>();
218 MUST_BE_TRUE(subReg == 0, "Not expecting non-zero sub-reg in callee/caller save");
219 std::cout << "\tr" << (srcReg) / numEltPerGRF<Type_UB>() << "." <<
220 (uint32_t)subReg << ":ub (" << numBytes << " bytes) -> ";
221
222 uint8_t dstInReg;
223 retval = fread(&dstInReg, sizeof(uint8_t), 1, dbgFile);
224 if (!retval)
225 return;
226
227 if (dstInReg)
228 {
229 uint16_t reg, subreg;
230 retval = fread(®, sizeof(uint16_t), 1, dbgFile);
231 if (!retval)
232 return;
233
234 retval = fread(&subreg, sizeof(uint16_t), 1, dbgFile);
235 if (!retval)
236 return;
237
238 std::cout << "r" << reg << "." << subreg << ":ub" << "\n";
239 }
240 else
241 {
242 uint32_t memOffset;
243 retval = fread(&memOffset, sizeof(uint32_t), 1, dbgFile);
244 if (!retval)
245 return;
246
247 if (memOffset & 0x80000000)
248 {
249 std::cout << get32BitSignedIntFrom31BitSignedInt(memOffset);
250 }
251 else
252 {
253 std::cout << "BE_FP + " << memOffset;
254 }
255
256 std::cout << " bytes" << "\n";
257 }
258 }
259 }
260 }
261
ddDbg()262 int DbgDecoder::ddDbg()
263 {
264 dbgFile = fopen(filename, "rb");
265
266 if (!dbgFile)
267 {
268 std::cerr << "Error opening and creating debug file: " << filename << "\n";
269 ASSERT_USER(false, "Unable to wrie debug file to disk.");
270 return -1;
271 }
272
273 uint32_t magic;
274 auto retval = fread(&magic, sizeof(uint32_t), 1, dbgFile);
275 if (!retval)
276 return -1;
277
278 std::cout << "=== Start of Debug Dump ===" << "\n";
279 std::cout << "Magic: " << "0x" << std::hex << magic << std::dec << "\n";
280 if (magic != DEBUG_MAGIC_NUMBER)
281 {
282 std::cout << "************ Magic expected = " << "0x" << std::hex << DEBUG_MAGIC_NUMBER << std::dec << " *************" << "\n";
283
284 fclose(dbgFile);
285
286 return -1;
287 }
288
289 uint16_t numCompiledObjects;
290 retval = fread(&numCompiledObjects, sizeof(uint16_t), 1, dbgFile);
291 if (!retval)
292 return -1;
293
294 std::cout << "Number of compiled objects: " << numCompiledObjects << "\n\n";
295
296 for (unsigned int i = 0; i < numCompiledObjects; i++)
297 {
298 std::cout << "Current compiled object index: " << i << "\n";
299
300 std::cout << "Kernel name: ";
301 ddName();
302 std::cout << "\n";
303
304 uint32_t reloc_offset;
305 retval = fread(&reloc_offset, sizeof(uint32_t), 1, dbgFile);
306 if (!retval)
307 return -1;
308
309 if (reloc_offset == 0)
310 {
311 std::cout << "(kernel)\n";
312 }
313 else
314 {
315 std::cout << "(function binary @ gen offset " << reloc_offset << " bytes)" << "\n";
316 }
317
318 uint32_t numElementsCISAOffsetMap;
319 retval = fread(&numElementsCISAOffsetMap, sizeof(uint32_t), 1, dbgFile);
320 if (!retval)
321 return -1;
322
323 std::cout << "CISA byte offset -> Gen byte offset mapping\n";
324
325 for (unsigned int j = 0; j < numElementsCISAOffsetMap; j++)
326 {
327 uint32_t cisaOffset, genOffset;
328 retval = fread(&cisaOffset, sizeof(uint32_t), 1, dbgFile);
329 if (!retval)
330 return -1;
331
332 retval = fread(&genOffset, sizeof(uint32_t), 1, dbgFile);
333 if (!retval)
334 return -1;
335
336 std::cout << cisaOffset << "\t" << genOffset << "\n";
337 }
338
339 std::cout << "\n";
340
341 uint32_t numElementsCISAIndexMap;
342 retval = fread(&numElementsCISAIndexMap, sizeof(uint32_t), 1, dbgFile);
343 if (!retval)
344 return -1;
345
346 std::cout << "CISA index -> Gen byte offset mapping\n";
347
348 for (unsigned int j = 0; j < numElementsCISAIndexMap; j++)
349 {
350 uint32_t cisaIndex, genOffset;
351 retval = fread(&cisaIndex, sizeof(uint32_t), 1, dbgFile);
352 if (!retval)
353 return -1;
354
355 retval = fread(&genOffset, sizeof(uint32_t), 1, dbgFile);
356 if (!retval)
357 return -1;
358
359 std::cout << cisaIndex << "\t" << genOffset << "\n";
360 }
361
362 std::cout << "\n";
363
364 uint32_t numElementsVarMap;
365 retval = fread(&numElementsVarMap, sizeof(uint32_t), 1, dbgFile);
366 if (!retval)
367 return -1;
368
369 std::cout << "Virtual Register -> Physical Register mapping\n";
370
371 for (unsigned int j = 0; j < numElementsVarMap; j++)
372 {
373 ddName();
374
375 ddLiveInterval<uint16_t>();
376 }
377 std::cout << "\n\n";
378
379 // Read sub-info
380 uint16_t numSubs;
381 retval = fread(&numSubs, sizeof(uint16_t), 1, dbgFile);
382 if (!retval)
383 return -1;
384
385 std::cout << "Number of subroutines: " << numSubs << "\n";
386
387 for (unsigned int j = 0; j < numSubs; j++)
388 {
389 std::cout << "Subroutine name: ";
390 ddName();
391 std::cout << "\n";
392 uint32_t startoffset = 0, endOffset = 0;
393 retval = fread(&startoffset, sizeof(uint32_t), 1, dbgFile);
394 if (!retval)
395 return -1;
396
397 retval = fread(&endOffset, sizeof(uint32_t), 1, dbgFile);
398 if (!retval)
399 return -1;
400
401 std::cout << "Start VISA: " << startoffset << ", end VISA: " << endOffset << "\n";
402 std::cout << "Retval: \n";
403 ddLiveInterval<uint16_t>();
404 }
405
406 std::cout << "\n";
407 uint16_t frameSize;
408 retval = fread(&frameSize, sizeof(uint16_t), 1, dbgFile);
409 if (!retval)
410 return -1;
411
412 std::cout << "Frame size: " << frameSize << " bytes\n";
413
414 uint8_t scratch;
415 retval = fread(&scratch, sizeof(uint8_t), 1, dbgFile);
416 if (!retval)
417 return -1;
418
419 if (scratch)
420 {
421 std::cout << "BE_FP: \n";
422 ddLiveInterval<uint32_t>();
423 }
424 else
425 {
426 std::cout << "BE_FP not found";
427 }
428
429 std::cout << "\n";
430
431 retval = fread(&scratch, sizeof(uint8_t), 1, dbgFile);
432 if (!retval)
433 return -1;
434
435 if (scratch)
436 {
437 std::cout << "Caller BE_FP saved at:\n";
438 ddLiveInterval<uint32_t>();
439 }
440 else
441 {
442 std::cout << "Caller BE_FP not saved";
443 }
444
445 std::cout << "\n";
446
447 retval = fread(&scratch, sizeof(uint8_t), 1, dbgFile);
448 if (!retval)
449 return -1;
450
451 if (scratch)
452 {
453 std::cout << "Return addr saved at:\n";
454 ddLiveInterval<uint32_t>();
455 }
456 else
457 {
458 std::cout << "Return addr not stored";
459 }
460
461 std::cout << "\n";
462
463 std::cout << "Callee save:\n";
464 ddCalleeCallerSave(reloc_offset);
465 std::cout << "\n";
466
467 std::cout << "Caller save:\n";
468 ddCalleeCallerSave(reloc_offset);
469 std::cout << "\n";
470 }
471
472 std::cout << "=== End of Debug Dump ===\n";
473
474 fclose(dbgFile);
475
476 return 0;
477 }
478
decodeAndDumpDebugInfo(char * filename)479 DEBUG_RELEASE_INTERNAL_DLL_EXPORT_ONLY int decodeAndDumpDebugInfo(char* filename)
480 {
481 DbgDecoder dd(filename);
482 return dd.ddDbg();
483 }
484
getGRF(G4_Declare * dcl,unsigned int & regNum,unsigned int & subRegNumInBytes)485 void getGRF(G4_Declare* dcl, unsigned int& regNum, unsigned int& subRegNumInBytes)
486 {
487 if (dcl->getRegVar()->getPhyReg() != NULL)
488 {
489 regNum = dcl->getRegVar()->getPhyReg()->asGreg()->getRegNum();
490 subRegNumInBytes = dcl->getRegVar()->getPhyRegOff() * dcl->getElemSize();
491 }
492 else
493 {
494 regNum = 65535;
495 subRegNumInBytes = 65535;
496 }
497 }
498
isMissingVISAId(unsigned int id)499 bool KernelDebugInfo::isMissingVISAId(unsigned int id)
500 {
501 if (!missingVISAIdsComputed)
502 {
503 computeMissingVISAIds();
504 }
505
506 return (missingVISAIds.find(id) != missingVISAIds.end());
507 }
508
markStackCallFuncDcls(G4_Kernel & function)509 void vISA::KernelDebugInfo::markStackCallFuncDcls(G4_Kernel& function)
510 {
511 // Store all dcls that appear in stack call functions. This is to allow
512 // debug info module to differentiate between dcls from kernel and stack call
513 // function. Stitching operation transfers all callee dcls to kernel, so
514 // kernel.Declares is a superset of kernel, stack call dcls.
515 for (auto dcl : function.Declares)
516 {
517 stackCallDcls.insert(dcl);
518 }
519 }
520
computeMissingVISAIds()521 void KernelDebugInfo::computeMissingVISAIds()
522 {
523 unsigned int maxCISAId = 0;
524
525 for (auto bb : getKernel().fg)
526 {
527 for (auto inst : *bb)
528 {
529 if (inst->getCISAOff() != UNMAPPABLE_VISA_INDEX &&
530 (unsigned int)inst->getCISAOff() > maxCISAId)
531 {
532 maxCISAId = inst->getCISAOff();
533 }
534 }
535 }
536
537 std::vector<bool> seenVISAIds(maxCISAId+1, false);
538
539 for (auto bb : getKernel().fg)
540 {
541 for (auto inst : *bb)
542 {
543 if (inst->getCISAOff() != UNMAPPABLE_VISA_INDEX)
544 {
545 seenVISAIds[inst->getCISAOff()] = true;
546 }
547 }
548 }
549
550 for (unsigned int i = 0, size = seenVISAIds.size(); i < size; i++)
551 {
552 if (!seenVISAIds[i])
553 {
554 missingVISAIds.insert(i);
555 }
556 }
557
558 missingVISAIdsComputed = true;
559 }
560
updateMapping(std::list<G4_BB * > & stackCallEntryBBs)561 void KernelDebugInfo::updateMapping(std::list<G4_BB*>& stackCallEntryBBs)
562 {
563 reset();
564
565 generateByteOffsetMapping(stackCallEntryBBs);
566 emitRegisterMapping();
567 generateCISAByteOffsetFromOffset();
568 generateGenISAToVISAIndex();
569 }
570
generateGenISAToVISAIndex()571 void KernelDebugInfo::generateGenISAToVISAIndex()
572 {
573 // Generate list of Gen ISA offset -> VISA index
574 // This is used to emit debug_ranges section in IGC.
575 // Inserting entries per Gen ISA offset guarantees
576 // all instructions will be present in the vector.
577 for (auto bb : kernel->fg)
578 {
579 for (auto inst : *bb)
580 {
581 if (inst->getGenOffset() == -1)
582 continue;
583 genISAOffsetToVISAIndex.push_back(IDX_VDbgGen2CisaIndex{(unsigned int)inst->getGenOffset(), (unsigned int)inst->getCISAOff()});
584 }
585 }
586 }
587
setVISAKernel(VISAKernelImpl * k)588 void KernelDebugInfo::setVISAKernel(VISAKernelImpl* k)
589 {
590 visaKernel = k;
591 kernel = k->getKernel();
592 }
593
generateCISAByteOffsetFromOffset()594 void KernelDebugInfo::generateCISAByteOffsetFromOffset()
595 {
596 // Using map1 and map2, generate map3
597 for (decltype(mapCISAIndexGenOffset)::iterator it = mapCISAIndexGenOffset.begin();
598 it != mapCISAIndexGenOffset.end();
599 it++)
600 {
601 // Read each entry in CISA Index->Gen Offset then map CISA Index to CISA Offset.
602 // Push back results.
603 unsigned int cisaIndex = (*it).CisaIndex;
604 unsigned int genOffset = (*it).GenOffset;
605
606 std::map<unsigned int, unsigned int>::iterator map_it = mapCISAOffset.find(cisaIndex);
607
608 if (map_it != mapCISAOffset.end())
609 {
610 unsigned int cisaOffset = mapCISAOffset.find(cisaIndex)->second;
611 mapCISAOffsetGenOffset.push_back(IDX_VDbgCisaByte2Gen{cisaOffset, genOffset});
612 }
613 }
614 }
615
generateByteOffsetMapping(std::list<G4_BB * > & stackCallEntryBBs)616 void KernelDebugInfo::generateByteOffsetMapping(std::list<G4_BB*>& stackCallEntryBBs)
617 {
618 // When compiling stack call functions, all stack call functions
619 // invoked are stitched to kernel being compiled. So G4_BBs of
620 // all stack call functions are appended to G4_Kernel's BB list.
621 // We need a way to differentiate between BBs of kernel and those
622 // of functions to emit out correct debug info. So a list is
623 // passed - stackCallEntryBBs that holds entryBBs of all stack
624 // call functions part of this compilation unit.
625
626 bool done = false;
627 unsigned int maxVISAIndex = 0;
628 uint64_t maxGenIsaOffset = 0;
629 // Now traverse CFG, create pair of CISA byte offset, gen binary offset and push to vector
630 for (BB_LIST_ITER bb_it = kernel->fg.begin(), bbEnd = kernel->fg.end(); bb_it != bbEnd; bb_it++)
631 {
632 G4_BB* bb = (*bb_it);
633
634 int isaPrevByteOffset = -1;
635
636 if (kernel->fg.builder->getIsKernel())
637 {
638 auto entryBBend = stackCallEntryBBs.end();
639 for (auto entryBBIt = stackCallEntryBBs.begin();
640 entryBBIt != entryBBend;
641 entryBBIt++)
642 {
643 if (bb == (*entryBBIt))
644 {
645 // Since we are traversing BBs in layout
646 // order, we will parse all kernel BBs
647 // first and as soon as we reach entryBB
648 // of first stack call function, we stop
649 // processing.
650 done = true;
651 break;
652 }
653 }
654 }
655
656 if (done == true)
657 {
658 break;
659 }
660
661 for (INST_LIST_ITER inst_it = bb->begin(), bbEnd = bb->end();
662 inst_it != bbEnd;
663 inst_it++)
664 {
665 G4_INST* inst = (*inst_it);
666
667 if (inst->getGenOffset() != UNDEFINED_GEN_OFFSET)
668 {
669 int cisaByteIndex = inst->getCISAOff();
670 maxGenIsaOffset = (uint64_t)inst->getGenOffset() +
671 (inst->isCompactedInst() ? 8 : 16);
672 if (cisaByteIndex == -1)
673 {
674 continue;
675 }
676
677 maxVISAIndex = std::max(maxVISAIndex, (unsigned int)cisaByteIndex);
678
679 if (isaPrevByteOffset != cisaByteIndex)
680 {
681 isaPrevByteOffset = cisaByteIndex;
682
683 // mapping holds pair of CISA bytecode index and gen Offset
684 // Use VISAKernelImpl's member mapCISAOffset to convert
685 // CISA bytecode index to CISA bytecode byte offset
686 mapCISAIndexGenOffset.push_back(IDX_VDbgCisaIndex2Gen{(unsigned)cisaByteIndex, (unsigned)inst->getGenOffset()});
687 }
688 }
689 }
690 }
691
692 // Insert out-of-sequence entry in to VISA index->Gen offset map
693 mapCISAIndexGenOffset.push_back(IDX_VDbgCisaIndex2Gen{++maxVISAIndex, (unsigned int)maxGenIsaOffset});
694 }
695
emitRegisterMapping()696 void KernelDebugInfo::emitRegisterMapping()
697 {
698 // Emit out mapping between
699 // virtual variables -> physical registers
700 // In case a variable has been spilled to memory,
701 // emit out memory offset.
702 // For address/flag registers, spill location is
703 // GRF registers. Only general variables, ie GRF
704 // candidates can be spilled to memory.
705
706 for (DECLARE_LIST_ITER dcl_it = getKernel().Declares.begin();
707 dcl_it != getKernel().Declares.end();
708 dcl_it++)
709 {
710 G4_Declare* dcl = (*dcl_it);
711 if (getKernel().fg.isPseudoDcl(dcl) ||
712 (dcl->getRegVar()->getPhyReg() &&
713 dcl->getRegVar()->getPhyReg()->isAreg() &&
714 !dcl->getRegVar()->getPhyReg()->isFlag() &&
715 !dcl->getRegVar()->getPhyReg()->isA0()))
716 {
717 // These pseudo nodes may or may not get
718 // an allocation depending on register
719 // pressure across fcall. There is no
720 // need to look at allocation results
721 // for these as far as debug info goes.
722 continue;
723 }
724
725 if (!getKernel().fg.getIsStackCallFunc())
726 {
727 // Skip iterating over dcls of callee stack call function.
728 auto it = stackCallDcls.find(dcl);
729 if (it != stackCallDcls.end())
730 continue;
731 }
732
733 VarnameMap* varMap = (VarnameMap*)getKernel().fg.builder->mem.alloc(sizeof(struct VarnameMap));
734 varMap->dcl = dcl;
735
736 if ((dcl->getRegFile() == G4_GRF || dcl->getRegFile() == G4_INPUT) &&
737 dcl->getRegVar()->isNullReg() == false)
738 {
739 // GRF candidate can be either in GRF or
740 // spilled to memory
741 bool isSpilled = dcl->isSpilled() && (dcl->getRegVar()->getPhyReg() == NULL);
742 varMap->virtualType = VARMAP_VREG_FILE_GRF;
743
744 if (!isSpilled)
745 {
746 unsigned int regNum, subRegNumInBytes;
747 getGRF(dcl, regNum, subRegNumInBytes);
748 varMap->physicalType = VARMAP_PREG_FILE_GRF;
749 varMap->Mapping.Register.regNum = static_cast<uint16_t>(regNum);
750 varMap->Mapping.Register.subRegNum = static_cast<uint16_t>(subRegNumInBytes);
751 varsMap.push_back(varMap);
752 }
753 else
754 {
755 unsigned int spillOffset = 0;
756 while (dcl->getAliasDeclare() != NULL)
757 {
758 spillOffset += dcl->getAliasOffset();
759 dcl = dcl->getAliasDeclare();
760 }
761 spillOffset += dcl->getRegVar()->getDisp();
762 varMap->physicalType = VARMAP_PREG_FILE_MEMORY;
763 if (getKernel().fg.getHasStackCalls() == false)
764 {
765 varMap->Mapping.Memory.isAbs = 1;
766 }
767 else
768 {
769 varMap->Mapping.Memory.isAbs = 0;
770 }
771 varMap->Mapping.Memory.memoryOffset = (int32_t)spillOffset;
772 varsMap.push_back(varMap);
773 }
774 }
775 else if (dcl->getRegFile() == G4_ADDRESS)
776 {
777 bool isSpilled = dcl->isSpilled() && (dcl->getRegVar()->getPhyReg() == NULL);
778 varMap->virtualType = VARMAP_VREG_FILE_ADDRESS;
779
780 if (!isSpilled)
781 {
782 unsigned int subRegNum;
783 subRegNum = dcl->getRegVar()->getPhyRegOff();
784 varMap->physicalType = VARMAP_PREG_FILE_ADDRESS;
785 varMap->Mapping.Register.regNum = 0;
786 varMap->Mapping.Register.subRegNum = static_cast<uint16_t>(subRegNum);
787 varsMap.push_back(varMap);
788 }
789 else
790 {
791 // Spilled to GRF
792 if (!dcl->getSpilledDeclare()->isSpilled())
793 {
794 unsigned int regNum, subRegNumInBytes;
795 getGRF(dcl->getSpilledDeclare(), regNum, subRegNumInBytes);
796 varMap->physicalType = VARMAP_PREG_FILE_GRF;
797 varMap->Mapping.Register.regNum = static_cast<uint16_t>(regNum);
798 varMap->Mapping.Register.subRegNum = static_cast<uint16_t>(subRegNumInBytes);
799 varsMap.push_back(varMap);
800 }
801 else
802 {
803 unsigned int spillOffset = 0;
804 //G4_Declare* origDcl = dcl;
805 while (dcl->getAliasDeclare() != NULL)
806 {
807 spillOffset += dcl->getAliasOffset();
808 dcl = dcl->getAliasDeclare();
809 }
810 spillOffset += dcl->getSpilledDeclare()->getRegVar()->getDisp();
811 varMap->physicalType = VARMAP_PREG_FILE_MEMORY;
812 if (getKernel().fg.getHasStackCalls() == false)
813 {
814 varMap->Mapping.Memory.isAbs = 1;
815 }
816 else
817 {
818 varMap->Mapping.Memory.isAbs = 0;
819 }
820 varMap->Mapping.Memory.memoryOffset = (int32_t)spillOffset;
821 varsMap.push_back(varMap);
822 }
823 }
824 }
825 else if (dcl->getRegFile() == G4_FLAG)
826 {
827 bool isSpilled = dcl->isSpilled() && (dcl->getRegVar()->getPhyReg() == NULL);
828 varMap->virtualType = VARMAP_VREG_FILE_FLAG;
829
830 if (!isSpilled)
831 {
832 unsigned int regNum, subRegNum;
833 regNum = dcl->getRegVar()->getPhyReg()->asAreg()->getFlagNum();
834 subRegNum = dcl->getRegVar()->getPhyRegOff();
835 varMap->physicalType = VARMAP_PREG_FILE_FLAG;
836 varMap->Mapping.Register.regNum = static_cast<uint16_t>(regNum);
837 varMap->Mapping.Register.subRegNum = static_cast<uint16_t>(subRegNum);
838 varsMap.push_back(varMap);
839 }
840 else
841 {
842 // Spilled to GRF
843 if (!dcl->getSpilledDeclare()->isSpilled())
844 {
845 unsigned int regNum, subRegNumInBytes;
846 getGRF(dcl->getSpilledDeclare(), regNum, subRegNumInBytes);
847 varMap->physicalType = VARMAP_PREG_FILE_GRF;
848 varMap->Mapping.Register.regNum = static_cast<uint16_t>(regNum);
849 varMap->Mapping.Register.subRegNum = static_cast<uint16_t>(subRegNumInBytes);
850 varsMap.push_back(varMap);
851 }
852 else
853 {
854 unsigned int spillOffset = 0;
855 //G4_Declare* origDcl = dcl;
856 while (dcl->getAliasDeclare() != NULL)
857 {
858 spillOffset += dcl->getAliasOffset();
859 dcl = dcl->getAliasDeclare();
860 }
861 spillOffset += dcl->getSpilledDeclare()->getRegVar()->getDisp();
862 varMap->physicalType = VARMAP_PREG_FILE_MEMORY;
863 if (getKernel().fg.getHasStackCalls() == false)
864 {
865 varMap->Mapping.Memory.isAbs = 1;
866 }
867 else
868 {
869 varMap->Mapping.Memory.isAbs = 0;
870 }
871 varMap->Mapping.Memory.memoryOffset = (int32_t)spillOffset;
872 varsMap.push_back(varMap);
873 }
874 }
875 }
876 }
877 }
878
insertData(const void * ptr,unsigned size,FILE * f)879 void insertData(const void* ptr, unsigned size, FILE* f)
880 {
881 fwrite(ptr, size, 1, f);
882 }
883
insertData(const void * ptr,unsigned size,std::vector<unsigned char> & vec)884 void insertData(const void* ptr, unsigned size, std::vector<unsigned char>& vec)
885 {
886 for (unsigned i = 0; i < size; ++i)
887 {
888 vec.push_back(*(((const unsigned char*)ptr) + i));
889 }
890 }
891
populateMapDclName(VISAKernelImpl * kernel,std::map<G4_Declare *,std::pair<const char *,unsigned int>> & mapDclName)892 unsigned int populateMapDclName(VISAKernelImpl* kernel, std::map<G4_Declare*, std::pair<const char*, unsigned int>>& mapDclName)
893 {
894 std::list<CISA_GEN_VAR*> dclList;
895 for (uint32_t ctr = 0; ctr < kernel->getGenVarCount(); ctr++)
896 {
897 // Pre-defined gen vars are included in this list,
898 // but we dont want to emit them to debug info.
899 if (kernel->getGenVar((unsigned int)ctr)->index >= kernel->getNumPredVars())
900 {
901 dclList.push_back(kernel->getGenVar((unsigned int)ctr));
902 }
903 }
904
905 for (uint32_t ctr = 0; ctr < kernel->getAddrVarCount(); ctr++)
906 {
907 dclList.push_back(kernel->getAddrVar((unsigned int)ctr));
908 }
909
910 for (uint32_t ctr = 0; ctr < kernel->getPredVarCount(); ctr++)
911 {
912 dclList.push_back(kernel->getPredVar((unsigned int)ctr));
913 }
914
915 for (uint32_t ctr = 0; ctr < kernel->getSurfaceVarCount(); ctr++)
916 {
917 dclList.push_back(kernel->getSurfaceVar((unsigned int)ctr));
918 }
919
920 for (uint32_t ctr = 0; ctr < kernel->getSamplerVarCount(); ctr++)
921 {
922 dclList.push_back(kernel->getSamplerVar((unsigned int)ctr));
923 }
924
925 auto start = dclList.begin();
926 auto end = dclList.end();
927
928 for (auto it = start;
929 it != end;
930 it++)
931 {
932 CISA_GEN_VAR* var = (*it);
933
934 if (var->type == GENERAL_VAR)
935 {
936 mapDclName.insert(std::make_pair(var->genVar.dcl, std::make_pair("V", var->index)));
937 }
938 else if (var->type == ADDRESS_VAR)
939 {
940 mapDclName.insert(std::make_pair(var->addrVar.dcl, std::make_pair("A", var->index)));
941 }
942 else if (var->type == PREDICATE_VAR)
943 {
944 mapDclName.insert(std::make_pair(var->predVar.dcl, std::make_pair("P", var->index)));
945 }
946 else if (var->type == SURFACE_VAR)
947 {
948 mapDclName.insert(std::make_pair(var->stateVar.dcl, std::make_pair("T", var->index)));
949 }
950 else if (var->type == SAMPLER_VAR)
951 {
952 mapDclName.insert(std::make_pair(var->stateVar.dcl, std::make_pair("S", var->index)));
953 }
954 }
955
956 return (uint32_t) dclList.size();
957 }
958
getVarIndex(G4_Declare * dcl)959 uint32_t KernelDebugInfo::getVarIndex(G4_Declare* dcl)
960 {
961 uint32_t retval = 0xffffffff;
962 for (uint32_t i = 0, size = varsMap.size(); i < size; i++)
963 {
964 if (dcl == varsMap[i]->dcl)
965 {
966 retval = i;
967 break;
968 }
969 }
970 return retval;
971 }
972
973 template<class T>
emitDataName(const char * name,T & t)974 void emitDataName(const char* name, T& t)
975 {
976 auto length = (uint16_t)strlen(name);
977 // Length
978 insertData(&length, sizeof(uint16_t), t);
979 // Actual name
980 insertData(name, (uint32_t) (sizeof(uint8_t) * length), t);
981 }
982
983 template<class T>
emitDataUInt32(uint32_t data,T & t)984 void emitDataUInt32(uint32_t data, T& t)
985 {
986 insertData(&data, sizeof(uint32_t), t);
987 }
988
989 template<class T>
emitDataUInt16(uint16_t data,T & t)990 void emitDataUInt16(uint16_t data, T& t)
991 {
992 insertData(&data, sizeof(uint16_t), t);
993 }
994
995 template<class T>
emitDataUInt8(uint8_t data,T & t)996 void emitDataUInt8(uint8_t data, T& t)
997 {
998 insertData(&data, sizeof(uint8_t), t);
999 }
1000
1001 template<class T>
emitDataVarLiveInterval(VISAKernelImpl * visaKernel,LiveIntervalInfo * lrInfo,uint32_t i,uint16_t size,T & t)1002 void emitDataVarLiveInterval(VISAKernelImpl* visaKernel, LiveIntervalInfo* lrInfo, uint32_t i, uint16_t size, T& t)
1003 {
1004 // given lrs and saverestore, prepare assembled list of ranges to write out
1005 KernelDebugInfo* dbgInfo = visaKernel->getKernel()->getKernelDebugInfo();
1006
1007 // start cisa index, end cisa index
1008 std::vector<std::pair<uint32_t, uint32_t>> lrs;
1009 if (lrInfo)
1010 {
1011 lrInfo->getLiveIntervals(lrs);
1012 }
1013 uint16_t numLRs = (uint16_t)lrs.size();
1014 std::sort(lrs.begin(), lrs.end(), [](std::pair<uint32_t, uint32_t>& a, std::pair<uint32_t, uint32_t>& b) { return a.first < b.first; });
1015 emitDataUInt16(numLRs, t);
1016 for (auto& it : lrs)
1017 {
1018 const uint32_t start = (uint32_t)it.first;
1019 const uint32_t end = (uint32_t)it.second;
1020
1021 if (size == 2)
1022 {
1023 emitDataUInt16((uint16_t)start, t);
1024 emitDataUInt16((uint16_t)end, t);
1025 }
1026 else
1027 {
1028 emitDataUInt32(start, t);
1029 emitDataUInt32(end, t);
1030 }
1031
1032 auto& varsMap = dbgInfo->getVarsMap();
1033 const unsigned char virtualType = varsMap[i]->virtualType;
1034 // Write virtual register type
1035 emitDataUInt8((uint8_t)virtualType, t);
1036
1037 const unsigned char physicalType = varsMap[i]->physicalType;
1038 // Write physical register type
1039 emitDataUInt8((uint8_t)physicalType, t);
1040
1041 // If physical register assigned then write register number and
1042 // sub-register number. Else write memory spill offset.
1043 if (physicalType == VARMAP_PREG_FILE_MEMORY)
1044 {
1045 unsigned int memOffset = (unsigned int)varsMap[i]->Mapping.Memory.memoryOffset;
1046 if (visaKernel->getKernel()->fg.getHasStackCalls() == false)
1047 {
1048 memOffset |= 0x80000000;
1049 }
1050 // Emit memory offset
1051 emitDataUInt32((uint32_t)memOffset, t);
1052 }
1053 else
1054 {
1055 const unsigned int regNum = varsMap[i]->Mapping.Register.regNum;
1056 const unsigned int subRegNum = varsMap[i]->Mapping.Register.subRegNum;
1057
1058 // Emit register number
1059 emitDataUInt16((uint16_t)regNum, t);
1060
1061 // Emit sub-register number
1062 emitDataUInt16((uint16_t)subRegNum, t);
1063 }
1064 }
1065 }
1066
1067 template<class T>
emitFrameDescriptorOffsetLiveInterval(LiveIntervalInfo * lrInfo,StackCall::FrameDescriptorOfsets memOffset,T & t)1068 void emitFrameDescriptorOffsetLiveInterval(LiveIntervalInfo* lrInfo, StackCall::FrameDescriptorOfsets memOffset, T& t)
1069 {
1070 // Used to emit fields of Frame Descriptor
1071 // location = [start, end) @ BE_FP+offset
1072 std::vector<std::pair<uint32_t, uint32_t>> lrs;
1073 if (lrInfo)
1074 lrInfo->getLiveIntervals(lrs);
1075 else
1076 return;
1077
1078 uint32_t start = 0, end = 0;
1079 if (lrs.size() > 0)
1080 {
1081 start = lrs.front().first;
1082 end = lrs.back().second;
1083 }
1084
1085 std::sort(lrs.begin(), lrs.end(), [](std::pair<uint32_t, uint32_t>& a, std::pair<uint32_t, uint32_t>& b) { return a.first < b.first; });
1086
1087 emitDataUInt16(1, t);
1088
1089 emitDataUInt32(start, t);
1090 emitDataUInt32(end, t);
1091
1092 emitDataUInt8((uint8_t)VARMAP_PREG_FILE_GRF, t);
1093
1094 emitDataUInt8((uint8_t)VARMAP_PREG_FILE_MEMORY, t);
1095
1096 emitDataUInt32((uint32_t)memOffset, t);
1097 }
1098
populateUniqueSubs(G4_Kernel * kernel,std::unordered_map<G4_BB *,bool> & uniqueSubs)1099 void populateUniqueSubs(G4_Kernel* kernel, std::unordered_map<G4_BB*, bool>& uniqueSubs)
1100 {
1101 // Traverse kernel and populate all unique subs.
1102 // Iterating over all BBs of kernel visits all
1103 // subroutine call sites.
1104 auto isStackObj = kernel->fg.getHasStackCalls() || kernel->fg.getIsStackCallFunc();
1105 for (auto bb : kernel->fg)
1106 {
1107 if (&bb->getParent() != &kernel->fg)
1108 continue;
1109
1110 if (bb->isEndWithCall())
1111 {
1112 if (!isStackObj || // definitely a subroutine since kernel has no stack calls
1113 (isStackObj && // a subroutine iff call dst != pre-defined reg as per ABI
1114 bb->back()->getDst()->getTopDcl()->getRegVar()->getPhyReg()->asGreg()->getRegNum() != kernel->getFPSPGRF()))
1115 {
1116 // This is a subroutine call
1117 uniqueSubs[bb->Succs.front()] = false;
1118 }
1119 }
1120 }
1121 }
1122
1123 template<class T>
emitDataSubroutines(VISAKernelImpl * visaKernel,T & t)1124 void emitDataSubroutines(VISAKernelImpl* visaKernel, T& t)
1125 {
1126 auto kernel = visaKernel->getKernel();
1127 // map<Label, Written to t>
1128 std::unordered_map<G4_BB*, bool> uniqueSubs;
1129
1130 populateUniqueSubs(kernel, uniqueSubs);
1131
1132 emitDataUInt16((uint16_t) uniqueSubs.size(), t);
1133
1134 kernel->fg.setPhysicalPredSucc();
1135 for (auto bb : kernel->fg)
1136 {
1137 G4_INST* firstInst = nullptr;
1138 G4_INST* lastInst = nullptr;
1139 unsigned int start = 0, end = 0;
1140 G4_Declare* retval = nullptr;
1141 G4_Label* subLabel = nullptr;
1142
1143 if (bb->isEndWithCall())
1144 {
1145 auto subInfo = uniqueSubs.find(bb->Succs.front());
1146 if (subInfo != uniqueSubs.end() &&
1147 subInfo->second == false)
1148 {
1149 subInfo->second = true;
1150 G4_BB* calleeBB = bb->Succs.front();
1151 while (firstInst == NULL && calleeBB != NULL)
1152 {
1153 if (calleeBB->size() > 0)
1154 {
1155 firstInst = calleeBB->front();
1156 start = firstInst->getCISAOff();
1157 subLabel = firstInst->getSrc(0)->asLabel();
1158 }
1159 }
1160
1161 calleeBB = bb->BBAfterCall()->Preds.front();
1162 while (lastInst == NULL && calleeBB != NULL)
1163 {
1164 if (calleeBB->size() > 0)
1165 {
1166 lastInst = calleeBB->back();
1167 end = lastInst->getCISAOff();
1168 MUST_BE_TRUE(lastInst->isReturn(), "Expecting to see G4_return as last inst in sub-routine");
1169 retval = lastInst->getSrc(0)->asSrcRegRegion()->getBase()->asRegVar()->getDeclare()->getRootDeclare();
1170 }
1171
1172 calleeBB = calleeBB->Preds.front();
1173 }
1174 emitDataName(subLabel->getLabel(), t);
1175 emitDataUInt32(start, t);
1176 emitDataUInt32(end, t);
1177
1178 if (kernel->getKernelDebugInfo()->getLiveIntervalInfo(retval, false) != NULL)
1179 {
1180 auto lv = kernel->getKernelDebugInfo()->getLiveIntervalInfo(retval, false);
1181 uint32_t idx = kernel->getKernelDebugInfo()->getVarIndex(retval);
1182 emitDataVarLiveInterval(visaKernel, lv, idx, sizeof(uint16_t), t);
1183 }
1184 else
1185 {
1186 emitDataUInt16(0, t);
1187 }
1188 }
1189 }
1190 }
1191 }
1192
1193 template<class T>
emitDataPhyRegSaveInfoPerIP(VISAKernelImpl * visaKernel,SaveRestoreManager & mgr,T & t)1194 void emitDataPhyRegSaveInfoPerIP(VISAKernelImpl* visaKernel, SaveRestoreManager& mgr, T& t)
1195 {
1196 auto& srInfo = mgr.getSRInfo();
1197 auto relocOffset = visaKernel->getKernel()->getKernelDebugInfo()->getRelocOffset();
1198
1199 for (auto sr : srInfo)
1200 {
1201 if (sr.getInst()->getGenOffset() == UNDEFINED_GEN_OFFSET)
1202 {
1203 continue;
1204 }
1205
1206 emitDataUInt32((uint32_t)sr.getInst()->getGenOffset() +
1207 getBinInstSize(sr.getInst()) - relocOffset, t);
1208 emitDataUInt16((uint16_t) sr.saveRestoreMap.size(), t);
1209 for (auto mapIt : sr.saveRestoreMap)
1210 {
1211 emitDataUInt16((uint16_t)mapIt.first * numEltPerGRF<Type_UB>(), t);
1212 emitDataUInt16((uint16_t)numEltPerGRF<Type_UB>(), t);
1213
1214 if (mapIt.second.first == SaveRestoreInfo::RegOrMem::Reg)
1215 {
1216 emitDataUInt8((uint8_t)1, t);
1217 emitDataUInt16((uint16_t)mapIt.second.second.regNum, t);
1218 emitDataUInt16((uint16_t)0, t);
1219 }
1220 else if (mapIt.second.first == SaveRestoreInfo::RegOrMem::MemOffBEFP)
1221 {
1222 SaveRestoreInfo::RegMap tmp;
1223 emitDataUInt8((uint8_t)0, t);
1224 tmp = mapIt.second.second;
1225 tmp.isAbs = 0;
1226 uint32_t data = mapIt.second.second.memOff;
1227 emitDataUInt32(data, t);
1228 }
1229 else if (mapIt.second.first == SaveRestoreInfo::RegOrMem::MemAbs)
1230 {
1231 SaveRestoreInfo::RegMap tmp;
1232 emitDataUInt8((uint8_t)0, t);
1233 tmp = mapIt.second.second;
1234 tmp.isAbs = 1;
1235 uint32_t data = mapIt.second.second.memOff;
1236 emitDataUInt32(data, t);
1237 }
1238 }
1239 }
1240 }
1241
sieveInstructions(CallerOrCallee c)1242 void SaveRestoreManager::sieveInstructions(CallerOrCallee c)
1243 {
1244 // Remove entries that are not caller/callee
1245 // save/restore.
1246 for (auto& sr : srInfo)
1247 {
1248 for (auto entryIt = sr.saveRestoreMap.begin();
1249 entryIt != sr.saveRestoreMap.end();
1250 )
1251 {
1252 auto entry = (*entryIt);
1253
1254 bool removeEntry = true;
1255 if (c == CallerOrCallee::Caller)
1256 {
1257 // r1 - r60
1258 // Remove temp movs emitted for send header
1259 // creation since they are not technically
1260 // caller save
1261 if (entry.first < visaKernel->getKernel()->calleeSaveStart() &&
1262 entry.first >= 0 &&
1263 entry.second.first == SaveRestoreInfo::RegOrMem::MemOffBEFP)
1264 {
1265 removeEntry = false;
1266 }
1267 }
1268 else if (c == CallerOrCallee::Callee)
1269 {
1270 if (entry.first >= visaKernel->getKernel()->calleeSaveStart() &&
1271 entry.second.first == SaveRestoreInfo::RegOrMem::MemOffBEFP)
1272 {
1273 removeEntry = false;
1274 }
1275 }
1276
1277 if (removeEntry)
1278 {
1279 entryIt = sr.saveRestoreMap.erase(entryIt);
1280 continue;
1281 }
1282
1283 entryIt++;
1284 }
1285 }
1286
1287 #if _DEBUG
1288 // Ensure ordering of elements is correct, ie ascending in key value
1289 for (auto& sr : srInfo)
1290 {
1291 uint32_t prev = 0;
1292 for (auto& item : sr.saveRestoreMap)
1293 {
1294 MUST_BE_TRUE(item.first >= prev, "Unexpected ordering in container");
1295 prev = item.first;
1296 }
1297 }
1298 #endif
1299
1300 // Code below is to remove empty and duplicate entries
1301 // from both caller and callee save code.
1302 bool foundFirstNonEmpty = false;
1303 bool onSecond = false;
1304 SaveRestoreInfo prev;
1305
1306 for (auto srIt = srInfo.begin();
1307 srIt != srInfo.end();
1308 )
1309 {
1310 auto& sr = (*srIt);
1311
1312 if (!foundFirstNonEmpty)
1313 {
1314 if (sr.saveRestoreMap.size() == 0)
1315 {
1316 srIt = srInfo.erase(srIt);
1317 continue;
1318 }
1319 else
1320 {
1321 foundFirstNonEmpty = true;
1322 }
1323 }
1324
1325 if (onSecond)
1326 {
1327 // If this one and previous one are same, eliminate this entry
1328 if (sr.isEqual(prev))
1329 {
1330 srIt = srInfo.erase(srIt);
1331 continue;
1332 }
1333 }
1334
1335 prev = (*srIt);
1336 onSecond = true;
1337 srIt++;
1338 }
1339 }
1340
1341 template<class T>
emitDataCallerSave(VISAKernelImpl * visaKernel,T & t)1342 void emitDataCallerSave(VISAKernelImpl* visaKernel, T& t)
1343 {
1344 auto kernel = visaKernel->getKernel();
1345
1346 uint16_t numCallerSaveEntries = 0;
1347 // Compute total caller save entries to emit
1348 for (auto bbs : kernel->fg)
1349 {
1350 if (bbs->size() > 0 &&
1351 kernel->getKernelDebugInfo()->isFcallWithSaveRestore(bbs))
1352 {
1353 auto& callerSaveInsts = kernel->getKernelDebugInfo()->getCallerSaveInsts(bbs);
1354 auto& callerRestoreInsts = kernel->getKernelDebugInfo()->getCallerRestoreInsts(bbs);
1355
1356 SaveRestoreManager mgr(visaKernel);
1357 for (auto callerSave : callerSaveInsts)
1358 {
1359 mgr.addInst(callerSave);
1360 }
1361
1362 for (auto callerRestore : callerRestoreInsts)
1363 {
1364 mgr.addInst(callerRestore);
1365 }
1366
1367 auto& srInfo = mgr.getSRInfo();
1368
1369 mgr.sieveInstructions(SaveRestoreManager::CallerOrCallee::Caller);
1370
1371 numCallerSaveEntries += (uint16_t) srInfo.size();
1372 for (auto sr : srInfo)
1373 {
1374 if (sr.getInst()->getGenOffset() == UNDEFINED_GEN_OFFSET)
1375 {
1376 numCallerSaveEntries--;
1377 }
1378 }
1379 }
1380 }
1381
1382 emitDataUInt16(numCallerSaveEntries, t);
1383
1384 if (numCallerSaveEntries > 0)
1385 {
1386 for (auto bbs : kernel->fg)
1387 {
1388 if (bbs->size() > 0 &&
1389 kernel->getKernelDebugInfo()->isFcallWithSaveRestore(bbs))
1390 {
1391 auto& callerSaveInsts = kernel->getKernelDebugInfo()->getCallerSaveInsts(bbs);
1392 auto& callerRestoreInsts = kernel->getKernelDebugInfo()->getCallerRestoreInsts(bbs);
1393
1394 SaveRestoreManager mgr(visaKernel);
1395 for (auto callerSave : callerSaveInsts)
1396 {
1397 mgr.addInst(callerSave);
1398 }
1399
1400 for (auto callerRestore : callerRestoreInsts)
1401 {
1402 mgr.addInst(callerRestore);
1403 }
1404
1405 mgr.sieveInstructions(SaveRestoreManager::CallerOrCallee::Caller);
1406
1407 emitDataPhyRegSaveInfoPerIP(visaKernel, mgr, t);
1408 }
1409 }
1410 }
1411 }
1412
1413 template<class T>
emitDataCalleeSave(VISAKernelImpl * visaKernel,T & t)1414 void emitDataCalleeSave(VISAKernelImpl* visaKernel, T& t)
1415 {
1416 G4_Kernel* kernel = visaKernel->getKernel();
1417
1418 SaveRestoreManager mgr(visaKernel);
1419 for (auto calleeSave : kernel->getKernelDebugInfo()->getCalleeSaveInsts())
1420 {
1421 mgr.addInst(calleeSave);
1422 }
1423
1424 for (auto calleeRestore : kernel->getKernelDebugInfo()->getCalleeRestoreInsts())
1425 {
1426 mgr.addInst(calleeRestore);
1427 }
1428
1429 uint16_t numCalleeSaveEntries = 0;
1430 auto& srInfo = mgr.getSRInfo();
1431
1432 mgr.sieveInstructions(SaveRestoreManager::CallerOrCallee::Callee);
1433
1434 numCalleeSaveEntries += (uint16_t) srInfo.size();
1435 for (auto sr : srInfo)
1436 {
1437 if (sr.getInst()->getGenOffset() == UNDEFINED_GEN_OFFSET)
1438 {
1439 numCalleeSaveEntries--;
1440 }
1441 }
1442
1443 emitDataUInt16(numCalleeSaveEntries, t);
1444
1445 emitDataPhyRegSaveInfoPerIP(visaKernel, mgr, t);
1446 }
1447
1448 template<class T>
emitDataCallFrameInfo(VISAKernelImpl * visaKernel,T & t)1449 void emitDataCallFrameInfo(VISAKernelImpl* visaKernel, T& t)
1450 {
1451 // Compute both be fp of current frame and previous frame
1452 auto kernel = visaKernel->getKernel();
1453
1454 auto frameSize = kernel->getKernelDebugInfo()->getFrameSize();
1455 emitDataUInt16((uint16_t)frameSize, t);
1456
1457 auto befpDcl = kernel->getKernelDebugInfo()->getBEFP();
1458 if (befpDcl)
1459 {
1460 auto befpLIInfo = kernel->getKernelDebugInfo()->getLiveIntervalInfo(befpDcl, false);
1461 if (befpLIInfo)
1462 {
1463 emitDataUInt8((uint8_t)1, t);
1464 uint32_t idx = kernel->getKernelDebugInfo()->getVarIndex(kernel->fg.framePtrDcl);
1465 emitDataVarLiveInterval(visaKernel, befpLIInfo, idx, sizeof(uint32_t), t);
1466 }
1467 else
1468 {
1469 emitDataUInt8((uint8_t)0, t);
1470 }
1471 }
1472 else
1473 {
1474 emitDataUInt8((uint8_t)0, t);
1475 }
1476
1477 auto callerfpdcl = kernel->getKernelDebugInfo()->getCallerBEFP();
1478 if (callerfpdcl)
1479 {
1480 auto callerfpLIInfo = kernel->getKernelDebugInfo()->getLiveIntervalInfo(callerfpdcl, false);
1481 if (callerfpLIInfo)
1482 {
1483 emitDataUInt8((uint8_t)1, t);
1484 // Caller's be_fp is stored in frame descriptor
1485 emitFrameDescriptorOffsetLiveInterval(callerfpLIInfo, StackCall::FrameDescriptorOfsets::BE_FP, t);
1486 }
1487 else
1488 {
1489 emitDataUInt8((uint8_t)0, t);
1490 }
1491 }
1492 else
1493 {
1494 emitDataUInt8((uint8_t)0, t);
1495 }
1496
1497 auto fretVar = kernel->getKernelDebugInfo()->getFretVar();
1498 if (fretVar)
1499 {
1500 auto fretVarLIInfo = kernel->getKernelDebugInfo()->getLiveIntervalInfo(fretVar, false);
1501 if (fretVarLIInfo)
1502 {
1503 emitDataUInt8((uint8_t)1, t);
1504 emitFrameDescriptorOffsetLiveInterval(fretVarLIInfo, StackCall::FrameDescriptorOfsets::Ret_IP, t);
1505 }
1506 else
1507 {
1508 emitDataUInt8((uint8_t)0, t);
1509 }
1510 }
1511 else
1512 {
1513 emitDataUInt8((uint8_t)0, t);
1514 }
1515
1516 emitDataCalleeSave(visaKernel, t);
1517
1518 emitDataCallerSave(visaKernel, t);
1519 }
1520
1521 // compilationUnits has 1 kernel and stack call functions
1522 // referenced by it. In case stack call functions dont
1523 // exist in input, it only has a kernel.
1524 template<class T>
emitData(std::list<VISAKernelImpl * > & compilationUnits,T t)1525 void emitData(std::list<VISAKernelImpl*>& compilationUnits, T t)
1526 {
1527 const unsigned int magic = DEBUG_MAGIC_NUMBER;
1528 const unsigned int numKernels = (uint32_t) compilationUnits.size();
1529 // Magic
1530 emitDataUInt32((uint32_t)magic, t);
1531 // Num Kernels
1532 emitDataUInt16((uint16_t)numKernels, t);
1533
1534 auto cunitsItEnd = compilationUnits.end();
1535 for (auto cunitsIt = compilationUnits.begin();
1536 cunitsIt != cunitsItEnd;
1537 cunitsIt++)
1538 {
1539 VISAKernelImpl* curKernel = (*cunitsIt);
1540
1541 emitDataName(curKernel->getName(), t);
1542
1543 uint32_t reloc_offset = 0;
1544 if (curKernel->getIsKernel())
1545 {
1546 emitDataUInt32((uint32_t)reloc_offset, t);
1547 }
1548 else
1549 {
1550 reloc_offset = curKernel->getKernel()->getKernelDebugInfo()->getRelocOffset();
1551 emitDataUInt32((uint32_t)reloc_offset, t);
1552 }
1553
1554 // Emit CISA Offset:Gen Offset mapping
1555 const unsigned int numElementsCISAOffsetMap = (uint32_t) curKernel->getKernel()->getKernelDebugInfo()->getMapCISAOffsetGenOffset().size();
1556 // Num elements
1557 emitDataUInt32((uint32_t)numElementsCISAOffsetMap, t);
1558
1559 // Emit out actual CISA Offset:Gen Offset mapping elements
1560 for (unsigned int i = 0; i < numElementsCISAOffsetMap; i++)
1561 {
1562 const auto & CisaOffset2Gen = curKernel->getKernel()->getKernelDebugInfo()->getMapCISAOffsetGenOffset()[i];
1563 const unsigned int cisaOffset = CisaOffset2Gen.CisaByteOffset;
1564 const unsigned int genOffset = CisaOffset2Gen.GenOffset - (unsigned int)reloc_offset;
1565
1566 // Write cisa offset and gen offset
1567 emitDataUInt32((uint32_t)cisaOffset, t);
1568 emitDataUInt32((uint32_t)genOffset, t);
1569 }
1570
1571 // Emit CISA index:Gen Offset mapping
1572 const unsigned int numElementsCISAIndexMap = (uint32_t) curKernel->getKernel()->getKernelDebugInfo()->getMapCISAIndexGenOffset().size();
1573 // Num elements
1574 emitDataUInt32((uint32_t)numElementsCISAIndexMap, t);
1575
1576 // Emit out actual CISA index:Gen Offset mapping
1577 for (unsigned int i = 0; i < numElementsCISAIndexMap; i++)
1578 {
1579 const auto &CisaIndex2Gen = curKernel->getKernel()->getKernelDebugInfo()->getMapCISAIndexGenOffset()[i];
1580 const unsigned int cisaIndex = CisaIndex2Gen.CisaIndex;
1581 const unsigned int genOffset = CisaIndex2Gen.GenOffset - (unsigned int)reloc_offset;
1582
1583 // Write cisa index and gen offset
1584 emitDataUInt32((uint32_t)cisaIndex, t);
1585 emitDataUInt32((uint32_t)genOffset, t);
1586 }
1587
1588 // All variables present in varMap need not be present in
1589 // mapDclName. Only those variables seen when constructing
1590 // symbol table will be added to mapDclName. So compute
1591 // number of elements that will be written out.
1592 unsigned int numItems = 0;
1593 std::map<G4_Declare*, std::pair<const char*, unsigned int>> mapDclName;
1594 // Compute items to write to debug info.
1595 // Sum variables of all types present in symbol table
1596 // created at build time, and subtract number of pre-
1597 // defined variables.
1598 populateMapDclName(curKernel, mapDclName);
1599
1600 const unsigned int numElementsVarMap = (uint32_t) curKernel->getKernel()->getKernelDebugInfo()->getVarsMap().size();
1601
1602 for (unsigned int i = 0; i < numElementsVarMap; i++)
1603 {
1604 G4_Declare* dcl = curKernel->getKernel()->getKernelDebugInfo()->getVarsMap()[i]->dcl;
1605 if (mapDclName.find(dcl) == mapDclName.end())
1606 {
1607 continue;
1608 }
1609
1610 numItems++;
1611 }
1612
1613 // Emit out number of variable mapping items
1614 emitDataUInt32((uint32_t)numItems, t);
1615
1616 // Emit out actual Virtual Register:Physical Register mapping elements
1617 for (unsigned int i = 0; i < numElementsVarMap; i++)
1618 {
1619 G4_Declare* dcl = curKernel->getKernel()->getKernelDebugInfo()->getVarsMap()[i]->dcl;
1620 if (mapDclName.find(dcl) == mapDclName.end())
1621 {
1622 continue;
1623 }
1624
1625 const std::pair<const char*, unsigned int>& dclInfo = mapDclName.find(dcl)->second;
1626 std::string varName(dclInfo.first);
1627 // to_string support not present prior to gcc 4.6 and is a c++11 feature
1628
1629 #if ANDROID
1630 {
1631 char t_char[128];
1632 snprintf(t_char, sizeof(t_char), "%d", dclInfo.second);
1633 varName += std::string(t_char);
1634 }
1635 #elif defined(_MSC_VER) && _MSC_VER < 1700
1636 varName += std::to_string((_ULonglong)dclInfo.second);
1637 #else
1638 varName += std::to_string(dclInfo.second);
1639 #endif
1640
1641 if (curKernel->getOptions()->getOption(vISA_UseFriendlyNameInDbg))
1642 {
1643 varName = dcl->getName();
1644 }
1645 emitDataName(varName.c_str(), t);
1646
1647 // Insert live-interval information
1648 LiveIntervalInfo* lrInfo = curKernel->getKernel()->getKernelDebugInfo()->getLiveIntervalInfo(dcl, false);
1649 emitDataVarLiveInterval(curKernel, lrInfo, i, sizeof(uint16_t), t);
1650 }
1651
1652 // emit sub-routine data
1653 emitDataSubroutines(curKernel, t);
1654
1655 emitDataCallFrameInfo(curKernel, t);
1656 }
1657 }
1658
emitDebugInfo(VISAKernelImpl * curKernel,std::string filename)1659 void emitDebugInfo(VISAKernelImpl* curKernel, std::string filename)
1660 {
1661 std::list<VISAKernelImpl*> functions;
1662 emitDebugInfo(curKernel, functions, filename);
1663 }
1664
1665 extern "C" void* allocCodeBlock(size_t sz);
1666
emitDebugInfoToMem(VISAKernelImpl * kernel,std::list<VISAKernelImpl * > & functions,void * & info,unsigned & size)1667 void emitDebugInfoToMem(VISAKernelImpl* kernel, std::list<VISAKernelImpl*>& functions, void*& info, unsigned& size)
1668 {
1669 std::vector<unsigned char> vec;
1670 std::list<VISAKernelImpl*> compilationUnits;
1671 compilationUnits.push_back(kernel);
1672 auto funcItEnd = functions.end();
1673 for (auto funcIt = functions.begin();
1674 funcIt != funcItEnd;
1675 funcIt++)
1676 {
1677 if ((*funcIt)->getKernel()->getKernelDebugInfo()->getRelocOffset() != 0)
1678 {
1679 compilationUnits.push_back((*funcIt));
1680 }
1681 }
1682
1683 emitData<std::vector<unsigned char>&>(compilationUnits, vec);
1684
1685 info = allocCodeBlock(vec.size());
1686 memcpy_s(info, vec.size(), vec.data(), vec.size());
1687 size = (uint32_t) vec.size();
1688 }
1689
emitDebugInfoToMem(VISAKernelImpl * curKernel,void * & info,unsigned & size)1690 void emitDebugInfoToMem(VISAKernelImpl* curKernel, void*& info, unsigned& size)
1691 {
1692 std::list<VISAKernelImpl*> compilationUnits;
1693
1694 emitDebugInfoToMem(curKernel, compilationUnits, info, size);
1695 }
1696
operator new(size_t sz,Mem_Manager & m)1697 void* KernelDebugInfo::operator new(size_t sz, Mem_Manager& m)
1698 {
1699 return m.alloc(sz);
1700 }
1701
KernelDebugInfo()1702 KernelDebugInfo::KernelDebugInfo()
1703 {
1704 visaKernel = nullptr;
1705 saveCallerFP = nullptr;
1706 restoreCallerFP = nullptr;
1707 setupFP = nullptr;
1708 restoreSP = nullptr;
1709 frameSize = 0;
1710 fretVar = nullptr;
1711 reloc_offset = 0;
1712 missingVISAIdsComputed = false;
1713 }
1714
~KernelDebugInfo()1715 KernelDebugInfo::~KernelDebugInfo()
1716 {
1717 for (auto& item : debugInfoLiveIntervalMap)
1718 {
1719 item.second->~LiveIntervalInfo();
1720 }
1721 }
1722
updateRelocOffset()1723 void KernelDebugInfo::updateRelocOffset()
1724 {
1725 // This function updates reloc_offset field of kernel
1726 // reloc_offset field for kernels is 0.
1727 // reloc_offset field for stack call function is set
1728 // to byte offset of first gen binary instruction
1729 // in binary buffer.
1730
1731 bool done = false;
1732 BB_LIST_ITER bbItEnd = getKernel().fg.end();
1733 for (auto bbIt = getKernel().fg.begin();
1734 bbIt != bbItEnd && done == false;
1735 bbIt++)
1736 {
1737 G4_BB* bb = (*bbIt);
1738 INST_LIST_ITER instItEnd = bb->end();
1739 for (auto instIt = bb->begin();
1740 instIt != instItEnd;
1741 instIt++)
1742 {
1743 G4_INST* inst = (*instIt);
1744 if (inst->getGenOffset() != UNDEFINED_GEN_OFFSET)
1745 {
1746 reloc_offset = (uint32_t)inst->getGenOffset();
1747 done = true;
1748 break;
1749 }
1750 }
1751 }
1752 }
1753
emitDebugInfo(VISAKernelImpl * kernel,std::list<VISAKernelImpl * > & functions,std::string debugFileNameStr)1754 void emitDebugInfo(VISAKernelImpl* kernel, std::list<VISAKernelImpl*>& functions, std::string debugFileNameStr)
1755 {
1756 std::list<VISAKernelImpl*> compilationUnits;
1757 compilationUnits.push_back(kernel);
1758 auto funcItEnd = functions.end();
1759 for (auto funcIt = functions.begin();
1760 funcIt != funcItEnd;
1761 funcIt++)
1762 {
1763 if ((*funcIt)->getKernel()->getKernelDebugInfo()->getRelocOffset() != 0)
1764 {
1765 // Include compilation unit only if
1766 // it is referenced, ie reloc_offset
1767 // for gen binary is non-zero.
1768 compilationUnits.push_back((*funcIt));
1769 }
1770 }
1771 #ifdef DEBUG_VERBOSE_ON
1772 addCallFrameInfo(kernel);
1773
1774 for (auto& funcIt : functions)
1775 {
1776 addCallFrameInfo(funcIt);
1777 }
1778 #endif
1779
1780 FILE* dbgFile = fopen(debugFileNameStr.c_str(), "wb+");
1781
1782 if (dbgFile == NULL)
1783 {
1784 std::cerr << "Error opening debug file " << debugFileNameStr << ". Not emitting debug info.\n";
1785 return;
1786 }
1787
1788 emitData(compilationUnits, dbgFile);
1789
1790 fclose(dbgFile);
1791 }
1792
resetGenOffsets(G4_Kernel & kernel)1793 void resetGenOffsets(G4_Kernel& kernel)
1794 {
1795 // Iterate over all instructions in kernel and set gen
1796 // offset of BinInst instance to 0.
1797 auto bbItEnd = kernel.fg.end();
1798 for (auto bbIt = kernel.fg.begin();
1799 bbIt != bbItEnd;
1800 bbIt++)
1801 {
1802 G4_BB* bb = (*bbIt);
1803
1804 auto instItEnd = bb->end();
1805 for (auto instIt = bb->begin();
1806 instIt != instItEnd;
1807 instIt++)
1808 {
1809 G4_INST* inst = (*instIt);
1810
1811 if (inst->getGenOffset() != UNDEFINED_GEN_OFFSET)
1812 {
1813 inst->setGenOffset(UNDEFINED_GEN_OFFSET);
1814 }
1815 }
1816 }
1817 }
1818
updateDebugInfo(G4_Kernel & kernel,G4_INST * inst,const LivenessAnalysis & liveAnalysis,LiveRange * lrs[],BitSet & live,DebugInfoState * state,bool closeAllOpenIntervals)1819 void updateDebugInfo(G4_Kernel& kernel, G4_INST* inst, const LivenessAnalysis& liveAnalysis, LiveRange* lrs[], BitSet& live, DebugInfoState* state,
1820 bool closeAllOpenIntervals)
1821 {
1822 if (closeAllOpenIntervals && !state->getPrevInst())
1823 return;
1824
1825 auto krnlDbgInfo = kernel.getKernelDebugInfo();
1826
1827 // Update live-intervals only when bits change in bit-vector.
1828 // state parameter contains previous instruction and bit-vector.
1829 for (unsigned int i = 0; i < liveAnalysis.getNumSelectedVar(); i += NUM_BITS_PER_ELT)
1830 {
1831 auto elt = live.getElt(i / NUM_BITS_PER_ELT);
1832 auto prevElt = state->getPrevBitset() ? state->getPrevBitset()->getElt(i / NUM_BITS_PER_ELT) : 0;
1833
1834 if (elt != prevElt)
1835 {
1836 // Some variables have changed state in bit-vector, so update their states accordingly.
1837 //
1838 // If elt is set and prevElt is reset, it means the variable became live at current inst,
1839 // If elt is reset and prevElt is set, it means the variable was killed at current inst
1840 //
1841 for (unsigned int j = 0; j < NUM_BITS_PER_ELT; j++)
1842 {
1843 unsigned char eltJ = (elt >> j) & 0x1;
1844 unsigned char prevEltJ = (prevElt >> j) & 0x1;
1845
1846 if (eltJ == 1 && prevEltJ == 0)
1847 {
1848 if (inst->getCISAOff() != UNMAPPABLE_VISA_INDEX)
1849 {
1850 // This check guarantees that for an open
1851 // interval, at least the same CISA offset
1852 // can be used to close it. If there is no
1853 // instruction with valid CISA offset
1854 // between open/close IR instruction, then
1855 // the interval will not be recorded.
1856 auto idx = (i + j);
1857 G4_Declare* dcl = lrs[idx]->getVar()->getDeclare();
1858 auto lr = krnlDbgInfo->getLiveIntervalInfo(dcl);
1859
1860 lr->setStateOpen(inst->getCISAOff());
1861 }
1862 }
1863 else if (eltJ == 0 && prevEltJ == 1)
1864 {
1865 auto idx = (i + j);
1866 G4_Declare* dcl = lrs[idx]->getVar()->getDeclare();
1867
1868 auto lr = krnlDbgInfo->getLiveIntervalInfo(dcl);
1869
1870 if (lr->getState() == LiveIntervalInfo::DebugLiveIntervalState::Open)
1871 {
1872 auto closeAt = state->getPrevInst()->getCISAOff();
1873 while (closeAt >= 1 &&
1874 krnlDbgInfo->isMissingVISAId(closeAt - 1))
1875 {
1876 closeAt--;
1877 }
1878 lr->setStateClosed(closeAt);
1879 }
1880 }
1881 }
1882 }
1883
1884 if (closeAllOpenIntervals)
1885 {
1886 for (unsigned int j = 0; j < NUM_BITS_PER_ELT; j++)
1887 {
1888 unsigned char eltJ = (elt >> j) & 0x1;
1889
1890 if (eltJ)
1891 {
1892 auto idx = (i + j);
1893 G4_Declare* dcl = lrs[idx]->getVar()->getDeclare();
1894 auto lr = krnlDbgInfo->getLiveIntervalInfo(dcl);
1895
1896 if (lr->getState() == LiveIntervalInfo::DebugLiveIntervalState::Open)
1897 {
1898 uint32_t lastCISAOff = (inst->getCISAOff() != UNMAPPABLE_VISA_INDEX) ?
1899 inst->getCISAOff() : state->getPrevInst()->getCISAOff();
1900
1901 while (lastCISAOff >= 1 &&
1902 krnlDbgInfo->isMissingVISAId(lastCISAOff - 1))
1903 {
1904 lastCISAOff--;
1905 }
1906
1907 lr->setStateClosed(lastCISAOff);
1908 }
1909 }
1910 }
1911 }
1912 }
1913
1914 if (inst->getCISAOff() != UNMAPPABLE_VISA_INDEX &&
1915 !inst->isPseudoKill())
1916 {
1917 state->setPrevBitset(live);
1918 state->setPrevInst(inst);
1919 }
1920 }
1921
updateDebugInfo(vISA::G4_Kernel & kernel,std::vector<vISA::LSLiveRange * > & liveIntervals)1922 void updateDebugInfo(vISA::G4_Kernel& kernel, std::vector<vISA::LSLiveRange*>& liveIntervals)
1923 {
1924 for (auto lr : liveIntervals)
1925 {
1926 uint32_t start, end;
1927 G4_INST* startInst = lr->getFirstRef(start);
1928 G4_INST* endInst = lr->getLastRef(end);
1929
1930 if (!start || !end)
1931 continue;
1932
1933 start = startInst->getCISAOff();
1934 end = endInst->getCISAOff();
1935
1936 auto lrInfo = kernel.getKernelDebugInfo()->getLiveIntervalInfo(lr->getTopDcl());
1937 if (start != UNMAPPABLE_VISA_INDEX &&
1938 end != UNMAPPABLE_VISA_INDEX)
1939 {
1940 lrInfo->addLiveInterval(start, end);
1941 }
1942 }
1943 }
1944
updateDebugInfo(G4_Kernel & kernel,std::vector<vISA::LocalLiveRange * > & liveIntervals)1945 void updateDebugInfo(G4_Kernel& kernel, std::vector<vISA::LocalLiveRange*>& liveIntervals)
1946 {
1947 for (auto lr : liveIntervals)
1948 {
1949 if (lr->getAssigned())
1950 {
1951 uint32_t start, end;
1952 G4_INST* startInst = lr->getFirstRef(start);
1953 G4_INST* endInst = lr->getLastRef(end);
1954 start = startInst->getCISAOff();
1955 end = endInst->getCISAOff();
1956
1957 auto lrInfo = kernel.getKernelDebugInfo()->getLiveIntervalInfo(lr->getTopDcl());
1958 if (start != UNMAPPABLE_VISA_INDEX &&
1959 end != UNMAPPABLE_VISA_INDEX)
1960 {
1961 lrInfo->addLiveInterval(start, end);
1962 }
1963 }
1964 }
1965 }
1966
updateDebugInfo(G4_Kernel & kernel,std::vector<std::tuple<G4_Declare *,G4_INST *,G4_INST * >> augmentationLiveIntervals)1967 void updateDebugInfo(G4_Kernel& kernel, std::vector<std::tuple<G4_Declare*, G4_INST*, G4_INST*>> augmentationLiveIntervals)
1968 {
1969 // Invoked via augmentation pass
1970 for (auto& lr : augmentationLiveIntervals)
1971 {
1972 uint32_t start, end;
1973 G4_INST* startInst = std::get<1>(lr);
1974 G4_INST* endInst = std::get<2>(lr);
1975 start = startInst->getCISAOff();
1976 end = endInst->getCISAOff();
1977
1978 G4_Declare* topdcl = std::get<0>(lr);
1979 while (std::get<0>(lr)->getAliasDeclare() != NULL)
1980 {
1981 topdcl = topdcl->getAliasDeclare();
1982 }
1983
1984 auto lrInfo = kernel.getKernelDebugInfo()->getLiveIntervalInfo(topdcl);
1985 if (start != UNMAPPABLE_VISA_INDEX &&
1986 end != UNMAPPABLE_VISA_INDEX)
1987 {
1988 lrInfo->addLiveInterval(start, end);
1989 }
1990 }
1991 }
1992
updateDebugInfo(G4_Kernel & kernel,G4_Declare * dcl,uint32_t start,uint32_t end)1993 void updateDebugInfo(G4_Kernel& kernel, G4_Declare* dcl, uint32_t start, uint32_t end)
1994 {
1995 auto lrInfo = kernel.getKernelDebugInfo()->getLiveIntervalInfo(dcl);
1996 if (start != UNMAPPABLE_VISA_INDEX &&
1997 end != UNMAPPABLE_VISA_INDEX)
1998 {
1999 lrInfo->addLiveInterval(start, end);
2000 }
2001 }
2002
updateDebugInfo(G4_Kernel & kernel,G4_Declare * dcl,uint32_t offset)2003 void updateDebugInfo(G4_Kernel& kernel, G4_Declare* dcl, uint32_t offset)
2004 {
2005 auto lrInfo = kernel.getKernelDebugInfo()->getLiveIntervalInfo(dcl);
2006 lrInfo->liveAt(offset);
2007 }
2008
getBinInstSize(G4_INST * inst)2009 uint32_t getBinInstSize(G4_INST* inst)
2010 {
2011 uint32_t size = inst->isCompactedInst() ?
2012 (BYTES_PER_INST / 2) : BYTES_PER_INST;
2013
2014 return size;
2015 }
2016
computeDebugInfo(std::list<G4_BB * > & stackCallEntryBBs)2017 void KernelDebugInfo::computeDebugInfo(std::list<G4_BB*>& stackCallEntryBBs)
2018 {
2019 updateMapping(stackCallEntryBBs);
2020 updateRelocOffset();
2021 if (reloc_offset > 0)
2022 {
2023 updateCallStackLiveIntervals();
2024 }
2025 else
2026 {
2027 updateCallStackMain();
2028 }
2029 }
2030
updateCallStackMain()2031 void KernelDebugInfo::updateCallStackMain()
2032 {
2033 if (!getKernel().fg.getHasStackCalls())
2034 return;
2035
2036 // Set live-interval for BE_FP
2037 auto befp = getBEFP();
2038 if (befp)
2039 {
2040 uint32_t start = 0;
2041 if (getBEFPSetupInst())
2042 {
2043 start = (uint32_t)getBEFPSetupInst()->getGenOffset() +
2044 (uint32_t)getBinInstSize(getBEFPSetupInst());
2045 }
2046 updateDebugInfo(getKernel(), befp, start, mapCISAIndexGenOffset.back().GenOffset);
2047 }
2048 }
2049
updateCallStackLiveIntervals()2050 void KernelDebugInfo::updateCallStackLiveIntervals()
2051 {
2052 if (!getKernel().fg.getIsStackCallFunc() &&
2053 !getKernel().fg.getHasStackCalls())
2054 {
2055 return;
2056 }
2057
2058 uint32_t reloc_offset = 0;
2059 uint32_t start = 0xffffffff, end = 0;
2060
2061 // Update live-interval for following ranges:
2062 // be_fp, caller_be_fp, retval
2063 if (getKernel().fg.getIsStackCallFunc())
2064 {
2065 // Only stack call function has return variable
2066 auto fretVar = getKernel().getKernelDebugInfo()->getFretVar();
2067 auto fretVarLI = getKernel().getKernelDebugInfo()->getLiveIntervalInfo(fretVar);
2068 fretVarLI->clearLiveIntervals();
2069
2070 for (auto bbs : getKernel().fg)
2071 {
2072 for (auto insts : *bbs)
2073 {
2074 if (insts->getGenOffset() != UNDEFINED_GEN_OFFSET)
2075 {
2076 reloc_offset = (reloc_offset == 0) ?
2077 (uint32_t)insts->getGenOffset() : reloc_offset;
2078 break;
2079 }
2080 }
2081 if (reloc_offset > 0)
2082 break;
2083 }
2084
2085 uint32_t start = 0;
2086 if (getBEFPSetupInst())
2087 {
2088 // Frame descriptor can be addressed once once BE_FP is defined
2089 start = (uint32_t)getBEFPSetupInst()->getGenOffset() +
2090 getBinInstSize(getBEFPSetupInst());
2091 }
2092
2093 if (getCallerBEFPRestoreInst())
2094 {
2095 end = (uint32_t)getCallerBEFPRestoreInst()->getGenOffset();
2096 }
2097
2098 MUST_BE_TRUE(end >= reloc_offset, "Failed to update live-interval for retval");
2099 MUST_BE_TRUE(start >= reloc_offset, "Failed to update start for retval");
2100 MUST_BE_TRUE(end >= start, "end less then start for retval");
2101 for (uint32_t i = start - reloc_offset; i <= end - reloc_offset; i++)
2102 {
2103 updateDebugInfo(*kernel, fretVar, i);
2104 }
2105 }
2106
2107 auto befp = getBEFP();
2108 if (befp)
2109 {
2110 auto befpLIInfo = getLiveIntervalInfo(befp);
2111 befpLIInfo->clearLiveIntervals();
2112 auto befpSetupInst = getBEFPSetupInst();
2113 if (befpSetupInst)
2114 {
2115 start = (uint32_t)befpSetupInst->getGenOffset() +
2116 getBinInstSize(befpSetupInst);
2117 auto spRestoreInst = getCallerSPRestoreInst();
2118 if (spRestoreInst)
2119 {
2120 end = (uint32_t)spRestoreInst->getGenOffset();
2121 }
2122 for (uint32_t i = start - reloc_offset; i <= end - reloc_offset; i++)
2123 {
2124 updateDebugInfo(*kernel, befp, i);
2125 }
2126 }
2127
2128 MUST_BE_TRUE(start != 0xffffffff, "Cannot update stack vars1");
2129 MUST_BE_TRUE(end != 0, "Cannot update stack vars2");
2130 }
2131
2132 auto callerbefp = getCallerBEFP();
2133 if (callerbefp)
2134 {
2135 auto callerbefpLIInfo = getLiveIntervalInfo(callerbefp);
2136 callerbefpLIInfo->clearLiveIntervals();
2137 auto callerbeSaveInst = getCallerBEFPSaveInst();
2138 if (callerbeSaveInst)
2139 {
2140 auto callerbefpRestoreInst = getCallerBEFPRestoreInst();
2141 MUST_BE_TRUE(callerbefpRestoreInst != nullptr,
2142 "Instruction destroying caller be fp not found in epilog");
2143 start = (uint32_t)callerbeSaveInst->getGenOffset() - reloc_offset +
2144 getBinInstSize(callerbeSaveInst);
2145 end = (uint32_t)callerbefpRestoreInst->getGenOffset() - reloc_offset;
2146 for (uint32_t i = start;
2147 i <= end;
2148 i++)
2149 {
2150 updateDebugInfo(*kernel, callerbefp, i);
2151 }
2152 }
2153 }
2154 }
2155
updateExpandedIntrinsic(G4_InstIntrinsic * spillOrFill,G4_INST * inst)2156 void KernelDebugInfo::updateExpandedIntrinsic(G4_InstIntrinsic* spillOrFill, G4_INST* inst)
2157 {
2158 // This function looks up all caller/callee save code added.
2159 // Once it finds "spillOrFill", it adds inst to it. This is
2160 // because VISA now uses spill/fill intrinsics to model
2161 // save/restore. These intrinsics are expanded after RA is
2162 // done. So this method gets invoked after RA is done and
2163 // when intrinsics are expanded.
2164 for (auto& k : callerSaveRestore)
2165 {
2166 for (auto it = k.second.first.begin(); it != k.second.first.end(); ++it)
2167 {
2168 if ((*it) == spillOrFill)
2169 {
2170 k.second.first.insert(it, inst);
2171 return;
2172 }
2173 }
2174
2175 for (auto it = k.second.second.begin(); it != k.second.second.end(); ++it)
2176 {
2177 if ((*it) == spillOrFill)
2178 {
2179 k.second.second.insert(it, inst);
2180 return;
2181 }
2182 }
2183 }
2184
2185 for (auto it = calleeSaveRestore.first.begin(); it != calleeSaveRestore.first.end(); ++it)
2186 {
2187 if ((*it) == spillOrFill)
2188 {
2189 calleeSaveRestore.first.insert(it, inst);
2190 return;
2191 }
2192 }
2193
2194 for (auto it = calleeSaveRestore.second.begin(); it != calleeSaveRestore.second.end(); ++it)
2195 {
2196 if ((*it) == spillOrFill)
2197 {
2198 calleeSaveRestore.second.insert(it, inst);
2199 return;
2200 }
2201 }
2202 }
2203
addCallerSaveInst(G4_BB * fcallBB,G4_INST * inst)2204 void KernelDebugInfo::addCallerSaveInst(G4_BB* fcallBB, G4_INST* inst)
2205 {
2206 callerSaveRestore[fcallBB].first.push_back(inst);
2207 }
2208
addCallerRestoreInst(G4_BB * fcallBB,G4_INST * inst)2209 void KernelDebugInfo::addCallerRestoreInst(G4_BB* fcallBB, G4_INST* inst)
2210 {
2211 callerSaveRestore[fcallBB].second.push_back(inst);
2212 }
2213
addCalleeSaveInst(G4_INST * inst)2214 void KernelDebugInfo::addCalleeSaveInst(G4_INST* inst)
2215 {
2216 calleeSaveRestore.first.push_back(inst);
2217 }
2218
addCalleeRestoreInst(G4_INST * inst)2219 void KernelDebugInfo::addCalleeRestoreInst(G4_INST* inst)
2220 {
2221 calleeSaveRestore.second.push_back(inst);
2222 }
2223
getCallerSaveInsts(G4_BB * fcallBB)2224 std::vector<G4_INST*>& KernelDebugInfo::getCallerSaveInsts(G4_BB* fcallBB)
2225 {
2226 return callerSaveRestore[fcallBB].first;
2227 }
2228
getCallerRestoreInsts(G4_BB * fcallBB)2229 std::vector<G4_INST*>& KernelDebugInfo::getCallerRestoreInsts(G4_BB* fcallBB)
2230 {
2231 return callerSaveRestore[fcallBB].second;
2232 }
2233
getCalleeSaveInsts()2234 std::vector<G4_INST*>& KernelDebugInfo::getCalleeSaveInsts()
2235 {
2236 return calleeSaveRestore.first;
2237 }
2238
getCalleeRestoreInsts()2239 std::vector<G4_INST*>& KernelDebugInfo::getCalleeRestoreInsts()
2240 {
2241 return calleeSaveRestore.second;
2242 }
2243
isFcallWithSaveRestore(G4_BB * bb)2244 bool KernelDebugInfo::isFcallWithSaveRestore(G4_BB* bb)
2245 {
2246 // Debug emission happens after binary encoding
2247 // at which point all fcalls are converted to
2248 // calls. So G4_INST::isFCall() will always
2249 // be false
2250 bool retval = false;
2251 auto it = callerSaveRestore.find(bb);
2252 if (it != callerSaveRestore.end())
2253 {
2254 retval = true;
2255 }
2256
2257 return retval;
2258 }
2259
2260 // Compute extra instructions in insts over oldInsts list and
2261 // return a new list.
getDeltaInstructions(G4_BB * bb)2262 INST_LIST KernelDebugInfo::getDeltaInstructions(G4_BB* bb)
2263 {
2264 INST_LIST deltaInsts;
2265 for (auto instIt = bb->begin(); instIt != bb->end(); instIt++)
2266 deltaInsts.push_back(*instIt);
2267
2268 for (auto oldInstsIt : oldInsts)
2269 {
2270 deltaInsts.remove(oldInstsIt);
2271 }
2272
2273 return deltaInsts;
2274 }
2275
addInst(G4_INST * inst)2276 void SaveRestoreManager::addInst(G4_INST* inst)
2277 {
2278 SaveRestoreInfo newSVInfo;
2279 srInfo.push_back(newSVInfo);
2280 if (srInfo.size() > 1)
2281 {
2282 // Copy over from previous
2283 // so emitted data is
2284 // cumulative per IP.
2285 srInfo[srInfo.size() - 1].saveRestoreMap = srInfo[srInfo.size() - 2].saveRestoreMap;
2286 }
2287
2288 if (inst->opcode() == G4_add &&
2289 inst->getSrc(1) &&
2290 inst->getSrc(1)->isImm() &&
2291 inst->getSrc(0) &&
2292 inst->getSrc(0)->isSrcRegRegion() &&
2293 GetTopDclFromRegRegion(inst->getSrc(0)) == visaKernel->getKernel()->fg.builder->getBEFP())
2294 {
2295 memOffset = (int32_t)inst->getSrc(1)->asImm()->getImm();
2296 regWithMemOffset = inst->getDst()->getLinearizedStart() / numEltPerGRF<Type_UB>();
2297 absOffset = false;
2298 }
2299
2300 if (inst->opcode() == G4_mov &&
2301 inst->getSrc(0) &&
2302 inst->getSrc(0)->isImm() &&
2303 inst->getExecSize() == g4::SIMD1 &&
2304 inst->getDst() &&
2305 inst->getDst()->getLinearizedStart() % numEltPerGRF<Type_UB>() == 8)
2306 {
2307 memOffset = (int32_t)inst->getSrc(0)->asImm()->getImm();
2308 regWithMemOffset = inst->getDst()->getLinearizedStart() / numEltPerGRF<Type_UB>();
2309 absOffset = true;
2310 }
2311
2312 srInfo.back().update(inst, memOffset, regWithMemOffset, absOffset);
2313 }
2314
emitAll()2315 void SaveRestoreManager::emitAll()
2316 {
2317 for (auto it : srInfo)
2318 {
2319 #ifdef DEBUG_VERBOSE_ON
2320 it.getInst()->emit(std::cerr);
2321 #endif
2322 DEBUG_VERBOSE("\n");
2323
2324 for (auto mapIt : it.saveRestoreMap)
2325 {
2326 DEBUG_VERBOSE("\tr" << mapIt.first << ".0 (8):d saved to ");
2327 if (mapIt.second.first == SaveRestoreInfo::RegOrMem::Reg)
2328 {
2329 DEBUG_VERBOSE("r" << mapIt.second.second.regNum << ".0 (8):d\n");
2330 }
2331 else if (mapIt.second.first == SaveRestoreInfo::RegOrMem::MemAbs)
2332 {
2333 DEBUG_VERBOSE("mem at offset " << mapIt.second.second.offset << " bytes (abs)\n");
2334 }
2335 else if (mapIt.second.first == SaveRestoreInfo::RegOrMem::MemOffBEFP)
2336 {
2337 DEBUG_VERBOSE("mem at offset " << mapIt.second.second.offset << " bytes (off befp)\n");
2338 }
2339 }
2340 }
2341 }
2342
update(G4_INST * inst,int32_t memOffset,uint32_t regWithMemOffset,bool isOffAbs)2343 void SaveRestoreInfo::update(G4_INST* inst, int32_t memOffset, uint32_t regWithMemOffset, bool isOffAbs)
2344 {
2345 i = inst;
2346
2347 if (inst->getDst() &&
2348 inst->getDst()->isDstRegRegion())
2349 {
2350 auto dstreg = inst->getDst()->getLinearizedStart() / numEltPerGRF<Type_UB>();
2351
2352 // Remove any item in map that is saved as storage for some other reg.
2353 for (auto mapIt : saveRestoreMap)
2354 {
2355 if (mapIt.second.first == RegOrMem::Reg &&
2356 mapIt.second.second.regNum == dstreg)
2357 {
2358 DEBUG_VERBOSE("Removed r" << mapIt.second.second.regNum << ".0 (8):d\n");
2359 saveRestoreMap.erase(mapIt.first);
2360 break;
2361 }
2362 }
2363 }
2364
2365 if (inst->opcode() == G4_mov &&
2366 inst->getDst()->isDstRegRegion() &&
2367 inst->getSrc(0)->isSrcRegRegion())
2368 {
2369 unsigned int srcreg, dstreg;
2370 srcreg = inst->getSrc(0)->getLinearizedStart() / numEltPerGRF<Type_UB>();
2371 dstreg = inst->getDst()->getLinearizedStart() / numEltPerGRF<Type_UB>();
2372
2373 bool done = false;
2374 for (auto mapIt : saveRestoreMap)
2375 {
2376 if (mapIt.second.first == RegOrMem::Reg &&
2377 mapIt.second.second.regNum == srcreg &&
2378 mapIt.first == dstreg)
2379 {
2380 saveRestoreMap.erase(mapIt.first);
2381 done = true;
2382 DEBUG_VERBOSE("Restored r" << dstreg << ".0 (8):d from r" << srcreg << ".0 (8):d\n");
2383 break;
2384 }
2385 }
2386
2387 if (done == false)
2388 {
2389 auto it = saveRestoreMap.find(srcreg);
2390 if (it == saveRestoreMap.end())
2391 {
2392 // Entry not found so update map
2393 RegMap mapping;
2394 mapping.regNum = dstreg;
2395
2396 saveRestoreMap.insert(std::make_pair(srcreg, std::make_pair(RegOrMem::Reg, mapping)));
2397
2398 DEBUG_VERBOSE("Saved r" << srcreg << ".0 (8):d to r" << dstreg << ".0 (8):d\n");
2399 }
2400 }
2401 }
2402 else if (inst->isSend())
2403 {
2404 // send/read, send/write
2405 // sends/read, sends/write
2406 MUST_BE_TRUE(!inst->getMsgDesc()->isScratch(),
2407 "Not expecting scratch msg in save/restore code");
2408 if (inst->getMsgDesc()->isWrite())
2409 {
2410 uint32_t srcreg, extsrcreg = 0;
2411 srcreg = inst->getSrc(0)->getLinearizedStart() / numEltPerGRF<Type_UB>();
2412 if (inst->getMsgDesc()->getSrc1LenRegs() > 0)
2413 {
2414 extsrcreg = inst->getSrc(1)->getLinearizedStart()/numEltPerGRF<Type_UB>();
2415 }
2416
2417 MUST_BE_TRUE(memOffset != 0xffff, "Invalid mem offset");
2418 MUST_BE_TRUE(regWithMemOffset == srcreg, "Send src not initialized with offset");
2419
2420 std::vector<uint32_t> payloadRegs;
2421 for (uint32_t i = 1; i < (uint32_t)inst->getMsgDesc()->getSrc0LenRegs(); i++)
2422 {
2423 payloadRegs.push_back(i + srcreg);
2424 }
2425 for (uint32_t i = 0; i < (uint32_t)inst->getMsgDesc()->getSrc1LenRegs(); i++)
2426 {
2427 payloadRegs.push_back(i + extsrcreg);
2428 }
2429
2430 for (uint32_t i = 0; i < payloadRegs.size(); i++)
2431 {
2432 uint32_t payloadReg = payloadRegs[i];
2433 RegMap m;
2434 m.offset = (int32_t)((memOffset * numEltPerGRF<Type_UB>()/2) + (i * numEltPerGRF<Type_UB>()));
2435 m.isAbs = isOffAbs;
2436 saveRestoreMap.insert(std::make_pair(payloadReg,
2437 std::make_pair(isOffAbs ? RegOrMem::MemAbs : RegOrMem::MemOffBEFP, m)));
2438
2439 #ifdef DEBUG_VERBOSE_ON
2440 const char* offstr = isOffAbs ? "(abs)" : "(off besp)";
2441
2442 DEBUG_VERBOSE("Saved r" << payloadReg << ".0 (8):d to mem at offset "
2443 << m.offset << " bytes" << offstr << "\n");
2444 #endif
2445 }
2446 }
2447 else if (inst->getMsgDesc()->isRead())
2448 {
2449 uint32_t srcreg, dstreg;
2450 srcreg = inst->getSrc(0)->getLinearizedStart() / numEltPerGRF<Type_UB>();
2451 dstreg = inst->getDst()->getLinearizedStart() / numEltPerGRF<Type_UB>();
2452
2453 MUST_BE_TRUE(memOffset != 0xffff, "Invalid mem offset");
2454 MUST_BE_TRUE(regWithMemOffset == srcreg, "Send src not initialized with offset");
2455
2456 auto responselen = inst->getMsgDesc()->getDstLenRegs();
2457 int32_t startoff;
2458 startoff = memOffset * numEltPerGRF<Type_UB>() / 2;
2459
2460 for (auto reg = dstreg; reg < (responselen + dstreg); reg++)
2461 {
2462 int32_t offsetForReg = startoff + ((reg - dstreg) * numEltPerGRF<Type_UB>());
2463
2464 for (auto mapIt : saveRestoreMap)
2465 {
2466 if (mapIt.first == reg &&
2467 (mapIt.second.first == RegOrMem::MemAbs ||
2468 mapIt.second.first == RegOrMem::MemOffBEFP) &&
2469 mapIt.second.second.offset == offsetForReg)
2470 {
2471 saveRestoreMap.erase(mapIt.first);
2472
2473 #ifdef DEBUG_VERBOSE_ON
2474 const char* offstr = RegOrMem::MemAbs ? "abs" : "off befp";
2475 DEBUG_VERBOSE("Restored r" << reg << ".0 (8):d from mem offset " <<
2476 offsetForReg << " bytes (" << offstr << ")\n");
2477 #endif
2478 break;
2479 }
2480 }
2481 }
2482 }
2483 }
2484 }
2485
2486 #ifdef DEBUG_VERBOSE
dumpLiveInterval(LiveIntervalInfo * lv)2487 void dumpLiveInterval(LiveIntervalInfo* lv)
2488 {
2489 std::vector<std::pair<unsigned int, unsigned int>> v;
2490 lv->getLiveIntervals(v);
2491 for (auto it : v)
2492 {
2493 std::cerr << "(" << it.first << ", " << it.second << ")\n";
2494 }
2495 }
2496
emitSubRoutineInfo(VISAKernelImpl * visaKernel)2497 void emitSubRoutineInfo(VISAKernelImpl* visaKernel)
2498 {
2499 auto kernel = visaKernel->getKernel();
2500
2501 // Is there a single entry point for debugInfo?
2502 kernel->fg.setPhysicalPredSucc();
2503 for (auto bb : kernel->fg)
2504 {
2505 G4_INST* firstInst = nullptr;
2506 G4_INST* lastInst = nullptr;
2507 unsigned int start = 0, end = 0;
2508 G4_Declare* retval = nullptr;
2509 G4_Label* subLabel = nullptr;
2510 if (bb->isEndWithCall())
2511 {
2512 G4_BB* calleeBB = bb->Succs.front();
2513 while (firstInst == NULL && calleeBB != NULL)
2514 {
2515 if (calleeBB->size() > 0)
2516 {
2517 firstInst = calleeBB->front();
2518 start = firstInst->getCISAOff();
2519 subLabel = firstInst->getSrc(0)->asLabel();
2520 }
2521 }
2522
2523 calleeBB = bb->BBAfterCall()->Preds.front();
2524 while (lastInst == NULL && calleeBB != NULL)
2525 {
2526 if (calleeBB->size() > 0)
2527 {
2528 lastInst = calleeBB->back();
2529 end = lastInst->getCISAOff();
2530 MUST_BE_TRUE(lastInst->isReturn(), "Expecting to see G4_return as last inst in sub-routine");
2531 retval = lastInst->getSrc(0)->asSrcRegRegion()->getBase()->asRegVar()->getDeclare()->getRootDeclare();
2532 }
2533
2534 calleeBB = calleeBB->Preds.front();
2535 }
2536 std::cerr << "Func info id " << subLabel->getLabel() << "\n";
2537 std::cerr << "First inst " << start << ", last inst " << end << "\n";
2538 std::cerr << "Return value in dcl " << retval->getName() << "\n";
2539
2540 if (kernel->getKernelDebugInfo()->getLiveIntervalInfo(retval, false) != NULL)
2541 {
2542 std::cerr << "Found live-interval for retval range\n";
2543 auto lv = kernel->getKernelDebugInfo()->getLiveIntervalInfo(retval, false);
2544 dumpLiveInterval(lv);
2545 std::cerr << "\n";
2546 }
2547 }
2548 }
2549 }
2550
emitBEFP(VISAKernelImpl * visaKernel)2551 void emitBEFP(VISAKernelImpl* visaKernel)
2552 {
2553 // Compute both be fp of current frame and previous frame
2554 auto kernel = visaKernel->getKernel();
2555 auto befpDcl = kernel->getKernelDebugInfo()->getBEFP();
2556 if (befpDcl &&
2557 kernel->getKernelDebugInfo()->getLiveIntervalInfo(befpDcl, false))
2558 {
2559 std::cerr << "Found befp dcl at " << befpDcl->getName() << "\n";
2560 dumpLiveInterval(kernel->getKernelDebugInfo()->getLiveIntervalInfo(befpDcl, false));
2561 std::cerr << "\n";
2562 }
2563
2564 auto befpSetup = kernel->getKernelDebugInfo()->getBEFPSetupInst();
2565 if (befpSetup)
2566 {
2567 std::cerr << "befp setup inst found:\n";
2568 befpSetup->emit(std::cerr);
2569 std::cerr << "\n";
2570 }
2571
2572 auto spRestore = kernel->getKernelDebugInfo()->getCallerSPRestoreInst();
2573 if (spRestore)
2574 {
2575 std::cerr << "sp restore inst found:\n";
2576 spRestore->emit(std::cerr);
2577 std::cerr << "\n";
2578 }
2579
2580 auto callerfpdcl = kernel->getKernelDebugInfo()->getCallerBEFP();
2581 if (callerfpdcl &&
2582 kernel->getKernelDebugInfo()->getLiveIntervalInfo(callerfpdcl, false))
2583 {
2584 std::cerr << "Found caller befp dcl at " << callerfpdcl->getName() << "\n";
2585 dumpLiveInterval(kernel->getKernelDebugInfo()->getLiveIntervalInfo(callerfpdcl, false));
2586 std::cerr << "\n";
2587 }
2588
2589 auto fretVar = kernel->getKernelDebugInfo()->getFretVar();
2590 if (fretVar &&
2591 kernel->getKernelDebugInfo()->getLiveIntervalInfo(fretVar, false))
2592 {
2593 std::cerr << "fretvar " << fretVar->getName() << "\n";
2594 dumpLiveInterval(kernel->getKernelDebugInfo()->getLiveIntervalInfo(fretVar, false));
2595 std::cerr << "\n";
2596 }
2597
2598 auto frameSize = kernel->getKernelDebugInfo()->getFrameSize();
2599 std::cerr << "frame size = " << frameSize << " bytes" << "\n";
2600 }
2601
emitCallerSaveInfo(VISAKernelImpl * visaKernel)2602 void emitCallerSaveInfo(VISAKernelImpl* visaKernel)
2603 {
2604 auto kernel = visaKernel->getKernel();
2605
2606 for (auto bbs : kernel->fg)
2607 {
2608 if (bbs->size() > 0 &&
2609 kernel->getKernelDebugInfo()->isFcallWithSaveRestore(bbs))
2610 {
2611 auto& callerSaveInsts = kernel->getKernelDebugInfo()->getCallerSaveInsts(bbs);
2612 auto& callerRestoreInsts = kernel->getKernelDebugInfo()->getCallerRestoreInsts(bbs);
2613
2614 std::cerr << "Caller save for ";
2615 bbs->back()->emit(std::cerr);
2616 std::cerr << "\n";
2617
2618 SaveRestoreManager mgr(visaKernel);
2619 for (auto callerSave : callerSaveInsts)
2620 {
2621 mgr.addInst(callerSave);
2622 }
2623
2624 for (auto callerRestore : callerRestoreInsts)
2625 {
2626 mgr.addInst(callerRestore);
2627 }
2628
2629 mgr.emitAll();
2630
2631 std::cerr << "\n";
2632 }
2633 }
2634 }
2635
emitCalleeSaveInfo(VISAKernelImpl * visaKernel)2636 void emitCalleeSaveInfo(VISAKernelImpl* visaKernel)
2637 {
2638 G4_Kernel* kernel = visaKernel->getKernel();
2639
2640 std::cerr << "\nCallee save:\n";
2641 SaveRestoreManager mgr(visaKernel);
2642 for (auto calleeSave : kernel->getKernelDebugInfo()->getCalleeSaveInsts())
2643 {
2644 mgr.addInst(calleeSave);
2645 }
2646
2647 for (auto calleeRestore : kernel->getKernelDebugInfo()->getCalleeRestoreInsts())
2648 {
2649 mgr.addInst(calleeRestore);
2650 }
2651
2652 mgr.emitAll();
2653
2654 std::cerr << "\n";
2655 }
2656
dumpCFG(VISAKernelImpl * visaKernel)2657 void dumpCFG(VISAKernelImpl* visaKernel)
2658 {
2659 G4_Kernel* kernel = visaKernel->getKernel();
2660 auto reloc_offset = 0;
2661 bool done = false;
2662
2663 for (auto bbs : kernel->fg)
2664 {
2665 for (auto insts : *bbs)
2666 {
2667 if (insts->getGenOffset() != UNDEFINED_GEN_OFFSET)
2668 {
2669 if (!done)
2670 {
2671 reloc_offset = (uint32_t)insts->getGenOffset();
2672 done = true;
2673 }
2674 std::cerr << insts->getGenOffset() - reloc_offset;
2675 }
2676 std::cerr << "\t";
2677 insts->emit(std::cerr);
2678 std::cerr << "\n";
2679 }
2680 }
2681 }
2682
addCallFrameInfo(VISAKernelImpl * kernel)2683 void addCallFrameInfo(VISAKernelImpl* kernel)
2684 {
2685 std::cerr << "\n\n\n";
2686
2687 if (kernel->getKernel()->fg.getIsStackCallFunc())
2688 {
2689 std::cerr << "Stack call function " << kernel->getKernel()->getName() << "\n";
2690 }
2691 else
2692 {
2693 std::cerr << "Kernel " << kernel->getKernel()->getName() << "\n";
2694 }
2695 std::cerr << "\n";
2696
2697 emitSubRoutineInfo(kernel);
2698
2699 emitBEFP(kernel);
2700
2701 emitCallerSaveInfo(kernel);
2702
2703 emitCalleeSaveInfo(kernel);
2704
2705 dumpCFG(kernel);
2706 }
2707 #endif
2708
getLiveIntervalInfo(G4_Declare * dcl,bool createIfNULL)2709 LiveIntervalInfo* KernelDebugInfo::getLiveIntervalInfo(G4_Declare* dcl, bool createIfNULL)
2710 {
2711 dcl = dcl->getRootDeclare();
2712
2713 LiveIntervalInfo* lr = NULL;
2714 auto it = debugInfoLiveIntervalMap.find(dcl);
2715 if (it == debugInfoLiveIntervalMap.end())
2716 {
2717 if (createIfNULL)
2718 {
2719 lr = new (kernel->fg.mem) LiveIntervalInfo();
2720 debugInfoLiveIntervalMap.insert(std::make_pair(dcl, lr));
2721 }
2722 }
2723 else
2724 {
2725 lr = it->second;
2726 }
2727
2728 return lr;
2729 }
2730
2731 // TODO: Check result in presence of spill code and stack calling convention
2732