1 /*========================== begin_copyright_notice ============================
2 
3 Copyright (C) 2017-2021 Intel Corporation
4 
5 SPDX-License-Identifier: MIT
6 
7 ============================= end_copyright_notice ===========================*/
8 
9 #include "DebugInfo.h"
10 #include "Common_ISA.h"
11 #include "G4_IR.hpp"
12 #include "FlowGraph.h"
13 #include "BuildIR.h"
14 #include "Common_ISA_framework.h"
15 #include "VISAKernel.h"
16 #include "BitSet.h"
17 
18 #include <map>
19 
20 using namespace vISA;
21 
22 using std::fclose;
23 using std::fopen;
24 using std::fwrite;
25 using std::FILE;
26 
27 uint32_t getBinInstSize(G4_INST* inst);
28 
get32BitSignedIntFrom31BitSignedInt(uint32_t data)29 int32_t get32BitSignedIntFrom31BitSignedInt(uint32_t data)
30 {
31     // MSB of 32-bit input is discarded
32     int32_t signedMemOffset = (int32_t)data;
33     bool isNeg = ((signedMemOffset << 1) < 0);
34     // Right shift on signed int is implementation defined
35     // so on some compilers it could do bitshift and
36     // on some arithmetic shift. Bitwise OR'ing later
37     // ensures that sign bit is setup correctly.
38     signedMemOffset = (signedMemOffset << 1) >> 1;
39     signedMemOffset |= (isNeg ? (1 << (sizeof(int32_t)-1)) : 0);
40 
41     return signedMemOffset;
42 }
43 
ddName()44 void DbgDecoder::ddName()
45 {
46     uint16_t nameLen;
47     auto retval = fread(&nameLen, sizeof(uint16_t), 1, dbgFile);
48     if (!retval)
49         return;
50 
51     auto name = (char*)malloc(nameLen + 1);
52     retval = fread(name, sizeof(uint8_t), nameLen, dbgFile);
53     if (!retval)
54     {
55         free(name);
56         return;
57     }
58 
59     name[nameLen] = 0;
60 
61     std::cout << name;
62 
63     free(name);
64 }
65 
66 template<class T>
ddLiveInterval()67 void DbgDecoder::ddLiveInterval()
68 {
69     // Dump live-interval info
70     uint16_t numLiveIntervals;
71     T start, end;
72     auto retval = fread(&numLiveIntervals, sizeof(uint16_t), 1, dbgFile);
73     if (!retval)
74         return;
75 
76     std::cout << "\tLive intervals: \n";
77     for (uint16_t i = 0; i < numLiveIntervals; i++)
78     {
79         retval = fread(&start, sizeof(T), 1, dbgFile);
80         if (!retval)
81             return;
82 
83         retval = fread(&end, sizeof(T), 1, dbgFile);
84         if (!retval)
85             return;
86 
87         std::cout << "(" << start << ", " << end << ") @ ";
88 
89         uint8_t virtualType;
90         retval = fread(&virtualType, sizeof(uint8_t), 1, dbgFile);
91         if (!retval)
92             return;
93 
94         if (virtualType == VARMAP_VREG_FILE_ADDRESS)
95         {
96             std::cout << "\t";
97         }
98         else if (virtualType == VARMAP_VREG_FILE_FLAG)
99         {
100             std::cout << "\t";
101         }
102         else if (virtualType == VARMAP_VREG_FILE_GRF)
103         {
104             std::cout << "\t";
105         }
106         else
107         {
108             MUST_BE_TRUE(false, "Unknown virtual type found");
109         }
110 
111         uint8_t physicalType;
112         retval = fread(&physicalType, sizeof(uint8_t), 1, dbgFile);
113         if (!retval)
114             return;
115 
116         if (physicalType == VARMAP_PREG_FILE_ADDRESS)
117         {
118             std::cout << "a";
119         }
120         else if (physicalType == VARMAP_PREG_FILE_FLAG)
121         {
122             std::cout << "f";
123         }
124         else if (physicalType == VARMAP_PREG_FILE_GRF)
125         {
126             std::cout << "r";
127         }
128         else if (physicalType == VARMAP_PREG_FILE_MEMORY)
129         {
130             std::cout << "Spilled";
131         }
132         else
133         {
134             MUST_BE_TRUE(false, "Unknown physical type found");
135         }
136 
137         if (physicalType == VARMAP_PREG_FILE_MEMORY)
138         {
139             uint32_t memoryOffset;
140             bool isAbsoluteOffset = false;
141             retval = fread(&memoryOffset, sizeof(uint32_t), 1, dbgFile);
142             if (!retval)
143                 return;
144 
145             if (memoryOffset & 0x80000000)
146             {
147                 isAbsoluteOffset = true;
148             }
149 
150             std::cout << " (offset = " << get32BitSignedIntFrom31BitSignedInt(memoryOffset) << " bytes)" <<
151                 (isAbsoluteOffset ? " (absolute offset)" : " (off be_fp)") <<
152                 "\n";
153         }
154         else
155         {
156             uint16_t regNum, subRegNum;
157             retval = fread(&regNum, sizeof(uint16_t), 1, dbgFile);
158             if (!retval)
159                 return;
160 
161             retval = fread(&subRegNum, sizeof(uint16_t), 1, dbgFile);
162             if (!retval)
163                 return;
164 
165             std::cout << regNum << "." << subRegNum;
166 
167             if (physicalType == VARMAP_PREG_FILE_GRF)
168             {
169                 std::cout << ":ub";
170             }
171 
172             std::cout << "\n";
173 
174         }
175     }
176     std::cout << "\n";
177 }
178 
ddCalleeCallerSave(uint32_t relocOffset)179 void DbgDecoder::ddCalleeCallerSave(uint32_t relocOffset)
180 {
181     uint16_t num;
182 
183     if (feof(dbgFile))
184     {
185         return;
186     }
187 
188     auto retval = fread(&num, sizeof(uint16_t), 1, dbgFile);
189     if (!retval)
190         return;
191 
192     for (uint32_t i = 0; i < num; i++)
193     {
194         uint32_t genOffset;
195         retval = fread(&genOffset, sizeof(uint32_t), 1, dbgFile);
196         if (!retval)
197             return;
198 
199         std::cout << "Gen ISA offset: " << genOffset << "\n";
200 
201         uint16_t numElems;
202         retval = fread(&numElems, sizeof(uint16_t), 1, dbgFile);
203         if (!retval)
204             return;
205 
206         for (uint32_t j = 0; j < numElems; j++)
207         {
208             uint16_t srcReg, numBytes;
209             retval = fread(&srcReg, sizeof(uint16_t), 1, dbgFile);
210             if (!retval)
211                 return;
212 
213             retval = fread(&numBytes, sizeof(uint16_t), 1, dbgFile);
214             if (!retval)
215                 return;
216 
217             uint8_t subReg = srcReg%numEltPerGRF<Type_UB>();
218             MUST_BE_TRUE(subReg == 0, "Not expecting non-zero sub-reg in callee/caller save");
219             std::cout << "\tr" << (srcReg) / numEltPerGRF<Type_UB>() << "." <<
220                 (uint32_t)subReg << ":ub (" << numBytes << " bytes) -> ";
221 
222             uint8_t dstInReg;
223             retval = fread(&dstInReg, sizeof(uint8_t), 1, dbgFile);
224             if (!retval)
225                 return;
226 
227             if (dstInReg)
228             {
229                 uint16_t reg, subreg;
230                 retval = fread(&reg, sizeof(uint16_t), 1, dbgFile);
231                 if (!retval)
232                     return;
233 
234                 retval = fread(&subreg, sizeof(uint16_t), 1, dbgFile);
235                 if (!retval)
236                     return;
237 
238                 std::cout << "r" << reg << "." << subreg << ":ub" << "\n";
239             }
240             else
241             {
242                 uint32_t memOffset;
243                 retval = fread(&memOffset, sizeof(uint32_t), 1, dbgFile);
244                 if (!retval)
245                     return;
246 
247                 if (memOffset & 0x80000000)
248                 {
249                     std::cout << get32BitSignedIntFrom31BitSignedInt(memOffset);
250                 }
251                 else
252                 {
253                     std::cout << "BE_FP + " << memOffset;
254                 }
255 
256                 std::cout << " bytes" << "\n";
257             }
258         }
259     }
260 }
261 
ddDbg()262 int DbgDecoder::ddDbg()
263 {
264     dbgFile = fopen(filename, "rb");
265 
266     if (!dbgFile)
267     {
268         std::cerr << "Error opening and creating debug file: " << filename << "\n";
269         ASSERT_USER(false, "Unable to wrie debug file to disk.");
270         return -1;
271     }
272 
273     uint32_t magic;
274     auto retval = fread(&magic, sizeof(uint32_t), 1, dbgFile);
275     if (!retval)
276         return -1;
277 
278     std::cout << "=== Start of Debug Dump ===" << "\n";
279     std::cout << "Magic: " << "0x" << std::hex << magic << std::dec << "\n";
280     if (magic != DEBUG_MAGIC_NUMBER)
281     {
282         std::cout << "************ Magic expected = " << "0x" << std::hex << DEBUG_MAGIC_NUMBER << std::dec << " *************" << "\n";
283 
284         fclose(dbgFile);
285 
286         return -1;
287     }
288 
289     uint16_t numCompiledObjects;
290     retval = fread(&numCompiledObjects, sizeof(uint16_t), 1, dbgFile);
291     if (!retval)
292         return -1;
293 
294     std::cout << "Number of compiled objects: " << numCompiledObjects << "\n\n";
295 
296     for (unsigned int i = 0; i < numCompiledObjects; i++)
297     {
298         std::cout << "Current compiled object index: " << i << "\n";
299 
300         std::cout << "Kernel name: ";
301         ddName();
302         std::cout << "\n";
303 
304         uint32_t reloc_offset;
305         retval = fread(&reloc_offset, sizeof(uint32_t), 1, dbgFile);
306         if (!retval)
307             return -1;
308 
309         if (reloc_offset == 0)
310         {
311             std::cout << "(kernel)\n";
312         }
313         else
314         {
315             std::cout << "(function binary @ gen offset " << reloc_offset << " bytes)" << "\n";
316         }
317 
318         uint32_t numElementsCISAOffsetMap;
319         retval = fread(&numElementsCISAOffsetMap, sizeof(uint32_t), 1, dbgFile);
320         if (!retval)
321             return -1;
322 
323         std::cout << "CISA byte offset -> Gen byte offset mapping\n";
324 
325         for (unsigned int j = 0; j < numElementsCISAOffsetMap; j++)
326         {
327             uint32_t cisaOffset, genOffset;
328             retval = fread(&cisaOffset, sizeof(uint32_t), 1, dbgFile);
329             if (!retval)
330                 return -1;
331 
332             retval = fread(&genOffset, sizeof(uint32_t), 1, dbgFile);
333             if (!retval)
334                 return -1;
335 
336             std::cout << cisaOffset << "\t" << genOffset << "\n";
337         }
338 
339         std::cout << "\n";
340 
341         uint32_t numElementsCISAIndexMap;
342         retval = fread(&numElementsCISAIndexMap, sizeof(uint32_t), 1, dbgFile);
343         if (!retval)
344             return -1;
345 
346         std::cout << "CISA index -> Gen byte offset mapping\n";
347 
348         for (unsigned int j = 0; j < numElementsCISAIndexMap; j++)
349         {
350             uint32_t cisaIndex, genOffset;
351             retval = fread(&cisaIndex, sizeof(uint32_t), 1, dbgFile);
352             if (!retval)
353                 return -1;
354 
355             retval = fread(&genOffset, sizeof(uint32_t), 1, dbgFile);
356             if (!retval)
357                 return -1;
358 
359             std::cout << cisaIndex << "\t" << genOffset << "\n";
360         }
361 
362         std::cout << "\n";
363 
364         uint32_t numElementsVarMap;
365         retval = fread(&numElementsVarMap, sizeof(uint32_t), 1, dbgFile);
366         if (!retval)
367             return -1;
368 
369         std::cout << "Virtual Register -> Physical Register mapping\n";
370 
371         for (unsigned int j = 0; j < numElementsVarMap; j++)
372         {
373             ddName();
374 
375             ddLiveInterval<uint16_t>();
376         }
377         std::cout << "\n\n";
378 
379         // Read sub-info
380         uint16_t numSubs;
381         retval = fread(&numSubs, sizeof(uint16_t), 1, dbgFile);
382         if (!retval)
383             return -1;
384 
385         std::cout << "Number of subroutines: " << numSubs << "\n";
386 
387         for (unsigned int j = 0; j < numSubs; j++)
388         {
389             std::cout << "Subroutine name: ";
390             ddName();
391             std::cout << "\n";
392             uint32_t startoffset = 0, endOffset = 0;
393             retval = fread(&startoffset, sizeof(uint32_t), 1, dbgFile);
394             if (!retval)
395                 return -1;
396 
397             retval = fread(&endOffset, sizeof(uint32_t), 1, dbgFile);
398             if (!retval)
399                 return -1;
400 
401             std::cout << "Start VISA: " << startoffset << ", end VISA: " << endOffset << "\n";
402             std::cout << "Retval: \n";
403             ddLiveInterval<uint16_t>();
404         }
405 
406         std::cout << "\n";
407         uint16_t frameSize;
408         retval = fread(&frameSize, sizeof(uint16_t), 1, dbgFile);
409         if (!retval)
410             return -1;
411 
412         std::cout << "Frame size: " << frameSize << " bytes\n";
413 
414         uint8_t scratch;
415         retval = fread(&scratch, sizeof(uint8_t), 1, dbgFile);
416         if (!retval)
417             return -1;
418 
419         if (scratch)
420         {
421             std::cout << "BE_FP: \n";
422             ddLiveInterval<uint32_t>();
423         }
424         else
425         {
426             std::cout << "BE_FP not found";
427         }
428 
429         std::cout << "\n";
430 
431         retval = fread(&scratch, sizeof(uint8_t), 1, dbgFile);
432         if (!retval)
433             return -1;
434 
435         if (scratch)
436         {
437             std::cout << "Caller BE_FP saved at:\n";
438             ddLiveInterval<uint32_t>();
439         }
440         else
441         {
442             std::cout << "Caller BE_FP not saved";
443         }
444 
445         std::cout << "\n";
446 
447         retval = fread(&scratch, sizeof(uint8_t), 1, dbgFile);
448         if (!retval)
449             return -1;
450 
451         if (scratch)
452         {
453             std::cout << "Return addr saved at:\n";
454             ddLiveInterval<uint32_t>();
455         }
456         else
457         {
458             std::cout << "Return addr not stored";
459         }
460 
461         std::cout << "\n";
462 
463         std::cout << "Callee save:\n";
464         ddCalleeCallerSave(reloc_offset);
465         std::cout << "\n";
466 
467         std::cout << "Caller save:\n";
468         ddCalleeCallerSave(reloc_offset);
469         std::cout << "\n";
470     }
471 
472     std::cout << "=== End of Debug Dump ===\n";
473 
474     fclose(dbgFile);
475 
476     return 0;
477 }
478 
decodeAndDumpDebugInfo(char * filename)479 DEBUG_RELEASE_INTERNAL_DLL_EXPORT_ONLY int decodeAndDumpDebugInfo(char* filename)
480 {
481     DbgDecoder dd(filename);
482     return dd.ddDbg();
483 }
484 
getGRF(G4_Declare * dcl,unsigned int & regNum,unsigned int & subRegNumInBytes)485 void getGRF(G4_Declare* dcl, unsigned int& regNum, unsigned int& subRegNumInBytes)
486 {
487     if (dcl->getRegVar()->getPhyReg() != NULL)
488     {
489         regNum = dcl->getRegVar()->getPhyReg()->asGreg()->getRegNum();
490         subRegNumInBytes = dcl->getRegVar()->getPhyRegOff() * dcl->getElemSize();
491     }
492     else
493     {
494         regNum = 65535;
495         subRegNumInBytes = 65535;
496     }
497 }
498 
isMissingVISAId(unsigned int id)499 bool KernelDebugInfo::isMissingVISAId(unsigned int id)
500 {
501     if (!missingVISAIdsComputed)
502     {
503         computeMissingVISAIds();
504     }
505 
506     return (missingVISAIds.find(id) != missingVISAIds.end());
507 }
508 
markStackCallFuncDcls(G4_Kernel & function)509 void vISA::KernelDebugInfo::markStackCallFuncDcls(G4_Kernel& function)
510 {
511     // Store all dcls that appear in stack call functions. This is to allow
512     // debug info module to differentiate between dcls from kernel and stack call
513     // function. Stitching operation transfers all callee dcls to kernel, so
514     // kernel.Declares is a superset of kernel, stack call dcls.
515     for (auto dcl : function.Declares)
516     {
517         stackCallDcls.insert(dcl);
518     }
519 }
520 
computeMissingVISAIds()521 void KernelDebugInfo::computeMissingVISAIds()
522 {
523     unsigned int maxCISAId = 0;
524 
525     for (auto bb : getKernel().fg)
526     {
527         for (auto inst : *bb)
528         {
529             if (inst->getCISAOff() != UNMAPPABLE_VISA_INDEX &&
530                 (unsigned int)inst->getCISAOff() > maxCISAId)
531             {
532                 maxCISAId = inst->getCISAOff();
533             }
534         }
535     }
536 
537     std::vector<bool> seenVISAIds(maxCISAId+1, false);
538 
539     for (auto bb : getKernel().fg)
540     {
541         for (auto inst : *bb)
542         {
543             if (inst->getCISAOff() != UNMAPPABLE_VISA_INDEX)
544             {
545                 seenVISAIds[inst->getCISAOff()] = true;
546             }
547         }
548     }
549 
550     for (unsigned int i = 0, size = seenVISAIds.size(); i < size; i++)
551     {
552         if (!seenVISAIds[i])
553         {
554             missingVISAIds.insert(i);
555         }
556     }
557 
558     missingVISAIdsComputed = true;
559 }
560 
updateMapping(std::list<G4_BB * > & stackCallEntryBBs)561 void KernelDebugInfo::updateMapping(std::list<G4_BB*>& stackCallEntryBBs)
562 {
563     reset();
564 
565     generateByteOffsetMapping(stackCallEntryBBs);
566     emitRegisterMapping();
567     generateCISAByteOffsetFromOffset();
568     generateGenISAToVISAIndex();
569 }
570 
generateGenISAToVISAIndex()571 void KernelDebugInfo::generateGenISAToVISAIndex()
572 {
573     // Generate list of Gen ISA offset -> VISA index
574     // This is used to emit debug_ranges section in IGC.
575     // Inserting entries per Gen ISA offset guarantees
576     // all instructions will be present in the vector.
577     for (auto bb : kernel->fg)
578     {
579         for (auto inst : *bb)
580         {
581             if (inst->getGenOffset() == -1)
582                 continue;
583             genISAOffsetToVISAIndex.push_back(IDX_VDbgGen2CisaIndex{(unsigned int)inst->getGenOffset(), (unsigned int)inst->getCISAOff()});
584         }
585     }
586 }
587 
setVISAKernel(VISAKernelImpl * k)588 void KernelDebugInfo::setVISAKernel(VISAKernelImpl* k)
589 {
590     visaKernel = k;
591     kernel = k->getKernel();
592 }
593 
generateCISAByteOffsetFromOffset()594 void KernelDebugInfo::generateCISAByteOffsetFromOffset()
595 {
596     // Using map1 and map2, generate map3
597     for (decltype(mapCISAIndexGenOffset)::iterator it = mapCISAIndexGenOffset.begin();
598         it != mapCISAIndexGenOffset.end();
599         it++)
600     {
601         // Read each entry in CISA Index->Gen Offset then map CISA Index to CISA Offset.
602         // Push back results.
603         unsigned int cisaIndex = (*it).CisaIndex;
604         unsigned int genOffset = (*it).GenOffset;
605 
606         std::map<unsigned int, unsigned int>::iterator map_it = mapCISAOffset.find(cisaIndex);
607 
608         if (map_it != mapCISAOffset.end())
609         {
610             unsigned int cisaOffset = mapCISAOffset.find(cisaIndex)->second;
611             mapCISAOffsetGenOffset.push_back(IDX_VDbgCisaByte2Gen{cisaOffset, genOffset});
612         }
613     }
614 }
615 
generateByteOffsetMapping(std::list<G4_BB * > & stackCallEntryBBs)616 void KernelDebugInfo::generateByteOffsetMapping(std::list<G4_BB*>& stackCallEntryBBs)
617 {
618     // When compiling stack call functions, all stack call functions
619     // invoked are stitched to kernel being compiled. So G4_BBs of
620     // all stack call functions are appended to G4_Kernel's BB list.
621     // We need a way to differentiate between BBs of kernel and those
622     // of functions to emit out correct debug info. So a list is
623     // passed - stackCallEntryBBs that holds entryBBs of all stack
624     // call functions part of this compilation unit.
625 
626     bool done = false;
627     unsigned int maxVISAIndex = 0;
628     uint64_t maxGenIsaOffset = 0;
629     // Now traverse CFG, create pair of CISA byte offset, gen binary offset and push to vector
630     for (BB_LIST_ITER bb_it = kernel->fg.begin(), bbEnd = kernel->fg.end(); bb_it != bbEnd; bb_it++)
631     {
632         G4_BB* bb = (*bb_it);
633 
634         int isaPrevByteOffset = -1;
635 
636         if (kernel->fg.builder->getIsKernel())
637         {
638             auto entryBBend = stackCallEntryBBs.end();
639             for (auto entryBBIt = stackCallEntryBBs.begin();
640                 entryBBIt != entryBBend;
641                 entryBBIt++)
642             {
643                 if (bb == (*entryBBIt))
644                 {
645                     // Since we are traversing BBs in layout
646                     // order, we will parse all kernel BBs
647                     // first and as soon as we reach entryBB
648                     // of first stack call function, we stop
649                     // processing.
650                     done = true;
651                     break;
652                 }
653             }
654         }
655 
656         if (done == true)
657         {
658             break;
659         }
660 
661         for (INST_LIST_ITER inst_it = bb->begin(), bbEnd = bb->end();
662             inst_it != bbEnd;
663             inst_it++)
664         {
665             G4_INST* inst = (*inst_it);
666 
667             if (inst->getGenOffset() != UNDEFINED_GEN_OFFSET)
668             {
669                 int cisaByteIndex = inst->getCISAOff();
670                 maxGenIsaOffset = (uint64_t)inst->getGenOffset() +
671                                     (inst->isCompactedInst() ? 8 : 16);
672                 if (cisaByteIndex == -1)
673                 {
674                     continue;
675                 }
676 
677                 maxVISAIndex = std::max(maxVISAIndex, (unsigned int)cisaByteIndex);
678 
679                 if (isaPrevByteOffset != cisaByteIndex)
680                 {
681                     isaPrevByteOffset = cisaByteIndex;
682 
683                     // mapping holds pair of CISA bytecode index and gen Offset
684                     // Use VISAKernelImpl's member mapCISAOffset to convert
685                     // CISA bytecode index to CISA bytecode byte offset
686                     mapCISAIndexGenOffset.push_back(IDX_VDbgCisaIndex2Gen{(unsigned)cisaByteIndex, (unsigned)inst->getGenOffset()});
687                 }
688             }
689         }
690     }
691 
692     // Insert out-of-sequence entry in to VISA index->Gen offset map
693     mapCISAIndexGenOffset.push_back(IDX_VDbgCisaIndex2Gen{++maxVISAIndex, (unsigned int)maxGenIsaOffset});
694 }
695 
emitRegisterMapping()696 void KernelDebugInfo::emitRegisterMapping()
697 {
698     // Emit out mapping between
699     // virtual variables -> physical registers
700     // In case a variable has been spilled to memory,
701     // emit out memory offset.
702     // For address/flag registers, spill location is
703     // GRF registers. Only general variables, ie GRF
704     // candidates can be spilled to memory.
705 
706     for (DECLARE_LIST_ITER dcl_it = getKernel().Declares.begin();
707         dcl_it != getKernel().Declares.end();
708         dcl_it++)
709     {
710         G4_Declare* dcl = (*dcl_it);
711         if (getKernel().fg.isPseudoDcl(dcl) ||
712             (dcl->getRegVar()->getPhyReg() &&
713                 dcl->getRegVar()->getPhyReg()->isAreg() &&
714                 !dcl->getRegVar()->getPhyReg()->isFlag() &&
715                 !dcl->getRegVar()->getPhyReg()->isA0()))
716         {
717             // These pseudo nodes may or may not get
718             // an allocation depending on register
719             // pressure across fcall. There is no
720             // need to look at allocation results
721             // for these as far as debug info goes.
722             continue;
723         }
724 
725         if (!getKernel().fg.getIsStackCallFunc())
726         {
727             // Skip iterating over dcls of callee stack call function.
728             auto it = stackCallDcls.find(dcl);
729             if (it != stackCallDcls.end())
730                 continue;
731         }
732 
733         VarnameMap* varMap = (VarnameMap*)getKernel().fg.builder->mem.alloc(sizeof(struct VarnameMap));
734         varMap->dcl = dcl;
735 
736         if ((dcl->getRegFile() == G4_GRF || dcl->getRegFile() == G4_INPUT) &&
737             dcl->getRegVar()->isNullReg() == false)
738         {
739             // GRF candidate can be either in GRF or
740             // spilled to memory
741             bool isSpilled = dcl->isSpilled() && (dcl->getRegVar()->getPhyReg() == NULL);
742             varMap->virtualType = VARMAP_VREG_FILE_GRF;
743 
744             if (!isSpilled)
745             {
746                 unsigned int regNum, subRegNumInBytes;
747                 getGRF(dcl, regNum, subRegNumInBytes);
748                 varMap->physicalType = VARMAP_PREG_FILE_GRF;
749                 varMap->Mapping.Register.regNum = static_cast<uint16_t>(regNum);
750                 varMap->Mapping.Register.subRegNum = static_cast<uint16_t>(subRegNumInBytes);
751                 varsMap.push_back(varMap);
752             }
753             else
754             {
755                 unsigned int spillOffset = 0;
756                 while (dcl->getAliasDeclare() != NULL)
757                 {
758                     spillOffset += dcl->getAliasOffset();
759                     dcl = dcl->getAliasDeclare();
760                 }
761                 spillOffset += dcl->getRegVar()->getDisp();
762                 varMap->physicalType = VARMAP_PREG_FILE_MEMORY;
763                 if (getKernel().fg.getHasStackCalls() == false)
764                 {
765                     varMap->Mapping.Memory.isAbs = 1;
766                 }
767                 else
768                 {
769                     varMap->Mapping.Memory.isAbs = 0;
770                 }
771                 varMap->Mapping.Memory.memoryOffset = (int32_t)spillOffset;
772                 varsMap.push_back(varMap);
773             }
774         }
775         else if (dcl->getRegFile() == G4_ADDRESS)
776         {
777             bool isSpilled = dcl->isSpilled() && (dcl->getRegVar()->getPhyReg() == NULL);
778             varMap->virtualType = VARMAP_VREG_FILE_ADDRESS;
779 
780             if (!isSpilled)
781             {
782                 unsigned int subRegNum;
783                 subRegNum = dcl->getRegVar()->getPhyRegOff();
784                 varMap->physicalType = VARMAP_PREG_FILE_ADDRESS;
785                 varMap->Mapping.Register.regNum = 0;
786                 varMap->Mapping.Register.subRegNum = static_cast<uint16_t>(subRegNum);
787                 varsMap.push_back(varMap);
788             }
789             else
790             {
791                 // Spilled to GRF
792                 if (!dcl->getSpilledDeclare()->isSpilled())
793                 {
794                     unsigned int regNum, subRegNumInBytes;
795                     getGRF(dcl->getSpilledDeclare(), regNum, subRegNumInBytes);
796                     varMap->physicalType = VARMAP_PREG_FILE_GRF;
797                     varMap->Mapping.Register.regNum = static_cast<uint16_t>(regNum);
798                     varMap->Mapping.Register.subRegNum = static_cast<uint16_t>(subRegNumInBytes);
799                     varsMap.push_back(varMap);
800                 }
801                 else
802                 {
803                     unsigned int spillOffset = 0;
804                     //G4_Declare* origDcl = dcl;
805                     while (dcl->getAliasDeclare() != NULL)
806                     {
807                         spillOffset += dcl->getAliasOffset();
808                         dcl = dcl->getAliasDeclare();
809                     }
810                     spillOffset += dcl->getSpilledDeclare()->getRegVar()->getDisp();
811                     varMap->physicalType = VARMAP_PREG_FILE_MEMORY;
812                     if (getKernel().fg.getHasStackCalls() == false)
813                     {
814                         varMap->Mapping.Memory.isAbs = 1;
815                     }
816                     else
817                     {
818                         varMap->Mapping.Memory.isAbs = 0;
819                     }
820                     varMap->Mapping.Memory.memoryOffset = (int32_t)spillOffset;
821                     varsMap.push_back(varMap);
822                 }
823             }
824         }
825         else if (dcl->getRegFile() == G4_FLAG)
826         {
827             bool isSpilled = dcl->isSpilled() && (dcl->getRegVar()->getPhyReg() == NULL);
828             varMap->virtualType = VARMAP_VREG_FILE_FLAG;
829 
830             if (!isSpilled)
831             {
832                 unsigned int regNum, subRegNum;
833                 regNum = dcl->getRegVar()->getPhyReg()->asAreg()->getFlagNum();
834                 subRegNum = dcl->getRegVar()->getPhyRegOff();
835                 varMap->physicalType = VARMAP_PREG_FILE_FLAG;
836                 varMap->Mapping.Register.regNum = static_cast<uint16_t>(regNum);
837                 varMap->Mapping.Register.subRegNum = static_cast<uint16_t>(subRegNum);
838                 varsMap.push_back(varMap);
839             }
840             else
841             {
842                 // Spilled to GRF
843                 if (!dcl->getSpilledDeclare()->isSpilled())
844                 {
845                     unsigned int regNum, subRegNumInBytes;
846                     getGRF(dcl->getSpilledDeclare(), regNum, subRegNumInBytes);
847                     varMap->physicalType = VARMAP_PREG_FILE_GRF;
848                     varMap->Mapping.Register.regNum = static_cast<uint16_t>(regNum);
849                     varMap->Mapping.Register.subRegNum = static_cast<uint16_t>(subRegNumInBytes);
850                     varsMap.push_back(varMap);
851                 }
852                 else
853                 {
854                     unsigned int spillOffset = 0;
855                     //G4_Declare* origDcl = dcl;
856                     while (dcl->getAliasDeclare() != NULL)
857                     {
858                         spillOffset += dcl->getAliasOffset();
859                         dcl = dcl->getAliasDeclare();
860                     }
861                     spillOffset += dcl->getSpilledDeclare()->getRegVar()->getDisp();
862                     varMap->physicalType = VARMAP_PREG_FILE_MEMORY;
863                     if (getKernel().fg.getHasStackCalls() == false)
864                     {
865                         varMap->Mapping.Memory.isAbs = 1;
866                     }
867                     else
868                     {
869                         varMap->Mapping.Memory.isAbs = 0;
870                     }
871                     varMap->Mapping.Memory.memoryOffset = (int32_t)spillOffset;
872                     varsMap.push_back(varMap);
873                 }
874             }
875         }
876     }
877 }
878 
insertData(const void * ptr,unsigned size,FILE * f)879 void insertData(const void* ptr, unsigned size, FILE* f)
880 {
881     fwrite(ptr, size, 1, f);
882 }
883 
insertData(const void * ptr,unsigned size,std::vector<unsigned char> & vec)884 void insertData(const void* ptr, unsigned size, std::vector<unsigned char>& vec)
885 {
886     for (unsigned i = 0; i < size; ++i)
887     {
888         vec.push_back(*(((const unsigned char*)ptr) + i));
889     }
890 }
891 
populateMapDclName(VISAKernelImpl * kernel,std::map<G4_Declare *,std::pair<const char *,unsigned int>> & mapDclName)892 unsigned int populateMapDclName(VISAKernelImpl* kernel, std::map<G4_Declare*, std::pair<const char*, unsigned int>>& mapDclName)
893 {
894     std::list<CISA_GEN_VAR*> dclList;
895     for (uint32_t ctr = 0; ctr < kernel->getGenVarCount(); ctr++)
896     {
897         // Pre-defined gen vars are included in this list,
898         // but we dont want to emit them to debug info.
899         if (kernel->getGenVar((unsigned int)ctr)->index >= kernel->getNumPredVars())
900         {
901             dclList.push_back(kernel->getGenVar((unsigned int)ctr));
902         }
903     }
904 
905     for (uint32_t ctr = 0; ctr < kernel->getAddrVarCount(); ctr++)
906     {
907         dclList.push_back(kernel->getAddrVar((unsigned int)ctr));
908     }
909 
910     for (uint32_t ctr = 0; ctr < kernel->getPredVarCount(); ctr++)
911     {
912         dclList.push_back(kernel->getPredVar((unsigned int)ctr));
913     }
914 
915     for (uint32_t ctr = 0; ctr < kernel->getSurfaceVarCount(); ctr++)
916     {
917         dclList.push_back(kernel->getSurfaceVar((unsigned int)ctr));
918     }
919 
920     for (uint32_t ctr = 0; ctr < kernel->getSamplerVarCount(); ctr++)
921     {
922         dclList.push_back(kernel->getSamplerVar((unsigned int)ctr));
923     }
924 
925     auto start = dclList.begin();
926     auto end = dclList.end();
927 
928     for (auto it = start;
929         it != end;
930         it++)
931     {
932         CISA_GEN_VAR* var = (*it);
933 
934         if (var->type == GENERAL_VAR)
935         {
936             mapDclName.insert(std::make_pair(var->genVar.dcl, std::make_pair("V", var->index)));
937         }
938         else if (var->type == ADDRESS_VAR)
939         {
940             mapDclName.insert(std::make_pair(var->addrVar.dcl, std::make_pair("A", var->index)));
941         }
942         else if (var->type == PREDICATE_VAR)
943         {
944             mapDclName.insert(std::make_pair(var->predVar.dcl, std::make_pair("P", var->index)));
945         }
946         else if (var->type == SURFACE_VAR)
947         {
948             mapDclName.insert(std::make_pair(var->stateVar.dcl, std::make_pair("T", var->index)));
949         }
950         else if (var->type == SAMPLER_VAR)
951         {
952             mapDclName.insert(std::make_pair(var->stateVar.dcl, std::make_pair("S", var->index)));
953         }
954     }
955 
956     return (uint32_t) dclList.size();
957 }
958 
getVarIndex(G4_Declare * dcl)959 uint32_t KernelDebugInfo::getVarIndex(G4_Declare* dcl)
960 {
961     uint32_t retval = 0xffffffff;
962     for (uint32_t i = 0, size = varsMap.size(); i < size; i++)
963     {
964         if (dcl == varsMap[i]->dcl)
965         {
966             retval = i;
967             break;
968         }
969     }
970     return retval;
971 }
972 
973 template<class T>
emitDataName(const char * name,T & t)974 void emitDataName(const char* name, T& t)
975 {
976     auto length = (uint16_t)strlen(name);
977     // Length
978     insertData(&length, sizeof(uint16_t), t);
979     // Actual name
980     insertData(name, (uint32_t) (sizeof(uint8_t) * length), t);
981 }
982 
983 template<class T>
emitDataUInt32(uint32_t data,T & t)984 void emitDataUInt32(uint32_t data, T& t)
985 {
986     insertData(&data, sizeof(uint32_t), t);
987 }
988 
989 template<class T>
emitDataUInt16(uint16_t data,T & t)990 void emitDataUInt16(uint16_t data, T& t)
991 {
992     insertData(&data, sizeof(uint16_t), t);
993 }
994 
995 template<class T>
emitDataUInt8(uint8_t data,T & t)996 void emitDataUInt8(uint8_t data, T& t)
997 {
998     insertData(&data, sizeof(uint8_t), t);
999 }
1000 
1001 template<class T>
emitDataVarLiveInterval(VISAKernelImpl * visaKernel,LiveIntervalInfo * lrInfo,uint32_t i,uint16_t size,T & t)1002 void emitDataVarLiveInterval(VISAKernelImpl* visaKernel, LiveIntervalInfo* lrInfo, uint32_t i, uint16_t size, T& t)
1003 {
1004     // given lrs and saverestore, prepare assembled list of ranges to write out
1005     KernelDebugInfo* dbgInfo = visaKernel->getKernel()->getKernelDebugInfo();
1006 
1007     // start cisa index, end cisa index
1008     std::vector<std::pair<uint32_t, uint32_t>> lrs;
1009     if (lrInfo)
1010     {
1011         lrInfo->getLiveIntervals(lrs);
1012     }
1013     uint16_t numLRs = (uint16_t)lrs.size();
1014     std::sort(lrs.begin(), lrs.end(), [](std::pair<uint32_t, uint32_t>& a, std::pair<uint32_t, uint32_t>& b) { return a.first < b.first; });
1015     emitDataUInt16(numLRs, t);
1016     for (auto& it : lrs)
1017     {
1018         const uint32_t start = (uint32_t)it.first;
1019         const uint32_t end = (uint32_t)it.second;
1020 
1021         if (size == 2)
1022         {
1023             emitDataUInt16((uint16_t)start, t);
1024             emitDataUInt16((uint16_t)end, t);
1025         }
1026         else
1027         {
1028             emitDataUInt32(start, t);
1029             emitDataUInt32(end, t);
1030         }
1031 
1032         auto& varsMap = dbgInfo->getVarsMap();
1033         const unsigned char virtualType = varsMap[i]->virtualType;
1034         // Write virtual register type
1035         emitDataUInt8((uint8_t)virtualType, t);
1036 
1037         const unsigned char physicalType = varsMap[i]->physicalType;
1038         // Write physical register type
1039         emitDataUInt8((uint8_t)physicalType, t);
1040 
1041         // If physical register assigned then write register number and
1042         // sub-register number. Else write memory spill offset.
1043         if (physicalType == VARMAP_PREG_FILE_MEMORY)
1044         {
1045             unsigned int memOffset = (unsigned int)varsMap[i]->Mapping.Memory.memoryOffset;
1046             if (visaKernel->getKernel()->fg.getHasStackCalls() == false)
1047             {
1048                 memOffset |= 0x80000000;
1049             }
1050             // Emit memory offset
1051             emitDataUInt32((uint32_t)memOffset, t);
1052         }
1053         else
1054         {
1055             const unsigned int regNum = varsMap[i]->Mapping.Register.regNum;
1056             const unsigned int subRegNum = varsMap[i]->Mapping.Register.subRegNum;
1057 
1058             // Emit register number
1059             emitDataUInt16((uint16_t)regNum, t);
1060 
1061             // Emit sub-register number
1062             emitDataUInt16((uint16_t)subRegNum, t);
1063         }
1064     }
1065 }
1066 
1067 template<class T>
emitFrameDescriptorOffsetLiveInterval(LiveIntervalInfo * lrInfo,StackCall::FrameDescriptorOfsets memOffset,T & t)1068 void emitFrameDescriptorOffsetLiveInterval(LiveIntervalInfo* lrInfo, StackCall::FrameDescriptorOfsets memOffset, T& t)
1069 {
1070     // Used to emit fields of Frame Descriptor
1071     // location = [start, end) @ BE_FP+offset
1072     std::vector<std::pair<uint32_t, uint32_t>> lrs;
1073     if (lrInfo)
1074         lrInfo->getLiveIntervals(lrs);
1075     else
1076         return;
1077 
1078     uint32_t start = 0, end = 0;
1079     if (lrs.size() > 0)
1080     {
1081         start = lrs.front().first;
1082         end = lrs.back().second;
1083     }
1084 
1085     std::sort(lrs.begin(), lrs.end(), [](std::pair<uint32_t, uint32_t>& a, std::pair<uint32_t, uint32_t>& b) { return a.first < b.first; });
1086 
1087     emitDataUInt16(1, t);
1088 
1089     emitDataUInt32(start, t);
1090     emitDataUInt32(end, t);
1091 
1092     emitDataUInt8((uint8_t)VARMAP_PREG_FILE_GRF, t);
1093 
1094     emitDataUInt8((uint8_t)VARMAP_PREG_FILE_MEMORY, t);
1095 
1096     emitDataUInt32((uint32_t)memOffset, t);
1097 }
1098 
populateUniqueSubs(G4_Kernel * kernel,std::unordered_map<G4_BB *,bool> & uniqueSubs)1099 void populateUniqueSubs(G4_Kernel* kernel, std::unordered_map<G4_BB*, bool>& uniqueSubs)
1100 {
1101     // Traverse kernel and populate all unique subs.
1102     // Iterating over all BBs of kernel visits all
1103     // subroutine call sites.
1104     auto isStackObj = kernel->fg.getHasStackCalls() || kernel->fg.getIsStackCallFunc();
1105     for (auto bb : kernel->fg)
1106     {
1107         if (&bb->getParent() != &kernel->fg)
1108             continue;
1109 
1110         if (bb->isEndWithCall())
1111         {
1112             if (!isStackObj || // definitely a subroutine since kernel has no stack calls
1113                 (isStackObj && // a subroutine iff call dst != pre-defined reg as per ABI
1114                     bb->back()->getDst()->getTopDcl()->getRegVar()->getPhyReg()->asGreg()->getRegNum() != kernel->getFPSPGRF()))
1115             {
1116                 // This is a subroutine call
1117                 uniqueSubs[bb->Succs.front()] = false;
1118             }
1119         }
1120     }
1121 }
1122 
1123 template<class T>
emitDataSubroutines(VISAKernelImpl * visaKernel,T & t)1124 void emitDataSubroutines(VISAKernelImpl* visaKernel, T& t)
1125 {
1126     auto kernel = visaKernel->getKernel();
1127     // map<Label, Written to t>
1128     std::unordered_map<G4_BB*, bool> uniqueSubs;
1129 
1130     populateUniqueSubs(kernel, uniqueSubs);
1131 
1132     emitDataUInt16((uint16_t) uniqueSubs.size(), t);
1133 
1134     kernel->fg.setPhysicalPredSucc();
1135     for (auto bb : kernel->fg)
1136     {
1137         G4_INST* firstInst = nullptr;
1138         G4_INST* lastInst = nullptr;
1139         unsigned int start = 0, end = 0;
1140         G4_Declare* retval = nullptr;
1141         G4_Label* subLabel = nullptr;
1142 
1143         if (bb->isEndWithCall())
1144         {
1145             auto subInfo = uniqueSubs.find(bb->Succs.front());
1146             if (subInfo != uniqueSubs.end() &&
1147                 subInfo->second == false)
1148             {
1149                 subInfo->second = true;
1150                 G4_BB* calleeBB = bb->Succs.front();
1151                 while (firstInst == NULL && calleeBB != NULL)
1152                 {
1153                     if (calleeBB->size() > 0)
1154                     {
1155                         firstInst = calleeBB->front();
1156                         start = firstInst->getCISAOff();
1157                         subLabel = firstInst->getSrc(0)->asLabel();
1158                     }
1159                 }
1160 
1161                 calleeBB = bb->BBAfterCall()->Preds.front();
1162                 while (lastInst == NULL && calleeBB != NULL)
1163                 {
1164                     if (calleeBB->size() > 0)
1165                     {
1166                         lastInst = calleeBB->back();
1167                         end = lastInst->getCISAOff();
1168                         MUST_BE_TRUE(lastInst->isReturn(), "Expecting to see G4_return as last inst in sub-routine");
1169                         retval = lastInst->getSrc(0)->asSrcRegRegion()->getBase()->asRegVar()->getDeclare()->getRootDeclare();
1170                     }
1171 
1172                     calleeBB = calleeBB->Preds.front();
1173                 }
1174                 emitDataName(subLabel->getLabel(), t);
1175                 emitDataUInt32(start, t);
1176                 emitDataUInt32(end, t);
1177 
1178                 if (kernel->getKernelDebugInfo()->getLiveIntervalInfo(retval, false) != NULL)
1179                 {
1180                     auto lv = kernel->getKernelDebugInfo()->getLiveIntervalInfo(retval, false);
1181                     uint32_t idx = kernel->getKernelDebugInfo()->getVarIndex(retval);
1182                     emitDataVarLiveInterval(visaKernel, lv, idx, sizeof(uint16_t), t);
1183                 }
1184                 else
1185                 {
1186                     emitDataUInt16(0, t);
1187                 }
1188             }
1189         }
1190     }
1191 }
1192 
1193 template<class T>
emitDataPhyRegSaveInfoPerIP(VISAKernelImpl * visaKernel,SaveRestoreManager & mgr,T & t)1194 void emitDataPhyRegSaveInfoPerIP(VISAKernelImpl* visaKernel, SaveRestoreManager& mgr, T& t)
1195 {
1196     auto& srInfo = mgr.getSRInfo();
1197     auto relocOffset = visaKernel->getKernel()->getKernelDebugInfo()->getRelocOffset();
1198 
1199     for (auto sr : srInfo)
1200     {
1201         if (sr.getInst()->getGenOffset() == UNDEFINED_GEN_OFFSET)
1202         {
1203             continue;
1204         }
1205 
1206         emitDataUInt32((uint32_t)sr.getInst()->getGenOffset() +
1207             getBinInstSize(sr.getInst()) - relocOffset, t);
1208         emitDataUInt16((uint16_t) sr.saveRestoreMap.size(), t);
1209         for (auto mapIt : sr.saveRestoreMap)
1210         {
1211             emitDataUInt16((uint16_t)mapIt.first * numEltPerGRF<Type_UB>(), t);
1212             emitDataUInt16((uint16_t)numEltPerGRF<Type_UB>(), t);
1213 
1214             if (mapIt.second.first == SaveRestoreInfo::RegOrMem::Reg)
1215             {
1216                 emitDataUInt8((uint8_t)1, t);
1217                 emitDataUInt16((uint16_t)mapIt.second.second.regNum, t);
1218                 emitDataUInt16((uint16_t)0, t);
1219             }
1220             else if (mapIt.second.first == SaveRestoreInfo::RegOrMem::MemOffBEFP)
1221             {
1222                 SaveRestoreInfo::RegMap tmp;
1223                 emitDataUInt8((uint8_t)0, t);
1224                 tmp = mapIt.second.second;
1225                 tmp.isAbs = 0;
1226                 uint32_t data = mapIt.second.second.memOff;
1227                 emitDataUInt32(data, t);
1228             }
1229             else if (mapIt.second.first == SaveRestoreInfo::RegOrMem::MemAbs)
1230             {
1231                 SaveRestoreInfo::RegMap tmp;
1232                 emitDataUInt8((uint8_t)0, t);
1233                 tmp = mapIt.second.second;
1234                 tmp.isAbs = 1;
1235                 uint32_t data = mapIt.second.second.memOff;
1236                 emitDataUInt32(data, t);
1237             }
1238         }
1239     }
1240 }
1241 
sieveInstructions(CallerOrCallee c)1242 void SaveRestoreManager::sieveInstructions(CallerOrCallee c)
1243 {
1244     // Remove entries that are not caller/callee
1245     // save/restore.
1246     for (auto& sr : srInfo)
1247     {
1248         for (auto entryIt = sr.saveRestoreMap.begin();
1249             entryIt != sr.saveRestoreMap.end();
1250            )
1251         {
1252             auto entry = (*entryIt);
1253 
1254             bool removeEntry = true;
1255             if (c == CallerOrCallee::Caller)
1256             {
1257                 // r1 - r60
1258                 // Remove temp movs emitted for send header
1259                 // creation since they are not technically
1260                 // caller save
1261                 if (entry.first < visaKernel->getKernel()->calleeSaveStart() &&
1262                     entry.first >= 0 &&
1263                     entry.second.first == SaveRestoreInfo::RegOrMem::MemOffBEFP)
1264                 {
1265                     removeEntry = false;
1266                 }
1267             }
1268             else if (c == CallerOrCallee::Callee)
1269             {
1270                 if (entry.first >= visaKernel->getKernel()->calleeSaveStart() &&
1271                     entry.second.first == SaveRestoreInfo::RegOrMem::MemOffBEFP)
1272                 {
1273                     removeEntry = false;
1274                 }
1275             }
1276 
1277             if (removeEntry)
1278             {
1279                 entryIt = sr.saveRestoreMap.erase(entryIt);
1280                 continue;
1281             }
1282 
1283             entryIt++;
1284         }
1285     }
1286 
1287 #if _DEBUG
1288     // Ensure ordering of elements is correct, ie ascending in key value
1289     for (auto& sr : srInfo)
1290     {
1291         uint32_t prev = 0;
1292         for (auto& item : sr.saveRestoreMap)
1293         {
1294             MUST_BE_TRUE(item.first >= prev, "Unexpected ordering in container");
1295             prev = item.first;
1296         }
1297     }
1298 #endif
1299 
1300     // Code below is to remove empty and duplicate entries
1301     // from both caller and callee save code.
1302     bool foundFirstNonEmpty = false;
1303     bool onSecond = false;
1304     SaveRestoreInfo prev;
1305 
1306     for (auto srIt = srInfo.begin();
1307         srIt != srInfo.end();
1308        )
1309     {
1310         auto& sr = (*srIt);
1311 
1312         if (!foundFirstNonEmpty)
1313         {
1314             if (sr.saveRestoreMap.size() == 0)
1315             {
1316                 srIt = srInfo.erase(srIt);
1317                 continue;
1318             }
1319             else
1320             {
1321                 foundFirstNonEmpty = true;
1322             }
1323         }
1324 
1325         if (onSecond)
1326         {
1327             // If this one and previous one are same, eliminate this entry
1328             if (sr.isEqual(prev))
1329             {
1330                 srIt = srInfo.erase(srIt);
1331                 continue;
1332             }
1333         }
1334 
1335         prev = (*srIt);
1336         onSecond = true;
1337         srIt++;
1338     }
1339 }
1340 
1341 template<class T>
emitDataCallerSave(VISAKernelImpl * visaKernel,T & t)1342 void emitDataCallerSave(VISAKernelImpl* visaKernel, T& t)
1343 {
1344     auto kernel = visaKernel->getKernel();
1345 
1346     uint16_t numCallerSaveEntries = 0;
1347     // Compute total caller save entries to emit
1348     for (auto bbs : kernel->fg)
1349     {
1350         if (bbs->size() > 0 &&
1351             kernel->getKernelDebugInfo()->isFcallWithSaveRestore(bbs))
1352         {
1353             auto& callerSaveInsts = kernel->getKernelDebugInfo()->getCallerSaveInsts(bbs);
1354             auto& callerRestoreInsts = kernel->getKernelDebugInfo()->getCallerRestoreInsts(bbs);
1355 
1356             SaveRestoreManager mgr(visaKernel);
1357             for (auto callerSave : callerSaveInsts)
1358             {
1359                 mgr.addInst(callerSave);
1360             }
1361 
1362             for (auto callerRestore : callerRestoreInsts)
1363             {
1364                 mgr.addInst(callerRestore);
1365             }
1366 
1367             auto& srInfo = mgr.getSRInfo();
1368 
1369             mgr.sieveInstructions(SaveRestoreManager::CallerOrCallee::Caller);
1370 
1371             numCallerSaveEntries += (uint16_t) srInfo.size();
1372             for (auto sr : srInfo)
1373             {
1374                 if (sr.getInst()->getGenOffset() == UNDEFINED_GEN_OFFSET)
1375                 {
1376                     numCallerSaveEntries--;
1377                 }
1378             }
1379         }
1380     }
1381 
1382     emitDataUInt16(numCallerSaveEntries, t);
1383 
1384     if (numCallerSaveEntries > 0)
1385     {
1386         for (auto bbs : kernel->fg)
1387         {
1388             if (bbs->size() > 0 &&
1389                 kernel->getKernelDebugInfo()->isFcallWithSaveRestore(bbs))
1390             {
1391                 auto& callerSaveInsts = kernel->getKernelDebugInfo()->getCallerSaveInsts(bbs);
1392                 auto& callerRestoreInsts = kernel->getKernelDebugInfo()->getCallerRestoreInsts(bbs);
1393 
1394                 SaveRestoreManager mgr(visaKernel);
1395                 for (auto callerSave : callerSaveInsts)
1396                 {
1397                     mgr.addInst(callerSave);
1398                 }
1399 
1400                 for (auto callerRestore : callerRestoreInsts)
1401                 {
1402                     mgr.addInst(callerRestore);
1403                 }
1404 
1405                 mgr.sieveInstructions(SaveRestoreManager::CallerOrCallee::Caller);
1406 
1407                 emitDataPhyRegSaveInfoPerIP(visaKernel, mgr, t);
1408             }
1409         }
1410     }
1411 }
1412 
1413 template<class T>
emitDataCalleeSave(VISAKernelImpl * visaKernel,T & t)1414 void emitDataCalleeSave(VISAKernelImpl* visaKernel, T& t)
1415 {
1416     G4_Kernel* kernel = visaKernel->getKernel();
1417 
1418     SaveRestoreManager mgr(visaKernel);
1419     for (auto calleeSave : kernel->getKernelDebugInfo()->getCalleeSaveInsts())
1420     {
1421         mgr.addInst(calleeSave);
1422     }
1423 
1424     for (auto calleeRestore : kernel->getKernelDebugInfo()->getCalleeRestoreInsts())
1425     {
1426         mgr.addInst(calleeRestore);
1427     }
1428 
1429     uint16_t numCalleeSaveEntries = 0;
1430     auto& srInfo = mgr.getSRInfo();
1431 
1432     mgr.sieveInstructions(SaveRestoreManager::CallerOrCallee::Callee);
1433 
1434     numCalleeSaveEntries += (uint16_t) srInfo.size();
1435     for (auto sr : srInfo)
1436     {
1437         if (sr.getInst()->getGenOffset() == UNDEFINED_GEN_OFFSET)
1438         {
1439             numCalleeSaveEntries--;
1440         }
1441     }
1442 
1443     emitDataUInt16(numCalleeSaveEntries, t);
1444 
1445     emitDataPhyRegSaveInfoPerIP(visaKernel, mgr, t);
1446 }
1447 
1448 template<class T>
emitDataCallFrameInfo(VISAKernelImpl * visaKernel,T & t)1449 void emitDataCallFrameInfo(VISAKernelImpl* visaKernel, T& t)
1450 {
1451     // Compute both be fp of current frame and previous frame
1452     auto kernel = visaKernel->getKernel();
1453 
1454     auto frameSize = kernel->getKernelDebugInfo()->getFrameSize();
1455     emitDataUInt16((uint16_t)frameSize, t);
1456 
1457     auto befpDcl = kernel->getKernelDebugInfo()->getBEFP();
1458     if (befpDcl)
1459     {
1460         auto befpLIInfo = kernel->getKernelDebugInfo()->getLiveIntervalInfo(befpDcl, false);
1461         if (befpLIInfo)
1462         {
1463             emitDataUInt8((uint8_t)1, t);
1464             uint32_t idx = kernel->getKernelDebugInfo()->getVarIndex(kernel->fg.framePtrDcl);
1465             emitDataVarLiveInterval(visaKernel, befpLIInfo, idx, sizeof(uint32_t), t);
1466         }
1467         else
1468         {
1469             emitDataUInt8((uint8_t)0, t);
1470         }
1471     }
1472     else
1473     {
1474         emitDataUInt8((uint8_t)0, t);
1475     }
1476 
1477     auto callerfpdcl = kernel->getKernelDebugInfo()->getCallerBEFP();
1478     if (callerfpdcl)
1479     {
1480         auto callerfpLIInfo = kernel->getKernelDebugInfo()->getLiveIntervalInfo(callerfpdcl, false);
1481         if (callerfpLIInfo)
1482         {
1483             emitDataUInt8((uint8_t)1, t);
1484             // Caller's be_fp is stored in frame descriptor
1485             emitFrameDescriptorOffsetLiveInterval(callerfpLIInfo, StackCall::FrameDescriptorOfsets::BE_FP, t);
1486         }
1487         else
1488         {
1489             emitDataUInt8((uint8_t)0, t);
1490         }
1491     }
1492     else
1493     {
1494         emitDataUInt8((uint8_t)0, t);
1495     }
1496 
1497     auto fretVar = kernel->getKernelDebugInfo()->getFretVar();
1498     if (fretVar)
1499     {
1500         auto fretVarLIInfo = kernel->getKernelDebugInfo()->getLiveIntervalInfo(fretVar, false);
1501         if (fretVarLIInfo)
1502         {
1503             emitDataUInt8((uint8_t)1, t);
1504             emitFrameDescriptorOffsetLiveInterval(fretVarLIInfo, StackCall::FrameDescriptorOfsets::Ret_IP, t);
1505         }
1506         else
1507         {
1508             emitDataUInt8((uint8_t)0, t);
1509         }
1510     }
1511     else
1512     {
1513         emitDataUInt8((uint8_t)0, t);
1514     }
1515 
1516     emitDataCalleeSave(visaKernel, t);
1517 
1518     emitDataCallerSave(visaKernel, t);
1519 }
1520 
1521 // compilationUnits has 1 kernel and stack call functions
1522 // referenced by it. In case stack call functions dont
1523 // exist in input, it only has a kernel.
1524 template<class T>
emitData(std::list<VISAKernelImpl * > & compilationUnits,T t)1525 void emitData(std::list<VISAKernelImpl*>& compilationUnits, T t)
1526 {
1527     const unsigned int magic = DEBUG_MAGIC_NUMBER;
1528     const unsigned int numKernels = (uint32_t) compilationUnits.size();
1529     // Magic
1530     emitDataUInt32((uint32_t)magic, t);
1531     // Num Kernels
1532     emitDataUInt16((uint16_t)numKernels, t);
1533 
1534     auto cunitsItEnd = compilationUnits.end();
1535     for (auto cunitsIt = compilationUnits.begin();
1536         cunitsIt != cunitsItEnd;
1537         cunitsIt++)
1538     {
1539         VISAKernelImpl* curKernel = (*cunitsIt);
1540 
1541         emitDataName(curKernel->getName(), t);
1542 
1543         uint32_t reloc_offset = 0;
1544         if (curKernel->getIsKernel())
1545         {
1546             emitDataUInt32((uint32_t)reloc_offset, t);
1547         }
1548         else
1549         {
1550             reloc_offset = curKernel->getKernel()->getKernelDebugInfo()->getRelocOffset();
1551             emitDataUInt32((uint32_t)reloc_offset, t);
1552         }
1553 
1554         // Emit CISA Offset:Gen Offset mapping
1555         const unsigned int numElementsCISAOffsetMap = (uint32_t) curKernel->getKernel()->getKernelDebugInfo()->getMapCISAOffsetGenOffset().size();
1556         // Num elements
1557         emitDataUInt32((uint32_t)numElementsCISAOffsetMap, t);
1558 
1559         // Emit out actual CISA Offset:Gen Offset mapping elements
1560         for (unsigned int i = 0; i < numElementsCISAOffsetMap; i++)
1561         {
1562             const auto & CisaOffset2Gen = curKernel->getKernel()->getKernelDebugInfo()->getMapCISAOffsetGenOffset()[i];
1563             const unsigned int cisaOffset = CisaOffset2Gen.CisaByteOffset;
1564             const unsigned int genOffset = CisaOffset2Gen.GenOffset - (unsigned int)reloc_offset;
1565 
1566             // Write cisa offset and gen offset
1567             emitDataUInt32((uint32_t)cisaOffset, t);
1568             emitDataUInt32((uint32_t)genOffset, t);
1569         }
1570 
1571         // Emit CISA index:Gen Offset mapping
1572         const unsigned int numElementsCISAIndexMap = (uint32_t) curKernel->getKernel()->getKernelDebugInfo()->getMapCISAIndexGenOffset().size();
1573         // Num elements
1574         emitDataUInt32((uint32_t)numElementsCISAIndexMap, t);
1575 
1576         // Emit out actual CISA index:Gen Offset mapping
1577         for (unsigned int i = 0; i < numElementsCISAIndexMap; i++)
1578         {
1579             const auto &CisaIndex2Gen = curKernel->getKernel()->getKernelDebugInfo()->getMapCISAIndexGenOffset()[i];
1580             const unsigned int cisaIndex = CisaIndex2Gen.CisaIndex;
1581             const unsigned int genOffset = CisaIndex2Gen.GenOffset - (unsigned int)reloc_offset;
1582 
1583             // Write cisa index and gen offset
1584             emitDataUInt32((uint32_t)cisaIndex, t);
1585             emitDataUInt32((uint32_t)genOffset, t);
1586         }
1587 
1588         // All variables present in varMap need not be present in
1589         // mapDclName. Only those variables seen when constructing
1590         // symbol table will be added to mapDclName. So compute
1591         // number of elements that will be written out.
1592         unsigned int numItems = 0;
1593         std::map<G4_Declare*, std::pair<const char*, unsigned int>> mapDclName;
1594         // Compute items to write to debug info.
1595         // Sum variables of all types present in symbol table
1596         // created at build time, and subtract number of pre-
1597         // defined variables.
1598         populateMapDclName(curKernel, mapDclName);
1599 
1600         const unsigned int numElementsVarMap = (uint32_t) curKernel->getKernel()->getKernelDebugInfo()->getVarsMap().size();
1601 
1602         for (unsigned int i = 0; i < numElementsVarMap; i++)
1603         {
1604             G4_Declare* dcl = curKernel->getKernel()->getKernelDebugInfo()->getVarsMap()[i]->dcl;
1605             if (mapDclName.find(dcl) == mapDclName.end())
1606             {
1607                 continue;
1608             }
1609 
1610             numItems++;
1611         }
1612 
1613         // Emit out number of variable mapping items
1614         emitDataUInt32((uint32_t)numItems, t);
1615 
1616         // Emit out actual Virtual Register:Physical Register mapping elements
1617         for (unsigned int i = 0; i < numElementsVarMap; i++)
1618         {
1619             G4_Declare* dcl = curKernel->getKernel()->getKernelDebugInfo()->getVarsMap()[i]->dcl;
1620             if (mapDclName.find(dcl) == mapDclName.end())
1621             {
1622                 continue;
1623             }
1624 
1625             const std::pair<const char*, unsigned int>& dclInfo = mapDclName.find(dcl)->second;
1626             std::string varName(dclInfo.first);
1627             // to_string support not present prior to gcc 4.6 and is a c++11 feature
1628 
1629 #if ANDROID
1630             {
1631                 char t_char[128];
1632                 snprintf(t_char, sizeof(t_char), "%d", dclInfo.second);
1633                 varName += std::string(t_char);
1634             }
1635 #elif defined(_MSC_VER) && _MSC_VER < 1700
1636             varName += std::to_string((_ULonglong)dclInfo.second);
1637 #else
1638             varName += std::to_string(dclInfo.second);
1639 #endif
1640 
1641             if (curKernel->getOptions()->getOption(vISA_UseFriendlyNameInDbg))
1642             {
1643                 varName = dcl->getName();
1644             }
1645             emitDataName(varName.c_str(), t);
1646 
1647             // Insert live-interval information
1648             LiveIntervalInfo* lrInfo = curKernel->getKernel()->getKernelDebugInfo()->getLiveIntervalInfo(dcl, false);
1649             emitDataVarLiveInterval(curKernel, lrInfo, i, sizeof(uint16_t), t);
1650         }
1651 
1652         // emit sub-routine data
1653         emitDataSubroutines(curKernel, t);
1654 
1655         emitDataCallFrameInfo(curKernel, t);
1656     }
1657 }
1658 
emitDebugInfo(VISAKernelImpl * curKernel,std::string filename)1659 void emitDebugInfo(VISAKernelImpl* curKernel, std::string filename)
1660 {
1661     std::list<VISAKernelImpl*> functions;
1662     emitDebugInfo(curKernel, functions, filename);
1663 }
1664 
1665 extern "C" void* allocCodeBlock(size_t sz);
1666 
emitDebugInfoToMem(VISAKernelImpl * kernel,std::list<VISAKernelImpl * > & functions,void * & info,unsigned & size)1667 void emitDebugInfoToMem(VISAKernelImpl* kernel, std::list<VISAKernelImpl*>& functions, void*& info, unsigned& size)
1668 {
1669     std::vector<unsigned char>  vec;
1670     std::list<VISAKernelImpl*> compilationUnits;
1671     compilationUnits.push_back(kernel);
1672     auto funcItEnd = functions.end();
1673     for (auto funcIt = functions.begin();
1674         funcIt != funcItEnd;
1675         funcIt++)
1676     {
1677         if ((*funcIt)->getKernel()->getKernelDebugInfo()->getRelocOffset() != 0)
1678         {
1679             compilationUnits.push_back((*funcIt));
1680         }
1681     }
1682 
1683     emitData<std::vector<unsigned char>&>(compilationUnits, vec);
1684 
1685     info = allocCodeBlock(vec.size());
1686     memcpy_s(info, vec.size(), vec.data(), vec.size());
1687     size = (uint32_t) vec.size();
1688 }
1689 
emitDebugInfoToMem(VISAKernelImpl * curKernel,void * & info,unsigned & size)1690 void emitDebugInfoToMem(VISAKernelImpl* curKernel, void*& info, unsigned& size)
1691 {
1692     std::list<VISAKernelImpl*> compilationUnits;
1693 
1694     emitDebugInfoToMem(curKernel, compilationUnits, info, size);
1695 }
1696 
operator new(size_t sz,Mem_Manager & m)1697 void* KernelDebugInfo::operator new(size_t sz, Mem_Manager& m)
1698 {
1699     return m.alloc(sz);
1700 }
1701 
KernelDebugInfo()1702 KernelDebugInfo::KernelDebugInfo()
1703 {
1704     visaKernel = nullptr;
1705     saveCallerFP = nullptr;
1706     restoreCallerFP = nullptr;
1707     setupFP = nullptr;
1708     restoreSP = nullptr;
1709     frameSize = 0;
1710     fretVar = nullptr;
1711     reloc_offset = 0;
1712     missingVISAIdsComputed = false;
1713 }
1714 
~KernelDebugInfo()1715 KernelDebugInfo::~KernelDebugInfo()
1716 {
1717     for (auto& item : debugInfoLiveIntervalMap)
1718     {
1719         item.second->~LiveIntervalInfo();
1720     }
1721 }
1722 
updateRelocOffset()1723 void KernelDebugInfo::updateRelocOffset()
1724 {
1725     // This function updates reloc_offset field of kernel
1726     // reloc_offset field for kernels is 0.
1727     // reloc_offset field for stack call function is set
1728     // to byte offset of first gen binary instruction
1729     // in binary buffer.
1730 
1731     bool done = false;
1732     BB_LIST_ITER bbItEnd = getKernel().fg.end();
1733     for (auto bbIt = getKernel().fg.begin();
1734         bbIt != bbItEnd && done == false;
1735         bbIt++)
1736     {
1737         G4_BB* bb = (*bbIt);
1738         INST_LIST_ITER instItEnd = bb->end();
1739         for (auto instIt = bb->begin();
1740             instIt != instItEnd;
1741             instIt++)
1742         {
1743             G4_INST* inst = (*instIt);
1744             if (inst->getGenOffset() != UNDEFINED_GEN_OFFSET)
1745             {
1746                 reloc_offset = (uint32_t)inst->getGenOffset();
1747                 done = true;
1748                 break;
1749             }
1750         }
1751     }
1752 }
1753 
emitDebugInfo(VISAKernelImpl * kernel,std::list<VISAKernelImpl * > & functions,std::string debugFileNameStr)1754 void emitDebugInfo(VISAKernelImpl* kernel, std::list<VISAKernelImpl*>& functions, std::string debugFileNameStr)
1755 {
1756     std::list<VISAKernelImpl*> compilationUnits;
1757     compilationUnits.push_back(kernel);
1758     auto funcItEnd = functions.end();
1759     for (auto funcIt = functions.begin();
1760         funcIt != funcItEnd;
1761         funcIt++)
1762     {
1763         if ((*funcIt)->getKernel()->getKernelDebugInfo()->getRelocOffset() != 0)
1764         {
1765             // Include compilation unit only if
1766             // it is referenced, ie reloc_offset
1767             // for gen binary is non-zero.
1768             compilationUnits.push_back((*funcIt));
1769         }
1770     }
1771 #ifdef DEBUG_VERBOSE_ON
1772     addCallFrameInfo(kernel);
1773 
1774     for (auto& funcIt : functions)
1775     {
1776         addCallFrameInfo(funcIt);
1777     }
1778 #endif
1779 
1780     FILE* dbgFile = fopen(debugFileNameStr.c_str(), "wb+");
1781 
1782     if (dbgFile == NULL)
1783     {
1784         std::cerr << "Error opening debug file " << debugFileNameStr << ". Not emitting debug info.\n";
1785         return;
1786     }
1787 
1788     emitData(compilationUnits, dbgFile);
1789 
1790     fclose(dbgFile);
1791 }
1792 
resetGenOffsets(G4_Kernel & kernel)1793 void resetGenOffsets(G4_Kernel& kernel)
1794 {
1795     // Iterate over all instructions in kernel and set gen
1796     // offset of BinInst instance to 0.
1797     auto bbItEnd = kernel.fg.end();
1798     for (auto bbIt = kernel.fg.begin();
1799         bbIt != bbItEnd;
1800         bbIt++)
1801     {
1802         G4_BB* bb = (*bbIt);
1803 
1804         auto instItEnd = bb->end();
1805         for (auto instIt = bb->begin();
1806             instIt != instItEnd;
1807             instIt++)
1808         {
1809             G4_INST* inst = (*instIt);
1810 
1811             if (inst->getGenOffset() != UNDEFINED_GEN_OFFSET)
1812             {
1813                 inst->setGenOffset(UNDEFINED_GEN_OFFSET);
1814             }
1815         }
1816     }
1817 }
1818 
updateDebugInfo(G4_Kernel & kernel,G4_INST * inst,const LivenessAnalysis & liveAnalysis,LiveRange * lrs[],BitSet & live,DebugInfoState * state,bool closeAllOpenIntervals)1819 void updateDebugInfo(G4_Kernel& kernel, G4_INST* inst, const LivenessAnalysis& liveAnalysis, LiveRange* lrs[], BitSet& live, DebugInfoState* state,
1820     bool closeAllOpenIntervals)
1821 {
1822     if (closeAllOpenIntervals && !state->getPrevInst())
1823         return;
1824 
1825     auto krnlDbgInfo = kernel.getKernelDebugInfo();
1826 
1827     // Update live-intervals only when bits change in bit-vector.
1828     // state parameter contains previous instruction and bit-vector.
1829     for (unsigned int i = 0; i < liveAnalysis.getNumSelectedVar(); i += NUM_BITS_PER_ELT)
1830     {
1831         auto elt = live.getElt(i / NUM_BITS_PER_ELT);
1832         auto prevElt = state->getPrevBitset() ? state->getPrevBitset()->getElt(i / NUM_BITS_PER_ELT) : 0;
1833 
1834         if (elt != prevElt)
1835         {
1836             // Some variables have changed state in bit-vector, so update their states accordingly.
1837             //
1838             // If elt is set and prevElt is reset, it means the variable became live at current inst,
1839             // If elt is reset and prevElt is set, it means the variable was killed at current inst
1840             //
1841             for (unsigned int j = 0; j < NUM_BITS_PER_ELT; j++)
1842             {
1843                 unsigned char eltJ = (elt >> j) & 0x1;
1844                 unsigned char prevEltJ = (prevElt >> j) & 0x1;
1845 
1846                 if (eltJ == 1 && prevEltJ == 0)
1847                 {
1848                     if (inst->getCISAOff() != UNMAPPABLE_VISA_INDEX)
1849                     {
1850                         // This check guarantees that for an open
1851                         // interval, at least the same CISA offset
1852                         // can be used to close it. If there is no
1853                         // instruction with valid CISA offset
1854                         // between open/close IR instruction, then
1855                         // the interval will not be recorded.
1856                         auto idx = (i + j);
1857                         G4_Declare* dcl = lrs[idx]->getVar()->getDeclare();
1858                         auto lr = krnlDbgInfo->getLiveIntervalInfo(dcl);
1859 
1860                         lr->setStateOpen(inst->getCISAOff());
1861                     }
1862                 }
1863                 else if (eltJ == 0 && prevEltJ == 1)
1864                 {
1865                     auto idx = (i + j);
1866                     G4_Declare* dcl = lrs[idx]->getVar()->getDeclare();
1867 
1868                     auto lr = krnlDbgInfo->getLiveIntervalInfo(dcl);
1869 
1870                     if (lr->getState() == LiveIntervalInfo::DebugLiveIntervalState::Open)
1871                     {
1872                         auto closeAt = state->getPrevInst()->getCISAOff();
1873                         while (closeAt >= 1 &&
1874                             krnlDbgInfo->isMissingVISAId(closeAt - 1))
1875                         {
1876                             closeAt--;
1877                         }
1878                         lr->setStateClosed(closeAt);
1879                     }
1880                 }
1881             }
1882         }
1883 
1884         if (closeAllOpenIntervals)
1885         {
1886             for (unsigned int j = 0; j < NUM_BITS_PER_ELT; j++)
1887             {
1888                 unsigned char eltJ = (elt >> j) & 0x1;
1889 
1890                 if (eltJ)
1891                 {
1892                     auto idx = (i + j);
1893                     G4_Declare* dcl = lrs[idx]->getVar()->getDeclare();
1894                     auto lr = krnlDbgInfo->getLiveIntervalInfo(dcl);
1895 
1896                     if (lr->getState() == LiveIntervalInfo::DebugLiveIntervalState::Open)
1897                     {
1898                         uint32_t lastCISAOff = (inst->getCISAOff() != UNMAPPABLE_VISA_INDEX) ?
1899                             inst->getCISAOff() : state->getPrevInst()->getCISAOff();
1900 
1901                         while (lastCISAOff >= 1 &&
1902                             krnlDbgInfo->isMissingVISAId(lastCISAOff - 1))
1903                         {
1904                             lastCISAOff--;
1905                         }
1906 
1907                         lr->setStateClosed(lastCISAOff);
1908                     }
1909                 }
1910             }
1911         }
1912     }
1913 
1914     if (inst->getCISAOff() != UNMAPPABLE_VISA_INDEX &&
1915         !inst->isPseudoKill())
1916     {
1917         state->setPrevBitset(live);
1918         state->setPrevInst(inst);
1919     }
1920 }
1921 
updateDebugInfo(vISA::G4_Kernel & kernel,std::vector<vISA::LSLiveRange * > & liveIntervals)1922 void updateDebugInfo(vISA::G4_Kernel& kernel, std::vector<vISA::LSLiveRange*>& liveIntervals)
1923 {
1924     for (auto lr : liveIntervals)
1925     {
1926         uint32_t start, end;
1927         G4_INST* startInst = lr->getFirstRef(start);
1928         G4_INST* endInst = lr->getLastRef(end);
1929 
1930         if (!start || !end)
1931             continue;
1932 
1933         start = startInst->getCISAOff();
1934         end = endInst->getCISAOff();
1935 
1936         auto lrInfo = kernel.getKernelDebugInfo()->getLiveIntervalInfo(lr->getTopDcl());
1937         if (start != UNMAPPABLE_VISA_INDEX &&
1938             end != UNMAPPABLE_VISA_INDEX)
1939         {
1940             lrInfo->addLiveInterval(start, end);
1941         }
1942     }
1943 }
1944 
updateDebugInfo(G4_Kernel & kernel,std::vector<vISA::LocalLiveRange * > & liveIntervals)1945 void updateDebugInfo(G4_Kernel& kernel, std::vector<vISA::LocalLiveRange*>& liveIntervals)
1946 {
1947     for (auto lr : liveIntervals)
1948     {
1949         if (lr->getAssigned())
1950         {
1951             uint32_t start, end;
1952             G4_INST* startInst = lr->getFirstRef(start);
1953             G4_INST* endInst = lr->getLastRef(end);
1954             start = startInst->getCISAOff();
1955             end = endInst->getCISAOff();
1956 
1957             auto lrInfo = kernel.getKernelDebugInfo()->getLiveIntervalInfo(lr->getTopDcl());
1958             if (start != UNMAPPABLE_VISA_INDEX &&
1959                 end != UNMAPPABLE_VISA_INDEX)
1960             {
1961                 lrInfo->addLiveInterval(start, end);
1962             }
1963         }
1964     }
1965 }
1966 
updateDebugInfo(G4_Kernel & kernel,std::vector<std::tuple<G4_Declare *,G4_INST *,G4_INST * >> augmentationLiveIntervals)1967 void updateDebugInfo(G4_Kernel& kernel, std::vector<std::tuple<G4_Declare*, G4_INST*, G4_INST*>> augmentationLiveIntervals)
1968 {
1969     // Invoked via augmentation pass
1970     for (auto& lr : augmentationLiveIntervals)
1971     {
1972         uint32_t start, end;
1973         G4_INST* startInst = std::get<1>(lr);
1974         G4_INST* endInst = std::get<2>(lr);
1975         start = startInst->getCISAOff();
1976         end = endInst->getCISAOff();
1977 
1978         G4_Declare* topdcl = std::get<0>(lr);
1979         while (std::get<0>(lr)->getAliasDeclare() != NULL)
1980         {
1981             topdcl = topdcl->getAliasDeclare();
1982         }
1983 
1984         auto lrInfo = kernel.getKernelDebugInfo()->getLiveIntervalInfo(topdcl);
1985         if (start != UNMAPPABLE_VISA_INDEX &&
1986             end != UNMAPPABLE_VISA_INDEX)
1987         {
1988             lrInfo->addLiveInterval(start, end);
1989         }
1990     }
1991 }
1992 
updateDebugInfo(G4_Kernel & kernel,G4_Declare * dcl,uint32_t start,uint32_t end)1993 void updateDebugInfo(G4_Kernel& kernel, G4_Declare* dcl, uint32_t start, uint32_t end)
1994 {
1995     auto lrInfo = kernel.getKernelDebugInfo()->getLiveIntervalInfo(dcl);
1996     if (start != UNMAPPABLE_VISA_INDEX &&
1997         end != UNMAPPABLE_VISA_INDEX)
1998     {
1999         lrInfo->addLiveInterval(start, end);
2000     }
2001 }
2002 
updateDebugInfo(G4_Kernel & kernel,G4_Declare * dcl,uint32_t offset)2003 void updateDebugInfo(G4_Kernel& kernel, G4_Declare* dcl, uint32_t offset)
2004 {
2005     auto lrInfo = kernel.getKernelDebugInfo()->getLiveIntervalInfo(dcl);
2006     lrInfo->liveAt(offset);
2007 }
2008 
getBinInstSize(G4_INST * inst)2009 uint32_t getBinInstSize(G4_INST* inst)
2010 {
2011     uint32_t size = inst->isCompactedInst() ?
2012         (BYTES_PER_INST / 2) : BYTES_PER_INST;
2013 
2014     return size;
2015 }
2016 
computeDebugInfo(std::list<G4_BB * > & stackCallEntryBBs)2017 void KernelDebugInfo::computeDebugInfo(std::list<G4_BB*>& stackCallEntryBBs)
2018 {
2019     updateMapping(stackCallEntryBBs);
2020     updateRelocOffset();
2021     if (reloc_offset > 0)
2022     {
2023         updateCallStackLiveIntervals();
2024     }
2025     else
2026     {
2027         updateCallStackMain();
2028     }
2029 }
2030 
updateCallStackMain()2031 void KernelDebugInfo::updateCallStackMain()
2032 {
2033     if (!getKernel().fg.getHasStackCalls())
2034         return;
2035 
2036     // Set live-interval for BE_FP
2037     auto befp = getBEFP();
2038     if (befp)
2039     {
2040         uint32_t start = 0;
2041         if (getBEFPSetupInst())
2042         {
2043             start = (uint32_t)getBEFPSetupInst()->getGenOffset() +
2044                 (uint32_t)getBinInstSize(getBEFPSetupInst());
2045         }
2046         updateDebugInfo(getKernel(), befp, start, mapCISAIndexGenOffset.back().GenOffset);
2047     }
2048 }
2049 
updateCallStackLiveIntervals()2050 void KernelDebugInfo::updateCallStackLiveIntervals()
2051 {
2052     if (!getKernel().fg.getIsStackCallFunc() &&
2053         !getKernel().fg.getHasStackCalls())
2054     {
2055         return;
2056     }
2057 
2058     uint32_t reloc_offset = 0;
2059     uint32_t start = 0xffffffff, end = 0;
2060 
2061     // Update live-interval for following ranges:
2062     // be_fp, caller_be_fp, retval
2063     if (getKernel().fg.getIsStackCallFunc())
2064     {
2065         // Only stack call function has return variable
2066         auto fretVar = getKernel().getKernelDebugInfo()->getFretVar();
2067         auto fretVarLI = getKernel().getKernelDebugInfo()->getLiveIntervalInfo(fretVar);
2068         fretVarLI->clearLiveIntervals();
2069 
2070         for (auto bbs : getKernel().fg)
2071         {
2072             for (auto insts : *bbs)
2073             {
2074                 if (insts->getGenOffset() != UNDEFINED_GEN_OFFSET)
2075                 {
2076                     reloc_offset = (reloc_offset == 0) ?
2077                         (uint32_t)insts->getGenOffset() : reloc_offset;
2078                     break;
2079                 }
2080             }
2081             if (reloc_offset > 0)
2082                 break;
2083         }
2084 
2085         uint32_t start = 0;
2086         if (getBEFPSetupInst())
2087         {
2088             // Frame descriptor can be addressed once once BE_FP is defined
2089             start = (uint32_t)getBEFPSetupInst()->getGenOffset() +
2090                 getBinInstSize(getBEFPSetupInst());
2091         }
2092 
2093         if (getCallerBEFPRestoreInst())
2094         {
2095             end = (uint32_t)getCallerBEFPRestoreInst()->getGenOffset();
2096         }
2097 
2098         MUST_BE_TRUE(end >= reloc_offset, "Failed to update live-interval for retval");
2099         MUST_BE_TRUE(start >= reloc_offset, "Failed to update start for retval");
2100         MUST_BE_TRUE(end >= start, "end less then start for retval");
2101         for (uint32_t i = start - reloc_offset; i <= end - reloc_offset; i++)
2102         {
2103             updateDebugInfo(*kernel, fretVar, i);
2104         }
2105     }
2106 
2107     auto befp = getBEFP();
2108     if (befp)
2109     {
2110         auto befpLIInfo = getLiveIntervalInfo(befp);
2111         befpLIInfo->clearLiveIntervals();
2112         auto befpSetupInst = getBEFPSetupInst();
2113         if (befpSetupInst)
2114         {
2115             start = (uint32_t)befpSetupInst->getGenOffset() +
2116                 getBinInstSize(befpSetupInst);
2117             auto spRestoreInst = getCallerSPRestoreInst();
2118             if (spRestoreInst)
2119             {
2120                 end = (uint32_t)spRestoreInst->getGenOffset();
2121             }
2122             for (uint32_t i = start - reloc_offset; i <= end - reloc_offset; i++)
2123             {
2124                 updateDebugInfo(*kernel, befp, i);
2125             }
2126         }
2127 
2128         MUST_BE_TRUE(start != 0xffffffff, "Cannot update stack vars1");
2129         MUST_BE_TRUE(end != 0, "Cannot update stack vars2");
2130     }
2131 
2132     auto callerbefp = getCallerBEFP();
2133     if (callerbefp)
2134     {
2135         auto callerbefpLIInfo = getLiveIntervalInfo(callerbefp);
2136         callerbefpLIInfo->clearLiveIntervals();
2137         auto callerbeSaveInst = getCallerBEFPSaveInst();
2138         if (callerbeSaveInst)
2139         {
2140             auto callerbefpRestoreInst = getCallerBEFPRestoreInst();
2141             MUST_BE_TRUE(callerbefpRestoreInst != nullptr,
2142                 "Instruction destroying caller be fp not found in epilog");
2143             start = (uint32_t)callerbeSaveInst->getGenOffset() - reloc_offset +
2144                 getBinInstSize(callerbeSaveInst);
2145             end = (uint32_t)callerbefpRestoreInst->getGenOffset() - reloc_offset;
2146             for (uint32_t i = start;
2147                 i <= end;
2148                 i++)
2149             {
2150                 updateDebugInfo(*kernel, callerbefp, i);
2151             }
2152         }
2153     }
2154 }
2155 
updateExpandedIntrinsic(G4_InstIntrinsic * spillOrFill,G4_INST * inst)2156 void KernelDebugInfo::updateExpandedIntrinsic(G4_InstIntrinsic* spillOrFill, G4_INST* inst)
2157 {
2158     // This function looks up all caller/callee save code added.
2159     // Once it finds "spillOrFill", it adds inst to it. This is
2160     // because VISA now uses spill/fill intrinsics to model
2161     // save/restore. These intrinsics are expanded after RA is
2162     // done. So this method gets invoked after RA is done and
2163     // when intrinsics are expanded.
2164     for (auto& k : callerSaveRestore)
2165     {
2166         for (auto it = k.second.first.begin(); it != k.second.first.end(); ++it)
2167         {
2168             if ((*it) == spillOrFill)
2169             {
2170                 k.second.first.insert(it, inst);
2171                 return;
2172             }
2173         }
2174 
2175         for (auto it = k.second.second.begin(); it != k.second.second.end(); ++it)
2176         {
2177             if ((*it) == spillOrFill)
2178             {
2179                 k.second.second.insert(it, inst);
2180                 return;
2181             }
2182         }
2183     }
2184 
2185     for (auto it = calleeSaveRestore.first.begin(); it != calleeSaveRestore.first.end(); ++it)
2186     {
2187         if ((*it) == spillOrFill)
2188         {
2189             calleeSaveRestore.first.insert(it, inst);
2190             return;
2191         }
2192     }
2193 
2194     for (auto it = calleeSaveRestore.second.begin(); it != calleeSaveRestore.second.end(); ++it)
2195     {
2196         if ((*it) == spillOrFill)
2197         {
2198             calleeSaveRestore.second.insert(it, inst);
2199             return;
2200         }
2201     }
2202 }
2203 
addCallerSaveInst(G4_BB * fcallBB,G4_INST * inst)2204 void KernelDebugInfo::addCallerSaveInst(G4_BB* fcallBB, G4_INST* inst)
2205 {
2206     callerSaveRestore[fcallBB].first.push_back(inst);
2207 }
2208 
addCallerRestoreInst(G4_BB * fcallBB,G4_INST * inst)2209 void KernelDebugInfo::addCallerRestoreInst(G4_BB* fcallBB, G4_INST* inst)
2210 {
2211     callerSaveRestore[fcallBB].second.push_back(inst);
2212 }
2213 
addCalleeSaveInst(G4_INST * inst)2214 void KernelDebugInfo::addCalleeSaveInst(G4_INST* inst)
2215 {
2216     calleeSaveRestore.first.push_back(inst);
2217 }
2218 
addCalleeRestoreInst(G4_INST * inst)2219 void KernelDebugInfo::addCalleeRestoreInst(G4_INST* inst)
2220 {
2221     calleeSaveRestore.second.push_back(inst);
2222 }
2223 
getCallerSaveInsts(G4_BB * fcallBB)2224 std::vector<G4_INST*>& KernelDebugInfo::getCallerSaveInsts(G4_BB* fcallBB)
2225 {
2226     return callerSaveRestore[fcallBB].first;
2227 }
2228 
getCallerRestoreInsts(G4_BB * fcallBB)2229 std::vector<G4_INST*>& KernelDebugInfo::getCallerRestoreInsts(G4_BB* fcallBB)
2230 {
2231     return callerSaveRestore[fcallBB].second;
2232 }
2233 
getCalleeSaveInsts()2234 std::vector<G4_INST*>& KernelDebugInfo::getCalleeSaveInsts()
2235 {
2236     return calleeSaveRestore.first;
2237 }
2238 
getCalleeRestoreInsts()2239 std::vector<G4_INST*>& KernelDebugInfo::getCalleeRestoreInsts()
2240 {
2241     return calleeSaveRestore.second;
2242 }
2243 
isFcallWithSaveRestore(G4_BB * bb)2244 bool KernelDebugInfo::isFcallWithSaveRestore(G4_BB* bb)
2245 {
2246     // Debug emission happens after binary encoding
2247     // at which point all fcalls are converted to
2248     // calls. So G4_INST::isFCall() will always
2249     // be false
2250     bool retval = false;
2251     auto it = callerSaveRestore.find(bb);
2252     if (it != callerSaveRestore.end())
2253     {
2254         retval = true;
2255     }
2256 
2257     return retval;
2258 }
2259 
2260 // Compute extra instructions in insts over oldInsts list and
2261 // return a new list.
getDeltaInstructions(G4_BB * bb)2262 INST_LIST KernelDebugInfo::getDeltaInstructions(G4_BB* bb)
2263 {
2264     INST_LIST deltaInsts;
2265     for (auto instIt = bb->begin(); instIt != bb->end(); instIt++)
2266         deltaInsts.push_back(*instIt);
2267 
2268     for (auto oldInstsIt : oldInsts)
2269     {
2270         deltaInsts.remove(oldInstsIt);
2271     }
2272 
2273     return deltaInsts;
2274 }
2275 
addInst(G4_INST * inst)2276 void SaveRestoreManager::addInst(G4_INST* inst)
2277 {
2278     SaveRestoreInfo newSVInfo;
2279     srInfo.push_back(newSVInfo);
2280     if (srInfo.size() > 1)
2281     {
2282         // Copy over from previous
2283         // so emitted data is
2284         // cumulative per IP.
2285         srInfo[srInfo.size() - 1].saveRestoreMap = srInfo[srInfo.size() - 2].saveRestoreMap;
2286     }
2287 
2288     if (inst->opcode() == G4_add &&
2289         inst->getSrc(1) &&
2290         inst->getSrc(1)->isImm() &&
2291         inst->getSrc(0) &&
2292         inst->getSrc(0)->isSrcRegRegion() &&
2293         GetTopDclFromRegRegion(inst->getSrc(0)) == visaKernel->getKernel()->fg.builder->getBEFP())
2294     {
2295         memOffset = (int32_t)inst->getSrc(1)->asImm()->getImm();
2296         regWithMemOffset = inst->getDst()->getLinearizedStart() / numEltPerGRF<Type_UB>();
2297         absOffset = false;
2298     }
2299 
2300     if (inst->opcode() == G4_mov &&
2301         inst->getSrc(0) &&
2302         inst->getSrc(0)->isImm() &&
2303         inst->getExecSize() == g4::SIMD1 &&
2304         inst->getDst() &&
2305         inst->getDst()->getLinearizedStart() % numEltPerGRF<Type_UB>() == 8)
2306     {
2307         memOffset = (int32_t)inst->getSrc(0)->asImm()->getImm();
2308         regWithMemOffset = inst->getDst()->getLinearizedStart() / numEltPerGRF<Type_UB>();
2309         absOffset = true;
2310     }
2311 
2312     srInfo.back().update(inst, memOffset, regWithMemOffset, absOffset);
2313 }
2314 
emitAll()2315 void SaveRestoreManager::emitAll()
2316 {
2317     for (auto it : srInfo)
2318     {
2319 #ifdef DEBUG_VERBOSE_ON
2320         it.getInst()->emit(std::cerr);
2321 #endif
2322         DEBUG_VERBOSE("\n");
2323 
2324         for (auto mapIt : it.saveRestoreMap)
2325         {
2326             DEBUG_VERBOSE("\tr" << mapIt.first << ".0 (8):d saved to ");
2327             if (mapIt.second.first == SaveRestoreInfo::RegOrMem::Reg)
2328             {
2329                 DEBUG_VERBOSE("r" << mapIt.second.second.regNum << ".0 (8):d\n");
2330             }
2331             else if (mapIt.second.first == SaveRestoreInfo::RegOrMem::MemAbs)
2332             {
2333                 DEBUG_VERBOSE("mem at offset " << mapIt.second.second.offset << " bytes (abs)\n");
2334             }
2335             else if (mapIt.second.first == SaveRestoreInfo::RegOrMem::MemOffBEFP)
2336             {
2337                 DEBUG_VERBOSE("mem at offset " << mapIt.second.second.offset << " bytes (off befp)\n");
2338             }
2339         }
2340     }
2341 }
2342 
update(G4_INST * inst,int32_t memOffset,uint32_t regWithMemOffset,bool isOffAbs)2343 void SaveRestoreInfo::update(G4_INST* inst, int32_t memOffset, uint32_t regWithMemOffset, bool isOffAbs)
2344 {
2345     i = inst;
2346 
2347     if (inst->getDst() &&
2348         inst->getDst()->isDstRegRegion())
2349     {
2350         auto dstreg = inst->getDst()->getLinearizedStart() / numEltPerGRF<Type_UB>();
2351 
2352         // Remove any item in map that is saved as storage for some other reg.
2353         for (auto mapIt : saveRestoreMap)
2354         {
2355             if (mapIt.second.first == RegOrMem::Reg &&
2356                 mapIt.second.second.regNum == dstreg)
2357             {
2358                 DEBUG_VERBOSE("Removed r" << mapIt.second.second.regNum << ".0 (8):d\n");
2359                 saveRestoreMap.erase(mapIt.first);
2360                 break;
2361             }
2362         }
2363     }
2364 
2365     if (inst->opcode() == G4_mov &&
2366         inst->getDst()->isDstRegRegion() &&
2367         inst->getSrc(0)->isSrcRegRegion())
2368     {
2369         unsigned int srcreg, dstreg;
2370         srcreg = inst->getSrc(0)->getLinearizedStart() / numEltPerGRF<Type_UB>();
2371         dstreg = inst->getDst()->getLinearizedStart() / numEltPerGRF<Type_UB>();
2372 
2373         bool done = false;
2374         for (auto mapIt : saveRestoreMap)
2375         {
2376             if (mapIt.second.first == RegOrMem::Reg &&
2377                 mapIt.second.second.regNum == srcreg &&
2378                 mapIt.first == dstreg)
2379             {
2380                 saveRestoreMap.erase(mapIt.first);
2381                 done = true;
2382                 DEBUG_VERBOSE("Restored r" << dstreg << ".0 (8):d from r" << srcreg << ".0 (8):d\n");
2383                 break;
2384             }
2385         }
2386 
2387         if (done == false)
2388         {
2389             auto it = saveRestoreMap.find(srcreg);
2390             if (it == saveRestoreMap.end())
2391             {
2392                 // Entry not found so update map
2393                 RegMap mapping;
2394                 mapping.regNum = dstreg;
2395 
2396                 saveRestoreMap.insert(std::make_pair(srcreg, std::make_pair(RegOrMem::Reg, mapping)));
2397 
2398                 DEBUG_VERBOSE("Saved r" << srcreg << ".0 (8):d to r" << dstreg << ".0 (8):d\n");
2399             }
2400         }
2401     }
2402     else if (inst->isSend())
2403     {
2404         // send/read, send/write
2405         // sends/read, sends/write
2406         MUST_BE_TRUE(!inst->getMsgDesc()->isScratch(),
2407             "Not expecting scratch msg in save/restore code");
2408         if (inst->getMsgDesc()->isWrite())
2409         {
2410             uint32_t srcreg, extsrcreg = 0;
2411             srcreg = inst->getSrc(0)->getLinearizedStart() / numEltPerGRF<Type_UB>();
2412             if (inst->getMsgDesc()->getSrc1LenRegs() > 0)
2413             {
2414                 extsrcreg = inst->getSrc(1)->getLinearizedStart()/numEltPerGRF<Type_UB>();
2415             }
2416 
2417             MUST_BE_TRUE(memOffset != 0xffff, "Invalid mem offset");
2418             MUST_BE_TRUE(regWithMemOffset == srcreg, "Send src not initialized with offset");
2419 
2420             std::vector<uint32_t> payloadRegs;
2421             for (uint32_t i = 1; i < (uint32_t)inst->getMsgDesc()->getSrc0LenRegs(); i++)
2422             {
2423                 payloadRegs.push_back(i + srcreg);
2424             }
2425             for (uint32_t i = 0; i < (uint32_t)inst->getMsgDesc()->getSrc1LenRegs(); i++)
2426             {
2427                 payloadRegs.push_back(i + extsrcreg);
2428             }
2429 
2430             for (uint32_t i = 0; i < payloadRegs.size(); i++)
2431             {
2432                 uint32_t payloadReg = payloadRegs[i];
2433                 RegMap m;
2434                 m.offset = (int32_t)((memOffset * numEltPerGRF<Type_UB>()/2) + (i * numEltPerGRF<Type_UB>()));
2435                 m.isAbs = isOffAbs;
2436                 saveRestoreMap.insert(std::make_pair(payloadReg,
2437                     std::make_pair(isOffAbs ? RegOrMem::MemAbs : RegOrMem::MemOffBEFP, m)));
2438 
2439 #ifdef DEBUG_VERBOSE_ON
2440                 const char* offstr = isOffAbs ? "(abs)" : "(off besp)";
2441 
2442                 DEBUG_VERBOSE("Saved r" << payloadReg << ".0 (8):d to mem at offset "
2443                     << m.offset << " bytes" << offstr << "\n");
2444 #endif
2445             }
2446         }
2447         else if (inst->getMsgDesc()->isRead())
2448         {
2449             uint32_t srcreg, dstreg;
2450             srcreg = inst->getSrc(0)->getLinearizedStart() / numEltPerGRF<Type_UB>();
2451             dstreg = inst->getDst()->getLinearizedStart() / numEltPerGRF<Type_UB>();
2452 
2453             MUST_BE_TRUE(memOffset != 0xffff, "Invalid mem offset");
2454             MUST_BE_TRUE(regWithMemOffset == srcreg, "Send src not initialized with offset");
2455 
2456             auto responselen = inst->getMsgDesc()->getDstLenRegs();
2457             int32_t startoff;
2458             startoff = memOffset * numEltPerGRF<Type_UB>() / 2;
2459 
2460             for (auto reg = dstreg; reg < (responselen + dstreg); reg++)
2461             {
2462                 int32_t offsetForReg = startoff + ((reg - dstreg) * numEltPerGRF<Type_UB>());
2463 
2464                 for (auto mapIt : saveRestoreMap)
2465                 {
2466                     if (mapIt.first == reg &&
2467                         (mapIt.second.first == RegOrMem::MemAbs ||
2468                         mapIt.second.first == RegOrMem::MemOffBEFP) &&
2469                         mapIt.second.second.offset == offsetForReg)
2470                     {
2471                         saveRestoreMap.erase(mapIt.first);
2472 
2473 #ifdef DEBUG_VERBOSE_ON
2474                         const char* offstr = RegOrMem::MemAbs ? "abs" : "off befp";
2475                         DEBUG_VERBOSE("Restored r" << reg << ".0 (8):d from mem offset " <<
2476                             offsetForReg << " bytes (" << offstr << ")\n");
2477 #endif
2478                         break;
2479                     }
2480                 }
2481             }
2482         }
2483     }
2484 }
2485 
2486 #ifdef DEBUG_VERBOSE
dumpLiveInterval(LiveIntervalInfo * lv)2487 void dumpLiveInterval(LiveIntervalInfo* lv)
2488 {
2489     std::vector<std::pair<unsigned int, unsigned int>> v;
2490     lv->getLiveIntervals(v);
2491     for (auto it : v)
2492     {
2493         std::cerr << "(" << it.first << ", " << it.second << ")\n";
2494     }
2495 }
2496 
emitSubRoutineInfo(VISAKernelImpl * visaKernel)2497 void emitSubRoutineInfo(VISAKernelImpl* visaKernel)
2498 {
2499     auto kernel = visaKernel->getKernel();
2500 
2501     // Is there a single entry point for debugInfo?
2502     kernel->fg.setPhysicalPredSucc();
2503     for (auto bb : kernel->fg)
2504     {
2505         G4_INST* firstInst = nullptr;
2506         G4_INST* lastInst = nullptr;
2507         unsigned int start = 0, end = 0;
2508         G4_Declare* retval = nullptr;
2509         G4_Label* subLabel = nullptr;
2510         if (bb->isEndWithCall())
2511         {
2512             G4_BB* calleeBB = bb->Succs.front();
2513             while (firstInst == NULL && calleeBB != NULL)
2514             {
2515                 if (calleeBB->size() > 0)
2516                 {
2517                     firstInst = calleeBB->front();
2518                     start = firstInst->getCISAOff();
2519                     subLabel = firstInst->getSrc(0)->asLabel();
2520                 }
2521             }
2522 
2523             calleeBB = bb->BBAfterCall()->Preds.front();
2524             while (lastInst == NULL && calleeBB != NULL)
2525             {
2526                 if (calleeBB->size() > 0)
2527                 {
2528                     lastInst = calleeBB->back();
2529                     end = lastInst->getCISAOff();
2530                     MUST_BE_TRUE(lastInst->isReturn(), "Expecting to see G4_return as last inst in sub-routine");
2531                     retval = lastInst->getSrc(0)->asSrcRegRegion()->getBase()->asRegVar()->getDeclare()->getRootDeclare();
2532                 }
2533 
2534                 calleeBB = calleeBB->Preds.front();
2535             }
2536             std::cerr << "Func info id " << subLabel->getLabel() << "\n";
2537             std::cerr << "First inst " << start << ", last inst " << end << "\n";
2538             std::cerr << "Return value in dcl " << retval->getName() << "\n";
2539 
2540             if (kernel->getKernelDebugInfo()->getLiveIntervalInfo(retval, false) != NULL)
2541             {
2542                 std::cerr << "Found live-interval for retval range\n";
2543                 auto lv = kernel->getKernelDebugInfo()->getLiveIntervalInfo(retval, false);
2544                 dumpLiveInterval(lv);
2545                 std::cerr << "\n";
2546             }
2547         }
2548     }
2549 }
2550 
emitBEFP(VISAKernelImpl * visaKernel)2551 void emitBEFP(VISAKernelImpl* visaKernel)
2552 {
2553     // Compute both be fp of current frame and previous frame
2554     auto kernel = visaKernel->getKernel();
2555     auto befpDcl = kernel->getKernelDebugInfo()->getBEFP();
2556     if (befpDcl &&
2557         kernel->getKernelDebugInfo()->getLiveIntervalInfo(befpDcl, false))
2558     {
2559         std::cerr << "Found befp dcl at " << befpDcl->getName() << "\n";
2560         dumpLiveInterval(kernel->getKernelDebugInfo()->getLiveIntervalInfo(befpDcl, false));
2561         std::cerr << "\n";
2562     }
2563 
2564     auto befpSetup = kernel->getKernelDebugInfo()->getBEFPSetupInst();
2565     if (befpSetup)
2566     {
2567         std::cerr << "befp setup inst found:\n";
2568         befpSetup->emit(std::cerr);
2569         std::cerr << "\n";
2570     }
2571 
2572     auto spRestore = kernel->getKernelDebugInfo()->getCallerSPRestoreInst();
2573     if (spRestore)
2574     {
2575         std::cerr << "sp restore inst found:\n";
2576         spRestore->emit(std::cerr);
2577         std::cerr << "\n";
2578     }
2579 
2580     auto callerfpdcl = kernel->getKernelDebugInfo()->getCallerBEFP();
2581     if (callerfpdcl &&
2582         kernel->getKernelDebugInfo()->getLiveIntervalInfo(callerfpdcl, false))
2583     {
2584         std::cerr << "Found caller befp dcl at " << callerfpdcl->getName() << "\n";
2585         dumpLiveInterval(kernel->getKernelDebugInfo()->getLiveIntervalInfo(callerfpdcl, false));
2586         std::cerr << "\n";
2587     }
2588 
2589     auto fretVar = kernel->getKernelDebugInfo()->getFretVar();
2590     if (fretVar &&
2591         kernel->getKernelDebugInfo()->getLiveIntervalInfo(fretVar, false))
2592     {
2593         std::cerr << "fretvar " << fretVar->getName() << "\n";
2594         dumpLiveInterval(kernel->getKernelDebugInfo()->getLiveIntervalInfo(fretVar, false));
2595         std::cerr << "\n";
2596     }
2597 
2598     auto frameSize = kernel->getKernelDebugInfo()->getFrameSize();
2599     std::cerr << "frame size = " << frameSize << " bytes" << "\n";
2600 }
2601 
emitCallerSaveInfo(VISAKernelImpl * visaKernel)2602 void emitCallerSaveInfo(VISAKernelImpl* visaKernel)
2603 {
2604     auto kernel = visaKernel->getKernel();
2605 
2606     for (auto bbs : kernel->fg)
2607     {
2608         if (bbs->size() > 0 &&
2609             kernel->getKernelDebugInfo()->isFcallWithSaveRestore(bbs))
2610         {
2611             auto& callerSaveInsts = kernel->getKernelDebugInfo()->getCallerSaveInsts(bbs);
2612             auto& callerRestoreInsts = kernel->getKernelDebugInfo()->getCallerRestoreInsts(bbs);
2613 
2614             std::cerr << "Caller save for ";
2615             bbs->back()->emit(std::cerr);
2616             std::cerr << "\n";
2617 
2618             SaveRestoreManager mgr(visaKernel);
2619             for (auto callerSave : callerSaveInsts)
2620             {
2621                 mgr.addInst(callerSave);
2622             }
2623 
2624             for (auto callerRestore : callerRestoreInsts)
2625             {
2626                 mgr.addInst(callerRestore);
2627             }
2628 
2629             mgr.emitAll();
2630 
2631             std::cerr << "\n";
2632         }
2633     }
2634 }
2635 
emitCalleeSaveInfo(VISAKernelImpl * visaKernel)2636 void emitCalleeSaveInfo(VISAKernelImpl* visaKernel)
2637 {
2638     G4_Kernel* kernel = visaKernel->getKernel();
2639 
2640     std::cerr << "\nCallee save:\n";
2641     SaveRestoreManager mgr(visaKernel);
2642     for (auto calleeSave : kernel->getKernelDebugInfo()->getCalleeSaveInsts())
2643     {
2644         mgr.addInst(calleeSave);
2645     }
2646 
2647     for (auto calleeRestore : kernel->getKernelDebugInfo()->getCalleeRestoreInsts())
2648     {
2649         mgr.addInst(calleeRestore);
2650     }
2651 
2652     mgr.emitAll();
2653 
2654     std::cerr << "\n";
2655 }
2656 
dumpCFG(VISAKernelImpl * visaKernel)2657 void dumpCFG(VISAKernelImpl* visaKernel)
2658 {
2659     G4_Kernel* kernel = visaKernel->getKernel();
2660     auto reloc_offset = 0;
2661     bool done = false;
2662 
2663     for (auto bbs : kernel->fg)
2664     {
2665         for (auto insts : *bbs)
2666         {
2667             if (insts->getGenOffset() != UNDEFINED_GEN_OFFSET)
2668             {
2669                 if (!done)
2670                 {
2671                     reloc_offset = (uint32_t)insts->getGenOffset();
2672                     done = true;
2673                 }
2674                 std::cerr << insts->getGenOffset() - reloc_offset;
2675             }
2676             std::cerr << "\t";
2677             insts->emit(std::cerr);
2678             std::cerr << "\n";
2679         }
2680     }
2681 }
2682 
addCallFrameInfo(VISAKernelImpl * kernel)2683 void addCallFrameInfo(VISAKernelImpl* kernel)
2684 {
2685     std::cerr << "\n\n\n";
2686 
2687     if (kernel->getKernel()->fg.getIsStackCallFunc())
2688     {
2689         std::cerr << "Stack call function " << kernel->getKernel()->getName() << "\n";
2690     }
2691     else
2692     {
2693         std::cerr << "Kernel " << kernel->getKernel()->getName() << "\n";
2694     }
2695     std::cerr << "\n";
2696 
2697     emitSubRoutineInfo(kernel);
2698 
2699     emitBEFP(kernel);
2700 
2701     emitCallerSaveInfo(kernel);
2702 
2703     emitCalleeSaveInfo(kernel);
2704 
2705     dumpCFG(kernel);
2706 }
2707 #endif
2708 
getLiveIntervalInfo(G4_Declare * dcl,bool createIfNULL)2709 LiveIntervalInfo* KernelDebugInfo::getLiveIntervalInfo(G4_Declare* dcl, bool createIfNULL)
2710 {
2711     dcl = dcl->getRootDeclare();
2712 
2713     LiveIntervalInfo* lr = NULL;
2714     auto it = debugInfoLiveIntervalMap.find(dcl);
2715     if (it == debugInfoLiveIntervalMap.end())
2716     {
2717         if (createIfNULL)
2718         {
2719             lr = new (kernel->fg.mem) LiveIntervalInfo();
2720             debugInfoLiveIntervalMap.insert(std::make_pair(dcl, lr));
2721         }
2722     }
2723     else
2724     {
2725         lr = it->second;
2726     }
2727 
2728     return lr;
2729 }
2730 
2731 // TODO: Check result in presence of spill code and stack calling convention
2732