1 /*========================== begin_copyright_notice ============================
2 
3 Copyright (C) 2017-2021 Intel Corporation
4 
5 SPDX-License-Identifier: MIT
6 
7 ============================= end_copyright_notice ===========================*/
8 
9 #include "visa_igc_common_header.h"
10 
11 #include "Common_ISA.h"
12 #include "Mem_Manager.h"
13 #include "VISADefines.h"
14 
15 #include "IGC/common/StringMacros.hpp"
16 
17 #define ALLOC_ASSERT(X)      \
18     if (X == NULL) return 1;
19 
20 const char *implictKindStrings[IMPLICIT_INPUT_COUNT] = {
21     "EXPLICIT", "LOCAL_SIZE", "GROUP_COUNT", "LOCAL_ID", "PSEUDO_INPUT"};
22 
23 const char* Rel_op_str[ISA_CMP_UNDEF + 1] =
24 {
25     "eq",  // equal
26     "ne", // not equal
27     "gt",  // greater
28     "ge", // greater or equal
29     "lt",  // less
30     "le", // less or equal
31     " "
32 };
33 
34 const char* media_ld_mod_str[MEDIA_LD_Mod_NUM] =
35 {
36     "nomod",
37     "modified",
38     "top",
39     "bottom",
40     "top_mod",
41     "bottom_mod"
42 };
43 
44 const char* media_st_mod_str[MEDIA_ST_Mod_NUM] =
45 {
46     "nomod",
47     "reserved", // this is useless since it is for MEDIA_ST_reserved
48     "top",
49     "bottom"
50 };
51 
52 const char* channel_mask_str[CHANNEL_MASK_NUM] =
53 {
54     "",    // 0000
55     "R",   // 0001
56     "G",   // 0010
57     "RG",  // 0011
58     "B",   // 0100
59     "RB",  // 0101
60     "GB",  // 0110
61     "RGB", // 0111
62     "A",   // 1000
63     "RA",  // 1001
64     "GA",  // 1010
65     "RGA", // 1011
66     "BA",  // 1100
67     "RBA", // 1101
68     "GBA", // 1110
69     "RGBA" // 1111
70 };
71 
72 const char* channel_mask_slm_str[CHANNEL_MASK_NUM] =
73 {
74     "RGBA",
75     "GBA",
76     "RBA",
77     "BA",
78     "RGA",
79     "GA",
80     "RA",
81     "A",
82     "RGB",
83     "GB",
84     "RB",
85     "B",
86     "RG",
87     "G",
88     "R",
89     "0000"
90 };
91 
92 const char* sampler_channel_output_str[4] =
93 {
94     "16-full",
95     "16-downsampled",
96     "8-full",
97     "8-downsampled"
98 };
99 
100 const char* vme_op_mode_str[VME_OP_MODE_NUM] =
101 {
102     "inter",
103     "intra",
104     "both"
105 };
106 
107 const char* emask_str[vISA_NUM_EMASK+1] =
108 {
109     "M1",
110     "M2",
111     "M3",
112     "M4",
113     "M5",
114     "M6",
115     "M7",
116     "M8",
117     "M1_NM",
118     "M2_NM",
119     "M3_NM",
120     "M4_NM",
121     "M5_NM",
122     "M6_NM",
123     "M7_NM",
124     "M8_NM",
125     "NoMask"
126 };
127 
128 
getSampleOp3DNameOrNull(int opcode)129 static const char* getSampleOp3DNameOrNull(int opcode)
130 {
131     switch (opcode)
132     {
133     case VISA_3D_SAMPLE:        // 0x0
134         return "sample_3d";
135     case VISA_3D_SAMPLE_B:      // 0x1
136         return "sample_b";
137     case VISA_3D_SAMPLE_L:      // 0x2
138         return "sample_l";
139     case VISA_3D_SAMPLE_C:      // 0x3
140         return "sample_c";
141     case VISA_3D_SAMPLE_D:      // 0x4
142         return "sample_d";
143     case VISA_3D_SAMPLE_B_C:    // 0x5
144         return "sample_b_c";
145     case VISA_3D_SAMPLE_L_C:    // 0x6
146         return "sample_l_c";
147     case VISA_3D_LD:            // 0x7
148         return "load_3d";
149     case VISA_3D_GATHER4:       // 0x8
150         return "sample4";
151     case VISA_3D_LOD:           // 0x9
152         return "lod";
153     case VISA_3D_RESINFO:       // 0xA
154         return "resinfo";
155     case VISA_3D_SAMPLEINFO:    // 0xB
156         return "sampleinfo";
157     case VISA_3D_SAMPLE_KILLPIX:// 0xC
158         return "sample+killpix";
159     case VISA_3D_GATHER4_C:     // 0x10
160         return "sample4_c";
161     case VISA_3D_GATHER4_PO:    // 0x11
162         return "sample4_po";
163     case VISA_3D_GATHER4_PO_C:  // 0x12
164         return "sample4_po_c";
165     case VISA_3D_SAMPLE_D_C:    // 0x14
166         return "sample_d_c";
167     case VISA_3D_SAMPLE_LZ:     // 0x18
168         return "sample_lz";
169     case VISA_3D_SAMPLE_C_LZ:   // 0x19
170         return "sample_c_lz";
171     case VISA_3D_LD_LZ:         // 0x1A
172         return "load_lz";
173     case VISA_3D_LD2DMS_W:      // 0x1C
174         return "load_2dms_w";
175     case VISA_3D_LD_MCS:        // 0x1D
176         return "load_mcs";
177     default:
178         return nullptr;
179     }
180 }
getSampleOp3DName(int opcode)181 const char* getSampleOp3DName(int opcode)
182 {
183     const char *name = getSampleOp3DNameOrNull(opcode);
184     assert(name && "invalid sampler opcode");
185     if (!name)
186         return "sampler_unknown";
187     return name;
188 }
getSampleOpFromName(const char * str)189 VISASampler3DSubOpCode getSampleOpFromName(const char *str)
190 {
191     for (int i = 0; i < ISA_NUM_OPCODE; i++) {
192         const char *symI = getSampleOp3DNameOrNull(i);
193         if (symI && strcmp(symI, str) == 0)
194             return (VISASampler3DSubOpCode)i;
195     }
196     return (VISASampler3DSubOpCode)-1;
197 }
198 
199 const char * va_sub_names[26] =
200 {
201     "avs"                , //0x0
202     "convolve"           , //0x1
203     "minmax"             , //0x2
204     "minmaxfilter"       , //0x3
205     "erode"              , //0x4
206     "dilate"             , //0x5
207     "boolcentroid"       , //0x6
208     "centroid"           , //0x7
209     "CONV_1D_HORIZONTAL" , //0x8
210     "CONV_1D_VERTICAL"   , //0x9
211     "CONV_1PIXEL"        , //0x10
212     "FLOOD_FILL"         , //0x11
213     "LBP_CREATION"       , //0x12
214     "LBP_CORRELATION"    , //0x13
215     ""                   , //0x14
216     "CORRELATION_SEARCH" , //0x15
217     "HDC_CONVOLVE_2D"    , //0x10
218     "HDC_MIN_MAX_FILTER" , //0x11
219     "HDC_ERODE"          , //0x12
220     "HDC_DILATE"         , //0x13
221     "HDC_LBP_CORRELATION", //0x14
222     "HDC_LBP_CREATION"   , //0x15
223     "HDC_CONVOLVE_1D_H"  , //0x16
224     "HDC_CONVOLVE_1D_V"  , //0x17
225     "HDC_CONVOLVE_1P"    , //0x18
226     "UNDEFINED"            //0x19
227 };
228 
229 const char * pixel_size_str[2] =
230 {
231     "VA_Y16_FORMAT",
232     "VA_Y8_FORMAT"
233 };
234 
235 const char * lbp_creation_mode[3] =
236 {
237     "VA_5x5_mode",
238     "VA_3x3_mode",
239     "VA_BOTH_mode"
240 };
241 
242 const char * avs_control_str[4] =
243 {   "AVS_16_FULL",
244     "AVS_16_DOWN_SAMPLE",
245     "AVS_8_FULL",
246     "AVS_8_DOWN_SAMPLE"
247 };
248 
249 const char * avs_exec_mode[3] =
250 {   "AVS_16x4",
251     "AVS_INVALID",
252     "AVS_16x8"
253 };
254 
255 const char * mmf_exec_mode[4] =
256 {   "VA_MMF_16x4",
257     "VA_MMF_INVALID",
258     "VA_MMF_16x1",
259     "VA_MMF_1x1"
260 };
261 
262 const char * mmf_enable_mode[3] =
263 {   "VA_MINMAX_ENABLE",
264     "VA_MAX_ENABLE",
265     "VA_MIN_ENABLE"
266 };
267 
268 const char * ed_exec_mode[4] =
269 {   "VA_ED_64x4",
270     "VA_ED_32x4",
271     "VA_ED_64x1",
272     "VA_ED_32x1"
273 };
274 
275 const char * conv_exec_mode[4] =
276 {   "VA_CONV_16x4",
277     "VA_CONV_INVALID",
278     "VA_CONV_16x1",
279     "VA_CONV_1x1"
280 };
281 
282 unsigned format_control_byteSize2[4] =
283 {   4, /// AVS_16_FULL
284     2, /// AVS_16_DOWN_SAMPLE NOT VALID
285     2, /// AVS_8_FULL
286     1  /// AVS_8_DOWN_SAMPLE NOT VALID
287 };
288 
289 unsigned ed_exec_mode_byte_size[4] =
290 {   64 * 4 / 8, /// VA_ED_64x4
291     32 * 4 / 8, /// VA_ED_32x4
292     64 * 1 / 8, /// VA_ED_64x1
293     32 * 1 / 8  /// VA_ED_32x1
294 };
295 
296 unsigned conv_exec_mode_size[4] =
297 {   16 * 4, /// 16x4
298          1, /// invalid
299     16 * 1, /// 16x1
300          1  /// 1x1 1pixelconvovle only
301 };
302 
303 unsigned mmf_exec_mode_size[4] =
304 {   16 * 4, /// 16x4
305          1, /// invalid
306     16 * 1, /// 16x1
307     1  * 1  /// 1x1
308 };
309 
310 unsigned lbp_creation_exec_mode_size[3] =
311 {   16 * 8, /// BOTH
312     16 * 4, /// 3x3
313     16 * 4  /// 5x5
314 };
315 
316 unsigned lbp_correlation_mode_size[3] =
317 {   16 * 4, /// 16x4
318          1, /// invalid
319     16 * 1  /// 16x1
320 };
321 
322 unsigned mmf_exec_mode_bit_size[4] =
323 {   16, /// 16x4 -- 16 bits
324      1, /// invalid
325     16, /// 16x1 -- 16 bits
326      8  /// 1x1  -- 8  bits
327 };
328 
329 unsigned output_format_control_size[4] =
330 {   16,
331     16,
332      8,
333      8
334 };
335 
336 // NOTE: keep the order consistent with CMAtomicOperations
337 const char* CISAAtomicOpNames[] = {
338     "add",      // ATOMIC_ADD       = 0x0,
339     "sub",      // ATOMIC_SUB       = 0x1,
340     "inc",      // ATOMIC_INC       = 0x2,
341     "dec",      // ATOMIC_DEC       = 0x3,
342     "min",      // ATOMIC_MIN       = 0x4,
343     "max",      // ATOMIC_MAX       = 0x5,
344     "xchg",     // ATOMIC_XCHG      = 0x6,
345     "cmpxchg",  // ATOMIC_CMPXCHG   = 0x7,
346     "and",      // ATOMIC_AND       = 0x8,
347     "or",       // ATOMIC_OR        = 0x9,
348     "xor",      // ATOMIC_XOR       = 0xa,
349     "minsint",  // ATOMIC_IMIN      = 0xb,
350     "maxsint",  // ATOMIC_IMAX      = 0xc,
351     "",         //              [SKIP 0xd]
352     "",         //              [SKIP 0xe]
353     "",         //              [SKIP 0xf]
354     "fmax",     // ATOMIC_FMAX      = 0x10,
355     "fmin",     // ATOMIC_FMIN      = 0x11,
356     "fcmpwr",   // ATOMIC_FCMPWR    = 0x12,
357     "fadd",     // ATOMIC_FADD      = 0x13,
358     "fsub",     // ATOMIC_FSUB      = 0x14
359 };
360 
361 CISATypeInfo CISATypeTable[ISA_TYPE_NUM] =
362 {
363     { ISA_TYPE_UD,  "ud",   4 },
364     { ISA_TYPE_D,   "d",    4 },
365     { ISA_TYPE_UW,  "uw",   2 },
366     { ISA_TYPE_W,   "w",    2 },
367     { ISA_TYPE_UB,  "ub",   1 },
368     { ISA_TYPE_B,   "b",    1 },
369     { ISA_TYPE_DF,  "df",   8 },
370     { ISA_TYPE_F,   "f",    4 },
371     { ISA_TYPE_V,   "v",    4 },
372     { ISA_TYPE_VF,  "vf",   4 },
373     { ISA_TYPE_BOOL,"bool", 1 },
374     { ISA_TYPE_UQ,  "uq",   8 },
375     { ISA_TYPE_UV,  "uv",   4 },
376     { ISA_TYPE_Q,   "q",    8 },
377     { ISA_TYPE_HF,  "hf",   2 },
378     { ISA_TYPE_BF,  "bf",   2 }
379 };
380 
processCommonISAHeader(common_isa_header & cisaHdr,unsigned & byte_pos,const void * cisaBuffer,vISA::Mem_Manager * mem)381 int processCommonISAHeader(
382     common_isa_header& cisaHdr,
383     unsigned& byte_pos,
384     const void* cisaBuffer,
385     vISA::Mem_Manager* mem)
386 {
387     const char *buf = (const char *)cisaBuffer;
388     READ_FIELD_FROM_BUF(cisaHdr.magic_number, uint32_t);
389     READ_FIELD_FROM_BUF(cisaHdr.major_version, uint8_t);
390     READ_FIELD_FROM_BUF(cisaHdr.minor_version, uint8_t);
391     READ_FIELD_FROM_BUF(cisaHdr.num_kernels, uint16_t);
392 
393     MUST_BE_TRUE(cisaHdr.major_version >= 3, "only vISA version 3.0 and above are supported");
394 
395     if (cisaHdr.num_kernels) {
396         cisaHdr.kernels =
397             (kernel_info_t *)mem->alloc(
398                     sizeof(kernel_info_t) * cisaHdr.num_kernels);
399         ALLOC_ASSERT(cisaHdr.kernels);
400     }
401     else {
402         cisaHdr.kernels = NULL;
403     }
404 
405     for (int i = 0; i < cisaHdr.num_kernels; i++) {
406         if (cisaHdr.major_version == 3 && cisaHdr.minor_version < 7)
407         {
408             READ_FIELD_FROM_BUF(cisaHdr.kernels[i].name_len, uint8_t);
409         }
410         else
411         {
412             READ_FIELD_FROM_BUF(cisaHdr.kernels[i].name_len, uint16_t);
413         }
414         cisaHdr.kernels[i].name = (char*)mem->alloc(cisaHdr.kernels[i].name_len + 1);
415         memcpy_s(
416             cisaHdr.kernels[i].name, cisaHdr.kernels[i].name_len * sizeof(uint8_t), &buf[byte_pos],
417                 cisaHdr.kernels[i].name_len * sizeof(uint8_t));
418         cisaHdr.kernels[i].name[cisaHdr.kernels[i].name_len] = '\0';
419         byte_pos += cisaHdr.kernels[i].name_len;
420         READ_FIELD_FROM_BUF(cisaHdr.kernels[i].offset, uint32_t);
421         READ_FIELD_FROM_BUF(cisaHdr.kernels[i].size, uint32_t);
422         READ_FIELD_FROM_BUF(cisaHdr.kernels[i].input_offset, uint32_t);
423 
424         READ_FIELD_FROM_BUF(
425             cisaHdr.kernels[i].variable_reloc_symtab.num_syms,
426             uint16_t);
427         assert(cisaHdr.kernels[i].variable_reloc_symtab.num_syms == 0 && "relocation symbols not allowed");
428         cisaHdr.kernels[i].variable_reloc_symtab.reloc_syms = nullptr;
429 
430         READ_FIELD_FROM_BUF(
431             cisaHdr.kernels[i].function_reloc_symtab.num_syms,
432             uint16_t);
433 
434         assert(cisaHdr.kernels[i].function_reloc_symtab.num_syms == 0 && "relocation symbols not allowed");
435         cisaHdr.kernels[i].function_reloc_symtab.reloc_syms = nullptr;
436         READ_FIELD_FROM_BUF(cisaHdr.kernels[i].num_gen_binaries, uint8_t);
437         cisaHdr.kernels[i].gen_binaries =
438             (gen_binary_info*)mem->alloc(cisaHdr.kernels[i].num_gen_binaries * sizeof(gen_binary_info));
439         for (int j = 0; j < cisaHdr.kernels[i].num_gen_binaries; j++) {
440             READ_FIELD_FROM_BUF(cisaHdr.kernels[i].gen_binaries[j].platform, uint8_t);
441             READ_FIELD_FROM_BUF(cisaHdr.kernels[i].gen_binaries[j].binary_offset, uint32_t);
442             READ_FIELD_FROM_BUF(cisaHdr.kernels[i].gen_binaries[j].binary_size, uint32_t);
443         }
444 
445         cisaHdr.kernels[i].cisa_binary_buffer = NULL;
446         cisaHdr.kernels[i].genx_binary_buffer = NULL;
447     }
448 
449     READ_FIELD_FROM_BUF(cisaHdr.num_filescope_variables, uint16_t);
450     assert(cisaHdr.num_filescope_variables == 0 && "file scope variables are no longer supported");
451 
452     READ_FIELD_FROM_BUF(cisaHdr.num_functions, uint16_t);
453 
454     if (cisaHdr.num_functions) {
455         cisaHdr.functions =
456             (function_info_t *)mem->alloc(
457             sizeof(function_info_t)* cisaHdr.num_functions);
458         ALLOC_ASSERT(cisaHdr.functions);
459     }
460     else {
461         cisaHdr.functions = NULL;
462     }
463 
464     for (int i = 0; i < cisaHdr.num_functions; i++) {
465         // field is deprecated
466         READ_FIELD_FROM_BUF(cisaHdr.functions[i].linkage, uint8_t);
467 
468         if (cisaHdr.major_version == 3 && cisaHdr.minor_version < 7)
469         {
470             READ_FIELD_FROM_BUF(cisaHdr.functions[i].name_len, uint8_t);
471         }
472         else
473         {
474             READ_FIELD_FROM_BUF(cisaHdr.functions[i].name_len, uint16_t);
475         }
476         cisaHdr.functions[i].name = (char*)mem->alloc(cisaHdr.functions[i].name_len + 1);
477         memcpy_s(
478             cisaHdr.functions[i].name, cisaHdr.functions[i].name_len * sizeof(uint8_t), &buf[byte_pos],
479             cisaHdr.functions[i].name_len * sizeof(uint8_t));
480         cisaHdr.functions[i].name[
481             cisaHdr.functions[i].name_len] = '\0';
482             byte_pos += cisaHdr.functions[i].name_len;
483             READ_FIELD_FROM_BUF(cisaHdr.functions[i].offset, uint32_t);
484             READ_FIELD_FROM_BUF(cisaHdr.functions[i].size, uint32_t);
485 
486             READ_FIELD_FROM_BUF(
487                 cisaHdr.functions[i].variable_reloc_symtab.num_syms,
488                 uint16_t);
489             assert(cisaHdr.functions[i].variable_reloc_symtab.num_syms == 0 && "variable relocation not supported");
490             cisaHdr.functions[i].variable_reloc_symtab.reloc_syms = nullptr;
491 
492             READ_FIELD_FROM_BUF(
493                 cisaHdr.functions[i].function_reloc_symtab.num_syms,
494                 uint16_t);
495             assert(cisaHdr.functions[i].function_reloc_symtab.num_syms == 0 && "function relocation not supported");
496             cisaHdr.functions[i].function_reloc_symtab.reloc_syms = nullptr;
497 
498             cisaHdr.functions[i].cisa_binary_buffer = NULL;
499             cisaHdr.functions[i].genx_binary_buffer = NULL;
500     }
501 
502     return 0;
503 }
504 
505 const char *ChannelMask::Names[] = {
506     "0000", // ABGR
507     "R",    // 0001
508     "G",    // 0010
509     "RG",   // 0011
510     "B",    // 0100
511     "RB",   // 0101
512     "GB",   // 0110
513     "RGB",  // 0111
514     "A",    // 1000
515     "RA",   // 1001
516     "GA",   // 1010
517     "RGA",  // 1011
518     "BA",   // 1100
519     "RBA",  // 1101
520     "GBA",  // 1110
521     "RGBA"  // 1111
522 };
523