1 /*
2 * Copyright © 2015 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 *
23 * Authors:
24 * Jason Ekstrand (jason@jlekstrand.net)
25 *
26 */
27
28 #include "vtn_private.h"
29 #include "nir/nir_vla.h"
30 #include "nir/nir_control_flow.h"
31 #include "nir/nir_constant_expressions.h"
32 #include "nir/nir_deref.h"
33 #include "spirv_info.h"
34
35 #include "util/format/u_format.h"
36 #include "util/u_math.h"
37 #include "util/u_string.h"
38
39 #include <stdio.h>
40
41 #ifndef NDEBUG
42 static enum nir_spirv_debug_level
vtn_default_log_level(void)43 vtn_default_log_level(void)
44 {
45 enum nir_spirv_debug_level level = NIR_SPIRV_DEBUG_LEVEL_WARNING;
46 const char *vtn_log_level_strings[] = {
47 [NIR_SPIRV_DEBUG_LEVEL_WARNING] = "warning",
48 [NIR_SPIRV_DEBUG_LEVEL_INFO] = "info",
49 [NIR_SPIRV_DEBUG_LEVEL_ERROR] = "error",
50 };
51 const char *str = getenv("MESA_SPIRV_LOG_LEVEL");
52
53 if (str == NULL)
54 return NIR_SPIRV_DEBUG_LEVEL_WARNING;
55
56 for (int i = 0; i < ARRAY_SIZE(vtn_log_level_strings); i++) {
57 if (strcasecmp(str, vtn_log_level_strings[i]) == 0) {
58 level = i;
59 break;
60 }
61 }
62
63 return level;
64 }
65 #endif
66
67 void
vtn_log(struct vtn_builder * b,enum nir_spirv_debug_level level,size_t spirv_offset,const char * message)68 vtn_log(struct vtn_builder *b, enum nir_spirv_debug_level level,
69 size_t spirv_offset, const char *message)
70 {
71 if (b->options->debug.func) {
72 b->options->debug.func(b->options->debug.private_data,
73 level, spirv_offset, message);
74 }
75
76 #ifndef NDEBUG
77 static enum nir_spirv_debug_level default_level =
78 NIR_SPIRV_DEBUG_LEVEL_INVALID;
79
80 if (default_level == NIR_SPIRV_DEBUG_LEVEL_INVALID)
81 default_level = vtn_default_log_level();
82
83 if (level >= default_level)
84 fprintf(stderr, "%s\n", message);
85 #endif
86 }
87
88 void
vtn_logf(struct vtn_builder * b,enum nir_spirv_debug_level level,size_t spirv_offset,const char * fmt,...)89 vtn_logf(struct vtn_builder *b, enum nir_spirv_debug_level level,
90 size_t spirv_offset, const char *fmt, ...)
91 {
92 va_list args;
93 char *msg;
94
95 va_start(args, fmt);
96 msg = ralloc_vasprintf(NULL, fmt, args);
97 va_end(args);
98
99 vtn_log(b, level, spirv_offset, msg);
100
101 ralloc_free(msg);
102 }
103
104 static void
vtn_log_err(struct vtn_builder * b,enum nir_spirv_debug_level level,const char * prefix,const char * file,unsigned line,const char * fmt,va_list args)105 vtn_log_err(struct vtn_builder *b,
106 enum nir_spirv_debug_level level, const char *prefix,
107 const char *file, unsigned line,
108 const char *fmt, va_list args)
109 {
110 char *msg;
111
112 msg = ralloc_strdup(NULL, prefix);
113
114 #ifndef NDEBUG
115 ralloc_asprintf_append(&msg, " In file %s:%u\n", file, line);
116 #endif
117
118 ralloc_asprintf_append(&msg, " ");
119
120 ralloc_vasprintf_append(&msg, fmt, args);
121
122 ralloc_asprintf_append(&msg, "\n %zu bytes into the SPIR-V binary",
123 b->spirv_offset);
124
125 if (b->file) {
126 ralloc_asprintf_append(&msg,
127 "\n in SPIR-V source file %s, line %d, col %d",
128 b->file, b->line, b->col);
129 }
130
131 vtn_log(b, level, b->spirv_offset, msg);
132
133 ralloc_free(msg);
134 }
135
136 static void
vtn_dump_shader(struct vtn_builder * b,const char * path,const char * prefix)137 vtn_dump_shader(struct vtn_builder *b, const char *path, const char *prefix)
138 {
139 static int idx = 0;
140
141 char filename[1024];
142 int len = snprintf(filename, sizeof(filename), "%s/%s-%d.spirv",
143 path, prefix, idx++);
144 if (len < 0 || len >= sizeof(filename))
145 return;
146
147 FILE *f = fopen(filename, "w");
148 if (f == NULL)
149 return;
150
151 fwrite(b->spirv, sizeof(*b->spirv), b->spirv_word_count, f);
152 fclose(f);
153
154 vtn_info("SPIR-V shader dumped to %s", filename);
155 }
156
157 void
_vtn_warn(struct vtn_builder * b,const char * file,unsigned line,const char * fmt,...)158 _vtn_warn(struct vtn_builder *b, const char *file, unsigned line,
159 const char *fmt, ...)
160 {
161 va_list args;
162
163 va_start(args, fmt);
164 vtn_log_err(b, NIR_SPIRV_DEBUG_LEVEL_WARNING, "SPIR-V WARNING:\n",
165 file, line, fmt, args);
166 va_end(args);
167 }
168
169 void
_vtn_err(struct vtn_builder * b,const char * file,unsigned line,const char * fmt,...)170 _vtn_err(struct vtn_builder *b, const char *file, unsigned line,
171 const char *fmt, ...)
172 {
173 va_list args;
174
175 va_start(args, fmt);
176 vtn_log_err(b, NIR_SPIRV_DEBUG_LEVEL_ERROR, "SPIR-V ERROR:\n",
177 file, line, fmt, args);
178 va_end(args);
179 }
180
181 void
_vtn_fail(struct vtn_builder * b,const char * file,unsigned line,const char * fmt,...)182 _vtn_fail(struct vtn_builder *b, const char *file, unsigned line,
183 const char *fmt, ...)
184 {
185 va_list args;
186
187 va_start(args, fmt);
188 vtn_log_err(b, NIR_SPIRV_DEBUG_LEVEL_ERROR, "SPIR-V parsing FAILED:\n",
189 file, line, fmt, args);
190 va_end(args);
191
192 const char *dump_path = getenv("MESA_SPIRV_FAIL_DUMP_PATH");
193 if (dump_path)
194 vtn_dump_shader(b, dump_path, "fail");
195
196 vtn_longjmp(b->fail_jump, 1);
197 }
198
199 static struct vtn_ssa_value *
vtn_undef_ssa_value(struct vtn_builder * b,const struct glsl_type * type)200 vtn_undef_ssa_value(struct vtn_builder *b, const struct glsl_type *type)
201 {
202 struct vtn_ssa_value *val = rzalloc(b, struct vtn_ssa_value);
203 val->type = glsl_get_bare_type(type);
204
205 if (glsl_type_is_vector_or_scalar(type)) {
206 unsigned num_components = glsl_get_vector_elements(val->type);
207 unsigned bit_size = glsl_get_bit_size(val->type);
208 val->def = nir_ssa_undef(&b->nb, num_components, bit_size);
209 } else {
210 unsigned elems = glsl_get_length(val->type);
211 val->elems = ralloc_array(b, struct vtn_ssa_value *, elems);
212 if (glsl_type_is_array_or_matrix(type)) {
213 const struct glsl_type *elem_type = glsl_get_array_element(type);
214 for (unsigned i = 0; i < elems; i++)
215 val->elems[i] = vtn_undef_ssa_value(b, elem_type);
216 } else {
217 vtn_assert(glsl_type_is_struct_or_ifc(type));
218 for (unsigned i = 0; i < elems; i++) {
219 const struct glsl_type *elem_type = glsl_get_struct_field(type, i);
220 val->elems[i] = vtn_undef_ssa_value(b, elem_type);
221 }
222 }
223 }
224
225 return val;
226 }
227
228 struct vtn_ssa_value *
vtn_const_ssa_value(struct vtn_builder * b,nir_constant * constant,const struct glsl_type * type)229 vtn_const_ssa_value(struct vtn_builder *b, nir_constant *constant,
230 const struct glsl_type *type)
231 {
232 struct hash_entry *entry = _mesa_hash_table_search(b->const_table, constant);
233
234 if (entry)
235 return entry->data;
236
237 struct vtn_ssa_value *val = rzalloc(b, struct vtn_ssa_value);
238 val->type = glsl_get_bare_type(type);
239
240 if (glsl_type_is_vector_or_scalar(type)) {
241 unsigned num_components = glsl_get_vector_elements(val->type);
242 unsigned bit_size = glsl_get_bit_size(type);
243 nir_load_const_instr *load =
244 nir_load_const_instr_create(b->shader, num_components, bit_size);
245
246 memcpy(load->value, constant->values,
247 sizeof(nir_const_value) * num_components);
248
249 nir_instr_insert_before_cf_list(&b->nb.impl->body, &load->instr);
250 val->def = &load->def;
251 } else {
252 unsigned elems = glsl_get_length(val->type);
253 val->elems = ralloc_array(b, struct vtn_ssa_value *, elems);
254 if (glsl_type_is_array_or_matrix(type)) {
255 const struct glsl_type *elem_type = glsl_get_array_element(type);
256 for (unsigned i = 0; i < elems; i++) {
257 val->elems[i] = vtn_const_ssa_value(b, constant->elements[i],
258 elem_type);
259 }
260 } else {
261 vtn_assert(glsl_type_is_struct_or_ifc(type));
262 for (unsigned i = 0; i < elems; i++) {
263 const struct glsl_type *elem_type = glsl_get_struct_field(type, i);
264 val->elems[i] = vtn_const_ssa_value(b, constant->elements[i],
265 elem_type);
266 }
267 }
268 }
269
270 return val;
271 }
272
273 struct vtn_ssa_value *
vtn_ssa_value(struct vtn_builder * b,uint32_t value_id)274 vtn_ssa_value(struct vtn_builder *b, uint32_t value_id)
275 {
276 struct vtn_value *val = vtn_untyped_value(b, value_id);
277 switch (val->value_type) {
278 case vtn_value_type_undef:
279 return vtn_undef_ssa_value(b, val->type->type);
280
281 case vtn_value_type_constant:
282 return vtn_const_ssa_value(b, val->constant, val->type->type);
283
284 case vtn_value_type_ssa:
285 return val->ssa;
286
287 case vtn_value_type_pointer:
288 vtn_assert(val->pointer->ptr_type && val->pointer->ptr_type->type);
289 struct vtn_ssa_value *ssa =
290 vtn_create_ssa_value(b, val->pointer->ptr_type->type);
291 ssa->def = vtn_pointer_to_ssa(b, val->pointer);
292 return ssa;
293
294 default:
295 vtn_fail("Invalid type for an SSA value");
296 }
297 }
298
299 struct vtn_value *
vtn_push_ssa_value(struct vtn_builder * b,uint32_t value_id,struct vtn_ssa_value * ssa)300 vtn_push_ssa_value(struct vtn_builder *b, uint32_t value_id,
301 struct vtn_ssa_value *ssa)
302 {
303 struct vtn_type *type = vtn_get_value_type(b, value_id);
304
305 /* See vtn_create_ssa_value */
306 vtn_fail_if(ssa->type != glsl_get_bare_type(type->type),
307 "Type mismatch for SPIR-V SSA value");
308
309 struct vtn_value *val;
310 if (type->base_type == vtn_base_type_pointer) {
311 val = vtn_push_pointer(b, value_id, vtn_pointer_from_ssa(b, ssa->def, type));
312 } else {
313 /* Don't trip the value_type_ssa check in vtn_push_value */
314 val = vtn_push_value(b, value_id, vtn_value_type_invalid);
315 val->value_type = vtn_value_type_ssa;
316 val->ssa = ssa;
317 }
318
319 return val;
320 }
321
322 nir_ssa_def *
vtn_get_nir_ssa(struct vtn_builder * b,uint32_t value_id)323 vtn_get_nir_ssa(struct vtn_builder *b, uint32_t value_id)
324 {
325 struct vtn_ssa_value *ssa = vtn_ssa_value(b, value_id);
326 vtn_fail_if(!glsl_type_is_vector_or_scalar(ssa->type),
327 "Expected a vector or scalar type");
328 return ssa->def;
329 }
330
331 struct vtn_value *
vtn_push_nir_ssa(struct vtn_builder * b,uint32_t value_id,nir_ssa_def * def)332 vtn_push_nir_ssa(struct vtn_builder *b, uint32_t value_id, nir_ssa_def *def)
333 {
334 /* Types for all SPIR-V SSA values are set as part of a pre-pass so the
335 * type will be valid by the time we get here.
336 */
337 struct vtn_type *type = vtn_get_value_type(b, value_id);
338 vtn_fail_if(def->num_components != glsl_get_vector_elements(type->type) ||
339 def->bit_size != glsl_get_bit_size(type->type),
340 "Mismatch between NIR and SPIR-V type.");
341 struct vtn_ssa_value *ssa = vtn_create_ssa_value(b, type->type);
342 ssa->def = def;
343 return vtn_push_ssa_value(b, value_id, ssa);
344 }
345
346 static enum gl_access_qualifier
spirv_to_gl_access_qualifier(struct vtn_builder * b,SpvAccessQualifier access_qualifier)347 spirv_to_gl_access_qualifier(struct vtn_builder *b,
348 SpvAccessQualifier access_qualifier)
349 {
350 switch (access_qualifier) {
351 case SpvAccessQualifierReadOnly:
352 return ACCESS_NON_WRITEABLE;
353 case SpvAccessQualifierWriteOnly:
354 return ACCESS_NON_READABLE;
355 case SpvAccessQualifierReadWrite:
356 return 0;
357 default:
358 vtn_fail("Invalid image access qualifier");
359 }
360 }
361
362 static nir_deref_instr *
vtn_get_image(struct vtn_builder * b,uint32_t value_id,enum gl_access_qualifier * access)363 vtn_get_image(struct vtn_builder *b, uint32_t value_id,
364 enum gl_access_qualifier *access)
365 {
366 struct vtn_type *type = vtn_get_value_type(b, value_id);
367 vtn_assert(type->base_type == vtn_base_type_image);
368 if (access)
369 *access |= spirv_to_gl_access_qualifier(b, type->access_qualifier);
370 nir_variable_mode mode = glsl_type_is_image(type->glsl_image) ?
371 nir_var_image : nir_var_uniform;
372 return nir_build_deref_cast(&b->nb, vtn_get_nir_ssa(b, value_id),
373 mode, type->glsl_image, 0);
374 }
375
376 static void
vtn_push_image(struct vtn_builder * b,uint32_t value_id,nir_deref_instr * deref,bool propagate_non_uniform)377 vtn_push_image(struct vtn_builder *b, uint32_t value_id,
378 nir_deref_instr *deref, bool propagate_non_uniform)
379 {
380 struct vtn_type *type = vtn_get_value_type(b, value_id);
381 vtn_assert(type->base_type == vtn_base_type_image);
382 struct vtn_value *value = vtn_push_nir_ssa(b, value_id, &deref->dest.ssa);
383 value->propagated_non_uniform = propagate_non_uniform;
384 }
385
386 static nir_deref_instr *
vtn_get_sampler(struct vtn_builder * b,uint32_t value_id)387 vtn_get_sampler(struct vtn_builder *b, uint32_t value_id)
388 {
389 struct vtn_type *type = vtn_get_value_type(b, value_id);
390 vtn_assert(type->base_type == vtn_base_type_sampler);
391 return nir_build_deref_cast(&b->nb, vtn_get_nir_ssa(b, value_id),
392 nir_var_uniform, glsl_bare_sampler_type(), 0);
393 }
394
395 nir_ssa_def *
vtn_sampled_image_to_nir_ssa(struct vtn_builder * b,struct vtn_sampled_image si)396 vtn_sampled_image_to_nir_ssa(struct vtn_builder *b,
397 struct vtn_sampled_image si)
398 {
399 return nir_vec2(&b->nb, &si.image->dest.ssa, &si.sampler->dest.ssa);
400 }
401
402 static void
vtn_push_sampled_image(struct vtn_builder * b,uint32_t value_id,struct vtn_sampled_image si,bool propagate_non_uniform)403 vtn_push_sampled_image(struct vtn_builder *b, uint32_t value_id,
404 struct vtn_sampled_image si, bool propagate_non_uniform)
405 {
406 struct vtn_type *type = vtn_get_value_type(b, value_id);
407 vtn_assert(type->base_type == vtn_base_type_sampled_image);
408 struct vtn_value *value = vtn_push_nir_ssa(b, value_id,
409 vtn_sampled_image_to_nir_ssa(b, si));
410 value->propagated_non_uniform = propagate_non_uniform;
411 }
412
413 static struct vtn_sampled_image
vtn_get_sampled_image(struct vtn_builder * b,uint32_t value_id)414 vtn_get_sampled_image(struct vtn_builder *b, uint32_t value_id)
415 {
416 struct vtn_type *type = vtn_get_value_type(b, value_id);
417 vtn_assert(type->base_type == vtn_base_type_sampled_image);
418 nir_ssa_def *si_vec2 = vtn_get_nir_ssa(b, value_id);
419
420 /* Even though this is a sampled image, we can end up here with a storage
421 * image because OpenCL doesn't distinguish between the two.
422 */
423 const struct glsl_type *image_type = type->image->glsl_image;
424 nir_variable_mode image_mode = glsl_type_is_image(image_type) ?
425 nir_var_image : nir_var_uniform;
426
427 struct vtn_sampled_image si = { NULL, };
428 si.image = nir_build_deref_cast(&b->nb, nir_channel(&b->nb, si_vec2, 0),
429 image_mode, image_type, 0);
430 si.sampler = nir_build_deref_cast(&b->nb, nir_channel(&b->nb, si_vec2, 1),
431 nir_var_uniform,
432 glsl_bare_sampler_type(), 0);
433 return si;
434 }
435
436 const char *
vtn_string_literal(struct vtn_builder * b,const uint32_t * words,unsigned word_count,unsigned * words_used)437 vtn_string_literal(struct vtn_builder *b, const uint32_t *words,
438 unsigned word_count, unsigned *words_used)
439 {
440 /* From the SPIR-V spec:
441 *
442 * "A string is interpreted as a nul-terminated stream of characters.
443 * The character set is Unicode in the UTF-8 encoding scheme. The UTF-8
444 * octets (8-bit bytes) are packed four per word, following the
445 * little-endian convention (i.e., the first octet is in the
446 * lowest-order 8 bits of the word). The final word contains the
447 * string’s nul-termination character (0), and all contents past the
448 * end of the string in the final word are padded with 0."
449 *
450 * On big-endian, we need to byte-swap.
451 */
452 #if UTIL_ARCH_BIG_ENDIAN
453 {
454 uint32_t *copy = ralloc_array(b, uint32_t, word_count);
455 for (unsigned i = 0; i < word_count; i++)
456 copy[i] = util_bswap32(words[i]);
457 words = copy;
458 }
459 #endif
460
461 const char *str = (char *)words;
462 const char *end = memchr(str, 0, word_count * 4);
463 vtn_fail_if(end == NULL, "String is not null-terminated");
464
465 if (words_used)
466 *words_used = DIV_ROUND_UP(end - str + 1, sizeof(*words));
467
468 return str;
469 }
470
471 const uint32_t *
vtn_foreach_instruction(struct vtn_builder * b,const uint32_t * start,const uint32_t * end,vtn_instruction_handler handler)472 vtn_foreach_instruction(struct vtn_builder *b, const uint32_t *start,
473 const uint32_t *end, vtn_instruction_handler handler)
474 {
475 b->file = NULL;
476 b->line = -1;
477 b->col = -1;
478
479 const uint32_t *w = start;
480 while (w < end) {
481 SpvOp opcode = w[0] & SpvOpCodeMask;
482 unsigned count = w[0] >> SpvWordCountShift;
483 vtn_assert(count >= 1 && w + count <= end);
484
485 b->spirv_offset = (uint8_t *)w - (uint8_t *)b->spirv;
486
487 switch (opcode) {
488 case SpvOpNop:
489 break; /* Do nothing */
490
491 case SpvOpLine:
492 b->file = vtn_value(b, w[1], vtn_value_type_string)->str;
493 b->line = w[2];
494 b->col = w[3];
495 break;
496
497 case SpvOpNoLine:
498 b->file = NULL;
499 b->line = -1;
500 b->col = -1;
501 break;
502
503 default:
504 if (!handler(b, opcode, w, count))
505 return w;
506 break;
507 }
508
509 w += count;
510 }
511
512 b->spirv_offset = 0;
513 b->file = NULL;
514 b->line = -1;
515 b->col = -1;
516
517 assert(w == end);
518 return w;
519 }
520
521 static bool
vtn_handle_non_semantic_instruction(struct vtn_builder * b,SpvOp ext_opcode,const uint32_t * w,unsigned count)522 vtn_handle_non_semantic_instruction(struct vtn_builder *b, SpvOp ext_opcode,
523 const uint32_t *w, unsigned count)
524 {
525 /* Do nothing. */
526 return true;
527 }
528
529 static void
vtn_handle_extension(struct vtn_builder * b,SpvOp opcode,const uint32_t * w,unsigned count)530 vtn_handle_extension(struct vtn_builder *b, SpvOp opcode,
531 const uint32_t *w, unsigned count)
532 {
533 switch (opcode) {
534 case SpvOpExtInstImport: {
535 struct vtn_value *val = vtn_push_value(b, w[1], vtn_value_type_extension);
536 const char *ext = vtn_string_literal(b, &w[2], count - 2, NULL);
537 if (strcmp(ext, "GLSL.std.450") == 0) {
538 val->ext_handler = vtn_handle_glsl450_instruction;
539 } else if ((strcmp(ext, "SPV_AMD_gcn_shader") == 0)
540 && (b->options && b->options->caps.amd_gcn_shader)) {
541 val->ext_handler = vtn_handle_amd_gcn_shader_instruction;
542 } else if ((strcmp(ext, "SPV_AMD_shader_ballot") == 0)
543 && (b->options && b->options->caps.amd_shader_ballot)) {
544 val->ext_handler = vtn_handle_amd_shader_ballot_instruction;
545 } else if ((strcmp(ext, "SPV_AMD_shader_trinary_minmax") == 0)
546 && (b->options && b->options->caps.amd_trinary_minmax)) {
547 val->ext_handler = vtn_handle_amd_shader_trinary_minmax_instruction;
548 } else if ((strcmp(ext, "SPV_AMD_shader_explicit_vertex_parameter") == 0)
549 && (b->options && b->options->caps.amd_shader_explicit_vertex_parameter)) {
550 val->ext_handler = vtn_handle_amd_shader_explicit_vertex_parameter_instruction;
551 } else if (strcmp(ext, "OpenCL.std") == 0) {
552 val->ext_handler = vtn_handle_opencl_instruction;
553 } else if (strstr(ext, "NonSemantic.") == ext) {
554 val->ext_handler = vtn_handle_non_semantic_instruction;
555 } else {
556 vtn_fail("Unsupported extension: %s", ext);
557 }
558 break;
559 }
560
561 case SpvOpExtInst: {
562 struct vtn_value *val = vtn_value(b, w[3], vtn_value_type_extension);
563 bool handled = val->ext_handler(b, w[4], w, count);
564 vtn_assert(handled);
565 break;
566 }
567
568 default:
569 vtn_fail_with_opcode("Unhandled opcode", opcode);
570 }
571 }
572
573 static void
_foreach_decoration_helper(struct vtn_builder * b,struct vtn_value * base_value,int parent_member,struct vtn_value * value,vtn_decoration_foreach_cb cb,void * data)574 _foreach_decoration_helper(struct vtn_builder *b,
575 struct vtn_value *base_value,
576 int parent_member,
577 struct vtn_value *value,
578 vtn_decoration_foreach_cb cb, void *data)
579 {
580 for (struct vtn_decoration *dec = value->decoration; dec; dec = dec->next) {
581 int member;
582 if (dec->scope == VTN_DEC_DECORATION) {
583 member = parent_member;
584 } else if (dec->scope >= VTN_DEC_STRUCT_MEMBER0) {
585 vtn_fail_if(value->value_type != vtn_value_type_type ||
586 value->type->base_type != vtn_base_type_struct,
587 "OpMemberDecorate and OpGroupMemberDecorate are only "
588 "allowed on OpTypeStruct");
589 /* This means we haven't recursed yet */
590 assert(value == base_value);
591
592 member = dec->scope - VTN_DEC_STRUCT_MEMBER0;
593
594 vtn_fail_if(member >= base_value->type->length,
595 "OpMemberDecorate specifies member %d but the "
596 "OpTypeStruct has only %u members",
597 member, base_value->type->length);
598 } else {
599 /* Not a decoration */
600 assert(dec->scope == VTN_DEC_EXECUTION_MODE ||
601 dec->scope <= VTN_DEC_STRUCT_MEMBER_NAME0);
602 continue;
603 }
604
605 if (dec->group) {
606 assert(dec->group->value_type == vtn_value_type_decoration_group);
607 _foreach_decoration_helper(b, base_value, member, dec->group,
608 cb, data);
609 } else {
610 cb(b, base_value, member, dec, data);
611 }
612 }
613 }
614
615 /** Iterates (recursively if needed) over all of the decorations on a value
616 *
617 * This function iterates over all of the decorations applied to a given
618 * value. If it encounters a decoration group, it recurses into the group
619 * and iterates over all of those decorations as well.
620 */
621 void
vtn_foreach_decoration(struct vtn_builder * b,struct vtn_value * value,vtn_decoration_foreach_cb cb,void * data)622 vtn_foreach_decoration(struct vtn_builder *b, struct vtn_value *value,
623 vtn_decoration_foreach_cb cb, void *data)
624 {
625 _foreach_decoration_helper(b, value, -1, value, cb, data);
626 }
627
628 void
vtn_foreach_execution_mode(struct vtn_builder * b,struct vtn_value * value,vtn_execution_mode_foreach_cb cb,void * data)629 vtn_foreach_execution_mode(struct vtn_builder *b, struct vtn_value *value,
630 vtn_execution_mode_foreach_cb cb, void *data)
631 {
632 for (struct vtn_decoration *dec = value->decoration; dec; dec = dec->next) {
633 if (dec->scope != VTN_DEC_EXECUTION_MODE)
634 continue;
635
636 assert(dec->group == NULL);
637 cb(b, value, dec, data);
638 }
639 }
640
641 void
vtn_handle_decoration(struct vtn_builder * b,SpvOp opcode,const uint32_t * w,unsigned count)642 vtn_handle_decoration(struct vtn_builder *b, SpvOp opcode,
643 const uint32_t *w, unsigned count)
644 {
645 const uint32_t *w_end = w + count;
646 const uint32_t target = w[1];
647 w += 2;
648
649 switch (opcode) {
650 case SpvOpDecorationGroup:
651 vtn_push_value(b, target, vtn_value_type_decoration_group);
652 break;
653
654 case SpvOpDecorate:
655 case SpvOpDecorateId:
656 case SpvOpMemberDecorate:
657 case SpvOpDecorateString:
658 case SpvOpMemberDecorateString:
659 case SpvOpExecutionMode:
660 case SpvOpExecutionModeId: {
661 struct vtn_value *val = vtn_untyped_value(b, target);
662
663 struct vtn_decoration *dec = rzalloc(b, struct vtn_decoration);
664 switch (opcode) {
665 case SpvOpDecorate:
666 case SpvOpDecorateId:
667 case SpvOpDecorateString:
668 dec->scope = VTN_DEC_DECORATION;
669 break;
670 case SpvOpMemberDecorate:
671 case SpvOpMemberDecorateString:
672 dec->scope = VTN_DEC_STRUCT_MEMBER0 + *(w++);
673 vtn_fail_if(dec->scope < VTN_DEC_STRUCT_MEMBER0, /* overflow */
674 "Member argument of OpMemberDecorate too large");
675 break;
676 case SpvOpExecutionMode:
677 case SpvOpExecutionModeId:
678 dec->scope = VTN_DEC_EXECUTION_MODE;
679 break;
680 default:
681 unreachable("Invalid decoration opcode");
682 }
683 dec->decoration = *(w++);
684 dec->num_operands = w_end - w;
685 dec->operands = w;
686
687 /* Link into the list */
688 dec->next = val->decoration;
689 val->decoration = dec;
690 break;
691 }
692
693 case SpvOpMemberName: {
694 struct vtn_value *val = vtn_untyped_value(b, target);
695 struct vtn_decoration *dec = rzalloc(b, struct vtn_decoration);
696
697 dec->scope = VTN_DEC_STRUCT_MEMBER_NAME0 - *(w++);
698
699 dec->member_name = vtn_string_literal(b, w, w_end - w, NULL);
700
701 dec->next = val->decoration;
702 val->decoration = dec;
703 break;
704 }
705
706 case SpvOpGroupMemberDecorate:
707 case SpvOpGroupDecorate: {
708 struct vtn_value *group =
709 vtn_value(b, target, vtn_value_type_decoration_group);
710
711 for (; w < w_end; w++) {
712 struct vtn_value *val = vtn_untyped_value(b, *w);
713 struct vtn_decoration *dec = rzalloc(b, struct vtn_decoration);
714
715 dec->group = group;
716 if (opcode == SpvOpGroupDecorate) {
717 dec->scope = VTN_DEC_DECORATION;
718 } else {
719 dec->scope = VTN_DEC_STRUCT_MEMBER0 + *(++w);
720 vtn_fail_if(dec->scope < 0, /* Check for overflow */
721 "Member argument of OpGroupMemberDecorate too large");
722 }
723
724 /* Link into the list */
725 dec->next = val->decoration;
726 val->decoration = dec;
727 }
728 break;
729 }
730
731 default:
732 unreachable("Unhandled opcode");
733 }
734 }
735
736 struct member_decoration_ctx {
737 unsigned num_fields;
738 struct glsl_struct_field *fields;
739 struct vtn_type *type;
740 };
741
742 /**
743 * Returns true if the given type contains a struct decorated Block or
744 * BufferBlock
745 */
746 bool
vtn_type_contains_block(struct vtn_builder * b,struct vtn_type * type)747 vtn_type_contains_block(struct vtn_builder *b, struct vtn_type *type)
748 {
749 switch (type->base_type) {
750 case vtn_base_type_array:
751 return vtn_type_contains_block(b, type->array_element);
752 case vtn_base_type_struct:
753 if (type->block || type->buffer_block)
754 return true;
755 for (unsigned i = 0; i < type->length; i++) {
756 if (vtn_type_contains_block(b, type->members[i]))
757 return true;
758 }
759 return false;
760 default:
761 return false;
762 }
763 }
764
765 /** Returns true if two types are "compatible", i.e. you can do an OpLoad,
766 * OpStore, or OpCopyMemory between them without breaking anything.
767 * Technically, the SPIR-V rules require the exact same type ID but this lets
768 * us internally be a bit looser.
769 */
770 bool
vtn_types_compatible(struct vtn_builder * b,struct vtn_type * t1,struct vtn_type * t2)771 vtn_types_compatible(struct vtn_builder *b,
772 struct vtn_type *t1, struct vtn_type *t2)
773 {
774 if (t1->id == t2->id)
775 return true;
776
777 if (t1->base_type != t2->base_type)
778 return false;
779
780 switch (t1->base_type) {
781 case vtn_base_type_void:
782 case vtn_base_type_scalar:
783 case vtn_base_type_vector:
784 case vtn_base_type_matrix:
785 case vtn_base_type_image:
786 case vtn_base_type_sampler:
787 case vtn_base_type_sampled_image:
788 case vtn_base_type_event:
789 return t1->type == t2->type;
790
791 case vtn_base_type_array:
792 return t1->length == t2->length &&
793 vtn_types_compatible(b, t1->array_element, t2->array_element);
794
795 case vtn_base_type_pointer:
796 return vtn_types_compatible(b, t1->deref, t2->deref);
797
798 case vtn_base_type_struct:
799 if (t1->length != t2->length)
800 return false;
801
802 for (unsigned i = 0; i < t1->length; i++) {
803 if (!vtn_types_compatible(b, t1->members[i], t2->members[i]))
804 return false;
805 }
806 return true;
807
808 case vtn_base_type_accel_struct:
809 case vtn_base_type_ray_query:
810 return true;
811
812 case vtn_base_type_function:
813 /* This case shouldn't get hit since you can't copy around function
814 * types. Just require them to be identical.
815 */
816 return false;
817 }
818
819 vtn_fail("Invalid base type");
820 }
821
822 struct vtn_type *
vtn_type_without_array(struct vtn_type * type)823 vtn_type_without_array(struct vtn_type *type)
824 {
825 while (type->base_type == vtn_base_type_array)
826 type = type->array_element;
827 return type;
828 }
829
830 /* does a shallow copy of a vtn_type */
831
832 static struct vtn_type *
vtn_type_copy(struct vtn_builder * b,struct vtn_type * src)833 vtn_type_copy(struct vtn_builder *b, struct vtn_type *src)
834 {
835 struct vtn_type *dest = ralloc(b, struct vtn_type);
836 *dest = *src;
837
838 switch (src->base_type) {
839 case vtn_base_type_void:
840 case vtn_base_type_scalar:
841 case vtn_base_type_vector:
842 case vtn_base_type_matrix:
843 case vtn_base_type_array:
844 case vtn_base_type_pointer:
845 case vtn_base_type_image:
846 case vtn_base_type_sampler:
847 case vtn_base_type_sampled_image:
848 case vtn_base_type_event:
849 case vtn_base_type_accel_struct:
850 case vtn_base_type_ray_query:
851 /* Nothing more to do */
852 break;
853
854 case vtn_base_type_struct:
855 dest->members = ralloc_array(b, struct vtn_type *, src->length);
856 memcpy(dest->members, src->members,
857 src->length * sizeof(src->members[0]));
858
859 dest->offsets = ralloc_array(b, unsigned, src->length);
860 memcpy(dest->offsets, src->offsets,
861 src->length * sizeof(src->offsets[0]));
862 break;
863
864 case vtn_base_type_function:
865 dest->params = ralloc_array(b, struct vtn_type *, src->length);
866 memcpy(dest->params, src->params, src->length * sizeof(src->params[0]));
867 break;
868 }
869
870 return dest;
871 }
872
873 static bool
vtn_type_needs_explicit_layout(struct vtn_builder * b,struct vtn_type * type,enum vtn_variable_mode mode)874 vtn_type_needs_explicit_layout(struct vtn_builder *b, struct vtn_type *type,
875 enum vtn_variable_mode mode)
876 {
877 /* For OpenCL we never want to strip the info from the types, and it makes
878 * type comparisons easier in later stages.
879 */
880 if (b->options->environment == NIR_SPIRV_OPENCL)
881 return true;
882
883 switch (mode) {
884 case vtn_variable_mode_input:
885 case vtn_variable_mode_output:
886 /* Layout decorations kept because we need offsets for XFB arrays of
887 * blocks.
888 */
889 return b->shader->info.has_transform_feedback_varyings;
890
891 case vtn_variable_mode_ssbo:
892 case vtn_variable_mode_phys_ssbo:
893 case vtn_variable_mode_ubo:
894 case vtn_variable_mode_push_constant:
895 case vtn_variable_mode_shader_record:
896 return true;
897
898 case vtn_variable_mode_workgroup:
899 return b->options->caps.workgroup_memory_explicit_layout;
900
901 default:
902 return false;
903 }
904 }
905
906 const struct glsl_type *
vtn_type_get_nir_type(struct vtn_builder * b,struct vtn_type * type,enum vtn_variable_mode mode)907 vtn_type_get_nir_type(struct vtn_builder *b, struct vtn_type *type,
908 enum vtn_variable_mode mode)
909 {
910 if (mode == vtn_variable_mode_atomic_counter) {
911 vtn_fail_if(glsl_without_array(type->type) != glsl_uint_type(),
912 "Variables in the AtomicCounter storage class should be "
913 "(possibly arrays of arrays of) uint.");
914 return glsl_type_wrap_in_arrays(glsl_atomic_uint_type(), type->type);
915 }
916
917 if (mode == vtn_variable_mode_uniform) {
918 switch (type->base_type) {
919 case vtn_base_type_array: {
920 const struct glsl_type *elem_type =
921 vtn_type_get_nir_type(b, type->array_element, mode);
922
923 return glsl_array_type(elem_type, type->length,
924 glsl_get_explicit_stride(type->type));
925 }
926
927 case vtn_base_type_struct: {
928 bool need_new_struct = false;
929 const uint32_t num_fields = type->length;
930 NIR_VLA(struct glsl_struct_field, fields, num_fields);
931 for (unsigned i = 0; i < num_fields; i++) {
932 fields[i] = *glsl_get_struct_field_data(type->type, i);
933 const struct glsl_type *field_nir_type =
934 vtn_type_get_nir_type(b, type->members[i], mode);
935 if (fields[i].type != field_nir_type) {
936 fields[i].type = field_nir_type;
937 need_new_struct = true;
938 }
939 }
940 if (need_new_struct) {
941 if (glsl_type_is_interface(type->type)) {
942 return glsl_interface_type(fields, num_fields,
943 /* packing */ 0, false,
944 glsl_get_type_name(type->type));
945 } else {
946 return glsl_struct_type(fields, num_fields,
947 glsl_get_type_name(type->type),
948 glsl_struct_type_is_packed(type->type));
949 }
950 } else {
951 /* No changes, just pass it on */
952 return type->type;
953 }
954 }
955
956 case vtn_base_type_image:
957 vtn_assert(glsl_type_is_texture(type->glsl_image));
958 return type->glsl_image;
959
960 case vtn_base_type_sampler:
961 return glsl_bare_sampler_type();
962
963 case vtn_base_type_sampled_image:
964 return glsl_texture_type_to_sampler(type->image->glsl_image,
965 false /* is_shadow */);
966
967 default:
968 return type->type;
969 }
970 }
971
972 if (mode == vtn_variable_mode_image) {
973 struct vtn_type *image_type = vtn_type_without_array(type);
974 vtn_assert(image_type->base_type == vtn_base_type_image);
975 return glsl_type_wrap_in_arrays(image_type->glsl_image, type->type);
976 }
977
978 /* Layout decorations are allowed but ignored in certain conditions,
979 * to allow SPIR-V generators perform type deduplication. Discard
980 * unnecessary ones when passing to NIR.
981 */
982 if (!vtn_type_needs_explicit_layout(b, type, mode))
983 return glsl_get_bare_type(type->type);
984
985 return type->type;
986 }
987
988 static struct vtn_type *
mutable_matrix_member(struct vtn_builder * b,struct vtn_type * type,int member)989 mutable_matrix_member(struct vtn_builder *b, struct vtn_type *type, int member)
990 {
991 type->members[member] = vtn_type_copy(b, type->members[member]);
992 type = type->members[member];
993
994 /* We may have an array of matrices.... Oh, joy! */
995 while (glsl_type_is_array(type->type)) {
996 type->array_element = vtn_type_copy(b, type->array_element);
997 type = type->array_element;
998 }
999
1000 vtn_assert(glsl_type_is_matrix(type->type));
1001
1002 return type;
1003 }
1004
1005 static void
vtn_handle_access_qualifier(struct vtn_builder * b,struct vtn_type * type,int member,enum gl_access_qualifier access)1006 vtn_handle_access_qualifier(struct vtn_builder *b, struct vtn_type *type,
1007 int member, enum gl_access_qualifier access)
1008 {
1009 type->members[member] = vtn_type_copy(b, type->members[member]);
1010 type = type->members[member];
1011
1012 type->access |= access;
1013 }
1014
1015 static void
array_stride_decoration_cb(struct vtn_builder * b,struct vtn_value * val,int member,const struct vtn_decoration * dec,void * void_ctx)1016 array_stride_decoration_cb(struct vtn_builder *b,
1017 struct vtn_value *val, int member,
1018 const struct vtn_decoration *dec, void *void_ctx)
1019 {
1020 struct vtn_type *type = val->type;
1021
1022 if (dec->decoration == SpvDecorationArrayStride) {
1023 if (vtn_type_contains_block(b, type)) {
1024 vtn_warn("The ArrayStride decoration cannot be applied to an array "
1025 "type which contains a structure type decorated Block "
1026 "or BufferBlock");
1027 /* Ignore the decoration */
1028 } else {
1029 vtn_fail_if(dec->operands[0] == 0, "ArrayStride must be non-zero");
1030 type->stride = dec->operands[0];
1031 }
1032 }
1033 }
1034
1035 static void
struct_member_decoration_cb(struct vtn_builder * b,UNUSED struct vtn_value * val,int member,const struct vtn_decoration * dec,void * void_ctx)1036 struct_member_decoration_cb(struct vtn_builder *b,
1037 UNUSED struct vtn_value *val, int member,
1038 const struct vtn_decoration *dec, void *void_ctx)
1039 {
1040 struct member_decoration_ctx *ctx = void_ctx;
1041
1042 if (member < 0)
1043 return;
1044
1045 assert(member < ctx->num_fields);
1046
1047 switch (dec->decoration) {
1048 case SpvDecorationRelaxedPrecision:
1049 case SpvDecorationUniform:
1050 case SpvDecorationUniformId:
1051 break; /* FIXME: Do nothing with this for now. */
1052 case SpvDecorationNonWritable:
1053 vtn_handle_access_qualifier(b, ctx->type, member, ACCESS_NON_WRITEABLE);
1054 break;
1055 case SpvDecorationNonReadable:
1056 vtn_handle_access_qualifier(b, ctx->type, member, ACCESS_NON_READABLE);
1057 break;
1058 case SpvDecorationVolatile:
1059 vtn_handle_access_qualifier(b, ctx->type, member, ACCESS_VOLATILE);
1060 break;
1061 case SpvDecorationCoherent:
1062 vtn_handle_access_qualifier(b, ctx->type, member, ACCESS_COHERENT);
1063 break;
1064 case SpvDecorationNoPerspective:
1065 ctx->fields[member].interpolation = INTERP_MODE_NOPERSPECTIVE;
1066 break;
1067 case SpvDecorationFlat:
1068 ctx->fields[member].interpolation = INTERP_MODE_FLAT;
1069 break;
1070 case SpvDecorationExplicitInterpAMD:
1071 ctx->fields[member].interpolation = INTERP_MODE_EXPLICIT;
1072 break;
1073 case SpvDecorationCentroid:
1074 ctx->fields[member].centroid = true;
1075 break;
1076 case SpvDecorationSample:
1077 ctx->fields[member].sample = true;
1078 break;
1079 case SpvDecorationStream:
1080 /* This is handled later by var_decoration_cb in vtn_variables.c */
1081 break;
1082 case SpvDecorationLocation:
1083 ctx->fields[member].location = dec->operands[0];
1084 break;
1085 case SpvDecorationComponent:
1086 break; /* FIXME: What should we do with these? */
1087 case SpvDecorationBuiltIn:
1088 ctx->type->members[member] = vtn_type_copy(b, ctx->type->members[member]);
1089 ctx->type->members[member]->is_builtin = true;
1090 ctx->type->members[member]->builtin = dec->operands[0];
1091 ctx->type->builtin_block = true;
1092 break;
1093 case SpvDecorationOffset:
1094 ctx->type->offsets[member] = dec->operands[0];
1095 ctx->fields[member].offset = dec->operands[0];
1096 break;
1097 case SpvDecorationMatrixStride:
1098 /* Handled as a second pass */
1099 break;
1100 case SpvDecorationColMajor:
1101 break; /* Nothing to do here. Column-major is the default. */
1102 case SpvDecorationRowMajor:
1103 mutable_matrix_member(b, ctx->type, member)->row_major = true;
1104 break;
1105
1106 case SpvDecorationPatch:
1107 case SpvDecorationPerPrimitiveNV:
1108 case SpvDecorationPerTaskNV:
1109 case SpvDecorationPerViewNV:
1110 break;
1111
1112 case SpvDecorationSpecId:
1113 case SpvDecorationBlock:
1114 case SpvDecorationBufferBlock:
1115 case SpvDecorationArrayStride:
1116 case SpvDecorationGLSLShared:
1117 case SpvDecorationGLSLPacked:
1118 case SpvDecorationInvariant:
1119 case SpvDecorationRestrict:
1120 case SpvDecorationAliased:
1121 case SpvDecorationConstant:
1122 case SpvDecorationIndex:
1123 case SpvDecorationBinding:
1124 case SpvDecorationDescriptorSet:
1125 case SpvDecorationLinkageAttributes:
1126 case SpvDecorationNoContraction:
1127 case SpvDecorationInputAttachmentIndex:
1128 case SpvDecorationCPacked:
1129 vtn_warn("Decoration not allowed on struct members: %s",
1130 spirv_decoration_to_string(dec->decoration));
1131 break;
1132
1133 case SpvDecorationXfbBuffer:
1134 case SpvDecorationXfbStride:
1135 /* This is handled later by var_decoration_cb in vtn_variables.c */
1136 break;
1137
1138 case SpvDecorationSaturatedConversion:
1139 case SpvDecorationFuncParamAttr:
1140 case SpvDecorationFPRoundingMode:
1141 case SpvDecorationFPFastMathMode:
1142 case SpvDecorationAlignment:
1143 if (b->shader->info.stage != MESA_SHADER_KERNEL) {
1144 vtn_warn("Decoration only allowed for CL-style kernels: %s",
1145 spirv_decoration_to_string(dec->decoration));
1146 }
1147 break;
1148
1149 case SpvDecorationUserSemantic:
1150 case SpvDecorationUserTypeGOOGLE:
1151 /* User semantic decorations can safely be ignored by the driver. */
1152 break;
1153
1154 default:
1155 vtn_fail_with_decoration("Unhandled decoration", dec->decoration);
1156 }
1157 }
1158
1159 /** Chases the array type all the way down to the tail and rewrites the
1160 * glsl_types to be based off the tail's glsl_type.
1161 */
1162 static void
vtn_array_type_rewrite_glsl_type(struct vtn_type * type)1163 vtn_array_type_rewrite_glsl_type(struct vtn_type *type)
1164 {
1165 if (type->base_type != vtn_base_type_array)
1166 return;
1167
1168 vtn_array_type_rewrite_glsl_type(type->array_element);
1169
1170 type->type = glsl_array_type(type->array_element->type,
1171 type->length, type->stride);
1172 }
1173
1174 /* Matrix strides are handled as a separate pass because we need to know
1175 * whether the matrix is row-major or not first.
1176 */
1177 static void
struct_member_matrix_stride_cb(struct vtn_builder * b,UNUSED struct vtn_value * val,int member,const struct vtn_decoration * dec,void * void_ctx)1178 struct_member_matrix_stride_cb(struct vtn_builder *b,
1179 UNUSED struct vtn_value *val, int member,
1180 const struct vtn_decoration *dec,
1181 void *void_ctx)
1182 {
1183 if (dec->decoration != SpvDecorationMatrixStride)
1184 return;
1185
1186 vtn_fail_if(member < 0,
1187 "The MatrixStride decoration is only allowed on members "
1188 "of OpTypeStruct");
1189 vtn_fail_if(dec->operands[0] == 0, "MatrixStride must be non-zero");
1190
1191 struct member_decoration_ctx *ctx = void_ctx;
1192
1193 struct vtn_type *mat_type = mutable_matrix_member(b, ctx->type, member);
1194 if (mat_type->row_major) {
1195 mat_type->array_element = vtn_type_copy(b, mat_type->array_element);
1196 mat_type->stride = mat_type->array_element->stride;
1197 mat_type->array_element->stride = dec->operands[0];
1198
1199 mat_type->type = glsl_explicit_matrix_type(mat_type->type,
1200 dec->operands[0], true);
1201 mat_type->array_element->type = glsl_get_column_type(mat_type->type);
1202 } else {
1203 vtn_assert(mat_type->array_element->stride > 0);
1204 mat_type->stride = dec->operands[0];
1205
1206 mat_type->type = glsl_explicit_matrix_type(mat_type->type,
1207 dec->operands[0], false);
1208 }
1209
1210 /* Now that we've replaced the glsl_type with a properly strided matrix
1211 * type, rewrite the member type so that it's an array of the proper kind
1212 * of glsl_type.
1213 */
1214 vtn_array_type_rewrite_glsl_type(ctx->type->members[member]);
1215 ctx->fields[member].type = ctx->type->members[member]->type;
1216 }
1217
1218 static void
struct_packed_decoration_cb(struct vtn_builder * b,struct vtn_value * val,int member,const struct vtn_decoration * dec,void * void_ctx)1219 struct_packed_decoration_cb(struct vtn_builder *b,
1220 struct vtn_value *val, int member,
1221 const struct vtn_decoration *dec, void *void_ctx)
1222 {
1223 vtn_assert(val->type->base_type == vtn_base_type_struct);
1224 if (dec->decoration == SpvDecorationCPacked) {
1225 if (b->shader->info.stage != MESA_SHADER_KERNEL) {
1226 vtn_warn("Decoration only allowed for CL-style kernels: %s",
1227 spirv_decoration_to_string(dec->decoration));
1228 }
1229 val->type->packed = true;
1230 }
1231 }
1232
1233 static void
struct_block_decoration_cb(struct vtn_builder * b,struct vtn_value * val,int member,const struct vtn_decoration * dec,void * ctx)1234 struct_block_decoration_cb(struct vtn_builder *b,
1235 struct vtn_value *val, int member,
1236 const struct vtn_decoration *dec, void *ctx)
1237 {
1238 if (member != -1)
1239 return;
1240
1241 struct vtn_type *type = val->type;
1242 if (dec->decoration == SpvDecorationBlock)
1243 type->block = true;
1244 else if (dec->decoration == SpvDecorationBufferBlock)
1245 type->buffer_block = true;
1246 }
1247
1248 static void
type_decoration_cb(struct vtn_builder * b,struct vtn_value * val,int member,const struct vtn_decoration * dec,UNUSED void * ctx)1249 type_decoration_cb(struct vtn_builder *b,
1250 struct vtn_value *val, int member,
1251 const struct vtn_decoration *dec, UNUSED void *ctx)
1252 {
1253 struct vtn_type *type = val->type;
1254
1255 if (member != -1) {
1256 /* This should have been handled by OpTypeStruct */
1257 assert(val->type->base_type == vtn_base_type_struct);
1258 assert(member >= 0 && member < val->type->length);
1259 return;
1260 }
1261
1262 switch (dec->decoration) {
1263 case SpvDecorationArrayStride:
1264 vtn_assert(type->base_type == vtn_base_type_array ||
1265 type->base_type == vtn_base_type_pointer);
1266 break;
1267 case SpvDecorationBlock:
1268 vtn_assert(type->base_type == vtn_base_type_struct);
1269 vtn_assert(type->block);
1270 break;
1271 case SpvDecorationBufferBlock:
1272 vtn_assert(type->base_type == vtn_base_type_struct);
1273 vtn_assert(type->buffer_block);
1274 break;
1275 case SpvDecorationGLSLShared:
1276 case SpvDecorationGLSLPacked:
1277 /* Ignore these, since we get explicit offsets anyways */
1278 break;
1279
1280 case SpvDecorationRowMajor:
1281 case SpvDecorationColMajor:
1282 case SpvDecorationMatrixStride:
1283 case SpvDecorationBuiltIn:
1284 case SpvDecorationNoPerspective:
1285 case SpvDecorationFlat:
1286 case SpvDecorationPatch:
1287 case SpvDecorationCentroid:
1288 case SpvDecorationSample:
1289 case SpvDecorationExplicitInterpAMD:
1290 case SpvDecorationVolatile:
1291 case SpvDecorationCoherent:
1292 case SpvDecorationNonWritable:
1293 case SpvDecorationNonReadable:
1294 case SpvDecorationUniform:
1295 case SpvDecorationUniformId:
1296 case SpvDecorationLocation:
1297 case SpvDecorationComponent:
1298 case SpvDecorationOffset:
1299 case SpvDecorationXfbBuffer:
1300 case SpvDecorationXfbStride:
1301 case SpvDecorationUserSemantic:
1302 vtn_warn("Decoration only allowed for struct members: %s",
1303 spirv_decoration_to_string(dec->decoration));
1304 break;
1305
1306 case SpvDecorationStream:
1307 /* We don't need to do anything here, as stream is filled up when
1308 * aplying the decoration to a variable, just check that if it is not a
1309 * struct member, it should be a struct.
1310 */
1311 vtn_assert(type->base_type == vtn_base_type_struct);
1312 break;
1313
1314 case SpvDecorationRelaxedPrecision:
1315 case SpvDecorationSpecId:
1316 case SpvDecorationInvariant:
1317 case SpvDecorationRestrict:
1318 case SpvDecorationAliased:
1319 case SpvDecorationConstant:
1320 case SpvDecorationIndex:
1321 case SpvDecorationBinding:
1322 case SpvDecorationDescriptorSet:
1323 case SpvDecorationLinkageAttributes:
1324 case SpvDecorationNoContraction:
1325 case SpvDecorationInputAttachmentIndex:
1326 vtn_warn("Decoration not allowed on types: %s",
1327 spirv_decoration_to_string(dec->decoration));
1328 break;
1329
1330 case SpvDecorationCPacked:
1331 /* Handled when parsing a struct type, nothing to do here. */
1332 break;
1333
1334 case SpvDecorationSaturatedConversion:
1335 case SpvDecorationFuncParamAttr:
1336 case SpvDecorationFPRoundingMode:
1337 case SpvDecorationFPFastMathMode:
1338 case SpvDecorationAlignment:
1339 vtn_warn("Decoration only allowed for CL-style kernels: %s",
1340 spirv_decoration_to_string(dec->decoration));
1341 break;
1342
1343 case SpvDecorationUserTypeGOOGLE:
1344 /* User semantic decorations can safely be ignored by the driver. */
1345 break;
1346
1347 default:
1348 vtn_fail_with_decoration("Unhandled decoration", dec->decoration);
1349 }
1350 }
1351
1352 static unsigned
translate_image_format(struct vtn_builder * b,SpvImageFormat format)1353 translate_image_format(struct vtn_builder *b, SpvImageFormat format)
1354 {
1355 switch (format) {
1356 case SpvImageFormatUnknown: return PIPE_FORMAT_NONE;
1357 case SpvImageFormatRgba32f: return PIPE_FORMAT_R32G32B32A32_FLOAT;
1358 case SpvImageFormatRgba16f: return PIPE_FORMAT_R16G16B16A16_FLOAT;
1359 case SpvImageFormatR32f: return PIPE_FORMAT_R32_FLOAT;
1360 case SpvImageFormatRgba8: return PIPE_FORMAT_R8G8B8A8_UNORM;
1361 case SpvImageFormatRgba8Snorm: return PIPE_FORMAT_R8G8B8A8_SNORM;
1362 case SpvImageFormatRg32f: return PIPE_FORMAT_R32G32_FLOAT;
1363 case SpvImageFormatRg16f: return PIPE_FORMAT_R16G16_FLOAT;
1364 case SpvImageFormatR11fG11fB10f: return PIPE_FORMAT_R11G11B10_FLOAT;
1365 case SpvImageFormatR16f: return PIPE_FORMAT_R16_FLOAT;
1366 case SpvImageFormatRgba16: return PIPE_FORMAT_R16G16B16A16_UNORM;
1367 case SpvImageFormatRgb10A2: return PIPE_FORMAT_R10G10B10A2_UNORM;
1368 case SpvImageFormatRg16: return PIPE_FORMAT_R16G16_UNORM;
1369 case SpvImageFormatRg8: return PIPE_FORMAT_R8G8_UNORM;
1370 case SpvImageFormatR16: return PIPE_FORMAT_R16_UNORM;
1371 case SpvImageFormatR8: return PIPE_FORMAT_R8_UNORM;
1372 case SpvImageFormatRgba16Snorm: return PIPE_FORMAT_R16G16B16A16_SNORM;
1373 case SpvImageFormatRg16Snorm: return PIPE_FORMAT_R16G16_SNORM;
1374 case SpvImageFormatRg8Snorm: return PIPE_FORMAT_R8G8_SNORM;
1375 case SpvImageFormatR16Snorm: return PIPE_FORMAT_R16_SNORM;
1376 case SpvImageFormatR8Snorm: return PIPE_FORMAT_R8_SNORM;
1377 case SpvImageFormatRgba32i: return PIPE_FORMAT_R32G32B32A32_SINT;
1378 case SpvImageFormatRgba16i: return PIPE_FORMAT_R16G16B16A16_SINT;
1379 case SpvImageFormatRgba8i: return PIPE_FORMAT_R8G8B8A8_SINT;
1380 case SpvImageFormatR32i: return PIPE_FORMAT_R32_SINT;
1381 case SpvImageFormatRg32i: return PIPE_FORMAT_R32G32_SINT;
1382 case SpvImageFormatRg16i: return PIPE_FORMAT_R16G16_SINT;
1383 case SpvImageFormatRg8i: return PIPE_FORMAT_R8G8_SINT;
1384 case SpvImageFormatR16i: return PIPE_FORMAT_R16_SINT;
1385 case SpvImageFormatR8i: return PIPE_FORMAT_R8_SINT;
1386 case SpvImageFormatRgba32ui: return PIPE_FORMAT_R32G32B32A32_UINT;
1387 case SpvImageFormatRgba16ui: return PIPE_FORMAT_R16G16B16A16_UINT;
1388 case SpvImageFormatRgba8ui: return PIPE_FORMAT_R8G8B8A8_UINT;
1389 case SpvImageFormatR32ui: return PIPE_FORMAT_R32_UINT;
1390 case SpvImageFormatRgb10a2ui: return PIPE_FORMAT_R10G10B10A2_UINT;
1391 case SpvImageFormatRg32ui: return PIPE_FORMAT_R32G32_UINT;
1392 case SpvImageFormatRg16ui: return PIPE_FORMAT_R16G16_UINT;
1393 case SpvImageFormatRg8ui: return PIPE_FORMAT_R8G8_UINT;
1394 case SpvImageFormatR16ui: return PIPE_FORMAT_R16_UINT;
1395 case SpvImageFormatR8ui: return PIPE_FORMAT_R8_UINT;
1396 case SpvImageFormatR64ui: return PIPE_FORMAT_R64_UINT;
1397 case SpvImageFormatR64i: return PIPE_FORMAT_R64_SINT;
1398 default:
1399 vtn_fail("Invalid image format: %s (%u)",
1400 spirv_imageformat_to_string(format), format);
1401 }
1402 }
1403
1404 static void
vtn_handle_type(struct vtn_builder * b,SpvOp opcode,const uint32_t * w,unsigned count)1405 vtn_handle_type(struct vtn_builder *b, SpvOp opcode,
1406 const uint32_t *w, unsigned count)
1407 {
1408 struct vtn_value *val = NULL;
1409
1410 /* In order to properly handle forward declarations, we have to defer
1411 * allocation for pointer types.
1412 */
1413 if (opcode != SpvOpTypePointer && opcode != SpvOpTypeForwardPointer) {
1414 val = vtn_push_value(b, w[1], vtn_value_type_type);
1415 vtn_fail_if(val->type != NULL,
1416 "Only pointers can have forward declarations");
1417 val->type = rzalloc(b, struct vtn_type);
1418 val->type->id = w[1];
1419 }
1420
1421 switch (opcode) {
1422 case SpvOpTypeVoid:
1423 val->type->base_type = vtn_base_type_void;
1424 val->type->type = glsl_void_type();
1425 break;
1426 case SpvOpTypeBool:
1427 val->type->base_type = vtn_base_type_scalar;
1428 val->type->type = glsl_bool_type();
1429 val->type->length = 1;
1430 break;
1431 case SpvOpTypeInt: {
1432 int bit_size = w[2];
1433 const bool signedness = w[3];
1434 vtn_fail_if(bit_size != 8 && bit_size != 16 &&
1435 bit_size != 32 && bit_size != 64,
1436 "Invalid int bit size: %u", bit_size);
1437 val->type->base_type = vtn_base_type_scalar;
1438 val->type->type = signedness ? glsl_intN_t_type(bit_size) :
1439 glsl_uintN_t_type(bit_size);
1440 val->type->length = 1;
1441 break;
1442 }
1443
1444 case SpvOpTypeFloat: {
1445 int bit_size = w[2];
1446 val->type->base_type = vtn_base_type_scalar;
1447 vtn_fail_if(bit_size != 16 && bit_size != 32 && bit_size != 64,
1448 "Invalid float bit size: %u", bit_size);
1449 val->type->type = glsl_floatN_t_type(bit_size);
1450 val->type->length = 1;
1451 break;
1452 }
1453
1454 case SpvOpTypeVector: {
1455 struct vtn_type *base = vtn_get_type(b, w[2]);
1456 unsigned elems = w[3];
1457
1458 vtn_fail_if(base->base_type != vtn_base_type_scalar,
1459 "Base type for OpTypeVector must be a scalar");
1460 vtn_fail_if((elems < 2 || elems > 4) && (elems != 8) && (elems != 16),
1461 "Invalid component count for OpTypeVector");
1462
1463 val->type->base_type = vtn_base_type_vector;
1464 val->type->type = glsl_vector_type(glsl_get_base_type(base->type), elems);
1465 val->type->length = elems;
1466 val->type->stride = glsl_type_is_boolean(val->type->type)
1467 ? 4 : glsl_get_bit_size(base->type) / 8;
1468 val->type->array_element = base;
1469 break;
1470 }
1471
1472 case SpvOpTypeMatrix: {
1473 struct vtn_type *base = vtn_get_type(b, w[2]);
1474 unsigned columns = w[3];
1475
1476 vtn_fail_if(base->base_type != vtn_base_type_vector,
1477 "Base type for OpTypeMatrix must be a vector");
1478 vtn_fail_if(columns < 2 || columns > 4,
1479 "Invalid column count for OpTypeMatrix");
1480
1481 val->type->base_type = vtn_base_type_matrix;
1482 val->type->type = glsl_matrix_type(glsl_get_base_type(base->type),
1483 glsl_get_vector_elements(base->type),
1484 columns);
1485 vtn_fail_if(glsl_type_is_error(val->type->type),
1486 "Unsupported base type for OpTypeMatrix");
1487 assert(!glsl_type_is_error(val->type->type));
1488 val->type->length = columns;
1489 val->type->array_element = base;
1490 val->type->row_major = false;
1491 val->type->stride = 0;
1492 break;
1493 }
1494
1495 case SpvOpTypeRuntimeArray:
1496 case SpvOpTypeArray: {
1497 struct vtn_type *array_element = vtn_get_type(b, w[2]);
1498
1499 if (opcode == SpvOpTypeRuntimeArray) {
1500 /* A length of 0 is used to denote unsized arrays */
1501 val->type->length = 0;
1502 } else {
1503 val->type->length = vtn_constant_uint(b, w[3]);
1504 }
1505
1506 val->type->base_type = vtn_base_type_array;
1507 val->type->array_element = array_element;
1508
1509 vtn_foreach_decoration(b, val, array_stride_decoration_cb, NULL);
1510 val->type->type = glsl_array_type(array_element->type, val->type->length,
1511 val->type->stride);
1512 break;
1513 }
1514
1515 case SpvOpTypeStruct: {
1516 unsigned num_fields = count - 2;
1517 val->type->base_type = vtn_base_type_struct;
1518 val->type->length = num_fields;
1519 val->type->members = ralloc_array(b, struct vtn_type *, num_fields);
1520 val->type->offsets = ralloc_array(b, unsigned, num_fields);
1521 val->type->packed = false;
1522
1523 NIR_VLA(struct glsl_struct_field, fields, count);
1524 for (unsigned i = 0; i < num_fields; i++) {
1525 val->type->members[i] = vtn_get_type(b, w[i + 2]);
1526 const char *name = NULL;
1527 for (struct vtn_decoration *dec = val->decoration; dec; dec = dec->next) {
1528 if (dec->scope == VTN_DEC_STRUCT_MEMBER_NAME0 - i) {
1529 name = dec->member_name;
1530 break;
1531 }
1532 }
1533 if (!name)
1534 name = ralloc_asprintf(b, "field%d", i);
1535
1536 fields[i] = (struct glsl_struct_field) {
1537 .type = val->type->members[i]->type,
1538 .name = name,
1539 .location = -1,
1540 .offset = -1,
1541 };
1542 }
1543
1544 vtn_foreach_decoration(b, val, struct_packed_decoration_cb, NULL);
1545
1546 struct member_decoration_ctx ctx = {
1547 .num_fields = num_fields,
1548 .fields = fields,
1549 .type = val->type
1550 };
1551
1552 vtn_foreach_decoration(b, val, struct_member_decoration_cb, &ctx);
1553
1554 /* Propagate access specifiers that are present on all members to the overall type */
1555 enum gl_access_qualifier overall_access = ACCESS_COHERENT | ACCESS_VOLATILE |
1556 ACCESS_NON_READABLE | ACCESS_NON_WRITEABLE;
1557 for (unsigned i = 0; i < num_fields; ++i)
1558 overall_access &= val->type->members[i]->access;
1559 val->type->access = overall_access;
1560
1561 vtn_foreach_decoration(b, val, struct_member_matrix_stride_cb, &ctx);
1562
1563 vtn_foreach_decoration(b, val, struct_block_decoration_cb, NULL);
1564
1565 const char *name = val->name;
1566
1567 if (val->type->block || val->type->buffer_block) {
1568 /* Packing will be ignored since types coming from SPIR-V are
1569 * explicitly laid out.
1570 */
1571 val->type->type = glsl_interface_type(fields, num_fields,
1572 /* packing */ 0, false,
1573 name ? name : "block");
1574 } else {
1575 val->type->type = glsl_struct_type(fields, num_fields,
1576 name ? name : "struct",
1577 val->type->packed);
1578 }
1579 break;
1580 }
1581
1582 case SpvOpTypeFunction: {
1583 val->type->base_type = vtn_base_type_function;
1584 val->type->type = NULL;
1585
1586 val->type->return_type = vtn_get_type(b, w[2]);
1587
1588 const unsigned num_params = count - 3;
1589 val->type->length = num_params;
1590 val->type->params = ralloc_array(b, struct vtn_type *, num_params);
1591 for (unsigned i = 0; i < count - 3; i++) {
1592 val->type->params[i] = vtn_get_type(b, w[i + 3]);
1593 }
1594 break;
1595 }
1596
1597 case SpvOpTypePointer:
1598 case SpvOpTypeForwardPointer: {
1599 /* We can't blindly push the value because it might be a forward
1600 * declaration.
1601 */
1602 val = vtn_untyped_value(b, w[1]);
1603
1604 SpvStorageClass storage_class = w[2];
1605
1606 vtn_fail_if(opcode == SpvOpTypeForwardPointer &&
1607 b->shader->info.stage != MESA_SHADER_KERNEL &&
1608 storage_class != SpvStorageClassPhysicalStorageBuffer,
1609 "OpTypeForwardPointer is only allowed in Vulkan with "
1610 "the PhysicalStorageBuffer storage class");
1611
1612 struct vtn_type *deref_type = NULL;
1613 if (opcode == SpvOpTypePointer)
1614 deref_type = vtn_get_type(b, w[3]);
1615
1616 bool has_forward_pointer = false;
1617 if (val->value_type == vtn_value_type_invalid) {
1618 val->value_type = vtn_value_type_type;
1619 val->type = rzalloc(b, struct vtn_type);
1620 val->type->id = w[1];
1621 val->type->base_type = vtn_base_type_pointer;
1622 val->type->storage_class = storage_class;
1623
1624 /* These can actually be stored to nir_variables and used as SSA
1625 * values so they need a real glsl_type.
1626 */
1627 enum vtn_variable_mode mode = vtn_storage_class_to_mode(
1628 b, storage_class, deref_type, NULL);
1629
1630 /* The deref type should only matter for the UniformConstant storage
1631 * class. In particular, it should never matter for any storage
1632 * classes that are allowed in combination with OpTypeForwardPointer.
1633 */
1634 if (storage_class != SpvStorageClassUniform &&
1635 storage_class != SpvStorageClassUniformConstant) {
1636 assert(mode == vtn_storage_class_to_mode(b, storage_class,
1637 NULL, NULL));
1638 }
1639
1640 val->type->type = nir_address_format_to_glsl_type(
1641 vtn_mode_to_address_format(b, mode));
1642 } else {
1643 vtn_fail_if(val->type->storage_class != storage_class,
1644 "The storage classes of an OpTypePointer and any "
1645 "OpTypeForwardPointers that provide forward "
1646 "declarations of it must match.");
1647 has_forward_pointer = true;
1648 }
1649
1650 if (opcode == SpvOpTypePointer) {
1651 vtn_fail_if(val->type->deref != NULL,
1652 "While OpTypeForwardPointer can be used to provide a "
1653 "forward declaration of a pointer, OpTypePointer can "
1654 "only be used once for a given id.");
1655
1656 vtn_fail_if(has_forward_pointer &&
1657 deref_type->base_type != vtn_base_type_struct,
1658 "An OpTypePointer instruction must declare "
1659 "Pointer Type to be a pointer to an OpTypeStruct.");
1660
1661 val->type->deref = deref_type;
1662
1663 /* Only certain storage classes use ArrayStride. */
1664 switch (storage_class) {
1665 case SpvStorageClassWorkgroup:
1666 if (!b->options->caps.workgroup_memory_explicit_layout)
1667 break;
1668 FALLTHROUGH;
1669
1670 case SpvStorageClassUniform:
1671 case SpvStorageClassPushConstant:
1672 case SpvStorageClassStorageBuffer:
1673 case SpvStorageClassPhysicalStorageBuffer:
1674 vtn_foreach_decoration(b, val, array_stride_decoration_cb, NULL);
1675 break;
1676
1677 default:
1678 /* Nothing to do. */
1679 break;
1680 }
1681 }
1682 break;
1683 }
1684
1685 case SpvOpTypeImage: {
1686 val->type->base_type = vtn_base_type_image;
1687
1688 /* Images are represented in NIR as a scalar SSA value that is the
1689 * result of a deref instruction. An OpLoad on an OpTypeImage pointer
1690 * from UniformConstant memory just takes the NIR deref from the pointer
1691 * and turns it into an SSA value.
1692 */
1693 val->type->type = nir_address_format_to_glsl_type(
1694 vtn_mode_to_address_format(b, vtn_variable_mode_function));
1695
1696 const struct vtn_type *sampled_type = vtn_get_type(b, w[2]);
1697 if (b->shader->info.stage == MESA_SHADER_KERNEL) {
1698 vtn_fail_if(sampled_type->base_type != vtn_base_type_void,
1699 "Sampled type of OpTypeImage must be void for kernels");
1700 } else {
1701 vtn_fail_if(sampled_type->base_type != vtn_base_type_scalar,
1702 "Sampled type of OpTypeImage must be a scalar");
1703 if (b->options->caps.image_atomic_int64) {
1704 vtn_fail_if(glsl_get_bit_size(sampled_type->type) != 32 &&
1705 glsl_get_bit_size(sampled_type->type) != 64,
1706 "Sampled type of OpTypeImage must be a 32 or 64-bit "
1707 "scalar");
1708 } else {
1709 vtn_fail_if(glsl_get_bit_size(sampled_type->type) != 32,
1710 "Sampled type of OpTypeImage must be a 32-bit scalar");
1711 }
1712 }
1713
1714 enum glsl_sampler_dim dim;
1715 switch ((SpvDim)w[3]) {
1716 case SpvDim1D: dim = GLSL_SAMPLER_DIM_1D; break;
1717 case SpvDim2D: dim = GLSL_SAMPLER_DIM_2D; break;
1718 case SpvDim3D: dim = GLSL_SAMPLER_DIM_3D; break;
1719 case SpvDimCube: dim = GLSL_SAMPLER_DIM_CUBE; break;
1720 case SpvDimRect: dim = GLSL_SAMPLER_DIM_RECT; break;
1721 case SpvDimBuffer: dim = GLSL_SAMPLER_DIM_BUF; break;
1722 case SpvDimSubpassData: dim = GLSL_SAMPLER_DIM_SUBPASS; break;
1723 default:
1724 vtn_fail("Invalid SPIR-V image dimensionality: %s (%u)",
1725 spirv_dim_to_string((SpvDim)w[3]), w[3]);
1726 }
1727
1728 /* w[4]: as per Vulkan spec "Validation Rules within a Module",
1729 * The “Depth” operand of OpTypeImage is ignored.
1730 */
1731 bool is_array = w[5];
1732 bool multisampled = w[6];
1733 unsigned sampled = w[7];
1734 SpvImageFormat format = w[8];
1735
1736 if (count > 9)
1737 val->type->access_qualifier = w[9];
1738 else if (b->shader->info.stage == MESA_SHADER_KERNEL)
1739 /* Per the CL C spec: If no qualifier is provided, read_only is assumed. */
1740 val->type->access_qualifier = SpvAccessQualifierReadOnly;
1741 else
1742 val->type->access_qualifier = SpvAccessQualifierReadWrite;
1743
1744 if (multisampled) {
1745 if (dim == GLSL_SAMPLER_DIM_2D)
1746 dim = GLSL_SAMPLER_DIM_MS;
1747 else if (dim == GLSL_SAMPLER_DIM_SUBPASS)
1748 dim = GLSL_SAMPLER_DIM_SUBPASS_MS;
1749 else
1750 vtn_fail("Unsupported multisampled image type");
1751 }
1752
1753 val->type->image_format = translate_image_format(b, format);
1754
1755 enum glsl_base_type sampled_base_type =
1756 glsl_get_base_type(sampled_type->type);
1757 if (sampled == 1) {
1758 val->type->glsl_image = glsl_texture_type(dim, is_array,
1759 sampled_base_type);
1760 } else if (sampled == 2) {
1761 val->type->glsl_image = glsl_image_type(dim, is_array,
1762 sampled_base_type);
1763 } else if (b->shader->info.stage == MESA_SHADER_KERNEL) {
1764 val->type->glsl_image = glsl_image_type(dim, is_array,
1765 GLSL_TYPE_VOID);
1766 } else {
1767 vtn_fail("We need to know if the image will be sampled");
1768 }
1769 break;
1770 }
1771
1772 case SpvOpTypeSampledImage: {
1773 val->type->base_type = vtn_base_type_sampled_image;
1774 val->type->image = vtn_get_type(b, w[2]);
1775
1776 /* Sampled images are represented NIR as a vec2 SSA value where each
1777 * component is the result of a deref instruction. The first component
1778 * is the image and the second is the sampler. An OpLoad on an
1779 * OpTypeSampledImage pointer from UniformConstant memory just takes
1780 * the NIR deref from the pointer and duplicates it to both vector
1781 * components.
1782 */
1783 nir_address_format addr_format =
1784 vtn_mode_to_address_format(b, vtn_variable_mode_function);
1785 assert(nir_address_format_num_components(addr_format) == 1);
1786 unsigned bit_size = nir_address_format_bit_size(addr_format);
1787 assert(bit_size == 32 || bit_size == 64);
1788
1789 enum glsl_base_type base_type =
1790 bit_size == 32 ? GLSL_TYPE_UINT : GLSL_TYPE_UINT64;
1791 val->type->type = glsl_vector_type(base_type, 2);
1792 break;
1793 }
1794
1795 case SpvOpTypeSampler:
1796 val->type->base_type = vtn_base_type_sampler;
1797
1798 /* Samplers are represented in NIR as a scalar SSA value that is the
1799 * result of a deref instruction. An OpLoad on an OpTypeSampler pointer
1800 * from UniformConstant memory just takes the NIR deref from the pointer
1801 * and turns it into an SSA value.
1802 */
1803 val->type->type = nir_address_format_to_glsl_type(
1804 vtn_mode_to_address_format(b, vtn_variable_mode_function));
1805 break;
1806
1807 case SpvOpTypeAccelerationStructureKHR:
1808 val->type->base_type = vtn_base_type_accel_struct;
1809 val->type->type = glsl_uint64_t_type();
1810 break;
1811
1812
1813 case SpvOpTypeOpaque: {
1814 val->type->base_type = vtn_base_type_struct;
1815 const char *name = vtn_string_literal(b, &w[2], count - 2, NULL);
1816 val->type->type = glsl_struct_type(NULL, 0, name, false);
1817 break;
1818 }
1819
1820 case SpvOpTypeRayQueryKHR: {
1821 val->type->base_type = vtn_base_type_ray_query;
1822 const char *name = "RayQueryKHR";
1823 val->type->type = glsl_struct_type(NULL, 0, name, false);
1824 /* We may need to run queries on helper invocations. Here the parser
1825 * doesn't go through a deeper analysis on whether the result of a query
1826 * will be used in derivative instructions.
1827 *
1828 * An implementation willing to optimize this would look through the IR
1829 * and check if any derivative instruction uses the result of a query
1830 * and drop this flag if not.
1831 */
1832 if (b->shader->info.stage == MESA_SHADER_FRAGMENT)
1833 val->type->access = ACCESS_INCLUDE_HELPERS;
1834 break;
1835 }
1836
1837 case SpvOpTypeEvent:
1838 val->type->base_type = vtn_base_type_event;
1839 val->type->type = glsl_int_type();
1840 break;
1841
1842 case SpvOpTypeDeviceEvent:
1843 case SpvOpTypeReserveId:
1844 case SpvOpTypeQueue:
1845 case SpvOpTypePipe:
1846 default:
1847 vtn_fail_with_opcode("Unhandled opcode", opcode);
1848 }
1849
1850 vtn_foreach_decoration(b, val, type_decoration_cb, NULL);
1851
1852 if (val->type->base_type == vtn_base_type_struct &&
1853 (val->type->block || val->type->buffer_block)) {
1854 for (unsigned i = 0; i < val->type->length; i++) {
1855 vtn_fail_if(vtn_type_contains_block(b, val->type->members[i]),
1856 "Block and BufferBlock decorations cannot decorate a "
1857 "structure type that is nested at any level inside "
1858 "another structure type decorated with Block or "
1859 "BufferBlock.");
1860 }
1861 }
1862 }
1863
1864 static nir_constant *
vtn_null_constant(struct vtn_builder * b,struct vtn_type * type)1865 vtn_null_constant(struct vtn_builder *b, struct vtn_type *type)
1866 {
1867 nir_constant *c = rzalloc(b, nir_constant);
1868
1869 switch (type->base_type) {
1870 case vtn_base_type_scalar:
1871 case vtn_base_type_vector:
1872 /* Nothing to do here. It's already initialized to zero */
1873 break;
1874
1875 case vtn_base_type_pointer: {
1876 enum vtn_variable_mode mode = vtn_storage_class_to_mode(
1877 b, type->storage_class, type->deref, NULL);
1878 nir_address_format addr_format = vtn_mode_to_address_format(b, mode);
1879
1880 const nir_const_value *null_value = nir_address_format_null_value(addr_format);
1881 memcpy(c->values, null_value,
1882 sizeof(nir_const_value) * nir_address_format_num_components(addr_format));
1883 break;
1884 }
1885
1886 case vtn_base_type_void:
1887 case vtn_base_type_image:
1888 case vtn_base_type_sampler:
1889 case vtn_base_type_sampled_image:
1890 case vtn_base_type_function:
1891 case vtn_base_type_event:
1892 /* For those we have to return something but it doesn't matter what. */
1893 break;
1894
1895 case vtn_base_type_matrix:
1896 case vtn_base_type_array:
1897 vtn_assert(type->length > 0);
1898 c->num_elements = type->length;
1899 c->elements = ralloc_array(b, nir_constant *, c->num_elements);
1900
1901 c->elements[0] = vtn_null_constant(b, type->array_element);
1902 for (unsigned i = 1; i < c->num_elements; i++)
1903 c->elements[i] = c->elements[0];
1904 break;
1905
1906 case vtn_base_type_struct:
1907 c->num_elements = type->length;
1908 c->elements = ralloc_array(b, nir_constant *, c->num_elements);
1909 for (unsigned i = 0; i < c->num_elements; i++)
1910 c->elements[i] = vtn_null_constant(b, type->members[i]);
1911 break;
1912
1913 default:
1914 vtn_fail("Invalid type for null constant");
1915 }
1916
1917 return c;
1918 }
1919
1920 static void
spec_constant_decoration_cb(struct vtn_builder * b,UNUSED struct vtn_value * val,ASSERTED int member,const struct vtn_decoration * dec,void * data)1921 spec_constant_decoration_cb(struct vtn_builder *b, UNUSED struct vtn_value *val,
1922 ASSERTED int member,
1923 const struct vtn_decoration *dec, void *data)
1924 {
1925 vtn_assert(member == -1);
1926 if (dec->decoration != SpvDecorationSpecId)
1927 return;
1928
1929 nir_const_value *value = data;
1930 for (unsigned i = 0; i < b->num_specializations; i++) {
1931 if (b->specializations[i].id == dec->operands[0]) {
1932 *value = b->specializations[i].value;
1933 return;
1934 }
1935 }
1936 }
1937
1938 static void
handle_workgroup_size_decoration_cb(struct vtn_builder * b,struct vtn_value * val,ASSERTED int member,const struct vtn_decoration * dec,UNUSED void * data)1939 handle_workgroup_size_decoration_cb(struct vtn_builder *b,
1940 struct vtn_value *val,
1941 ASSERTED int member,
1942 const struct vtn_decoration *dec,
1943 UNUSED void *data)
1944 {
1945 vtn_assert(member == -1);
1946 if (dec->decoration != SpvDecorationBuiltIn ||
1947 dec->operands[0] != SpvBuiltInWorkgroupSize)
1948 return;
1949
1950 vtn_assert(val->type->type == glsl_vector_type(GLSL_TYPE_UINT, 3));
1951 b->workgroup_size_builtin = val;
1952 }
1953
1954 static void
vtn_handle_constant(struct vtn_builder * b,SpvOp opcode,const uint32_t * w,unsigned count)1955 vtn_handle_constant(struct vtn_builder *b, SpvOp opcode,
1956 const uint32_t *w, unsigned count)
1957 {
1958 struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_constant);
1959 val->constant = rzalloc(b, nir_constant);
1960 switch (opcode) {
1961 case SpvOpConstantTrue:
1962 case SpvOpConstantFalse:
1963 case SpvOpSpecConstantTrue:
1964 case SpvOpSpecConstantFalse: {
1965 vtn_fail_if(val->type->type != glsl_bool_type(),
1966 "Result type of %s must be OpTypeBool",
1967 spirv_op_to_string(opcode));
1968
1969 bool bval = (opcode == SpvOpConstantTrue ||
1970 opcode == SpvOpSpecConstantTrue);
1971
1972 nir_const_value u32val = nir_const_value_for_uint(bval, 32);
1973
1974 if (opcode == SpvOpSpecConstantTrue ||
1975 opcode == SpvOpSpecConstantFalse)
1976 vtn_foreach_decoration(b, val, spec_constant_decoration_cb, &u32val);
1977
1978 val->constant->values[0].b = u32val.u32 != 0;
1979 break;
1980 }
1981
1982 case SpvOpConstant:
1983 case SpvOpSpecConstant: {
1984 vtn_fail_if(val->type->base_type != vtn_base_type_scalar,
1985 "Result type of %s must be a scalar",
1986 spirv_op_to_string(opcode));
1987 int bit_size = glsl_get_bit_size(val->type->type);
1988 switch (bit_size) {
1989 case 64:
1990 val->constant->values[0].u64 = vtn_u64_literal(&w[3]);
1991 break;
1992 case 32:
1993 val->constant->values[0].u32 = w[3];
1994 break;
1995 case 16:
1996 val->constant->values[0].u16 = w[3];
1997 break;
1998 case 8:
1999 val->constant->values[0].u8 = w[3];
2000 break;
2001 default:
2002 vtn_fail("Unsupported SpvOpConstant bit size: %u", bit_size);
2003 }
2004
2005 if (opcode == SpvOpSpecConstant)
2006 vtn_foreach_decoration(b, val, spec_constant_decoration_cb,
2007 &val->constant->values[0]);
2008 break;
2009 }
2010
2011 case SpvOpSpecConstantComposite:
2012 case SpvOpConstantComposite: {
2013 unsigned elem_count = count - 3;
2014 vtn_fail_if(elem_count != val->type->length,
2015 "%s has %u constituents, expected %u",
2016 spirv_op_to_string(opcode), elem_count, val->type->length);
2017
2018 nir_constant **elems = ralloc_array(b, nir_constant *, elem_count);
2019 val->is_undef_constant = true;
2020 for (unsigned i = 0; i < elem_count; i++) {
2021 struct vtn_value *elem_val = vtn_untyped_value(b, w[i + 3]);
2022
2023 if (elem_val->value_type == vtn_value_type_constant) {
2024 elems[i] = elem_val->constant;
2025 val->is_undef_constant = val->is_undef_constant &&
2026 elem_val->is_undef_constant;
2027 } else {
2028 vtn_fail_if(elem_val->value_type != vtn_value_type_undef,
2029 "only constants or undefs allowed for "
2030 "SpvOpConstantComposite");
2031 /* to make it easier, just insert a NULL constant for now */
2032 elems[i] = vtn_null_constant(b, elem_val->type);
2033 }
2034 }
2035
2036 switch (val->type->base_type) {
2037 case vtn_base_type_vector: {
2038 assert(glsl_type_is_vector(val->type->type));
2039 for (unsigned i = 0; i < elem_count; i++)
2040 val->constant->values[i] = elems[i]->values[0];
2041 break;
2042 }
2043
2044 case vtn_base_type_matrix:
2045 case vtn_base_type_struct:
2046 case vtn_base_type_array:
2047 ralloc_steal(val->constant, elems);
2048 val->constant->num_elements = elem_count;
2049 val->constant->elements = elems;
2050 break;
2051
2052 default:
2053 vtn_fail("Result type of %s must be a composite type",
2054 spirv_op_to_string(opcode));
2055 }
2056 break;
2057 }
2058
2059 case SpvOpSpecConstantOp: {
2060 nir_const_value u32op = nir_const_value_for_uint(w[3], 32);
2061 vtn_foreach_decoration(b, val, spec_constant_decoration_cb, &u32op);
2062 SpvOp opcode = u32op.u32;
2063 switch (opcode) {
2064 case SpvOpVectorShuffle: {
2065 struct vtn_value *v0 = &b->values[w[4]];
2066 struct vtn_value *v1 = &b->values[w[5]];
2067
2068 vtn_assert(v0->value_type == vtn_value_type_constant ||
2069 v0->value_type == vtn_value_type_undef);
2070 vtn_assert(v1->value_type == vtn_value_type_constant ||
2071 v1->value_type == vtn_value_type_undef);
2072
2073 unsigned len0 = glsl_get_vector_elements(v0->type->type);
2074 unsigned len1 = glsl_get_vector_elements(v1->type->type);
2075
2076 vtn_assert(len0 + len1 < 16);
2077
2078 unsigned bit_size = glsl_get_bit_size(val->type->type);
2079 unsigned bit_size0 = glsl_get_bit_size(v0->type->type);
2080 unsigned bit_size1 = glsl_get_bit_size(v1->type->type);
2081
2082 vtn_assert(bit_size == bit_size0 && bit_size == bit_size1);
2083 (void)bit_size0; (void)bit_size1;
2084
2085 nir_const_value undef = { .u64 = 0xdeadbeefdeadbeef };
2086 nir_const_value combined[NIR_MAX_VEC_COMPONENTS * 2];
2087
2088 if (v0->value_type == vtn_value_type_constant) {
2089 for (unsigned i = 0; i < len0; i++)
2090 combined[i] = v0->constant->values[i];
2091 }
2092 if (v1->value_type == vtn_value_type_constant) {
2093 for (unsigned i = 0; i < len1; i++)
2094 combined[len0 + i] = v1->constant->values[i];
2095 }
2096
2097 for (unsigned i = 0, j = 0; i < count - 6; i++, j++) {
2098 uint32_t comp = w[i + 6];
2099 if (comp == (uint32_t)-1) {
2100 /* If component is not used, set the value to a known constant
2101 * to detect if it is wrongly used.
2102 */
2103 val->constant->values[j] = undef;
2104 } else {
2105 vtn_fail_if(comp >= len0 + len1,
2106 "All Component literals must either be FFFFFFFF "
2107 "or in [0, N - 1] (inclusive).");
2108 val->constant->values[j] = combined[comp];
2109 }
2110 }
2111 break;
2112 }
2113
2114 case SpvOpCompositeExtract:
2115 case SpvOpCompositeInsert: {
2116 struct vtn_value *comp;
2117 unsigned deref_start;
2118 struct nir_constant **c;
2119 if (opcode == SpvOpCompositeExtract) {
2120 comp = vtn_value(b, w[4], vtn_value_type_constant);
2121 deref_start = 5;
2122 c = &comp->constant;
2123 } else {
2124 comp = vtn_value(b, w[5], vtn_value_type_constant);
2125 deref_start = 6;
2126 val->constant = nir_constant_clone(comp->constant,
2127 (nir_variable *)b);
2128 c = &val->constant;
2129 }
2130
2131 int elem = -1;
2132 const struct vtn_type *type = comp->type;
2133 for (unsigned i = deref_start; i < count; i++) {
2134 vtn_fail_if(w[i] > type->length,
2135 "%uth index of %s is %u but the type has only "
2136 "%u elements", i - deref_start,
2137 spirv_op_to_string(opcode), w[i], type->length);
2138
2139 switch (type->base_type) {
2140 case vtn_base_type_vector:
2141 elem = w[i];
2142 type = type->array_element;
2143 break;
2144
2145 case vtn_base_type_matrix:
2146 case vtn_base_type_array:
2147 c = &(*c)->elements[w[i]];
2148 type = type->array_element;
2149 break;
2150
2151 case vtn_base_type_struct:
2152 c = &(*c)->elements[w[i]];
2153 type = type->members[w[i]];
2154 break;
2155
2156 default:
2157 vtn_fail("%s must only index into composite types",
2158 spirv_op_to_string(opcode));
2159 }
2160 }
2161
2162 if (opcode == SpvOpCompositeExtract) {
2163 if (elem == -1) {
2164 val->constant = *c;
2165 } else {
2166 unsigned num_components = type->length;
2167 for (unsigned i = 0; i < num_components; i++)
2168 val->constant->values[i] = (*c)->values[elem + i];
2169 }
2170 } else {
2171 struct vtn_value *insert =
2172 vtn_value(b, w[4], vtn_value_type_constant);
2173 vtn_assert(insert->type == type);
2174 if (elem == -1) {
2175 *c = insert->constant;
2176 } else {
2177 unsigned num_components = type->length;
2178 for (unsigned i = 0; i < num_components; i++)
2179 (*c)->values[elem + i] = insert->constant->values[i];
2180 }
2181 }
2182 break;
2183 }
2184
2185 default: {
2186 bool swap;
2187 nir_alu_type dst_alu_type = nir_get_nir_type_for_glsl_type(val->type->type);
2188 nir_alu_type src_alu_type = dst_alu_type;
2189 unsigned num_components = glsl_get_vector_elements(val->type->type);
2190 unsigned bit_size;
2191
2192 vtn_assert(count <= 7);
2193
2194 switch (opcode) {
2195 case SpvOpSConvert:
2196 case SpvOpFConvert:
2197 case SpvOpUConvert:
2198 /* We have a source in a conversion */
2199 src_alu_type =
2200 nir_get_nir_type_for_glsl_type(vtn_get_value_type(b, w[4])->type);
2201 /* We use the bitsize of the conversion source to evaluate the opcode later */
2202 bit_size = glsl_get_bit_size(vtn_get_value_type(b, w[4])->type);
2203 break;
2204 default:
2205 bit_size = glsl_get_bit_size(val->type->type);
2206 };
2207
2208 bool exact;
2209 nir_op op = vtn_nir_alu_op_for_spirv_opcode(b, opcode, &swap, &exact,
2210 nir_alu_type_get_type_size(src_alu_type),
2211 nir_alu_type_get_type_size(dst_alu_type));
2212
2213 /* No SPIR-V opcodes handled through this path should set exact.
2214 * Since it is ignored, assert on it.
2215 */
2216 assert(!exact);
2217
2218 nir_const_value src[3][NIR_MAX_VEC_COMPONENTS];
2219
2220 for (unsigned i = 0; i < count - 4; i++) {
2221 struct vtn_value *src_val =
2222 vtn_value(b, w[4 + i], vtn_value_type_constant);
2223
2224 /* If this is an unsized source, pull the bit size from the
2225 * source; otherwise, we'll use the bit size from the destination.
2226 */
2227 if (!nir_alu_type_get_type_size(nir_op_infos[op].input_types[i]))
2228 bit_size = glsl_get_bit_size(src_val->type->type);
2229
2230 unsigned src_comps = nir_op_infos[op].input_sizes[i] ?
2231 nir_op_infos[op].input_sizes[i] :
2232 num_components;
2233
2234 unsigned j = swap ? 1 - i : i;
2235 for (unsigned c = 0; c < src_comps; c++)
2236 src[j][c] = src_val->constant->values[c];
2237 }
2238
2239 /* fix up fixed size sources */
2240 switch (op) {
2241 case nir_op_ishl:
2242 case nir_op_ishr:
2243 case nir_op_ushr: {
2244 if (bit_size == 32)
2245 break;
2246 for (unsigned i = 0; i < num_components; ++i) {
2247 switch (bit_size) {
2248 case 64: src[1][i].u32 = src[1][i].u64; break;
2249 case 16: src[1][i].u32 = src[1][i].u16; break;
2250 case 8: src[1][i].u32 = src[1][i].u8; break;
2251 }
2252 }
2253 break;
2254 }
2255 default:
2256 break;
2257 }
2258
2259 nir_const_value *srcs[3] = {
2260 src[0], src[1], src[2],
2261 };
2262 nir_eval_const_opcode(op, val->constant->values,
2263 num_components, bit_size, srcs,
2264 b->shader->info.float_controls_execution_mode);
2265 break;
2266 } /* default */
2267 }
2268 break;
2269 }
2270
2271 case SpvOpConstantNull:
2272 val->constant = vtn_null_constant(b, val->type);
2273 val->is_null_constant = true;
2274 break;
2275
2276 default:
2277 vtn_fail_with_opcode("Unhandled opcode", opcode);
2278 }
2279
2280 /* Now that we have the value, update the workgroup size if needed */
2281 if (gl_shader_stage_uses_workgroup(b->entry_point_stage))
2282 vtn_foreach_decoration(b, val, handle_workgroup_size_decoration_cb,
2283 NULL);
2284 }
2285
2286 static void
vtn_split_barrier_semantics(struct vtn_builder * b,SpvMemorySemanticsMask semantics,SpvMemorySemanticsMask * before,SpvMemorySemanticsMask * after)2287 vtn_split_barrier_semantics(struct vtn_builder *b,
2288 SpvMemorySemanticsMask semantics,
2289 SpvMemorySemanticsMask *before,
2290 SpvMemorySemanticsMask *after)
2291 {
2292 /* For memory semantics embedded in operations, we split them into up to
2293 * two barriers, to be added before and after the operation. This is less
2294 * strict than if we propagated until the final backend stage, but still
2295 * result in correct execution.
2296 *
2297 * A further improvement could be pipe this information (and use!) into the
2298 * next compiler layers, at the expense of making the handling of barriers
2299 * more complicated.
2300 */
2301
2302 *before = SpvMemorySemanticsMaskNone;
2303 *after = SpvMemorySemanticsMaskNone;
2304
2305 SpvMemorySemanticsMask order_semantics =
2306 semantics & (SpvMemorySemanticsAcquireMask |
2307 SpvMemorySemanticsReleaseMask |
2308 SpvMemorySemanticsAcquireReleaseMask |
2309 SpvMemorySemanticsSequentiallyConsistentMask);
2310
2311 if (util_bitcount(order_semantics) > 1) {
2312 /* Old GLSLang versions incorrectly set all the ordering bits. This was
2313 * fixed in c51287d744fb6e7e9ccc09f6f8451e6c64b1dad6 of glslang repo,
2314 * and it is in GLSLang since revision "SPIRV99.1321" (from Jul-2016).
2315 */
2316 vtn_warn("Multiple memory ordering semantics specified, "
2317 "assuming AcquireRelease.");
2318 order_semantics = SpvMemorySemanticsAcquireReleaseMask;
2319 }
2320
2321 const SpvMemorySemanticsMask av_vis_semantics =
2322 semantics & (SpvMemorySemanticsMakeAvailableMask |
2323 SpvMemorySemanticsMakeVisibleMask);
2324
2325 const SpvMemorySemanticsMask storage_semantics =
2326 semantics & (SpvMemorySemanticsUniformMemoryMask |
2327 SpvMemorySemanticsSubgroupMemoryMask |
2328 SpvMemorySemanticsWorkgroupMemoryMask |
2329 SpvMemorySemanticsCrossWorkgroupMemoryMask |
2330 SpvMemorySemanticsAtomicCounterMemoryMask |
2331 SpvMemorySemanticsImageMemoryMask |
2332 SpvMemorySemanticsOutputMemoryMask);
2333
2334 const SpvMemorySemanticsMask other_semantics =
2335 semantics & ~(order_semantics | av_vis_semantics | storage_semantics |
2336 SpvMemorySemanticsVolatileMask);
2337
2338 if (other_semantics)
2339 vtn_warn("Ignoring unhandled memory semantics: %u\n", other_semantics);
2340
2341 /* SequentiallyConsistent is treated as AcquireRelease. */
2342
2343 /* The RELEASE barrier happens BEFORE the operation, and it is usually
2344 * associated with a Store. All the write operations with a matching
2345 * semantics will not be reordered after the Store.
2346 */
2347 if (order_semantics & (SpvMemorySemanticsReleaseMask |
2348 SpvMemorySemanticsAcquireReleaseMask |
2349 SpvMemorySemanticsSequentiallyConsistentMask)) {
2350 *before |= SpvMemorySemanticsReleaseMask | storage_semantics;
2351 }
2352
2353 /* The ACQUIRE barrier happens AFTER the operation, and it is usually
2354 * associated with a Load. All the operations with a matching semantics
2355 * will not be reordered before the Load.
2356 */
2357 if (order_semantics & (SpvMemorySemanticsAcquireMask |
2358 SpvMemorySemanticsAcquireReleaseMask |
2359 SpvMemorySemanticsSequentiallyConsistentMask)) {
2360 *after |= SpvMemorySemanticsAcquireMask | storage_semantics;
2361 }
2362
2363 if (av_vis_semantics & SpvMemorySemanticsMakeVisibleMask)
2364 *before |= SpvMemorySemanticsMakeVisibleMask | storage_semantics;
2365
2366 if (av_vis_semantics & SpvMemorySemanticsMakeAvailableMask)
2367 *after |= SpvMemorySemanticsMakeAvailableMask | storage_semantics;
2368 }
2369
2370 static nir_memory_semantics
vtn_mem_semantics_to_nir_mem_semantics(struct vtn_builder * b,SpvMemorySemanticsMask semantics)2371 vtn_mem_semantics_to_nir_mem_semantics(struct vtn_builder *b,
2372 SpvMemorySemanticsMask semantics)
2373 {
2374 nir_memory_semantics nir_semantics = 0;
2375
2376 SpvMemorySemanticsMask order_semantics =
2377 semantics & (SpvMemorySemanticsAcquireMask |
2378 SpvMemorySemanticsReleaseMask |
2379 SpvMemorySemanticsAcquireReleaseMask |
2380 SpvMemorySemanticsSequentiallyConsistentMask);
2381
2382 if (util_bitcount(order_semantics) > 1) {
2383 /* Old GLSLang versions incorrectly set all the ordering bits. This was
2384 * fixed in c51287d744fb6e7e9ccc09f6f8451e6c64b1dad6 of glslang repo,
2385 * and it is in GLSLang since revision "SPIRV99.1321" (from Jul-2016).
2386 */
2387 vtn_warn("Multiple memory ordering semantics bits specified, "
2388 "assuming AcquireRelease.");
2389 order_semantics = SpvMemorySemanticsAcquireReleaseMask;
2390 }
2391
2392 switch (order_semantics) {
2393 case 0:
2394 /* Not an ordering barrier. */
2395 break;
2396
2397 case SpvMemorySemanticsAcquireMask:
2398 nir_semantics = NIR_MEMORY_ACQUIRE;
2399 break;
2400
2401 case SpvMemorySemanticsReleaseMask:
2402 nir_semantics = NIR_MEMORY_RELEASE;
2403 break;
2404
2405 case SpvMemorySemanticsSequentiallyConsistentMask:
2406 FALLTHROUGH; /* Treated as AcquireRelease in Vulkan. */
2407 case SpvMemorySemanticsAcquireReleaseMask:
2408 nir_semantics = NIR_MEMORY_ACQUIRE | NIR_MEMORY_RELEASE;
2409 break;
2410
2411 default:
2412 unreachable("Invalid memory order semantics");
2413 }
2414
2415 if (semantics & SpvMemorySemanticsMakeAvailableMask) {
2416 vtn_fail_if(!b->options->caps.vk_memory_model,
2417 "To use MakeAvailable memory semantics the VulkanMemoryModel "
2418 "capability must be declared.");
2419 nir_semantics |= NIR_MEMORY_MAKE_AVAILABLE;
2420 }
2421
2422 if (semantics & SpvMemorySemanticsMakeVisibleMask) {
2423 vtn_fail_if(!b->options->caps.vk_memory_model,
2424 "To use MakeVisible memory semantics the VulkanMemoryModel "
2425 "capability must be declared.");
2426 nir_semantics |= NIR_MEMORY_MAKE_VISIBLE;
2427 }
2428
2429 return nir_semantics;
2430 }
2431
2432 static nir_variable_mode
vtn_mem_semantics_to_nir_var_modes(struct vtn_builder * b,SpvMemorySemanticsMask semantics)2433 vtn_mem_semantics_to_nir_var_modes(struct vtn_builder *b,
2434 SpvMemorySemanticsMask semantics)
2435 {
2436 /* Vulkan Environment for SPIR-V says "SubgroupMemory, CrossWorkgroupMemory,
2437 * and AtomicCounterMemory are ignored".
2438 */
2439 if (b->options->environment == NIR_SPIRV_VULKAN) {
2440 semantics &= ~(SpvMemorySemanticsSubgroupMemoryMask |
2441 SpvMemorySemanticsCrossWorkgroupMemoryMask |
2442 SpvMemorySemanticsAtomicCounterMemoryMask);
2443 }
2444
2445 nir_variable_mode modes = 0;
2446 if (semantics & SpvMemorySemanticsUniformMemoryMask) {
2447 modes |= nir_var_uniform |
2448 nir_var_mem_ubo |
2449 nir_var_mem_ssbo |
2450 nir_var_mem_global;
2451 }
2452 if (semantics & SpvMemorySemanticsImageMemoryMask)
2453 modes |= nir_var_image;
2454 if (semantics & SpvMemorySemanticsWorkgroupMemoryMask)
2455 modes |= nir_var_mem_shared;
2456 if (semantics & SpvMemorySemanticsCrossWorkgroupMemoryMask)
2457 modes |= nir_var_mem_global;
2458 if (semantics & SpvMemorySemanticsOutputMemoryMask) {
2459 modes |= nir_var_shader_out;
2460
2461 if (b->shader->info.stage == MESA_SHADER_TASK)
2462 modes |= nir_var_mem_task_payload;
2463 }
2464
2465 return modes;
2466 }
2467
2468 static nir_scope
vtn_scope_to_nir_scope(struct vtn_builder * b,SpvScope scope)2469 vtn_scope_to_nir_scope(struct vtn_builder *b, SpvScope scope)
2470 {
2471 nir_scope nir_scope;
2472 switch (scope) {
2473 case SpvScopeDevice:
2474 vtn_fail_if(b->options->caps.vk_memory_model &&
2475 !b->options->caps.vk_memory_model_device_scope,
2476 "If the Vulkan memory model is declared and any instruction "
2477 "uses Device scope, the VulkanMemoryModelDeviceScope "
2478 "capability must be declared.");
2479 nir_scope = NIR_SCOPE_DEVICE;
2480 break;
2481
2482 case SpvScopeQueueFamily:
2483 vtn_fail_if(!b->options->caps.vk_memory_model,
2484 "To use Queue Family scope, the VulkanMemoryModel capability "
2485 "must be declared.");
2486 nir_scope = NIR_SCOPE_QUEUE_FAMILY;
2487 break;
2488
2489 case SpvScopeWorkgroup:
2490 nir_scope = NIR_SCOPE_WORKGROUP;
2491 break;
2492
2493 case SpvScopeSubgroup:
2494 nir_scope = NIR_SCOPE_SUBGROUP;
2495 break;
2496
2497 case SpvScopeInvocation:
2498 nir_scope = NIR_SCOPE_INVOCATION;
2499 break;
2500
2501 case SpvScopeShaderCallKHR:
2502 nir_scope = NIR_SCOPE_SHADER_CALL;
2503 break;
2504
2505 default:
2506 vtn_fail("Invalid memory scope");
2507 }
2508
2509 return nir_scope;
2510 }
2511
2512 static void
vtn_emit_scoped_control_barrier(struct vtn_builder * b,SpvScope exec_scope,SpvScope mem_scope,SpvMemorySemanticsMask semantics)2513 vtn_emit_scoped_control_barrier(struct vtn_builder *b, SpvScope exec_scope,
2514 SpvScope mem_scope,
2515 SpvMemorySemanticsMask semantics)
2516 {
2517 nir_memory_semantics nir_semantics =
2518 vtn_mem_semantics_to_nir_mem_semantics(b, semantics);
2519 nir_variable_mode modes = vtn_mem_semantics_to_nir_var_modes(b, semantics);
2520 nir_scope nir_exec_scope = vtn_scope_to_nir_scope(b, exec_scope);
2521
2522 /* Memory semantics is optional for OpControlBarrier. */
2523 nir_scope nir_mem_scope;
2524 if (nir_semantics == 0 || modes == 0)
2525 nir_mem_scope = NIR_SCOPE_NONE;
2526 else
2527 nir_mem_scope = vtn_scope_to_nir_scope(b, mem_scope);
2528
2529 nir_scoped_barrier(&b->nb, .execution_scope=nir_exec_scope, .memory_scope=nir_mem_scope,
2530 .memory_semantics=nir_semantics, .memory_modes=modes);
2531 }
2532
2533 static void
vtn_emit_scoped_memory_barrier(struct vtn_builder * b,SpvScope scope,SpvMemorySemanticsMask semantics)2534 vtn_emit_scoped_memory_barrier(struct vtn_builder *b, SpvScope scope,
2535 SpvMemorySemanticsMask semantics)
2536 {
2537 nir_variable_mode modes = vtn_mem_semantics_to_nir_var_modes(b, semantics);
2538 nir_memory_semantics nir_semantics =
2539 vtn_mem_semantics_to_nir_mem_semantics(b, semantics);
2540
2541 /* No barrier to add. */
2542 if (nir_semantics == 0 || modes == 0)
2543 return;
2544
2545 nir_scoped_barrier(&b->nb, .memory_scope=vtn_scope_to_nir_scope(b, scope),
2546 .memory_semantics=nir_semantics,
2547 .memory_modes=modes);
2548 }
2549
2550 struct vtn_ssa_value *
vtn_create_ssa_value(struct vtn_builder * b,const struct glsl_type * type)2551 vtn_create_ssa_value(struct vtn_builder *b, const struct glsl_type *type)
2552 {
2553 /* Always use bare types for SSA values for a couple of reasons:
2554 *
2555 * 1. Code which emits deref chains should never listen to the explicit
2556 * layout information on the SSA value if any exists. If we've
2557 * accidentally been relying on this, we want to find those bugs.
2558 *
2559 * 2. We want to be able to quickly check that an SSA value being assigned
2560 * to a SPIR-V value has the right type. Using bare types everywhere
2561 * ensures that we can pointer-compare.
2562 */
2563 struct vtn_ssa_value *val = rzalloc(b, struct vtn_ssa_value);
2564 val->type = glsl_get_bare_type(type);
2565
2566
2567 if (!glsl_type_is_vector_or_scalar(type)) {
2568 unsigned elems = glsl_get_length(val->type);
2569 val->elems = ralloc_array(b, struct vtn_ssa_value *, elems);
2570 if (glsl_type_is_array_or_matrix(type)) {
2571 const struct glsl_type *elem_type = glsl_get_array_element(type);
2572 for (unsigned i = 0; i < elems; i++)
2573 val->elems[i] = vtn_create_ssa_value(b, elem_type);
2574 } else {
2575 vtn_assert(glsl_type_is_struct_or_ifc(type));
2576 for (unsigned i = 0; i < elems; i++) {
2577 const struct glsl_type *elem_type = glsl_get_struct_field(type, i);
2578 val->elems[i] = vtn_create_ssa_value(b, elem_type);
2579 }
2580 }
2581 }
2582
2583 return val;
2584 }
2585
2586 static nir_tex_src
vtn_tex_src(struct vtn_builder * b,unsigned index,nir_tex_src_type type)2587 vtn_tex_src(struct vtn_builder *b, unsigned index, nir_tex_src_type type)
2588 {
2589 nir_tex_src src;
2590 src.src = nir_src_for_ssa(vtn_get_nir_ssa(b, index));
2591 src.src_type = type;
2592 return src;
2593 }
2594
2595 static uint32_t
image_operand_arg(struct vtn_builder * b,const uint32_t * w,uint32_t count,uint32_t mask_idx,SpvImageOperandsMask op)2596 image_operand_arg(struct vtn_builder *b, const uint32_t *w, uint32_t count,
2597 uint32_t mask_idx, SpvImageOperandsMask op)
2598 {
2599 static const SpvImageOperandsMask ops_with_arg =
2600 SpvImageOperandsBiasMask |
2601 SpvImageOperandsLodMask |
2602 SpvImageOperandsGradMask |
2603 SpvImageOperandsConstOffsetMask |
2604 SpvImageOperandsOffsetMask |
2605 SpvImageOperandsConstOffsetsMask |
2606 SpvImageOperandsSampleMask |
2607 SpvImageOperandsMinLodMask |
2608 SpvImageOperandsMakeTexelAvailableMask |
2609 SpvImageOperandsMakeTexelVisibleMask;
2610
2611 assert(util_bitcount(op) == 1);
2612 assert(w[mask_idx] & op);
2613 assert(op & ops_with_arg);
2614
2615 uint32_t idx = util_bitcount(w[mask_idx] & (op - 1) & ops_with_arg) + 1;
2616
2617 /* Adjust indices for operands with two arguments. */
2618 static const SpvImageOperandsMask ops_with_two_args =
2619 SpvImageOperandsGradMask;
2620 idx += util_bitcount(w[mask_idx] & (op - 1) & ops_with_two_args);
2621
2622 idx += mask_idx;
2623
2624 vtn_fail_if(idx + (op & ops_with_two_args ? 1 : 0) >= count,
2625 "Image op claims to have %s but does not enough "
2626 "following operands", spirv_imageoperands_to_string(op));
2627
2628 return idx;
2629 }
2630
2631 static void
non_uniform_decoration_cb(struct vtn_builder * b,struct vtn_value * val,int member,const struct vtn_decoration * dec,void * void_ctx)2632 non_uniform_decoration_cb(struct vtn_builder *b,
2633 struct vtn_value *val, int member,
2634 const struct vtn_decoration *dec, void *void_ctx)
2635 {
2636 enum gl_access_qualifier *access = void_ctx;
2637 switch (dec->decoration) {
2638 case SpvDecorationNonUniformEXT:
2639 *access |= ACCESS_NON_UNIFORM;
2640 break;
2641
2642 default:
2643 break;
2644 }
2645 }
2646
2647 /* Apply SignExtend/ZeroExtend operands to get the actual result type for
2648 * image read/sample operations and source type for write operations.
2649 */
2650 static nir_alu_type
get_image_type(struct vtn_builder * b,nir_alu_type type,unsigned operands)2651 get_image_type(struct vtn_builder *b, nir_alu_type type, unsigned operands)
2652 {
2653 unsigned extend_operands =
2654 operands & (SpvImageOperandsSignExtendMask | SpvImageOperandsZeroExtendMask);
2655 vtn_fail_if(nir_alu_type_get_base_type(type) == nir_type_float && extend_operands,
2656 "SignExtend/ZeroExtend used on floating-point texel type");
2657 vtn_fail_if(extend_operands ==
2658 (SpvImageOperandsSignExtendMask | SpvImageOperandsZeroExtendMask),
2659 "SignExtend and ZeroExtend both specified");
2660
2661 if (operands & SpvImageOperandsSignExtendMask)
2662 return nir_type_int | nir_alu_type_get_type_size(type);
2663 if (operands & SpvImageOperandsZeroExtendMask)
2664 return nir_type_uint | nir_alu_type_get_type_size(type);
2665
2666 return type;
2667 }
2668
2669 static void
vtn_handle_texture(struct vtn_builder * b,SpvOp opcode,const uint32_t * w,unsigned count)2670 vtn_handle_texture(struct vtn_builder *b, SpvOp opcode,
2671 const uint32_t *w, unsigned count)
2672 {
2673 if (opcode == SpvOpSampledImage) {
2674 struct vtn_sampled_image si = {
2675 .image = vtn_get_image(b, w[3], NULL),
2676 .sampler = vtn_get_sampler(b, w[4]),
2677 };
2678
2679 enum gl_access_qualifier access = 0;
2680 vtn_foreach_decoration(b, vtn_untyped_value(b, w[3]),
2681 non_uniform_decoration_cb, &access);
2682 vtn_foreach_decoration(b, vtn_untyped_value(b, w[4]),
2683 non_uniform_decoration_cb, &access);
2684
2685 vtn_push_sampled_image(b, w[2], si, access & ACCESS_NON_UNIFORM);
2686 return;
2687 } else if (opcode == SpvOpImage) {
2688 struct vtn_sampled_image si = vtn_get_sampled_image(b, w[3]);
2689
2690 enum gl_access_qualifier access = 0;
2691 vtn_foreach_decoration(b, vtn_untyped_value(b, w[3]),
2692 non_uniform_decoration_cb, &access);
2693
2694 vtn_push_image(b, w[2], si.image, access & ACCESS_NON_UNIFORM);
2695 return;
2696 } else if (opcode == SpvOpImageSparseTexelsResident) {
2697 nir_ssa_def *code = vtn_get_nir_ssa(b, w[3]);
2698 vtn_push_nir_ssa(b, w[2], nir_is_sparse_texels_resident(&b->nb, code));
2699 return;
2700 }
2701
2702 nir_deref_instr *image = NULL, *sampler = NULL;
2703 struct vtn_value *sampled_val = vtn_untyped_value(b, w[3]);
2704 if (sampled_val->type->base_type == vtn_base_type_sampled_image) {
2705 struct vtn_sampled_image si = vtn_get_sampled_image(b, w[3]);
2706 image = si.image;
2707 sampler = si.sampler;
2708 } else {
2709 image = vtn_get_image(b, w[3], NULL);
2710 }
2711
2712 const enum glsl_sampler_dim sampler_dim = glsl_get_sampler_dim(image->type);
2713 const bool is_array = glsl_sampler_type_is_array(image->type);
2714 nir_alu_type dest_type = nir_type_invalid;
2715
2716 /* Figure out the base texture operation */
2717 nir_texop texop;
2718 switch (opcode) {
2719 case SpvOpImageSampleImplicitLod:
2720 case SpvOpImageSparseSampleImplicitLod:
2721 case SpvOpImageSampleDrefImplicitLod:
2722 case SpvOpImageSparseSampleDrefImplicitLod:
2723 case SpvOpImageSampleProjImplicitLod:
2724 case SpvOpImageSampleProjDrefImplicitLod:
2725 texop = nir_texop_tex;
2726 break;
2727
2728 case SpvOpImageSampleExplicitLod:
2729 case SpvOpImageSparseSampleExplicitLod:
2730 case SpvOpImageSampleDrefExplicitLod:
2731 case SpvOpImageSparseSampleDrefExplicitLod:
2732 case SpvOpImageSampleProjExplicitLod:
2733 case SpvOpImageSampleProjDrefExplicitLod:
2734 texop = nir_texop_txl;
2735 break;
2736
2737 case SpvOpImageFetch:
2738 case SpvOpImageSparseFetch:
2739 if (sampler_dim == GLSL_SAMPLER_DIM_MS) {
2740 texop = nir_texop_txf_ms;
2741 } else {
2742 texop = nir_texop_txf;
2743 }
2744 break;
2745
2746 case SpvOpImageGather:
2747 case SpvOpImageSparseGather:
2748 case SpvOpImageDrefGather:
2749 case SpvOpImageSparseDrefGather:
2750 texop = nir_texop_tg4;
2751 break;
2752
2753 case SpvOpImageQuerySizeLod:
2754 case SpvOpImageQuerySize:
2755 texop = nir_texop_txs;
2756 dest_type = nir_type_int32;
2757 break;
2758
2759 case SpvOpImageQueryLod:
2760 texop = nir_texop_lod;
2761 dest_type = nir_type_float32;
2762 break;
2763
2764 case SpvOpImageQueryLevels:
2765 texop = nir_texop_query_levels;
2766 dest_type = nir_type_int32;
2767 break;
2768
2769 case SpvOpImageQuerySamples:
2770 texop = nir_texop_texture_samples;
2771 dest_type = nir_type_int32;
2772 break;
2773
2774 case SpvOpFragmentFetchAMD:
2775 texop = nir_texop_fragment_fetch_amd;
2776 break;
2777
2778 case SpvOpFragmentMaskFetchAMD:
2779 texop = nir_texop_fragment_mask_fetch_amd;
2780 dest_type = nir_type_uint32;
2781 break;
2782
2783 default:
2784 vtn_fail_with_opcode("Unhandled opcode", opcode);
2785 }
2786
2787 nir_tex_src srcs[10]; /* 10 should be enough */
2788 nir_tex_src *p = srcs;
2789
2790 p->src = nir_src_for_ssa(&image->dest.ssa);
2791 p->src_type = nir_tex_src_texture_deref;
2792 p++;
2793
2794 switch (texop) {
2795 case nir_texop_tex:
2796 case nir_texop_txb:
2797 case nir_texop_txl:
2798 case nir_texop_txd:
2799 case nir_texop_tg4:
2800 case nir_texop_lod:
2801 vtn_fail_if(sampler == NULL,
2802 "%s requires an image of type OpTypeSampledImage",
2803 spirv_op_to_string(opcode));
2804 p->src = nir_src_for_ssa(&sampler->dest.ssa);
2805 p->src_type = nir_tex_src_sampler_deref;
2806 p++;
2807 break;
2808 case nir_texop_txf:
2809 case nir_texop_txf_ms:
2810 case nir_texop_txs:
2811 case nir_texop_query_levels:
2812 case nir_texop_texture_samples:
2813 case nir_texop_samples_identical:
2814 case nir_texop_fragment_fetch_amd:
2815 case nir_texop_fragment_mask_fetch_amd:
2816 /* These don't */
2817 break;
2818 case nir_texop_txf_ms_fb:
2819 vtn_fail("unexpected nir_texop_txf_ms_fb");
2820 break;
2821 case nir_texop_txf_ms_mcs_intel:
2822 vtn_fail("unexpected nir_texop_txf_ms_mcs");
2823 case nir_texop_tex_prefetch:
2824 vtn_fail("unexpected nir_texop_tex_prefetch");
2825 }
2826
2827 unsigned idx = 4;
2828
2829 struct nir_ssa_def *coord;
2830 unsigned coord_components;
2831 switch (opcode) {
2832 case SpvOpImageSampleImplicitLod:
2833 case SpvOpImageSparseSampleImplicitLod:
2834 case SpvOpImageSampleExplicitLod:
2835 case SpvOpImageSparseSampleExplicitLod:
2836 case SpvOpImageSampleDrefImplicitLod:
2837 case SpvOpImageSparseSampleDrefImplicitLod:
2838 case SpvOpImageSampleDrefExplicitLod:
2839 case SpvOpImageSparseSampleDrefExplicitLod:
2840 case SpvOpImageSampleProjImplicitLod:
2841 case SpvOpImageSampleProjExplicitLod:
2842 case SpvOpImageSampleProjDrefImplicitLod:
2843 case SpvOpImageSampleProjDrefExplicitLod:
2844 case SpvOpImageFetch:
2845 case SpvOpImageSparseFetch:
2846 case SpvOpImageGather:
2847 case SpvOpImageSparseGather:
2848 case SpvOpImageDrefGather:
2849 case SpvOpImageSparseDrefGather:
2850 case SpvOpImageQueryLod:
2851 case SpvOpFragmentFetchAMD:
2852 case SpvOpFragmentMaskFetchAMD: {
2853 /* All these types have the coordinate as their first real argument */
2854 coord_components = glsl_get_sampler_dim_coordinate_components(sampler_dim);
2855
2856 if (is_array && texop != nir_texop_lod)
2857 coord_components++;
2858
2859 struct vtn_ssa_value *coord_val = vtn_ssa_value(b, w[idx++]);
2860 coord = coord_val->def;
2861 /* From the SPIR-V spec verxion 1.5, rev. 5:
2862 *
2863 * "Coordinate must be a scalar or vector of floating-point type. It
2864 * contains (u[, v] ... [, array layer]) as needed by the definition
2865 * of Sampled Image. It may be a vector larger than needed, but all
2866 * unused components appear after all used components."
2867 */
2868 vtn_fail_if(coord->num_components < coord_components,
2869 "Coordinate value passed has fewer components than sampler dimensionality.");
2870 p->src = nir_src_for_ssa(nir_channels(&b->nb, coord,
2871 (1 << coord_components) - 1));
2872
2873 /* OpenCL allows integer sampling coordinates */
2874 if (glsl_type_is_integer(coord_val->type) &&
2875 opcode == SpvOpImageSampleExplicitLod) {
2876 vtn_fail_if(b->shader->info.stage != MESA_SHADER_KERNEL,
2877 "Unless the Kernel capability is being used, the coordinate parameter "
2878 "OpImageSampleExplicitLod must be floating point.");
2879
2880 nir_ssa_def *coords[4];
2881 nir_ssa_def *f0_5 = nir_imm_float(&b->nb, 0.5);
2882 for (unsigned i = 0; i < coord_components; i++) {
2883 coords[i] = nir_i2f32(&b->nb, nir_channel(&b->nb, p->src.ssa, i));
2884
2885 if (!is_array || i != coord_components - 1)
2886 coords[i] = nir_fadd(&b->nb, coords[i], f0_5);
2887 }
2888
2889 p->src = nir_src_for_ssa(nir_vec(&b->nb, coords, coord_components));
2890 }
2891
2892 p->src_type = nir_tex_src_coord;
2893 p++;
2894 break;
2895 }
2896
2897 default:
2898 coord = NULL;
2899 coord_components = 0;
2900 break;
2901 }
2902
2903 switch (opcode) {
2904 case SpvOpImageSampleProjImplicitLod:
2905 case SpvOpImageSampleProjExplicitLod:
2906 case SpvOpImageSampleProjDrefImplicitLod:
2907 case SpvOpImageSampleProjDrefExplicitLod:
2908 /* These have the projector as the last coordinate component */
2909 p->src = nir_src_for_ssa(nir_channel(&b->nb, coord, coord_components));
2910 p->src_type = nir_tex_src_projector;
2911 p++;
2912 break;
2913
2914 default:
2915 break;
2916 }
2917
2918 bool is_shadow = false;
2919 unsigned gather_component = 0;
2920 switch (opcode) {
2921 case SpvOpImageSampleDrefImplicitLod:
2922 case SpvOpImageSparseSampleDrefImplicitLod:
2923 case SpvOpImageSampleDrefExplicitLod:
2924 case SpvOpImageSparseSampleDrefExplicitLod:
2925 case SpvOpImageSampleProjDrefImplicitLod:
2926 case SpvOpImageSampleProjDrefExplicitLod:
2927 case SpvOpImageDrefGather:
2928 case SpvOpImageSparseDrefGather:
2929 /* These all have an explicit depth value as their next source */
2930 is_shadow = true;
2931 (*p++) = vtn_tex_src(b, w[idx++], nir_tex_src_comparator);
2932 break;
2933
2934 case SpvOpImageGather:
2935 case SpvOpImageSparseGather:
2936 /* This has a component as its next source */
2937 gather_component = vtn_constant_uint(b, w[idx++]);
2938 break;
2939
2940 default:
2941 break;
2942 }
2943
2944 bool is_sparse = false;
2945 switch (opcode) {
2946 case SpvOpImageSparseSampleImplicitLod:
2947 case SpvOpImageSparseSampleExplicitLod:
2948 case SpvOpImageSparseSampleDrefImplicitLod:
2949 case SpvOpImageSparseSampleDrefExplicitLod:
2950 case SpvOpImageSparseFetch:
2951 case SpvOpImageSparseGather:
2952 case SpvOpImageSparseDrefGather:
2953 is_sparse = true;
2954 break;
2955 default:
2956 break;
2957 }
2958
2959 /* For OpImageQuerySizeLod, we always have an LOD */
2960 if (opcode == SpvOpImageQuerySizeLod)
2961 (*p++) = vtn_tex_src(b, w[idx++], nir_tex_src_lod);
2962
2963 /* For OpFragmentFetchAMD, we always have a multisample index */
2964 if (opcode == SpvOpFragmentFetchAMD)
2965 (*p++) = vtn_tex_src(b, w[idx++], nir_tex_src_ms_index);
2966
2967 /* Now we need to handle some number of optional arguments */
2968 struct vtn_value *gather_offsets = NULL;
2969 uint32_t operands = SpvImageOperandsMaskNone;
2970 if (idx < count) {
2971 operands = w[idx];
2972
2973 if (operands & SpvImageOperandsBiasMask) {
2974 vtn_assert(texop == nir_texop_tex ||
2975 texop == nir_texop_tg4);
2976 if (texop == nir_texop_tex)
2977 texop = nir_texop_txb;
2978 uint32_t arg = image_operand_arg(b, w, count, idx,
2979 SpvImageOperandsBiasMask);
2980 (*p++) = vtn_tex_src(b, w[arg], nir_tex_src_bias);
2981 }
2982
2983 if (operands & SpvImageOperandsLodMask) {
2984 vtn_assert(texop == nir_texop_txl || texop == nir_texop_txf ||
2985 texop == nir_texop_txs || texop == nir_texop_tg4);
2986 uint32_t arg = image_operand_arg(b, w, count, idx,
2987 SpvImageOperandsLodMask);
2988 (*p++) = vtn_tex_src(b, w[arg], nir_tex_src_lod);
2989 }
2990
2991 if (operands & SpvImageOperandsGradMask) {
2992 vtn_assert(texop == nir_texop_txl);
2993 texop = nir_texop_txd;
2994 uint32_t arg = image_operand_arg(b, w, count, idx,
2995 SpvImageOperandsGradMask);
2996 (*p++) = vtn_tex_src(b, w[arg], nir_tex_src_ddx);
2997 (*p++) = vtn_tex_src(b, w[arg + 1], nir_tex_src_ddy);
2998 }
2999
3000 vtn_fail_if(util_bitcount(operands & (SpvImageOperandsConstOffsetsMask |
3001 SpvImageOperandsOffsetMask |
3002 SpvImageOperandsConstOffsetMask)) > 1,
3003 "At most one of the ConstOffset, Offset, and ConstOffsets "
3004 "image operands can be used on a given instruction.");
3005
3006 if (operands & SpvImageOperandsOffsetMask) {
3007 uint32_t arg = image_operand_arg(b, w, count, idx,
3008 SpvImageOperandsOffsetMask);
3009 (*p++) = vtn_tex_src(b, w[arg], nir_tex_src_offset);
3010 }
3011
3012 if (operands & SpvImageOperandsConstOffsetMask) {
3013 uint32_t arg = image_operand_arg(b, w, count, idx,
3014 SpvImageOperandsConstOffsetMask);
3015 (*p++) = vtn_tex_src(b, w[arg], nir_tex_src_offset);
3016 }
3017
3018 if (operands & SpvImageOperandsConstOffsetsMask) {
3019 vtn_assert(texop == nir_texop_tg4);
3020 uint32_t arg = image_operand_arg(b, w, count, idx,
3021 SpvImageOperandsConstOffsetsMask);
3022 gather_offsets = vtn_value(b, w[arg], vtn_value_type_constant);
3023 }
3024
3025 if (operands & SpvImageOperandsSampleMask) {
3026 vtn_assert(texop == nir_texop_txf_ms);
3027 uint32_t arg = image_operand_arg(b, w, count, idx,
3028 SpvImageOperandsSampleMask);
3029 texop = nir_texop_txf_ms;
3030 (*p++) = vtn_tex_src(b, w[arg], nir_tex_src_ms_index);
3031 }
3032
3033 if (operands & SpvImageOperandsMinLodMask) {
3034 vtn_assert(texop == nir_texop_tex ||
3035 texop == nir_texop_txb ||
3036 texop == nir_texop_txd);
3037 uint32_t arg = image_operand_arg(b, w, count, idx,
3038 SpvImageOperandsMinLodMask);
3039 (*p++) = vtn_tex_src(b, w[arg], nir_tex_src_min_lod);
3040 }
3041 }
3042
3043 struct vtn_type *ret_type = vtn_get_type(b, w[1]);
3044 struct vtn_type *struct_type = NULL;
3045 if (is_sparse) {
3046 vtn_assert(glsl_type_is_struct_or_ifc(ret_type->type));
3047 struct_type = ret_type;
3048 ret_type = struct_type->members[1];
3049 }
3050
3051 nir_tex_instr *instr = nir_tex_instr_create(b->shader, p - srcs);
3052 instr->op = texop;
3053
3054 memcpy(instr->src, srcs, instr->num_srcs * sizeof(*instr->src));
3055
3056 instr->coord_components = coord_components;
3057 instr->sampler_dim = sampler_dim;
3058 instr->is_array = is_array;
3059 instr->is_shadow = is_shadow;
3060 instr->is_sparse = is_sparse;
3061 instr->is_new_style_shadow =
3062 is_shadow && glsl_get_components(ret_type->type) == 1;
3063 instr->component = gather_component;
3064
3065 /* The Vulkan spec says:
3066 *
3067 * "If an instruction loads from or stores to a resource (including
3068 * atomics and image instructions) and the resource descriptor being
3069 * accessed is not dynamically uniform, then the operand corresponding
3070 * to that resource (e.g. the pointer or sampled image operand) must be
3071 * decorated with NonUniform."
3072 *
3073 * It's very careful to specify that the exact operand must be decorated
3074 * NonUniform. The SPIR-V parser is not expected to chase through long
3075 * chains to find the NonUniform decoration. It's either right there or we
3076 * can assume it doesn't exist.
3077 */
3078 enum gl_access_qualifier access = 0;
3079 vtn_foreach_decoration(b, sampled_val, non_uniform_decoration_cb, &access);
3080
3081 if (operands & SpvImageOperandsNontemporalMask)
3082 access |= ACCESS_STREAM_CACHE_POLICY;
3083
3084 if (sampled_val->propagated_non_uniform)
3085 access |= ACCESS_NON_UNIFORM;
3086
3087 if (image && (access & ACCESS_NON_UNIFORM))
3088 instr->texture_non_uniform = true;
3089
3090 if (sampler && (access & ACCESS_NON_UNIFORM))
3091 instr->sampler_non_uniform = true;
3092
3093 /* for non-query ops, get dest_type from SPIR-V return type */
3094 if (dest_type == nir_type_invalid) {
3095 /* the return type should match the image type, unless the image type is
3096 * VOID (CL image), in which case the return type dictates the sampler
3097 */
3098 enum glsl_base_type sampler_base =
3099 glsl_get_sampler_result_type(image->type);
3100 enum glsl_base_type ret_base = glsl_get_base_type(ret_type->type);
3101 vtn_fail_if(sampler_base != ret_base && sampler_base != GLSL_TYPE_VOID,
3102 "SPIR-V return type mismatches image type. This is only valid "
3103 "for untyped images (OpenCL).");
3104 dest_type = nir_get_nir_type_for_glsl_base_type(ret_base);
3105 dest_type = get_image_type(b, dest_type, operands);
3106 }
3107
3108 instr->dest_type = dest_type;
3109
3110 nir_ssa_dest_init(&instr->instr, &instr->dest,
3111 nir_tex_instr_dest_size(instr), 32, NULL);
3112
3113 vtn_assert(glsl_get_vector_elements(ret_type->type) ==
3114 nir_tex_instr_result_size(instr));
3115
3116 if (gather_offsets) {
3117 vtn_fail_if(gather_offsets->type->base_type != vtn_base_type_array ||
3118 gather_offsets->type->length != 4,
3119 "ConstOffsets must be an array of size four of vectors "
3120 "of two integer components");
3121
3122 struct vtn_type *vec_type = gather_offsets->type->array_element;
3123 vtn_fail_if(vec_type->base_type != vtn_base_type_vector ||
3124 vec_type->length != 2 ||
3125 !glsl_type_is_integer(vec_type->type),
3126 "ConstOffsets must be an array of size four of vectors "
3127 "of two integer components");
3128
3129 unsigned bit_size = glsl_get_bit_size(vec_type->type);
3130 for (uint32_t i = 0; i < 4; i++) {
3131 const nir_const_value *cvec =
3132 gather_offsets->constant->elements[i]->values;
3133 for (uint32_t j = 0; j < 2; j++) {
3134 switch (bit_size) {
3135 case 8: instr->tg4_offsets[i][j] = cvec[j].i8; break;
3136 case 16: instr->tg4_offsets[i][j] = cvec[j].i16; break;
3137 case 32: instr->tg4_offsets[i][j] = cvec[j].i32; break;
3138 case 64: instr->tg4_offsets[i][j] = cvec[j].i64; break;
3139 default:
3140 vtn_fail("Unsupported bit size: %u", bit_size);
3141 }
3142 }
3143 }
3144 }
3145
3146 nir_builder_instr_insert(&b->nb, &instr->instr);
3147
3148 if (is_sparse) {
3149 struct vtn_ssa_value *dest = vtn_create_ssa_value(b, struct_type->type);
3150 unsigned result_size = glsl_get_vector_elements(ret_type->type);
3151 dest->elems[0]->def = nir_channel(&b->nb, &instr->dest.ssa, result_size);
3152 dest->elems[1]->def = nir_channels(&b->nb, &instr->dest.ssa,
3153 BITFIELD_MASK(result_size));
3154 vtn_push_ssa_value(b, w[2], dest);
3155 } else {
3156 vtn_push_nir_ssa(b, w[2], &instr->dest.ssa);
3157 }
3158 }
3159
3160 static void
fill_common_atomic_sources(struct vtn_builder * b,SpvOp opcode,const uint32_t * w,nir_src * src)3161 fill_common_atomic_sources(struct vtn_builder *b, SpvOp opcode,
3162 const uint32_t *w, nir_src *src)
3163 {
3164 const struct glsl_type *type = vtn_get_type(b, w[1])->type;
3165 unsigned bit_size = glsl_get_bit_size(type);
3166
3167 switch (opcode) {
3168 case SpvOpAtomicIIncrement:
3169 src[0] = nir_src_for_ssa(nir_imm_intN_t(&b->nb, 1, bit_size));
3170 break;
3171
3172 case SpvOpAtomicIDecrement:
3173 src[0] = nir_src_for_ssa(nir_imm_intN_t(&b->nb, -1, bit_size));
3174 break;
3175
3176 case SpvOpAtomicISub:
3177 src[0] =
3178 nir_src_for_ssa(nir_ineg(&b->nb, vtn_get_nir_ssa(b, w[6])));
3179 break;
3180
3181 case SpvOpAtomicCompareExchange:
3182 case SpvOpAtomicCompareExchangeWeak:
3183 src[0] = nir_src_for_ssa(vtn_get_nir_ssa(b, w[8]));
3184 src[1] = nir_src_for_ssa(vtn_get_nir_ssa(b, w[7]));
3185 break;
3186
3187 case SpvOpAtomicExchange:
3188 case SpvOpAtomicIAdd:
3189 case SpvOpAtomicSMin:
3190 case SpvOpAtomicUMin:
3191 case SpvOpAtomicSMax:
3192 case SpvOpAtomicUMax:
3193 case SpvOpAtomicAnd:
3194 case SpvOpAtomicOr:
3195 case SpvOpAtomicXor:
3196 case SpvOpAtomicFAddEXT:
3197 case SpvOpAtomicFMinEXT:
3198 case SpvOpAtomicFMaxEXT:
3199 src[0] = nir_src_for_ssa(vtn_get_nir_ssa(b, w[6]));
3200 break;
3201
3202 default:
3203 vtn_fail_with_opcode("Invalid SPIR-V atomic", opcode);
3204 }
3205 }
3206
3207 static nir_ssa_def *
get_image_coord(struct vtn_builder * b,uint32_t value)3208 get_image_coord(struct vtn_builder *b, uint32_t value)
3209 {
3210 nir_ssa_def *coord = vtn_get_nir_ssa(b, value);
3211 /* The image_load_store intrinsics assume a 4-dim coordinate */
3212 return nir_pad_vec4(&b->nb, coord);
3213 }
3214
3215 static void
vtn_handle_image(struct vtn_builder * b,SpvOp opcode,const uint32_t * w,unsigned count)3216 vtn_handle_image(struct vtn_builder *b, SpvOp opcode,
3217 const uint32_t *w, unsigned count)
3218 {
3219 /* Just get this one out of the way */
3220 if (opcode == SpvOpImageTexelPointer) {
3221 struct vtn_value *val =
3222 vtn_push_value(b, w[2], vtn_value_type_image_pointer);
3223 val->image = ralloc(b, struct vtn_image_pointer);
3224
3225 val->image->image = vtn_nir_deref(b, w[3]);
3226 val->image->coord = get_image_coord(b, w[4]);
3227 val->image->sample = vtn_get_nir_ssa(b, w[5]);
3228 val->image->lod = nir_imm_int(&b->nb, 0);
3229 return;
3230 }
3231
3232 struct vtn_image_pointer image;
3233 SpvScope scope = SpvScopeInvocation;
3234 SpvMemorySemanticsMask semantics = 0;
3235 SpvImageOperandsMask operands = SpvImageOperandsMaskNone;
3236
3237 enum gl_access_qualifier access = 0;
3238
3239 struct vtn_value *res_val;
3240 switch (opcode) {
3241 case SpvOpAtomicExchange:
3242 case SpvOpAtomicCompareExchange:
3243 case SpvOpAtomicCompareExchangeWeak:
3244 case SpvOpAtomicIIncrement:
3245 case SpvOpAtomicIDecrement:
3246 case SpvOpAtomicIAdd:
3247 case SpvOpAtomicISub:
3248 case SpvOpAtomicLoad:
3249 case SpvOpAtomicSMin:
3250 case SpvOpAtomicUMin:
3251 case SpvOpAtomicSMax:
3252 case SpvOpAtomicUMax:
3253 case SpvOpAtomicAnd:
3254 case SpvOpAtomicOr:
3255 case SpvOpAtomicXor:
3256 case SpvOpAtomicFAddEXT:
3257 case SpvOpAtomicFMinEXT:
3258 case SpvOpAtomicFMaxEXT:
3259 res_val = vtn_value(b, w[3], vtn_value_type_image_pointer);
3260 image = *res_val->image;
3261 scope = vtn_constant_uint(b, w[4]);
3262 semantics = vtn_constant_uint(b, w[5]);
3263 access |= ACCESS_COHERENT;
3264 break;
3265
3266 case SpvOpAtomicStore:
3267 res_val = vtn_value(b, w[1], vtn_value_type_image_pointer);
3268 image = *res_val->image;
3269 scope = vtn_constant_uint(b, w[2]);
3270 semantics = vtn_constant_uint(b, w[3]);
3271 access |= ACCESS_COHERENT;
3272 break;
3273
3274 case SpvOpImageQuerySizeLod:
3275 res_val = vtn_untyped_value(b, w[3]);
3276 image.image = vtn_get_image(b, w[3], &access);
3277 image.coord = NULL;
3278 image.sample = NULL;
3279 image.lod = vtn_ssa_value(b, w[4])->def;
3280 break;
3281
3282 case SpvOpImageQuerySize:
3283 case SpvOpImageQuerySamples:
3284 res_val = vtn_untyped_value(b, w[3]);
3285 image.image = vtn_get_image(b, w[3], &access);
3286 image.coord = NULL;
3287 image.sample = NULL;
3288 image.lod = NULL;
3289 break;
3290
3291 case SpvOpImageQueryFormat:
3292 case SpvOpImageQueryOrder:
3293 res_val = vtn_untyped_value(b, w[3]);
3294 image.image = vtn_get_image(b, w[3], &access);
3295 image.coord = NULL;
3296 image.sample = NULL;
3297 image.lod = NULL;
3298 break;
3299
3300 case SpvOpImageRead:
3301 case SpvOpImageSparseRead: {
3302 res_val = vtn_untyped_value(b, w[3]);
3303 image.image = vtn_get_image(b, w[3], &access);
3304 image.coord = get_image_coord(b, w[4]);
3305
3306 operands = count > 5 ? w[5] : SpvImageOperandsMaskNone;
3307
3308 if (operands & SpvImageOperandsSampleMask) {
3309 uint32_t arg = image_operand_arg(b, w, count, 5,
3310 SpvImageOperandsSampleMask);
3311 image.sample = vtn_get_nir_ssa(b, w[arg]);
3312 } else {
3313 image.sample = nir_ssa_undef(&b->nb, 1, 32);
3314 }
3315
3316 if (operands & SpvImageOperandsMakeTexelVisibleMask) {
3317 vtn_fail_if((operands & SpvImageOperandsNonPrivateTexelMask) == 0,
3318 "MakeTexelVisible requires NonPrivateTexel to also be set.");
3319 uint32_t arg = image_operand_arg(b, w, count, 5,
3320 SpvImageOperandsMakeTexelVisibleMask);
3321 semantics = SpvMemorySemanticsMakeVisibleMask;
3322 scope = vtn_constant_uint(b, w[arg]);
3323 }
3324
3325 if (operands & SpvImageOperandsLodMask) {
3326 uint32_t arg = image_operand_arg(b, w, count, 5,
3327 SpvImageOperandsLodMask);
3328 image.lod = vtn_get_nir_ssa(b, w[arg]);
3329 } else {
3330 image.lod = nir_imm_int(&b->nb, 0);
3331 }
3332
3333 if (operands & SpvImageOperandsVolatileTexelMask)
3334 access |= ACCESS_VOLATILE;
3335 if (operands & SpvImageOperandsNontemporalMask)
3336 access |= ACCESS_STREAM_CACHE_POLICY;
3337
3338 break;
3339 }
3340
3341 case SpvOpImageWrite: {
3342 res_val = vtn_untyped_value(b, w[1]);
3343 image.image = vtn_get_image(b, w[1], &access);
3344 image.coord = get_image_coord(b, w[2]);
3345
3346 /* texel = w[3] */
3347
3348 operands = count > 4 ? w[4] : SpvImageOperandsMaskNone;
3349
3350 if (operands & SpvImageOperandsSampleMask) {
3351 uint32_t arg = image_operand_arg(b, w, count, 4,
3352 SpvImageOperandsSampleMask);
3353 image.sample = vtn_get_nir_ssa(b, w[arg]);
3354 } else {
3355 image.sample = nir_ssa_undef(&b->nb, 1, 32);
3356 }
3357
3358 if (operands & SpvImageOperandsMakeTexelAvailableMask) {
3359 vtn_fail_if((operands & SpvImageOperandsNonPrivateTexelMask) == 0,
3360 "MakeTexelAvailable requires NonPrivateTexel to also be set.");
3361 uint32_t arg = image_operand_arg(b, w, count, 4,
3362 SpvImageOperandsMakeTexelAvailableMask);
3363 semantics = SpvMemorySemanticsMakeAvailableMask;
3364 scope = vtn_constant_uint(b, w[arg]);
3365 }
3366
3367 if (operands & SpvImageOperandsLodMask) {
3368 uint32_t arg = image_operand_arg(b, w, count, 4,
3369 SpvImageOperandsLodMask);
3370 image.lod = vtn_get_nir_ssa(b, w[arg]);
3371 } else {
3372 image.lod = nir_imm_int(&b->nb, 0);
3373 }
3374
3375 if (operands & SpvImageOperandsVolatileTexelMask)
3376 access |= ACCESS_VOLATILE;
3377 if (operands & SpvImageOperandsNontemporalMask)
3378 access |= ACCESS_STREAM_CACHE_POLICY;
3379
3380 break;
3381 }
3382
3383 default:
3384 vtn_fail_with_opcode("Invalid image opcode", opcode);
3385 }
3386
3387 if (semantics & SpvMemorySemanticsVolatileMask)
3388 access |= ACCESS_VOLATILE;
3389
3390 nir_intrinsic_op op;
3391 switch (opcode) {
3392 #define OP(S, N) case SpvOp##S: op = nir_intrinsic_image_deref_##N; break;
3393 OP(ImageQuerySize, size)
3394 OP(ImageQuerySizeLod, size)
3395 OP(ImageRead, load)
3396 OP(ImageSparseRead, sparse_load)
3397 OP(ImageWrite, store)
3398 OP(AtomicLoad, load)
3399 OP(AtomicStore, store)
3400 OP(AtomicExchange, atomic_exchange)
3401 OP(AtomicCompareExchange, atomic_comp_swap)
3402 OP(AtomicCompareExchangeWeak, atomic_comp_swap)
3403 OP(AtomicIIncrement, atomic_add)
3404 OP(AtomicIDecrement, atomic_add)
3405 OP(AtomicIAdd, atomic_add)
3406 OP(AtomicISub, atomic_add)
3407 OP(AtomicSMin, atomic_imin)
3408 OP(AtomicUMin, atomic_umin)
3409 OP(AtomicSMax, atomic_imax)
3410 OP(AtomicUMax, atomic_umax)
3411 OP(AtomicAnd, atomic_and)
3412 OP(AtomicOr, atomic_or)
3413 OP(AtomicXor, atomic_xor)
3414 OP(AtomicFAddEXT, atomic_fadd)
3415 OP(AtomicFMinEXT, atomic_fmin)
3416 OP(AtomicFMaxEXT, atomic_fmax)
3417 OP(ImageQueryFormat, format)
3418 OP(ImageQueryOrder, order)
3419 OP(ImageQuerySamples, samples)
3420 #undef OP
3421 default:
3422 vtn_fail_with_opcode("Invalid image opcode", opcode);
3423 }
3424
3425 nir_intrinsic_instr *intrin = nir_intrinsic_instr_create(b->shader, op);
3426
3427 intrin->src[0] = nir_src_for_ssa(&image.image->dest.ssa);
3428 nir_intrinsic_set_image_dim(intrin, glsl_get_sampler_dim(image.image->type));
3429 nir_intrinsic_set_image_array(intrin,
3430 glsl_sampler_type_is_array(image.image->type));
3431
3432 switch (opcode) {
3433 case SpvOpImageQuerySamples:
3434 case SpvOpImageQuerySize:
3435 case SpvOpImageQuerySizeLod:
3436 case SpvOpImageQueryFormat:
3437 case SpvOpImageQueryOrder:
3438 break;
3439 default:
3440 /* The image coordinate is always 4 components but we may not have that
3441 * many. Swizzle to compensate.
3442 */
3443 intrin->src[1] = nir_src_for_ssa(nir_pad_vec4(&b->nb, image.coord));
3444 intrin->src[2] = nir_src_for_ssa(image.sample);
3445 break;
3446 }
3447
3448 /* The Vulkan spec says:
3449 *
3450 * "If an instruction loads from or stores to a resource (including
3451 * atomics and image instructions) and the resource descriptor being
3452 * accessed is not dynamically uniform, then the operand corresponding
3453 * to that resource (e.g. the pointer or sampled image operand) must be
3454 * decorated with NonUniform."
3455 *
3456 * It's very careful to specify that the exact operand must be decorated
3457 * NonUniform. The SPIR-V parser is not expected to chase through long
3458 * chains to find the NonUniform decoration. It's either right there or we
3459 * can assume it doesn't exist.
3460 */
3461 vtn_foreach_decoration(b, res_val, non_uniform_decoration_cb, &access);
3462 nir_intrinsic_set_access(intrin, access);
3463
3464 switch (opcode) {
3465 case SpvOpImageQuerySamples:
3466 case SpvOpImageQueryFormat:
3467 case SpvOpImageQueryOrder:
3468 /* No additional sources */
3469 break;
3470 case SpvOpImageQuerySize:
3471 intrin->src[1] = nir_src_for_ssa(nir_imm_int(&b->nb, 0));
3472 break;
3473 case SpvOpImageQuerySizeLod:
3474 intrin->src[1] = nir_src_for_ssa(image.lod);
3475 break;
3476 case SpvOpAtomicLoad:
3477 case SpvOpImageRead:
3478 case SpvOpImageSparseRead:
3479 /* Only OpImageRead can support a lod parameter if
3480 * SPV_AMD_shader_image_load_store_lod is used but the current NIR
3481 * intrinsics definition for atomics requires us to set it for
3482 * OpAtomicLoad.
3483 */
3484 intrin->src[3] = nir_src_for_ssa(image.lod);
3485 break;
3486 case SpvOpAtomicStore:
3487 case SpvOpImageWrite: {
3488 const uint32_t value_id = opcode == SpvOpAtomicStore ? w[4] : w[3];
3489 struct vtn_ssa_value *value = vtn_ssa_value(b, value_id);
3490 /* nir_intrinsic_image_deref_store always takes a vec4 value */
3491 assert(op == nir_intrinsic_image_deref_store);
3492 intrin->num_components = 4;
3493 intrin->src[3] = nir_src_for_ssa(nir_pad_vec4(&b->nb, value->def));
3494 /* Only OpImageWrite can support a lod parameter if
3495 * SPV_AMD_shader_image_load_store_lod is used but the current NIR
3496 * intrinsics definition for atomics requires us to set it for
3497 * OpAtomicStore.
3498 */
3499 intrin->src[4] = nir_src_for_ssa(image.lod);
3500
3501 if (opcode == SpvOpImageWrite) {
3502 nir_alu_type src_type =
3503 get_image_type(b, nir_get_nir_type_for_glsl_type(value->type), operands);
3504 nir_intrinsic_set_src_type(intrin, src_type);
3505 }
3506 break;
3507 }
3508
3509 case SpvOpAtomicCompareExchange:
3510 case SpvOpAtomicCompareExchangeWeak:
3511 case SpvOpAtomicIIncrement:
3512 case SpvOpAtomicIDecrement:
3513 case SpvOpAtomicExchange:
3514 case SpvOpAtomicIAdd:
3515 case SpvOpAtomicISub:
3516 case SpvOpAtomicSMin:
3517 case SpvOpAtomicUMin:
3518 case SpvOpAtomicSMax:
3519 case SpvOpAtomicUMax:
3520 case SpvOpAtomicAnd:
3521 case SpvOpAtomicOr:
3522 case SpvOpAtomicXor:
3523 case SpvOpAtomicFAddEXT:
3524 case SpvOpAtomicFMinEXT:
3525 case SpvOpAtomicFMaxEXT:
3526 fill_common_atomic_sources(b, opcode, w, &intrin->src[3]);
3527 break;
3528
3529 default:
3530 vtn_fail_with_opcode("Invalid image opcode", opcode);
3531 }
3532
3533 /* Image operations implicitly have the Image storage memory semantics. */
3534 semantics |= SpvMemorySemanticsImageMemoryMask;
3535
3536 SpvMemorySemanticsMask before_semantics;
3537 SpvMemorySemanticsMask after_semantics;
3538 vtn_split_barrier_semantics(b, semantics, &before_semantics, &after_semantics);
3539
3540 if (before_semantics)
3541 vtn_emit_memory_barrier(b, scope, before_semantics);
3542
3543 if (opcode != SpvOpImageWrite && opcode != SpvOpAtomicStore) {
3544 struct vtn_type *type = vtn_get_type(b, w[1]);
3545 struct vtn_type *struct_type = NULL;
3546 if (opcode == SpvOpImageSparseRead) {
3547 vtn_assert(glsl_type_is_struct_or_ifc(type->type));
3548 struct_type = type;
3549 type = struct_type->members[1];
3550 }
3551
3552 unsigned dest_components = glsl_get_vector_elements(type->type);
3553 if (opcode == SpvOpImageSparseRead)
3554 dest_components++;
3555
3556 if (nir_intrinsic_infos[op].dest_components == 0)
3557 intrin->num_components = dest_components;
3558
3559 nir_ssa_dest_init(&intrin->instr, &intrin->dest,
3560 nir_intrinsic_dest_components(intrin),
3561 glsl_get_bit_size(type->type), NULL);
3562
3563 nir_builder_instr_insert(&b->nb, &intrin->instr);
3564
3565 nir_ssa_def *result = &intrin->dest.ssa;
3566 if (nir_intrinsic_dest_components(intrin) != dest_components)
3567 result = nir_channels(&b->nb, result, (1 << dest_components) - 1);
3568
3569 if (opcode == SpvOpImageSparseRead) {
3570 struct vtn_ssa_value *dest = vtn_create_ssa_value(b, struct_type->type);
3571 unsigned res_type_size = glsl_get_vector_elements(type->type);
3572 dest->elems[0]->def = nir_channel(&b->nb, result, res_type_size);
3573 if (intrin->dest.ssa.bit_size != 32)
3574 dest->elems[0]->def = nir_u2u32(&b->nb, dest->elems[0]->def);
3575 dest->elems[1]->def = nir_channels(&b->nb, result,
3576 BITFIELD_MASK(res_type_size));
3577 vtn_push_ssa_value(b, w[2], dest);
3578 } else {
3579 vtn_push_nir_ssa(b, w[2], result);
3580 }
3581
3582 if (opcode == SpvOpImageRead || opcode == SpvOpImageSparseRead) {
3583 nir_alu_type dest_type =
3584 get_image_type(b, nir_get_nir_type_for_glsl_type(type->type), operands);
3585 nir_intrinsic_set_dest_type(intrin, dest_type);
3586 }
3587 } else {
3588 nir_builder_instr_insert(&b->nb, &intrin->instr);
3589 }
3590
3591 if (after_semantics)
3592 vtn_emit_memory_barrier(b, scope, after_semantics);
3593 }
3594
3595 static nir_intrinsic_op
get_uniform_nir_atomic_op(struct vtn_builder * b,SpvOp opcode)3596 get_uniform_nir_atomic_op(struct vtn_builder *b, SpvOp opcode)
3597 {
3598 switch (opcode) {
3599 #define OP(S, N) case SpvOp##S: return nir_intrinsic_atomic_counter_ ##N;
3600 OP(AtomicLoad, read_deref)
3601 OP(AtomicExchange, exchange)
3602 OP(AtomicCompareExchange, comp_swap)
3603 OP(AtomicCompareExchangeWeak, comp_swap)
3604 OP(AtomicIIncrement, inc_deref)
3605 OP(AtomicIDecrement, post_dec_deref)
3606 OP(AtomicIAdd, add_deref)
3607 OP(AtomicISub, add_deref)
3608 OP(AtomicUMin, min_deref)
3609 OP(AtomicUMax, max_deref)
3610 OP(AtomicAnd, and_deref)
3611 OP(AtomicOr, or_deref)
3612 OP(AtomicXor, xor_deref)
3613 #undef OP
3614 default:
3615 /* We left the following out: AtomicStore, AtomicSMin and
3616 * AtomicSmax. Right now there are not nir intrinsics for them. At this
3617 * moment Atomic Counter support is needed for ARB_spirv support, so is
3618 * only need to support GLSL Atomic Counters that are uints and don't
3619 * allow direct storage.
3620 */
3621 vtn_fail("Invalid uniform atomic");
3622 }
3623 }
3624
3625 static nir_intrinsic_op
get_deref_nir_atomic_op(struct vtn_builder * b,SpvOp opcode)3626 get_deref_nir_atomic_op(struct vtn_builder *b, SpvOp opcode)
3627 {
3628 switch (opcode) {
3629 case SpvOpAtomicLoad: return nir_intrinsic_load_deref;
3630 case SpvOpAtomicFlagClear:
3631 case SpvOpAtomicStore: return nir_intrinsic_store_deref;
3632 #define OP(S, N) case SpvOp##S: return nir_intrinsic_deref_##N;
3633 OP(AtomicExchange, atomic_exchange)
3634 OP(AtomicCompareExchange, atomic_comp_swap)
3635 OP(AtomicCompareExchangeWeak, atomic_comp_swap)
3636 OP(AtomicIIncrement, atomic_add)
3637 OP(AtomicIDecrement, atomic_add)
3638 OP(AtomicIAdd, atomic_add)
3639 OP(AtomicISub, atomic_add)
3640 OP(AtomicSMin, atomic_imin)
3641 OP(AtomicUMin, atomic_umin)
3642 OP(AtomicSMax, atomic_imax)
3643 OP(AtomicUMax, atomic_umax)
3644 OP(AtomicAnd, atomic_and)
3645 OP(AtomicOr, atomic_or)
3646 OP(AtomicXor, atomic_xor)
3647 OP(AtomicFAddEXT, atomic_fadd)
3648 OP(AtomicFMinEXT, atomic_fmin)
3649 OP(AtomicFMaxEXT, atomic_fmax)
3650 OP(AtomicFlagTestAndSet, atomic_comp_swap)
3651 #undef OP
3652 default:
3653 vtn_fail_with_opcode("Invalid shared atomic", opcode);
3654 }
3655 }
3656
3657 /*
3658 * Handles shared atomics, ssbo atomics and atomic counters.
3659 */
3660 static void
vtn_handle_atomics(struct vtn_builder * b,SpvOp opcode,const uint32_t * w,UNUSED unsigned count)3661 vtn_handle_atomics(struct vtn_builder *b, SpvOp opcode,
3662 const uint32_t *w, UNUSED unsigned count)
3663 {
3664 struct vtn_pointer *ptr;
3665 nir_intrinsic_instr *atomic;
3666
3667 SpvScope scope = SpvScopeInvocation;
3668 SpvMemorySemanticsMask semantics = 0;
3669 enum gl_access_qualifier access = 0;
3670
3671 switch (opcode) {
3672 case SpvOpAtomicLoad:
3673 case SpvOpAtomicExchange:
3674 case SpvOpAtomicCompareExchange:
3675 case SpvOpAtomicCompareExchangeWeak:
3676 case SpvOpAtomicIIncrement:
3677 case SpvOpAtomicIDecrement:
3678 case SpvOpAtomicIAdd:
3679 case SpvOpAtomicISub:
3680 case SpvOpAtomicSMin:
3681 case SpvOpAtomicUMin:
3682 case SpvOpAtomicSMax:
3683 case SpvOpAtomicUMax:
3684 case SpvOpAtomicAnd:
3685 case SpvOpAtomicOr:
3686 case SpvOpAtomicXor:
3687 case SpvOpAtomicFAddEXT:
3688 case SpvOpAtomicFMinEXT:
3689 case SpvOpAtomicFMaxEXT:
3690 case SpvOpAtomicFlagTestAndSet:
3691 ptr = vtn_pointer(b, w[3]);
3692 scope = vtn_constant_uint(b, w[4]);
3693 semantics = vtn_constant_uint(b, w[5]);
3694 break;
3695 case SpvOpAtomicFlagClear:
3696 case SpvOpAtomicStore:
3697 ptr = vtn_pointer(b, w[1]);
3698 scope = vtn_constant_uint(b, w[2]);
3699 semantics = vtn_constant_uint(b, w[3]);
3700 break;
3701
3702 default:
3703 vtn_fail_with_opcode("Invalid SPIR-V atomic", opcode);
3704 }
3705
3706 if (semantics & SpvMemorySemanticsVolatileMask)
3707 access |= ACCESS_VOLATILE;
3708
3709 /* uniform as "atomic counter uniform" */
3710 if (ptr->mode == vtn_variable_mode_atomic_counter) {
3711 nir_deref_instr *deref = vtn_pointer_to_deref(b, ptr);
3712 nir_intrinsic_op op = get_uniform_nir_atomic_op(b, opcode);
3713 atomic = nir_intrinsic_instr_create(b->nb.shader, op);
3714 atomic->src[0] = nir_src_for_ssa(&deref->dest.ssa);
3715
3716 /* SSBO needs to initialize index/offset. In this case we don't need to,
3717 * as that info is already stored on the ptr->var->var nir_variable (see
3718 * vtn_create_variable)
3719 */
3720
3721 switch (opcode) {
3722 case SpvOpAtomicLoad:
3723 case SpvOpAtomicExchange:
3724 case SpvOpAtomicCompareExchange:
3725 case SpvOpAtomicCompareExchangeWeak:
3726 case SpvOpAtomicIIncrement:
3727 case SpvOpAtomicIDecrement:
3728 case SpvOpAtomicIAdd:
3729 case SpvOpAtomicISub:
3730 case SpvOpAtomicSMin:
3731 case SpvOpAtomicUMin:
3732 case SpvOpAtomicSMax:
3733 case SpvOpAtomicUMax:
3734 case SpvOpAtomicAnd:
3735 case SpvOpAtomicOr:
3736 case SpvOpAtomicXor:
3737 /* Nothing: we don't need to call fill_common_atomic_sources here, as
3738 * atomic counter uniforms doesn't have sources
3739 */
3740 break;
3741
3742 default:
3743 unreachable("Invalid SPIR-V atomic");
3744
3745 }
3746 } else {
3747 nir_deref_instr *deref = vtn_pointer_to_deref(b, ptr);
3748 const struct glsl_type *deref_type = deref->type;
3749 nir_intrinsic_op op = get_deref_nir_atomic_op(b, opcode);
3750 atomic = nir_intrinsic_instr_create(b->nb.shader, op);
3751 atomic->src[0] = nir_src_for_ssa(&deref->dest.ssa);
3752
3753 if (ptr->mode != vtn_variable_mode_workgroup)
3754 access |= ACCESS_COHERENT;
3755
3756 nir_intrinsic_set_access(atomic, access);
3757
3758 switch (opcode) {
3759 case SpvOpAtomicLoad:
3760 atomic->num_components = glsl_get_vector_elements(deref_type);
3761 break;
3762
3763 case SpvOpAtomicStore:
3764 atomic->num_components = glsl_get_vector_elements(deref_type);
3765 nir_intrinsic_set_write_mask(atomic, (1 << atomic->num_components) - 1);
3766 atomic->src[1] = nir_src_for_ssa(vtn_get_nir_ssa(b, w[4]));
3767 break;
3768
3769 case SpvOpAtomicFlagClear:
3770 atomic->num_components = 1;
3771 nir_intrinsic_set_write_mask(atomic, 1);
3772 atomic->src[1] = nir_src_for_ssa(nir_imm_intN_t(&b->nb, 0, 32));
3773 break;
3774 case SpvOpAtomicFlagTestAndSet:
3775 atomic->src[1] = nir_src_for_ssa(nir_imm_intN_t(&b->nb, 0, 32));
3776 atomic->src[2] = nir_src_for_ssa(nir_imm_intN_t(&b->nb, -1, 32));
3777 break;
3778 case SpvOpAtomicExchange:
3779 case SpvOpAtomicCompareExchange:
3780 case SpvOpAtomicCompareExchangeWeak:
3781 case SpvOpAtomicIIncrement:
3782 case SpvOpAtomicIDecrement:
3783 case SpvOpAtomicIAdd:
3784 case SpvOpAtomicISub:
3785 case SpvOpAtomicSMin:
3786 case SpvOpAtomicUMin:
3787 case SpvOpAtomicSMax:
3788 case SpvOpAtomicUMax:
3789 case SpvOpAtomicAnd:
3790 case SpvOpAtomicOr:
3791 case SpvOpAtomicXor:
3792 case SpvOpAtomicFAddEXT:
3793 case SpvOpAtomicFMinEXT:
3794 case SpvOpAtomicFMaxEXT:
3795 fill_common_atomic_sources(b, opcode, w, &atomic->src[1]);
3796 break;
3797
3798 default:
3799 vtn_fail_with_opcode("Invalid SPIR-V atomic", opcode);
3800 }
3801 }
3802
3803 /* Atomic ordering operations will implicitly apply to the atomic operation
3804 * storage class, so include that too.
3805 */
3806 semantics |= vtn_mode_to_memory_semantics(ptr->mode);
3807
3808 SpvMemorySemanticsMask before_semantics;
3809 SpvMemorySemanticsMask after_semantics;
3810 vtn_split_barrier_semantics(b, semantics, &before_semantics, &after_semantics);
3811
3812 if (before_semantics)
3813 vtn_emit_memory_barrier(b, scope, before_semantics);
3814
3815 if (opcode != SpvOpAtomicStore && opcode != SpvOpAtomicFlagClear) {
3816 struct vtn_type *type = vtn_get_type(b, w[1]);
3817
3818 if (opcode == SpvOpAtomicFlagTestAndSet) {
3819 /* map atomic flag to a 32-bit atomic integer. */
3820 nir_ssa_dest_init(&atomic->instr, &atomic->dest,
3821 1, 32, NULL);
3822 } else {
3823 nir_ssa_dest_init(&atomic->instr, &atomic->dest,
3824 glsl_get_vector_elements(type->type),
3825 glsl_get_bit_size(type->type), NULL);
3826
3827 vtn_push_nir_ssa(b, w[2], &atomic->dest.ssa);
3828 }
3829 }
3830
3831 nir_builder_instr_insert(&b->nb, &atomic->instr);
3832
3833 if (opcode == SpvOpAtomicFlagTestAndSet) {
3834 vtn_push_nir_ssa(b, w[2], nir_i2b1(&b->nb, &atomic->dest.ssa));
3835 }
3836 if (after_semantics)
3837 vtn_emit_memory_barrier(b, scope, after_semantics);
3838 }
3839
3840 static nir_alu_instr *
create_vec(struct vtn_builder * b,unsigned num_components,unsigned bit_size)3841 create_vec(struct vtn_builder *b, unsigned num_components, unsigned bit_size)
3842 {
3843 nir_op op = nir_op_vec(num_components);
3844 nir_alu_instr *vec = nir_alu_instr_create(b->shader, op);
3845 nir_ssa_dest_init(&vec->instr, &vec->dest.dest, num_components,
3846 bit_size, NULL);
3847 vec->dest.write_mask = (1 << num_components) - 1;
3848
3849 return vec;
3850 }
3851
3852 struct vtn_ssa_value *
vtn_ssa_transpose(struct vtn_builder * b,struct vtn_ssa_value * src)3853 vtn_ssa_transpose(struct vtn_builder *b, struct vtn_ssa_value *src)
3854 {
3855 if (src->transposed)
3856 return src->transposed;
3857
3858 struct vtn_ssa_value *dest =
3859 vtn_create_ssa_value(b, glsl_transposed_type(src->type));
3860
3861 for (unsigned i = 0; i < glsl_get_matrix_columns(dest->type); i++) {
3862 nir_alu_instr *vec = create_vec(b, glsl_get_matrix_columns(src->type),
3863 glsl_get_bit_size(src->type));
3864 if (glsl_type_is_vector_or_scalar(src->type)) {
3865 vec->src[0].src = nir_src_for_ssa(src->def);
3866 vec->src[0].swizzle[0] = i;
3867 } else {
3868 for (unsigned j = 0; j < glsl_get_matrix_columns(src->type); j++) {
3869 vec->src[j].src = nir_src_for_ssa(src->elems[j]->def);
3870 vec->src[j].swizzle[0] = i;
3871 }
3872 }
3873 nir_builder_instr_insert(&b->nb, &vec->instr);
3874 dest->elems[i]->def = &vec->dest.dest.ssa;
3875 }
3876
3877 dest->transposed = src;
3878
3879 return dest;
3880 }
3881
3882 static nir_ssa_def *
vtn_vector_shuffle(struct vtn_builder * b,unsigned num_components,nir_ssa_def * src0,nir_ssa_def * src1,const uint32_t * indices)3883 vtn_vector_shuffle(struct vtn_builder *b, unsigned num_components,
3884 nir_ssa_def *src0, nir_ssa_def *src1,
3885 const uint32_t *indices)
3886 {
3887 nir_alu_instr *vec = create_vec(b, num_components, src0->bit_size);
3888
3889 for (unsigned i = 0; i < num_components; i++) {
3890 uint32_t index = indices[i];
3891 if (index == 0xffffffff) {
3892 vec->src[i].src =
3893 nir_src_for_ssa(nir_ssa_undef(&b->nb, 1, src0->bit_size));
3894 } else if (index < src0->num_components) {
3895 vec->src[i].src = nir_src_for_ssa(src0);
3896 vec->src[i].swizzle[0] = index;
3897 } else {
3898 vec->src[i].src = nir_src_for_ssa(src1);
3899 vec->src[i].swizzle[0] = index - src0->num_components;
3900 }
3901 }
3902
3903 nir_builder_instr_insert(&b->nb, &vec->instr);
3904
3905 return &vec->dest.dest.ssa;
3906 }
3907
3908 /*
3909 * Concatentates a number of vectors/scalars together to produce a vector
3910 */
3911 static nir_ssa_def *
vtn_vector_construct(struct vtn_builder * b,unsigned num_components,unsigned num_srcs,nir_ssa_def ** srcs)3912 vtn_vector_construct(struct vtn_builder *b, unsigned num_components,
3913 unsigned num_srcs, nir_ssa_def **srcs)
3914 {
3915 nir_alu_instr *vec = create_vec(b, num_components, srcs[0]->bit_size);
3916
3917 /* From the SPIR-V 1.1 spec for OpCompositeConstruct:
3918 *
3919 * "When constructing a vector, there must be at least two Constituent
3920 * operands."
3921 */
3922 vtn_assert(num_srcs >= 2);
3923
3924 unsigned dest_idx = 0;
3925 for (unsigned i = 0; i < num_srcs; i++) {
3926 nir_ssa_def *src = srcs[i];
3927 vtn_assert(dest_idx + src->num_components <= num_components);
3928 for (unsigned j = 0; j < src->num_components; j++) {
3929 vec->src[dest_idx].src = nir_src_for_ssa(src);
3930 vec->src[dest_idx].swizzle[0] = j;
3931 dest_idx++;
3932 }
3933 }
3934
3935 /* From the SPIR-V 1.1 spec for OpCompositeConstruct:
3936 *
3937 * "When constructing a vector, the total number of components in all
3938 * the operands must equal the number of components in Result Type."
3939 */
3940 vtn_assert(dest_idx == num_components);
3941
3942 nir_builder_instr_insert(&b->nb, &vec->instr);
3943
3944 return &vec->dest.dest.ssa;
3945 }
3946
3947 static struct vtn_ssa_value *
vtn_composite_copy(void * mem_ctx,struct vtn_ssa_value * src)3948 vtn_composite_copy(void *mem_ctx, struct vtn_ssa_value *src)
3949 {
3950 struct vtn_ssa_value *dest = rzalloc(mem_ctx, struct vtn_ssa_value);
3951 dest->type = src->type;
3952
3953 if (glsl_type_is_vector_or_scalar(src->type)) {
3954 dest->def = src->def;
3955 } else {
3956 unsigned elems = glsl_get_length(src->type);
3957
3958 dest->elems = ralloc_array(mem_ctx, struct vtn_ssa_value *, elems);
3959 for (unsigned i = 0; i < elems; i++)
3960 dest->elems[i] = vtn_composite_copy(mem_ctx, src->elems[i]);
3961 }
3962
3963 return dest;
3964 }
3965
3966 static struct vtn_ssa_value *
vtn_composite_insert(struct vtn_builder * b,struct vtn_ssa_value * src,struct vtn_ssa_value * insert,const uint32_t * indices,unsigned num_indices)3967 vtn_composite_insert(struct vtn_builder *b, struct vtn_ssa_value *src,
3968 struct vtn_ssa_value *insert, const uint32_t *indices,
3969 unsigned num_indices)
3970 {
3971 struct vtn_ssa_value *dest = vtn_composite_copy(b, src);
3972
3973 struct vtn_ssa_value *cur = dest;
3974 unsigned i;
3975 for (i = 0; i < num_indices - 1; i++) {
3976 /* If we got a vector here, that means the next index will be trying to
3977 * dereference a scalar.
3978 */
3979 vtn_fail_if(glsl_type_is_vector_or_scalar(cur->type),
3980 "OpCompositeInsert has too many indices.");
3981 vtn_fail_if(indices[i] >= glsl_get_length(cur->type),
3982 "All indices in an OpCompositeInsert must be in-bounds");
3983 cur = cur->elems[indices[i]];
3984 }
3985
3986 if (glsl_type_is_vector_or_scalar(cur->type)) {
3987 vtn_fail_if(indices[i] >= glsl_get_vector_elements(cur->type),
3988 "All indices in an OpCompositeInsert must be in-bounds");
3989
3990 /* According to the SPIR-V spec, OpCompositeInsert may work down to
3991 * the component granularity. In that case, the last index will be
3992 * the index to insert the scalar into the vector.
3993 */
3994
3995 cur->def = nir_vector_insert_imm(&b->nb, cur->def, insert->def, indices[i]);
3996 } else {
3997 vtn_fail_if(indices[i] >= glsl_get_length(cur->type),
3998 "All indices in an OpCompositeInsert must be in-bounds");
3999 cur->elems[indices[i]] = insert;
4000 }
4001
4002 return dest;
4003 }
4004
4005 static struct vtn_ssa_value *
vtn_composite_extract(struct vtn_builder * b,struct vtn_ssa_value * src,const uint32_t * indices,unsigned num_indices)4006 vtn_composite_extract(struct vtn_builder *b, struct vtn_ssa_value *src,
4007 const uint32_t *indices, unsigned num_indices)
4008 {
4009 struct vtn_ssa_value *cur = src;
4010 for (unsigned i = 0; i < num_indices; i++) {
4011 if (glsl_type_is_vector_or_scalar(cur->type)) {
4012 vtn_assert(i == num_indices - 1);
4013 vtn_fail_if(indices[i] >= glsl_get_vector_elements(cur->type),
4014 "All indices in an OpCompositeExtract must be in-bounds");
4015
4016 /* According to the SPIR-V spec, OpCompositeExtract may work down to
4017 * the component granularity. The last index will be the index of the
4018 * vector to extract.
4019 */
4020
4021 const struct glsl_type *scalar_type =
4022 glsl_scalar_type(glsl_get_base_type(cur->type));
4023 struct vtn_ssa_value *ret = vtn_create_ssa_value(b, scalar_type);
4024 ret->def = nir_channel(&b->nb, cur->def, indices[i]);
4025 return ret;
4026 } else {
4027 vtn_fail_if(indices[i] >= glsl_get_length(cur->type),
4028 "All indices in an OpCompositeExtract must be in-bounds");
4029 cur = cur->elems[indices[i]];
4030 }
4031 }
4032
4033 return cur;
4034 }
4035
4036 static void
vtn_handle_composite(struct vtn_builder * b,SpvOp opcode,const uint32_t * w,unsigned count)4037 vtn_handle_composite(struct vtn_builder *b, SpvOp opcode,
4038 const uint32_t *w, unsigned count)
4039 {
4040 struct vtn_type *type = vtn_get_type(b, w[1]);
4041 struct vtn_ssa_value *ssa = vtn_create_ssa_value(b, type->type);
4042
4043 switch (opcode) {
4044 case SpvOpVectorExtractDynamic:
4045 ssa->def = nir_vector_extract(&b->nb, vtn_get_nir_ssa(b, w[3]),
4046 vtn_get_nir_ssa(b, w[4]));
4047 break;
4048
4049 case SpvOpVectorInsertDynamic:
4050 ssa->def = nir_vector_insert(&b->nb, vtn_get_nir_ssa(b, w[3]),
4051 vtn_get_nir_ssa(b, w[4]),
4052 vtn_get_nir_ssa(b, w[5]));
4053 break;
4054
4055 case SpvOpVectorShuffle:
4056 ssa->def = vtn_vector_shuffle(b, glsl_get_vector_elements(type->type),
4057 vtn_get_nir_ssa(b, w[3]),
4058 vtn_get_nir_ssa(b, w[4]),
4059 w + 5);
4060 break;
4061
4062 case SpvOpCompositeConstruct: {
4063 unsigned elems = count - 3;
4064 assume(elems >= 1);
4065 if (glsl_type_is_vector_or_scalar(type->type)) {
4066 nir_ssa_def *srcs[NIR_MAX_VEC_COMPONENTS];
4067 for (unsigned i = 0; i < elems; i++)
4068 srcs[i] = vtn_get_nir_ssa(b, w[3 + i]);
4069 ssa->def =
4070 vtn_vector_construct(b, glsl_get_vector_elements(type->type),
4071 elems, srcs);
4072 } else {
4073 ssa->elems = ralloc_array(b, struct vtn_ssa_value *, elems);
4074 for (unsigned i = 0; i < elems; i++)
4075 ssa->elems[i] = vtn_ssa_value(b, w[3 + i]);
4076 }
4077 break;
4078 }
4079 case SpvOpCompositeExtract:
4080 ssa = vtn_composite_extract(b, vtn_ssa_value(b, w[3]),
4081 w + 4, count - 4);
4082 break;
4083
4084 case SpvOpCompositeInsert:
4085 ssa = vtn_composite_insert(b, vtn_ssa_value(b, w[4]),
4086 vtn_ssa_value(b, w[3]),
4087 w + 5, count - 5);
4088 break;
4089
4090 case SpvOpCopyLogical:
4091 ssa = vtn_composite_copy(b, vtn_ssa_value(b, w[3]));
4092 break;
4093 case SpvOpCopyObject:
4094 vtn_copy_value(b, w[3], w[2]);
4095 return;
4096
4097 default:
4098 vtn_fail_with_opcode("unknown composite operation", opcode);
4099 }
4100
4101 vtn_push_ssa_value(b, w[2], ssa);
4102 }
4103
4104 void
vtn_emit_memory_barrier(struct vtn_builder * b,SpvScope scope,SpvMemorySemanticsMask semantics)4105 vtn_emit_memory_barrier(struct vtn_builder *b, SpvScope scope,
4106 SpvMemorySemanticsMask semantics)
4107 {
4108 if (b->shader->options->use_scoped_barrier) {
4109 vtn_emit_scoped_memory_barrier(b, scope, semantics);
4110 return;
4111 }
4112
4113 static const SpvMemorySemanticsMask all_memory_semantics =
4114 SpvMemorySemanticsUniformMemoryMask |
4115 SpvMemorySemanticsWorkgroupMemoryMask |
4116 SpvMemorySemanticsAtomicCounterMemoryMask |
4117 SpvMemorySemanticsImageMemoryMask |
4118 SpvMemorySemanticsOutputMemoryMask;
4119
4120 /* If we're not actually doing a memory barrier, bail */
4121 if (!(semantics & all_memory_semantics))
4122 return;
4123
4124 /* GL and Vulkan don't have these */
4125 vtn_assert(scope != SpvScopeCrossDevice);
4126
4127 if (scope == SpvScopeSubgroup)
4128 return; /* Nothing to do here */
4129
4130 if (scope == SpvScopeWorkgroup) {
4131 nir_group_memory_barrier(&b->nb);
4132 return;
4133 }
4134
4135 /* There's only two scopes thing left */
4136 vtn_assert(scope == SpvScopeInvocation || scope == SpvScopeDevice);
4137
4138 /* Map the GLSL memoryBarrier() construct and any barriers with more than one
4139 * semantic to the corresponding NIR one.
4140 */
4141 if (util_bitcount(semantics & all_memory_semantics) > 1) {
4142 nir_memory_barrier(&b->nb);
4143 if (semantics & SpvMemorySemanticsOutputMemoryMask) {
4144 /* GLSL memoryBarrier() (and the corresponding NIR one) doesn't include
4145 * TCS outputs, so we have to emit it's own intrinsic for that. We
4146 * then need to emit another memory_barrier to prevent moving
4147 * non-output operations to before the tcs_patch barrier.
4148 */
4149 nir_memory_barrier_tcs_patch(&b->nb);
4150 nir_memory_barrier(&b->nb);
4151 }
4152 return;
4153 }
4154
4155 /* Issue a more specific barrier */
4156 switch (semantics & all_memory_semantics) {
4157 case SpvMemorySemanticsUniformMemoryMask:
4158 nir_memory_barrier_buffer(&b->nb);
4159 break;
4160 case SpvMemorySemanticsWorkgroupMemoryMask:
4161 nir_memory_barrier_shared(&b->nb);
4162 break;
4163 case SpvMemorySemanticsAtomicCounterMemoryMask:
4164 nir_memory_barrier_atomic_counter(&b->nb);
4165 break;
4166 case SpvMemorySemanticsImageMemoryMask:
4167 nir_memory_barrier_image(&b->nb);
4168 break;
4169 case SpvMemorySemanticsOutputMemoryMask:
4170 if (b->nb.shader->info.stage == MESA_SHADER_TESS_CTRL)
4171 nir_memory_barrier_tcs_patch(&b->nb);
4172 break;
4173 default:
4174 break;
4175 }
4176 }
4177
4178 static void
vtn_handle_barrier(struct vtn_builder * b,SpvOp opcode,const uint32_t * w,UNUSED unsigned count)4179 vtn_handle_barrier(struct vtn_builder *b, SpvOp opcode,
4180 const uint32_t *w, UNUSED unsigned count)
4181 {
4182 switch (opcode) {
4183 case SpvOpEmitVertex:
4184 case SpvOpEmitStreamVertex:
4185 case SpvOpEndPrimitive:
4186 case SpvOpEndStreamPrimitive: {
4187 unsigned stream = 0;
4188 if (opcode == SpvOpEmitStreamVertex || opcode == SpvOpEndStreamPrimitive)
4189 stream = vtn_constant_uint(b, w[1]);
4190
4191 switch (opcode) {
4192 case SpvOpEmitStreamVertex:
4193 case SpvOpEmitVertex:
4194 nir_emit_vertex(&b->nb, stream);
4195 break;
4196 case SpvOpEndPrimitive:
4197 case SpvOpEndStreamPrimitive:
4198 nir_end_primitive(&b->nb, stream);
4199 break;
4200 default:
4201 unreachable("Invalid opcode");
4202 }
4203 break;
4204 }
4205
4206 case SpvOpMemoryBarrier: {
4207 SpvScope scope = vtn_constant_uint(b, w[1]);
4208 SpvMemorySemanticsMask semantics = vtn_constant_uint(b, w[2]);
4209 vtn_emit_memory_barrier(b, scope, semantics);
4210 return;
4211 }
4212
4213 case SpvOpControlBarrier: {
4214 SpvScope execution_scope = vtn_constant_uint(b, w[1]);
4215 SpvScope memory_scope = vtn_constant_uint(b, w[2]);
4216 SpvMemorySemanticsMask memory_semantics = vtn_constant_uint(b, w[3]);
4217
4218 /* GLSLang, prior to commit 8297936dd6eb3, emitted OpControlBarrier with
4219 * memory semantics of None for GLSL barrier().
4220 * And before that, prior to c3f1cdfa, emitted the OpControlBarrier with
4221 * Device instead of Workgroup for execution scope.
4222 */
4223 if (b->wa_glslang_cs_barrier &&
4224 b->nb.shader->info.stage == MESA_SHADER_COMPUTE &&
4225 (execution_scope == SpvScopeWorkgroup ||
4226 execution_scope == SpvScopeDevice) &&
4227 memory_semantics == SpvMemorySemanticsMaskNone) {
4228 execution_scope = SpvScopeWorkgroup;
4229 memory_scope = SpvScopeWorkgroup;
4230 memory_semantics = SpvMemorySemanticsAcquireReleaseMask |
4231 SpvMemorySemanticsWorkgroupMemoryMask;
4232 }
4233
4234 /* From the SPIR-V spec:
4235 *
4236 * "When used with the TessellationControl execution model, it also
4237 * implicitly synchronizes the Output Storage Class: Writes to Output
4238 * variables performed by any invocation executed prior to a
4239 * OpControlBarrier will be visible to any other invocation after
4240 * return from that OpControlBarrier."
4241 *
4242 * The same applies to VK_NV_mesh_shader.
4243 */
4244 if (b->nb.shader->info.stage == MESA_SHADER_TESS_CTRL ||
4245 b->nb.shader->info.stage == MESA_SHADER_TASK ||
4246 b->nb.shader->info.stage == MESA_SHADER_MESH) {
4247 memory_semantics &= ~(SpvMemorySemanticsAcquireMask |
4248 SpvMemorySemanticsReleaseMask |
4249 SpvMemorySemanticsAcquireReleaseMask |
4250 SpvMemorySemanticsSequentiallyConsistentMask);
4251 memory_semantics |= SpvMemorySemanticsAcquireReleaseMask |
4252 SpvMemorySemanticsOutputMemoryMask;
4253 }
4254
4255 if (b->shader->options->use_scoped_barrier) {
4256 vtn_emit_scoped_control_barrier(b, execution_scope, memory_scope,
4257 memory_semantics);
4258 } else {
4259 vtn_emit_memory_barrier(b, memory_scope, memory_semantics);
4260
4261 if (execution_scope == SpvScopeWorkgroup)
4262 nir_control_barrier(&b->nb);
4263 }
4264 break;
4265 }
4266
4267 default:
4268 unreachable("unknown barrier instruction");
4269 }
4270 }
4271
4272 static enum tess_primitive_mode
tess_primitive_mode_from_spv_execution_mode(struct vtn_builder * b,SpvExecutionMode mode)4273 tess_primitive_mode_from_spv_execution_mode(struct vtn_builder *b,
4274 SpvExecutionMode mode)
4275 {
4276 switch (mode) {
4277 case SpvExecutionModeTriangles:
4278 return TESS_PRIMITIVE_TRIANGLES;
4279 case SpvExecutionModeQuads:
4280 return TESS_PRIMITIVE_QUADS;
4281 case SpvExecutionModeIsolines:
4282 return TESS_PRIMITIVE_ISOLINES;
4283 default:
4284 vtn_fail("Invalid tess primitive type: %s (%u)",
4285 spirv_executionmode_to_string(mode), mode);
4286 }
4287 }
4288
4289 static enum shader_prim
primitive_from_spv_execution_mode(struct vtn_builder * b,SpvExecutionMode mode)4290 primitive_from_spv_execution_mode(struct vtn_builder *b,
4291 SpvExecutionMode mode)
4292 {
4293 switch (mode) {
4294 case SpvExecutionModeInputPoints:
4295 case SpvExecutionModeOutputPoints:
4296 return SHADER_PRIM_POINTS;
4297 case SpvExecutionModeInputLines:
4298 case SpvExecutionModeOutputLinesNV:
4299 return SHADER_PRIM_LINES;
4300 case SpvExecutionModeInputLinesAdjacency:
4301 return SHADER_PRIM_LINES_ADJACENCY;
4302 case SpvExecutionModeTriangles:
4303 case SpvExecutionModeOutputTrianglesNV:
4304 return SHADER_PRIM_TRIANGLES;
4305 case SpvExecutionModeInputTrianglesAdjacency:
4306 return SHADER_PRIM_TRIANGLES_ADJACENCY;
4307 case SpvExecutionModeQuads:
4308 return SHADER_PRIM_QUADS;
4309 case SpvExecutionModeOutputLineStrip:
4310 return SHADER_PRIM_LINE_STRIP;
4311 case SpvExecutionModeOutputTriangleStrip:
4312 return SHADER_PRIM_TRIANGLE_STRIP;
4313 default:
4314 vtn_fail("Invalid primitive type: %s (%u)",
4315 spirv_executionmode_to_string(mode), mode);
4316 }
4317 }
4318
4319 static unsigned
vertices_in_from_spv_execution_mode(struct vtn_builder * b,SpvExecutionMode mode)4320 vertices_in_from_spv_execution_mode(struct vtn_builder *b,
4321 SpvExecutionMode mode)
4322 {
4323 switch (mode) {
4324 case SpvExecutionModeInputPoints:
4325 return 1;
4326 case SpvExecutionModeInputLines:
4327 return 2;
4328 case SpvExecutionModeInputLinesAdjacency:
4329 return 4;
4330 case SpvExecutionModeTriangles:
4331 return 3;
4332 case SpvExecutionModeInputTrianglesAdjacency:
4333 return 6;
4334 default:
4335 vtn_fail("Invalid GS input mode: %s (%u)",
4336 spirv_executionmode_to_string(mode), mode);
4337 }
4338 }
4339
4340 static gl_shader_stage
stage_for_execution_model(struct vtn_builder * b,SpvExecutionModel model)4341 stage_for_execution_model(struct vtn_builder *b, SpvExecutionModel model)
4342 {
4343 switch (model) {
4344 case SpvExecutionModelVertex:
4345 return MESA_SHADER_VERTEX;
4346 case SpvExecutionModelTessellationControl:
4347 return MESA_SHADER_TESS_CTRL;
4348 case SpvExecutionModelTessellationEvaluation:
4349 return MESA_SHADER_TESS_EVAL;
4350 case SpvExecutionModelGeometry:
4351 return MESA_SHADER_GEOMETRY;
4352 case SpvExecutionModelFragment:
4353 return MESA_SHADER_FRAGMENT;
4354 case SpvExecutionModelGLCompute:
4355 return MESA_SHADER_COMPUTE;
4356 case SpvExecutionModelKernel:
4357 return MESA_SHADER_KERNEL;
4358 case SpvExecutionModelRayGenerationKHR:
4359 return MESA_SHADER_RAYGEN;
4360 case SpvExecutionModelAnyHitKHR:
4361 return MESA_SHADER_ANY_HIT;
4362 case SpvExecutionModelClosestHitKHR:
4363 return MESA_SHADER_CLOSEST_HIT;
4364 case SpvExecutionModelMissKHR:
4365 return MESA_SHADER_MISS;
4366 case SpvExecutionModelIntersectionKHR:
4367 return MESA_SHADER_INTERSECTION;
4368 case SpvExecutionModelCallableKHR:
4369 return MESA_SHADER_CALLABLE;
4370 case SpvExecutionModelTaskNV:
4371 return MESA_SHADER_TASK;
4372 case SpvExecutionModelMeshNV:
4373 return MESA_SHADER_MESH;
4374 default:
4375 vtn_fail("Unsupported execution model: %s (%u)",
4376 spirv_executionmodel_to_string(model), model);
4377 }
4378 }
4379
4380 #define spv_check_supported(name, cap) do { \
4381 if (!(b->options && b->options->caps.name)) \
4382 vtn_warn("Unsupported SPIR-V capability: %s (%u)", \
4383 spirv_capability_to_string(cap), cap); \
4384 } while(0)
4385
4386
4387 void
vtn_handle_entry_point(struct vtn_builder * b,const uint32_t * w,unsigned count)4388 vtn_handle_entry_point(struct vtn_builder *b, const uint32_t *w,
4389 unsigned count)
4390 {
4391 struct vtn_value *entry_point = &b->values[w[2]];
4392 /* Let this be a name label regardless */
4393 unsigned name_words;
4394 entry_point->name = vtn_string_literal(b, &w[3], count - 3, &name_words);
4395
4396 if (strcmp(entry_point->name, b->entry_point_name) != 0 ||
4397 stage_for_execution_model(b, w[1]) != b->entry_point_stage)
4398 return;
4399
4400 vtn_assert(b->entry_point == NULL);
4401 b->entry_point = entry_point;
4402
4403 /* Entry points enumerate which global variables are used. */
4404 size_t start = 3 + name_words;
4405 b->interface_ids_count = count - start;
4406 b->interface_ids = ralloc_array(b, uint32_t, b->interface_ids_count);
4407 memcpy(b->interface_ids, &w[start], b->interface_ids_count * 4);
4408 qsort(b->interface_ids, b->interface_ids_count, 4, cmp_uint32_t);
4409 }
4410
4411 static bool
vtn_handle_preamble_instruction(struct vtn_builder * b,SpvOp opcode,const uint32_t * w,unsigned count)4412 vtn_handle_preamble_instruction(struct vtn_builder *b, SpvOp opcode,
4413 const uint32_t *w, unsigned count)
4414 {
4415 switch (opcode) {
4416 case SpvOpSource: {
4417 const char *lang;
4418 switch (w[1]) {
4419 default:
4420 case SpvSourceLanguageUnknown: lang = "unknown"; break;
4421 case SpvSourceLanguageESSL: lang = "ESSL"; break;
4422 case SpvSourceLanguageGLSL: lang = "GLSL"; break;
4423 case SpvSourceLanguageOpenCL_C: lang = "OpenCL C"; break;
4424 case SpvSourceLanguageOpenCL_CPP: lang = "OpenCL C++"; break;
4425 case SpvSourceLanguageHLSL: lang = "HLSL"; break;
4426 }
4427
4428 uint32_t version = w[2];
4429
4430 const char *file =
4431 (count > 3) ? vtn_value(b, w[3], vtn_value_type_string)->str : "";
4432
4433 vtn_info("Parsing SPIR-V from %s %u source file %s", lang, version, file);
4434
4435 b->source_lang = w[1];
4436 break;
4437 }
4438
4439 case SpvOpSourceExtension:
4440 case SpvOpSourceContinued:
4441 case SpvOpExtension:
4442 case SpvOpModuleProcessed:
4443 /* Unhandled, but these are for debug so that's ok. */
4444 break;
4445
4446 case SpvOpCapability: {
4447 SpvCapability cap = w[1];
4448 switch (cap) {
4449 case SpvCapabilityMatrix:
4450 case SpvCapabilityShader:
4451 case SpvCapabilityGeometry:
4452 case SpvCapabilityGeometryPointSize:
4453 case SpvCapabilityUniformBufferArrayDynamicIndexing:
4454 case SpvCapabilitySampledImageArrayDynamicIndexing:
4455 case SpvCapabilityStorageBufferArrayDynamicIndexing:
4456 case SpvCapabilityStorageImageArrayDynamicIndexing:
4457 case SpvCapabilityImageRect:
4458 case SpvCapabilitySampledRect:
4459 case SpvCapabilitySampled1D:
4460 case SpvCapabilityImage1D:
4461 case SpvCapabilitySampledCubeArray:
4462 case SpvCapabilityImageCubeArray:
4463 case SpvCapabilitySampledBuffer:
4464 case SpvCapabilityImageBuffer:
4465 case SpvCapabilityImageQuery:
4466 case SpvCapabilityDerivativeControl:
4467 case SpvCapabilityInterpolationFunction:
4468 case SpvCapabilityMultiViewport:
4469 case SpvCapabilitySampleRateShading:
4470 case SpvCapabilityClipDistance:
4471 case SpvCapabilityCullDistance:
4472 case SpvCapabilityInputAttachment:
4473 case SpvCapabilityImageGatherExtended:
4474 case SpvCapabilityStorageImageExtendedFormats:
4475 case SpvCapabilityVector16:
4476 case SpvCapabilityDotProduct:
4477 case SpvCapabilityDotProductInputAll:
4478 case SpvCapabilityDotProductInput4x8Bit:
4479 case SpvCapabilityDotProductInput4x8BitPacked:
4480 break;
4481
4482 case SpvCapabilityLinkage:
4483 if (!b->options->create_library)
4484 vtn_warn("Unsupported SPIR-V capability: %s",
4485 spirv_capability_to_string(cap));
4486 spv_check_supported(linkage, cap);
4487 vtn_warn("The SPIR-V Linkage capability is not fully supported");
4488 break;
4489
4490 case SpvCapabilitySparseResidency:
4491 spv_check_supported(sparse_residency, cap);
4492 break;
4493
4494 case SpvCapabilityMinLod:
4495 spv_check_supported(min_lod, cap);
4496 break;
4497
4498 case SpvCapabilityAtomicStorage:
4499 spv_check_supported(atomic_storage, cap);
4500 break;
4501
4502 case SpvCapabilityFloat64:
4503 spv_check_supported(float64, cap);
4504 break;
4505 case SpvCapabilityInt64:
4506 spv_check_supported(int64, cap);
4507 break;
4508 case SpvCapabilityInt16:
4509 spv_check_supported(int16, cap);
4510 break;
4511 case SpvCapabilityInt8:
4512 spv_check_supported(int8, cap);
4513 break;
4514
4515 case SpvCapabilityTransformFeedback:
4516 spv_check_supported(transform_feedback, cap);
4517 break;
4518
4519 case SpvCapabilityGeometryStreams:
4520 spv_check_supported(geometry_streams, cap);
4521 break;
4522
4523 case SpvCapabilityInt64Atomics:
4524 spv_check_supported(int64_atomics, cap);
4525 break;
4526
4527 case SpvCapabilityStorageImageMultisample:
4528 spv_check_supported(storage_image_ms, cap);
4529 break;
4530
4531 case SpvCapabilityAddresses:
4532 spv_check_supported(address, cap);
4533 break;
4534
4535 case SpvCapabilityKernel:
4536 case SpvCapabilityFloat16Buffer:
4537 spv_check_supported(kernel, cap);
4538 break;
4539
4540 case SpvCapabilityGenericPointer:
4541 spv_check_supported(generic_pointers, cap);
4542 break;
4543
4544 case SpvCapabilityImageBasic:
4545 spv_check_supported(kernel_image, cap);
4546 break;
4547
4548 case SpvCapabilityImageReadWrite:
4549 spv_check_supported(kernel_image_read_write, cap);
4550 break;
4551
4552 case SpvCapabilityLiteralSampler:
4553 spv_check_supported(literal_sampler, cap);
4554 break;
4555
4556 case SpvCapabilityImageMipmap:
4557 case SpvCapabilityPipes:
4558 case SpvCapabilityDeviceEnqueue:
4559 vtn_warn("Unsupported OpenCL-style SPIR-V capability: %s",
4560 spirv_capability_to_string(cap));
4561 break;
4562
4563 case SpvCapabilityImageMSArray:
4564 spv_check_supported(image_ms_array, cap);
4565 break;
4566
4567 case SpvCapabilityTessellation:
4568 case SpvCapabilityTessellationPointSize:
4569 spv_check_supported(tessellation, cap);
4570 break;
4571
4572 case SpvCapabilityDrawParameters:
4573 spv_check_supported(draw_parameters, cap);
4574 break;
4575
4576 case SpvCapabilityStorageImageReadWithoutFormat:
4577 spv_check_supported(image_read_without_format, cap);
4578 break;
4579
4580 case SpvCapabilityStorageImageWriteWithoutFormat:
4581 spv_check_supported(image_write_without_format, cap);
4582 break;
4583
4584 case SpvCapabilityDeviceGroup:
4585 spv_check_supported(device_group, cap);
4586 break;
4587
4588 case SpvCapabilityMultiView:
4589 spv_check_supported(multiview, cap);
4590 break;
4591
4592 case SpvCapabilityGroupNonUniform:
4593 spv_check_supported(subgroup_basic, cap);
4594 break;
4595
4596 case SpvCapabilitySubgroupVoteKHR:
4597 case SpvCapabilityGroupNonUniformVote:
4598 spv_check_supported(subgroup_vote, cap);
4599 break;
4600
4601 case SpvCapabilitySubgroupBallotKHR:
4602 case SpvCapabilityGroupNonUniformBallot:
4603 spv_check_supported(subgroup_ballot, cap);
4604 break;
4605
4606 case SpvCapabilityGroupNonUniformShuffle:
4607 case SpvCapabilityGroupNonUniformShuffleRelative:
4608 spv_check_supported(subgroup_shuffle, cap);
4609 break;
4610
4611 case SpvCapabilityGroupNonUniformQuad:
4612 spv_check_supported(subgroup_quad, cap);
4613 break;
4614
4615 case SpvCapabilityGroupNonUniformArithmetic:
4616 case SpvCapabilityGroupNonUniformClustered:
4617 spv_check_supported(subgroup_arithmetic, cap);
4618 break;
4619
4620 case SpvCapabilityGroups:
4621 spv_check_supported(groups, cap);
4622 break;
4623
4624 case SpvCapabilitySubgroupDispatch:
4625 spv_check_supported(subgroup_dispatch, cap);
4626 /* Missing :
4627 * - SpvOpGetKernelLocalSizeForSubgroupCount
4628 * - SpvOpGetKernelMaxNumSubgroups
4629 * - SpvExecutionModeSubgroupsPerWorkgroup
4630 * - SpvExecutionModeSubgroupsPerWorkgroupId
4631 */
4632 vtn_warn("Not fully supported capability: %s",
4633 spirv_capability_to_string(cap));
4634 break;
4635
4636 case SpvCapabilityVariablePointersStorageBuffer:
4637 case SpvCapabilityVariablePointers:
4638 spv_check_supported(variable_pointers, cap);
4639 b->variable_pointers = true;
4640 break;
4641
4642 case SpvCapabilityStorageUniformBufferBlock16:
4643 case SpvCapabilityStorageUniform16:
4644 case SpvCapabilityStoragePushConstant16:
4645 case SpvCapabilityStorageInputOutput16:
4646 spv_check_supported(storage_16bit, cap);
4647 break;
4648
4649 case SpvCapabilityShaderLayer:
4650 case SpvCapabilityShaderViewportIndex:
4651 case SpvCapabilityShaderViewportIndexLayerEXT:
4652 spv_check_supported(shader_viewport_index_layer, cap);
4653 break;
4654
4655 case SpvCapabilityStorageBuffer8BitAccess:
4656 case SpvCapabilityUniformAndStorageBuffer8BitAccess:
4657 case SpvCapabilityStoragePushConstant8:
4658 spv_check_supported(storage_8bit, cap);
4659 break;
4660
4661 case SpvCapabilityShaderNonUniformEXT:
4662 spv_check_supported(descriptor_indexing, cap);
4663 break;
4664
4665 case SpvCapabilityInputAttachmentArrayDynamicIndexingEXT:
4666 case SpvCapabilityUniformTexelBufferArrayDynamicIndexingEXT:
4667 case SpvCapabilityStorageTexelBufferArrayDynamicIndexingEXT:
4668 spv_check_supported(descriptor_array_dynamic_indexing, cap);
4669 break;
4670
4671 case SpvCapabilityUniformBufferArrayNonUniformIndexingEXT:
4672 case SpvCapabilitySampledImageArrayNonUniformIndexingEXT:
4673 case SpvCapabilityStorageBufferArrayNonUniformIndexingEXT:
4674 case SpvCapabilityStorageImageArrayNonUniformIndexingEXT:
4675 case SpvCapabilityInputAttachmentArrayNonUniformIndexingEXT:
4676 case SpvCapabilityUniformTexelBufferArrayNonUniformIndexingEXT:
4677 case SpvCapabilityStorageTexelBufferArrayNonUniformIndexingEXT:
4678 spv_check_supported(descriptor_array_non_uniform_indexing, cap);
4679 break;
4680
4681 case SpvCapabilityRuntimeDescriptorArrayEXT:
4682 spv_check_supported(runtime_descriptor_array, cap);
4683 break;
4684
4685 case SpvCapabilityStencilExportEXT:
4686 spv_check_supported(stencil_export, cap);
4687 break;
4688
4689 case SpvCapabilitySampleMaskPostDepthCoverage:
4690 spv_check_supported(post_depth_coverage, cap);
4691 break;
4692
4693 case SpvCapabilityDenormFlushToZero:
4694 case SpvCapabilityDenormPreserve:
4695 case SpvCapabilitySignedZeroInfNanPreserve:
4696 case SpvCapabilityRoundingModeRTE:
4697 case SpvCapabilityRoundingModeRTZ:
4698 spv_check_supported(float_controls, cap);
4699 break;
4700
4701 case SpvCapabilityPhysicalStorageBufferAddresses:
4702 spv_check_supported(physical_storage_buffer_address, cap);
4703 break;
4704
4705 case SpvCapabilityComputeDerivativeGroupQuadsNV:
4706 case SpvCapabilityComputeDerivativeGroupLinearNV:
4707 spv_check_supported(derivative_group, cap);
4708 break;
4709
4710 case SpvCapabilityFloat16:
4711 spv_check_supported(float16, cap);
4712 break;
4713
4714 case SpvCapabilityFragmentShaderSampleInterlockEXT:
4715 spv_check_supported(fragment_shader_sample_interlock, cap);
4716 break;
4717
4718 case SpvCapabilityFragmentShaderPixelInterlockEXT:
4719 spv_check_supported(fragment_shader_pixel_interlock, cap);
4720 break;
4721
4722 case SpvCapabilityDemoteToHelperInvocation:
4723 spv_check_supported(demote_to_helper_invocation, cap);
4724 b->uses_demote_to_helper_invocation = true;
4725 break;
4726
4727 case SpvCapabilityShaderClockKHR:
4728 spv_check_supported(shader_clock, cap);
4729 break;
4730
4731 case SpvCapabilityVulkanMemoryModel:
4732 spv_check_supported(vk_memory_model, cap);
4733 break;
4734
4735 case SpvCapabilityVulkanMemoryModelDeviceScope:
4736 spv_check_supported(vk_memory_model_device_scope, cap);
4737 break;
4738
4739 case SpvCapabilityImageReadWriteLodAMD:
4740 spv_check_supported(amd_image_read_write_lod, cap);
4741 break;
4742
4743 case SpvCapabilityIntegerFunctions2INTEL:
4744 spv_check_supported(integer_functions2, cap);
4745 break;
4746
4747 case SpvCapabilityFragmentMaskAMD:
4748 spv_check_supported(amd_fragment_mask, cap);
4749 break;
4750
4751 case SpvCapabilityImageGatherBiasLodAMD:
4752 spv_check_supported(amd_image_gather_bias_lod, cap);
4753 break;
4754
4755 case SpvCapabilityAtomicFloat16AddEXT:
4756 spv_check_supported(float16_atomic_add, cap);
4757 break;
4758
4759 case SpvCapabilityAtomicFloat32AddEXT:
4760 spv_check_supported(float32_atomic_add, cap);
4761 break;
4762
4763 case SpvCapabilityAtomicFloat64AddEXT:
4764 spv_check_supported(float64_atomic_add, cap);
4765 break;
4766
4767 case SpvCapabilitySubgroupShuffleINTEL:
4768 spv_check_supported(intel_subgroup_shuffle, cap);
4769 break;
4770
4771 case SpvCapabilitySubgroupBufferBlockIOINTEL:
4772 spv_check_supported(intel_subgroup_buffer_block_io, cap);
4773 break;
4774
4775 case SpvCapabilityRayTracingKHR:
4776 spv_check_supported(ray_tracing, cap);
4777 break;
4778
4779 case SpvCapabilityRayQueryKHR:
4780 spv_check_supported(ray_query, cap);
4781 break;
4782
4783 case SpvCapabilityRayTraversalPrimitiveCullingKHR:
4784 spv_check_supported(ray_traversal_primitive_culling, cap);
4785 break;
4786
4787 case SpvCapabilityInt64ImageEXT:
4788 spv_check_supported(image_atomic_int64, cap);
4789 break;
4790
4791 case SpvCapabilityFragmentShadingRateKHR:
4792 spv_check_supported(fragment_shading_rate, cap);
4793 break;
4794
4795 case SpvCapabilityWorkgroupMemoryExplicitLayoutKHR:
4796 spv_check_supported(workgroup_memory_explicit_layout, cap);
4797 break;
4798
4799 case SpvCapabilityWorkgroupMemoryExplicitLayout8BitAccessKHR:
4800 spv_check_supported(workgroup_memory_explicit_layout, cap);
4801 spv_check_supported(storage_8bit, cap);
4802 break;
4803
4804 case SpvCapabilityWorkgroupMemoryExplicitLayout16BitAccessKHR:
4805 spv_check_supported(workgroup_memory_explicit_layout, cap);
4806 spv_check_supported(storage_16bit, cap);
4807 break;
4808
4809 case SpvCapabilityAtomicFloat16MinMaxEXT:
4810 spv_check_supported(float16_atomic_min_max, cap);
4811 break;
4812
4813 case SpvCapabilityAtomicFloat32MinMaxEXT:
4814 spv_check_supported(float32_atomic_min_max, cap);
4815 break;
4816
4817 case SpvCapabilityAtomicFloat64MinMaxEXT:
4818 spv_check_supported(float64_atomic_min_max, cap);
4819 break;
4820
4821 case SpvCapabilityMeshShadingNV:
4822 spv_check_supported(mesh_shading_nv, cap);
4823 break;
4824
4825 case SpvCapabilityPerViewAttributesNV:
4826 spv_check_supported(per_view_attributes_nv, cap);
4827 break;
4828
4829 case SpvCapabilityShaderViewportMaskNV:
4830 spv_check_supported(shader_viewport_mask_nv, cap);
4831 break;
4832
4833 default:
4834 vtn_fail("Unhandled capability: %s (%u)",
4835 spirv_capability_to_string(cap), cap);
4836 }
4837 break;
4838 }
4839
4840 case SpvOpExtInstImport:
4841 vtn_handle_extension(b, opcode, w, count);
4842 break;
4843
4844 case SpvOpMemoryModel:
4845 switch (w[1]) {
4846 case SpvAddressingModelPhysical32:
4847 vtn_fail_if(b->shader->info.stage != MESA_SHADER_KERNEL,
4848 "AddressingModelPhysical32 only supported for kernels");
4849 b->shader->info.cs.ptr_size = 32;
4850 b->physical_ptrs = true;
4851 assert(nir_address_format_bit_size(b->options->global_addr_format) == 32);
4852 assert(nir_address_format_num_components(b->options->global_addr_format) == 1);
4853 assert(nir_address_format_bit_size(b->options->shared_addr_format) == 32);
4854 assert(nir_address_format_num_components(b->options->shared_addr_format) == 1);
4855 assert(nir_address_format_bit_size(b->options->constant_addr_format) == 32);
4856 assert(nir_address_format_num_components(b->options->constant_addr_format) == 1);
4857 break;
4858 case SpvAddressingModelPhysical64:
4859 vtn_fail_if(b->shader->info.stage != MESA_SHADER_KERNEL,
4860 "AddressingModelPhysical64 only supported for kernels");
4861 b->shader->info.cs.ptr_size = 64;
4862 b->physical_ptrs = true;
4863 assert(nir_address_format_bit_size(b->options->global_addr_format) == 64);
4864 assert(nir_address_format_num_components(b->options->global_addr_format) == 1);
4865 assert(nir_address_format_bit_size(b->options->shared_addr_format) == 64);
4866 assert(nir_address_format_num_components(b->options->shared_addr_format) == 1);
4867 assert(nir_address_format_bit_size(b->options->constant_addr_format) == 64);
4868 assert(nir_address_format_num_components(b->options->constant_addr_format) == 1);
4869 break;
4870 case SpvAddressingModelLogical:
4871 vtn_fail_if(b->shader->info.stage == MESA_SHADER_KERNEL,
4872 "AddressingModelLogical only supported for shaders");
4873 b->physical_ptrs = false;
4874 break;
4875 case SpvAddressingModelPhysicalStorageBuffer64:
4876 vtn_fail_if(!b->options ||
4877 !b->options->caps.physical_storage_buffer_address,
4878 "AddressingModelPhysicalStorageBuffer64 not supported");
4879 break;
4880 default:
4881 vtn_fail("Unknown addressing model: %s (%u)",
4882 spirv_addressingmodel_to_string(w[1]), w[1]);
4883 break;
4884 }
4885
4886 b->mem_model = w[2];
4887 switch (w[2]) {
4888 case SpvMemoryModelSimple:
4889 case SpvMemoryModelGLSL450:
4890 case SpvMemoryModelOpenCL:
4891 break;
4892 case SpvMemoryModelVulkan:
4893 vtn_fail_if(!b->options->caps.vk_memory_model,
4894 "Vulkan memory model is unsupported by this driver");
4895 break;
4896 default:
4897 vtn_fail("Unsupported memory model: %s",
4898 spirv_memorymodel_to_string(w[2]));
4899 break;
4900 }
4901 break;
4902
4903 case SpvOpEntryPoint:
4904 vtn_handle_entry_point(b, w, count);
4905 break;
4906
4907 case SpvOpString:
4908 vtn_push_value(b, w[1], vtn_value_type_string)->str =
4909 vtn_string_literal(b, &w[2], count - 2, NULL);
4910 break;
4911
4912 case SpvOpName:
4913 b->values[w[1]].name = vtn_string_literal(b, &w[2], count - 2, NULL);
4914 break;
4915
4916 case SpvOpMemberName:
4917 case SpvOpExecutionMode:
4918 case SpvOpExecutionModeId:
4919 case SpvOpDecorationGroup:
4920 case SpvOpDecorate:
4921 case SpvOpDecorateId:
4922 case SpvOpMemberDecorate:
4923 case SpvOpGroupDecorate:
4924 case SpvOpGroupMemberDecorate:
4925 case SpvOpDecorateString:
4926 case SpvOpMemberDecorateString:
4927 vtn_handle_decoration(b, opcode, w, count);
4928 break;
4929
4930 case SpvOpExtInst: {
4931 struct vtn_value *val = vtn_value(b, w[3], vtn_value_type_extension);
4932 if (val->ext_handler == vtn_handle_non_semantic_instruction) {
4933 /* NonSemantic extended instructions are acceptable in preamble. */
4934 vtn_handle_non_semantic_instruction(b, w[4], w, count);
4935 return true;
4936 } else {
4937 return false; /* End of preamble. */
4938 }
4939 }
4940
4941 default:
4942 return false; /* End of preamble */
4943 }
4944
4945 return true;
4946 }
4947
4948 static void
vtn_handle_execution_mode(struct vtn_builder * b,struct vtn_value * entry_point,const struct vtn_decoration * mode,UNUSED void * data)4949 vtn_handle_execution_mode(struct vtn_builder *b, struct vtn_value *entry_point,
4950 const struct vtn_decoration *mode, UNUSED void *data)
4951 {
4952 vtn_assert(b->entry_point == entry_point);
4953
4954 switch(mode->exec_mode) {
4955 case SpvExecutionModeOriginUpperLeft:
4956 case SpvExecutionModeOriginLowerLeft:
4957 vtn_assert(b->shader->info.stage == MESA_SHADER_FRAGMENT);
4958 b->shader->info.fs.origin_upper_left =
4959 (mode->exec_mode == SpvExecutionModeOriginUpperLeft);
4960 break;
4961
4962 case SpvExecutionModeEarlyFragmentTests:
4963 vtn_assert(b->shader->info.stage == MESA_SHADER_FRAGMENT);
4964 b->shader->info.fs.early_fragment_tests = true;
4965 break;
4966
4967 case SpvExecutionModePostDepthCoverage:
4968 vtn_assert(b->shader->info.stage == MESA_SHADER_FRAGMENT);
4969 b->shader->info.fs.post_depth_coverage = true;
4970 break;
4971
4972 case SpvExecutionModeInvocations:
4973 vtn_assert(b->shader->info.stage == MESA_SHADER_GEOMETRY);
4974 b->shader->info.gs.invocations = MAX2(1, mode->operands[0]);
4975 break;
4976
4977 case SpvExecutionModeDepthReplacing:
4978 vtn_assert(b->shader->info.stage == MESA_SHADER_FRAGMENT);
4979 b->shader->info.fs.depth_layout = FRAG_DEPTH_LAYOUT_ANY;
4980 break;
4981 case SpvExecutionModeDepthGreater:
4982 vtn_assert(b->shader->info.stage == MESA_SHADER_FRAGMENT);
4983 b->shader->info.fs.depth_layout = FRAG_DEPTH_LAYOUT_GREATER;
4984 break;
4985 case SpvExecutionModeDepthLess:
4986 vtn_assert(b->shader->info.stage == MESA_SHADER_FRAGMENT);
4987 b->shader->info.fs.depth_layout = FRAG_DEPTH_LAYOUT_LESS;
4988 break;
4989 case SpvExecutionModeDepthUnchanged:
4990 vtn_assert(b->shader->info.stage == MESA_SHADER_FRAGMENT);
4991 b->shader->info.fs.depth_layout = FRAG_DEPTH_LAYOUT_UNCHANGED;
4992 break;
4993
4994 case SpvExecutionModeLocalSizeHint:
4995 vtn_assert(b->shader->info.stage == MESA_SHADER_KERNEL);
4996 b->shader->info.cs.workgroup_size_hint[0] = mode->operands[0];
4997 b->shader->info.cs.workgroup_size_hint[1] = mode->operands[1];
4998 b->shader->info.cs.workgroup_size_hint[2] = mode->operands[2];
4999 break;
5000
5001 case SpvExecutionModeLocalSize:
5002 if (gl_shader_stage_uses_workgroup(b->shader->info.stage)) {
5003 b->shader->info.workgroup_size[0] = mode->operands[0];
5004 b->shader->info.workgroup_size[1] = mode->operands[1];
5005 b->shader->info.workgroup_size[2] = mode->operands[2];
5006 } else {
5007 vtn_fail("Execution mode LocalSize not supported in stage %s",
5008 _mesa_shader_stage_to_string(b->shader->info.stage));
5009 }
5010 break;
5011
5012 case SpvExecutionModeOutputVertices:
5013 switch (b->shader->info.stage) {
5014 case MESA_SHADER_TESS_CTRL:
5015 case MESA_SHADER_TESS_EVAL:
5016 b->shader->info.tess.tcs_vertices_out = mode->operands[0];
5017 break;
5018 case MESA_SHADER_GEOMETRY:
5019 b->shader->info.gs.vertices_out = mode->operands[0];
5020 break;
5021 case MESA_SHADER_MESH:
5022 b->shader->info.mesh.max_vertices_out = mode->operands[0];
5023 break;
5024 default:
5025 vtn_fail("Execution mode OutputVertices not supported in stage %s",
5026 _mesa_shader_stage_to_string(b->shader->info.stage));
5027 break;
5028 }
5029 break;
5030
5031 case SpvExecutionModeInputPoints:
5032 case SpvExecutionModeInputLines:
5033 case SpvExecutionModeInputLinesAdjacency:
5034 case SpvExecutionModeTriangles:
5035 case SpvExecutionModeInputTrianglesAdjacency:
5036 case SpvExecutionModeQuads:
5037 case SpvExecutionModeIsolines:
5038 if (b->shader->info.stage == MESA_SHADER_TESS_CTRL ||
5039 b->shader->info.stage == MESA_SHADER_TESS_EVAL) {
5040 b->shader->info.tess._primitive_mode =
5041 tess_primitive_mode_from_spv_execution_mode(b, mode->exec_mode);
5042 } else {
5043 vtn_assert(b->shader->info.stage == MESA_SHADER_GEOMETRY);
5044 b->shader->info.gs.vertices_in =
5045 vertices_in_from_spv_execution_mode(b, mode->exec_mode);
5046 b->shader->info.gs.input_primitive =
5047 primitive_from_spv_execution_mode(b, mode->exec_mode);
5048 }
5049 break;
5050
5051 case SpvExecutionModeOutputPrimitivesNV:
5052 vtn_assert(b->shader->info.stage == MESA_SHADER_MESH);
5053 b->shader->info.mesh.max_primitives_out = mode->operands[0];
5054 break;
5055
5056 case SpvExecutionModeOutputLinesNV:
5057 case SpvExecutionModeOutputTrianglesNV:
5058 vtn_assert(b->shader->info.stage == MESA_SHADER_MESH);
5059 b->shader->info.mesh.primitive_type =
5060 primitive_from_spv_execution_mode(b, mode->exec_mode);
5061 break;
5062
5063 case SpvExecutionModeOutputPoints: {
5064 const unsigned primitive =
5065 primitive_from_spv_execution_mode(b, mode->exec_mode);
5066
5067 switch (b->shader->info.stage) {
5068 case MESA_SHADER_GEOMETRY:
5069 b->shader->info.gs.output_primitive = primitive;
5070 break;
5071 case MESA_SHADER_MESH:
5072 b->shader->info.mesh.primitive_type = primitive;
5073 break;
5074 default:
5075 vtn_fail("Execution mode OutputPoints not supported in stage %s",
5076 _mesa_shader_stage_to_string(b->shader->info.stage));
5077 break;
5078 }
5079 break;
5080 }
5081
5082 case SpvExecutionModeOutputLineStrip:
5083 case SpvExecutionModeOutputTriangleStrip:
5084 vtn_assert(b->shader->info.stage == MESA_SHADER_GEOMETRY);
5085 b->shader->info.gs.output_primitive =
5086 primitive_from_spv_execution_mode(b, mode->exec_mode);
5087 break;
5088
5089 case SpvExecutionModeSpacingEqual:
5090 vtn_assert(b->shader->info.stage == MESA_SHADER_TESS_CTRL ||
5091 b->shader->info.stage == MESA_SHADER_TESS_EVAL);
5092 b->shader->info.tess.spacing = TESS_SPACING_EQUAL;
5093 break;
5094 case SpvExecutionModeSpacingFractionalEven:
5095 vtn_assert(b->shader->info.stage == MESA_SHADER_TESS_CTRL ||
5096 b->shader->info.stage == MESA_SHADER_TESS_EVAL);
5097 b->shader->info.tess.spacing = TESS_SPACING_FRACTIONAL_EVEN;
5098 break;
5099 case SpvExecutionModeSpacingFractionalOdd:
5100 vtn_assert(b->shader->info.stage == MESA_SHADER_TESS_CTRL ||
5101 b->shader->info.stage == MESA_SHADER_TESS_EVAL);
5102 b->shader->info.tess.spacing = TESS_SPACING_FRACTIONAL_ODD;
5103 break;
5104 case SpvExecutionModeVertexOrderCw:
5105 vtn_assert(b->shader->info.stage == MESA_SHADER_TESS_CTRL ||
5106 b->shader->info.stage == MESA_SHADER_TESS_EVAL);
5107 b->shader->info.tess.ccw = false;
5108 break;
5109 case SpvExecutionModeVertexOrderCcw:
5110 vtn_assert(b->shader->info.stage == MESA_SHADER_TESS_CTRL ||
5111 b->shader->info.stage == MESA_SHADER_TESS_EVAL);
5112 b->shader->info.tess.ccw = true;
5113 break;
5114 case SpvExecutionModePointMode:
5115 vtn_assert(b->shader->info.stage == MESA_SHADER_TESS_CTRL ||
5116 b->shader->info.stage == MESA_SHADER_TESS_EVAL);
5117 b->shader->info.tess.point_mode = true;
5118 break;
5119
5120 case SpvExecutionModePixelCenterInteger:
5121 vtn_assert(b->shader->info.stage == MESA_SHADER_FRAGMENT);
5122 b->shader->info.fs.pixel_center_integer = true;
5123 break;
5124
5125 case SpvExecutionModeXfb:
5126 b->shader->info.has_transform_feedback_varyings = true;
5127 break;
5128
5129 case SpvExecutionModeVecTypeHint:
5130 break; /* OpenCL */
5131
5132 case SpvExecutionModeContractionOff:
5133 if (b->shader->info.stage != MESA_SHADER_KERNEL)
5134 vtn_warn("ExectionMode only allowed for CL-style kernels: %s",
5135 spirv_executionmode_to_string(mode->exec_mode));
5136 else
5137 b->exact = true;
5138 break;
5139
5140 case SpvExecutionModeStencilRefReplacingEXT:
5141 vtn_assert(b->shader->info.stage == MESA_SHADER_FRAGMENT);
5142 break;
5143
5144 case SpvExecutionModeDerivativeGroupQuadsNV:
5145 vtn_assert(b->shader->info.stage == MESA_SHADER_COMPUTE);
5146 b->shader->info.cs.derivative_group = DERIVATIVE_GROUP_QUADS;
5147 break;
5148
5149 case SpvExecutionModeDerivativeGroupLinearNV:
5150 vtn_assert(b->shader->info.stage == MESA_SHADER_COMPUTE);
5151 b->shader->info.cs.derivative_group = DERIVATIVE_GROUP_LINEAR;
5152 break;
5153
5154 case SpvExecutionModePixelInterlockOrderedEXT:
5155 vtn_assert(b->shader->info.stage == MESA_SHADER_FRAGMENT);
5156 b->shader->info.fs.pixel_interlock_ordered = true;
5157 break;
5158
5159 case SpvExecutionModePixelInterlockUnorderedEXT:
5160 vtn_assert(b->shader->info.stage == MESA_SHADER_FRAGMENT);
5161 b->shader->info.fs.pixel_interlock_unordered = true;
5162 break;
5163
5164 case SpvExecutionModeSampleInterlockOrderedEXT:
5165 vtn_assert(b->shader->info.stage == MESA_SHADER_FRAGMENT);
5166 b->shader->info.fs.sample_interlock_ordered = true;
5167 break;
5168
5169 case SpvExecutionModeSampleInterlockUnorderedEXT:
5170 vtn_assert(b->shader->info.stage == MESA_SHADER_FRAGMENT);
5171 b->shader->info.fs.sample_interlock_unordered = true;
5172 break;
5173
5174 case SpvExecutionModeDenormPreserve:
5175 case SpvExecutionModeDenormFlushToZero:
5176 case SpvExecutionModeSignedZeroInfNanPreserve:
5177 case SpvExecutionModeRoundingModeRTE:
5178 case SpvExecutionModeRoundingModeRTZ: {
5179 unsigned execution_mode = 0;
5180 switch (mode->exec_mode) {
5181 case SpvExecutionModeDenormPreserve:
5182 switch (mode->operands[0]) {
5183 case 16: execution_mode = FLOAT_CONTROLS_DENORM_PRESERVE_FP16; break;
5184 case 32: execution_mode = FLOAT_CONTROLS_DENORM_PRESERVE_FP32; break;
5185 case 64: execution_mode = FLOAT_CONTROLS_DENORM_PRESERVE_FP64; break;
5186 default: vtn_fail("Floating point type not supported");
5187 }
5188 break;
5189 case SpvExecutionModeDenormFlushToZero:
5190 switch (mode->operands[0]) {
5191 case 16: execution_mode = FLOAT_CONTROLS_DENORM_FLUSH_TO_ZERO_FP16; break;
5192 case 32: execution_mode = FLOAT_CONTROLS_DENORM_FLUSH_TO_ZERO_FP32; break;
5193 case 64: execution_mode = FLOAT_CONTROLS_DENORM_FLUSH_TO_ZERO_FP64; break;
5194 default: vtn_fail("Floating point type not supported");
5195 }
5196 break;
5197 case SpvExecutionModeSignedZeroInfNanPreserve:
5198 switch (mode->operands[0]) {
5199 case 16: execution_mode = FLOAT_CONTROLS_SIGNED_ZERO_INF_NAN_PRESERVE_FP16; break;
5200 case 32: execution_mode = FLOAT_CONTROLS_SIGNED_ZERO_INF_NAN_PRESERVE_FP32; break;
5201 case 64: execution_mode = FLOAT_CONTROLS_SIGNED_ZERO_INF_NAN_PRESERVE_FP64; break;
5202 default: vtn_fail("Floating point type not supported");
5203 }
5204 break;
5205 case SpvExecutionModeRoundingModeRTE:
5206 switch (mode->operands[0]) {
5207 case 16: execution_mode = FLOAT_CONTROLS_ROUNDING_MODE_RTE_FP16; break;
5208 case 32: execution_mode = FLOAT_CONTROLS_ROUNDING_MODE_RTE_FP32; break;
5209 case 64: execution_mode = FLOAT_CONTROLS_ROUNDING_MODE_RTE_FP64; break;
5210 default: vtn_fail("Floating point type not supported");
5211 }
5212 break;
5213 case SpvExecutionModeRoundingModeRTZ:
5214 switch (mode->operands[0]) {
5215 case 16: execution_mode = FLOAT_CONTROLS_ROUNDING_MODE_RTZ_FP16; break;
5216 case 32: execution_mode = FLOAT_CONTROLS_ROUNDING_MODE_RTZ_FP32; break;
5217 case 64: execution_mode = FLOAT_CONTROLS_ROUNDING_MODE_RTZ_FP64; break;
5218 default: vtn_fail("Floating point type not supported");
5219 }
5220 break;
5221 default:
5222 break;
5223 }
5224
5225 b->shader->info.float_controls_execution_mode |= execution_mode;
5226
5227 for (unsigned bit_size = 16; bit_size <= 64; bit_size *= 2) {
5228 vtn_fail_if(nir_is_denorm_flush_to_zero(b->shader->info.float_controls_execution_mode, bit_size) &&
5229 nir_is_denorm_preserve(b->shader->info.float_controls_execution_mode, bit_size),
5230 "Cannot flush to zero and preserve denorms for the same bit size.");
5231 vtn_fail_if(nir_is_rounding_mode_rtne(b->shader->info.float_controls_execution_mode, bit_size) &&
5232 nir_is_rounding_mode_rtz(b->shader->info.float_controls_execution_mode, bit_size),
5233 "Cannot set rounding mode to RTNE and RTZ for the same bit size.");
5234 }
5235 break;
5236 }
5237
5238 case SpvExecutionModeLocalSizeId:
5239 case SpvExecutionModeLocalSizeHintId:
5240 /* Handled later by vtn_handle_execution_mode_id(). */
5241 break;
5242
5243 case SpvExecutionModeSubgroupSize:
5244 vtn_assert(b->shader->info.stage == MESA_SHADER_KERNEL);
5245 b->shader->info.cs.subgroup_size = mode->operands[0];
5246 break;
5247
5248 case SpvExecutionModeSubgroupUniformControlFlowKHR:
5249 /* There's no corresponding SPIR-V capability, so check here. */
5250 vtn_fail_if(!b->options->caps.subgroup_uniform_control_flow,
5251 "SpvExecutionModeSubgroupUniformControlFlowKHR not supported.");
5252 break;
5253
5254 default:
5255 vtn_fail("Unhandled execution mode: %s (%u)",
5256 spirv_executionmode_to_string(mode->exec_mode),
5257 mode->exec_mode);
5258 }
5259 }
5260
5261 static void
vtn_handle_execution_mode_id(struct vtn_builder * b,struct vtn_value * entry_point,const struct vtn_decoration * mode,UNUSED void * data)5262 vtn_handle_execution_mode_id(struct vtn_builder *b, struct vtn_value *entry_point,
5263 const struct vtn_decoration *mode, UNUSED void *data)
5264 {
5265
5266 vtn_assert(b->entry_point == entry_point);
5267
5268 switch (mode->exec_mode) {
5269 case SpvExecutionModeLocalSizeId:
5270 if (gl_shader_stage_uses_workgroup(b->shader->info.stage)) {
5271 b->shader->info.workgroup_size[0] = vtn_constant_uint(b, mode->operands[0]);
5272 b->shader->info.workgroup_size[1] = vtn_constant_uint(b, mode->operands[1]);
5273 b->shader->info.workgroup_size[2] = vtn_constant_uint(b, mode->operands[2]);
5274 } else {
5275 vtn_fail("Execution mode LocalSizeId not supported in stage %s",
5276 _mesa_shader_stage_to_string(b->shader->info.stage));
5277 }
5278 break;
5279
5280 case SpvExecutionModeLocalSizeHintId:
5281 vtn_assert(b->shader->info.stage == MESA_SHADER_KERNEL);
5282 b->shader->info.cs.workgroup_size_hint[0] = vtn_constant_uint(b, mode->operands[0]);
5283 b->shader->info.cs.workgroup_size_hint[1] = vtn_constant_uint(b, mode->operands[1]);
5284 b->shader->info.cs.workgroup_size_hint[2] = vtn_constant_uint(b, mode->operands[2]);
5285 break;
5286
5287 default:
5288 /* Nothing to do. Literal execution modes already handled by
5289 * vtn_handle_execution_mode(). */
5290 break;
5291 }
5292 }
5293
5294 static bool
vtn_handle_variable_or_type_instruction(struct vtn_builder * b,SpvOp opcode,const uint32_t * w,unsigned count)5295 vtn_handle_variable_or_type_instruction(struct vtn_builder *b, SpvOp opcode,
5296 const uint32_t *w, unsigned count)
5297 {
5298 vtn_set_instruction_result_type(b, opcode, w, count);
5299
5300 switch (opcode) {
5301 case SpvOpSource:
5302 case SpvOpSourceContinued:
5303 case SpvOpSourceExtension:
5304 case SpvOpExtension:
5305 case SpvOpCapability:
5306 case SpvOpExtInstImport:
5307 case SpvOpMemoryModel:
5308 case SpvOpEntryPoint:
5309 case SpvOpExecutionMode:
5310 case SpvOpString:
5311 case SpvOpName:
5312 case SpvOpMemberName:
5313 case SpvOpDecorationGroup:
5314 case SpvOpDecorate:
5315 case SpvOpDecorateId:
5316 case SpvOpMemberDecorate:
5317 case SpvOpGroupDecorate:
5318 case SpvOpGroupMemberDecorate:
5319 case SpvOpDecorateString:
5320 case SpvOpMemberDecorateString:
5321 vtn_fail("Invalid opcode types and variables section");
5322 break;
5323
5324 case SpvOpTypeVoid:
5325 case SpvOpTypeBool:
5326 case SpvOpTypeInt:
5327 case SpvOpTypeFloat:
5328 case SpvOpTypeVector:
5329 case SpvOpTypeMatrix:
5330 case SpvOpTypeImage:
5331 case SpvOpTypeSampler:
5332 case SpvOpTypeSampledImage:
5333 case SpvOpTypeArray:
5334 case SpvOpTypeRuntimeArray:
5335 case SpvOpTypeStruct:
5336 case SpvOpTypeOpaque:
5337 case SpvOpTypePointer:
5338 case SpvOpTypeForwardPointer:
5339 case SpvOpTypeFunction:
5340 case SpvOpTypeEvent:
5341 case SpvOpTypeDeviceEvent:
5342 case SpvOpTypeReserveId:
5343 case SpvOpTypeQueue:
5344 case SpvOpTypePipe:
5345 case SpvOpTypeAccelerationStructureKHR:
5346 case SpvOpTypeRayQueryKHR:
5347 vtn_handle_type(b, opcode, w, count);
5348 break;
5349
5350 case SpvOpConstantTrue:
5351 case SpvOpConstantFalse:
5352 case SpvOpConstant:
5353 case SpvOpConstantComposite:
5354 case SpvOpConstantNull:
5355 case SpvOpSpecConstantTrue:
5356 case SpvOpSpecConstantFalse:
5357 case SpvOpSpecConstant:
5358 case SpvOpSpecConstantComposite:
5359 case SpvOpSpecConstantOp:
5360 vtn_handle_constant(b, opcode, w, count);
5361 break;
5362
5363 case SpvOpUndef:
5364 case SpvOpVariable:
5365 case SpvOpConstantSampler:
5366 vtn_handle_variables(b, opcode, w, count);
5367 break;
5368
5369 case SpvOpExtInst: {
5370 struct vtn_value *val = vtn_value(b, w[3], vtn_value_type_extension);
5371 /* NonSemantic extended instructions are acceptable in preamble, others
5372 * will indicate the end of preamble.
5373 */
5374 return val->ext_handler == vtn_handle_non_semantic_instruction;
5375 }
5376
5377 default:
5378 return false; /* End of preamble */
5379 }
5380
5381 return true;
5382 }
5383
5384 static struct vtn_ssa_value *
vtn_nir_select(struct vtn_builder * b,struct vtn_ssa_value * src0,struct vtn_ssa_value * src1,struct vtn_ssa_value * src2)5385 vtn_nir_select(struct vtn_builder *b, struct vtn_ssa_value *src0,
5386 struct vtn_ssa_value *src1, struct vtn_ssa_value *src2)
5387 {
5388 struct vtn_ssa_value *dest = rzalloc(b, struct vtn_ssa_value);
5389 dest->type = src1->type;
5390
5391 if (glsl_type_is_vector_or_scalar(src1->type)) {
5392 dest->def = nir_bcsel(&b->nb, src0->def, src1->def, src2->def);
5393 } else {
5394 unsigned elems = glsl_get_length(src1->type);
5395
5396 dest->elems = ralloc_array(b, struct vtn_ssa_value *, elems);
5397 for (unsigned i = 0; i < elems; i++) {
5398 dest->elems[i] = vtn_nir_select(b, src0,
5399 src1->elems[i], src2->elems[i]);
5400 }
5401 }
5402
5403 return dest;
5404 }
5405
5406 static void
vtn_handle_select(struct vtn_builder * b,SpvOp opcode,const uint32_t * w,unsigned count)5407 vtn_handle_select(struct vtn_builder *b, SpvOp opcode,
5408 const uint32_t *w, unsigned count)
5409 {
5410 /* Handle OpSelect up-front here because it needs to be able to handle
5411 * pointers and not just regular vectors and scalars.
5412 */
5413 struct vtn_value *res_val = vtn_untyped_value(b, w[2]);
5414 struct vtn_value *cond_val = vtn_untyped_value(b, w[3]);
5415 struct vtn_value *obj1_val = vtn_untyped_value(b, w[4]);
5416 struct vtn_value *obj2_val = vtn_untyped_value(b, w[5]);
5417
5418 vtn_fail_if(obj1_val->type != res_val->type ||
5419 obj2_val->type != res_val->type,
5420 "Object types must match the result type in OpSelect");
5421
5422 vtn_fail_if((cond_val->type->base_type != vtn_base_type_scalar &&
5423 cond_val->type->base_type != vtn_base_type_vector) ||
5424 !glsl_type_is_boolean(cond_val->type->type),
5425 "OpSelect must have either a vector of booleans or "
5426 "a boolean as Condition type");
5427
5428 vtn_fail_if(cond_val->type->base_type == vtn_base_type_vector &&
5429 (res_val->type->base_type != vtn_base_type_vector ||
5430 res_val->type->length != cond_val->type->length),
5431 "When Condition type in OpSelect is a vector, the Result "
5432 "type must be a vector of the same length");
5433
5434 switch (res_val->type->base_type) {
5435 case vtn_base_type_scalar:
5436 case vtn_base_type_vector:
5437 case vtn_base_type_matrix:
5438 case vtn_base_type_array:
5439 case vtn_base_type_struct:
5440 /* OK. */
5441 break;
5442 case vtn_base_type_pointer:
5443 /* We need to have actual storage for pointer types. */
5444 vtn_fail_if(res_val->type->type == NULL,
5445 "Invalid pointer result type for OpSelect");
5446 break;
5447 default:
5448 vtn_fail("Result type of OpSelect must be a scalar, composite, or pointer");
5449 }
5450
5451 vtn_push_ssa_value(b, w[2],
5452 vtn_nir_select(b, vtn_ssa_value(b, w[3]),
5453 vtn_ssa_value(b, w[4]),
5454 vtn_ssa_value(b, w[5])));
5455 }
5456
5457 static void
vtn_handle_ptr(struct vtn_builder * b,SpvOp opcode,const uint32_t * w,unsigned count)5458 vtn_handle_ptr(struct vtn_builder *b, SpvOp opcode,
5459 const uint32_t *w, unsigned count)
5460 {
5461 struct vtn_type *type1 = vtn_get_value_type(b, w[3]);
5462 struct vtn_type *type2 = vtn_get_value_type(b, w[4]);
5463 vtn_fail_if(type1->base_type != vtn_base_type_pointer ||
5464 type2->base_type != vtn_base_type_pointer,
5465 "%s operands must have pointer types",
5466 spirv_op_to_string(opcode));
5467 vtn_fail_if(type1->storage_class != type2->storage_class,
5468 "%s operands must have the same storage class",
5469 spirv_op_to_string(opcode));
5470
5471 struct vtn_type *vtn_type = vtn_get_type(b, w[1]);
5472 const struct glsl_type *type = vtn_type->type;
5473
5474 nir_address_format addr_format = vtn_mode_to_address_format(
5475 b, vtn_storage_class_to_mode(b, type1->storage_class, NULL, NULL));
5476
5477 nir_ssa_def *def;
5478
5479 switch (opcode) {
5480 case SpvOpPtrDiff: {
5481 /* OpPtrDiff returns the difference in number of elements (not byte offset). */
5482 unsigned elem_size, elem_align;
5483 glsl_get_natural_size_align_bytes(type1->deref->type,
5484 &elem_size, &elem_align);
5485
5486 def = nir_build_addr_isub(&b->nb,
5487 vtn_get_nir_ssa(b, w[3]),
5488 vtn_get_nir_ssa(b, w[4]),
5489 addr_format);
5490 def = nir_idiv(&b->nb, def, nir_imm_intN_t(&b->nb, elem_size, def->bit_size));
5491 def = nir_i2i(&b->nb, def, glsl_get_bit_size(type));
5492 break;
5493 }
5494
5495 case SpvOpPtrEqual:
5496 case SpvOpPtrNotEqual: {
5497 def = nir_build_addr_ieq(&b->nb,
5498 vtn_get_nir_ssa(b, w[3]),
5499 vtn_get_nir_ssa(b, w[4]),
5500 addr_format);
5501 if (opcode == SpvOpPtrNotEqual)
5502 def = nir_inot(&b->nb, def);
5503 break;
5504 }
5505
5506 default:
5507 unreachable("Invalid ptr operation");
5508 }
5509
5510 vtn_push_nir_ssa(b, w[2], def);
5511 }
5512
5513 static void
vtn_handle_ray_intrinsic(struct vtn_builder * b,SpvOp opcode,const uint32_t * w,unsigned count)5514 vtn_handle_ray_intrinsic(struct vtn_builder *b, SpvOp opcode,
5515 const uint32_t *w, unsigned count)
5516 {
5517 nir_intrinsic_instr *intrin;
5518
5519 switch (opcode) {
5520 case SpvOpTraceNV:
5521 case SpvOpTraceRayKHR: {
5522 intrin = nir_intrinsic_instr_create(b->nb.shader,
5523 nir_intrinsic_trace_ray);
5524
5525 /* The sources are in the same order in the NIR intrinsic */
5526 for (unsigned i = 0; i < 10; i++)
5527 intrin->src[i] = nir_src_for_ssa(vtn_ssa_value(b, w[i + 1])->def);
5528
5529 nir_deref_instr *payload;
5530 if (opcode == SpvOpTraceNV)
5531 payload = vtn_get_call_payload_for_location(b, w[11]);
5532 else
5533 payload = vtn_nir_deref(b, w[11]);
5534 intrin->src[10] = nir_src_for_ssa(&payload->dest.ssa);
5535 nir_builder_instr_insert(&b->nb, &intrin->instr);
5536 break;
5537 }
5538
5539 case SpvOpReportIntersectionKHR: {
5540 intrin = nir_intrinsic_instr_create(b->nb.shader,
5541 nir_intrinsic_report_ray_intersection);
5542 intrin->src[0] = nir_src_for_ssa(vtn_ssa_value(b, w[3])->def);
5543 intrin->src[1] = nir_src_for_ssa(vtn_ssa_value(b, w[4])->def);
5544 nir_ssa_dest_init(&intrin->instr, &intrin->dest, 1, 1, NULL);
5545 nir_builder_instr_insert(&b->nb, &intrin->instr);
5546 vtn_push_nir_ssa(b, w[2], &intrin->dest.ssa);
5547 break;
5548 }
5549
5550 case SpvOpIgnoreIntersectionNV:
5551 intrin = nir_intrinsic_instr_create(b->nb.shader,
5552 nir_intrinsic_ignore_ray_intersection);
5553 nir_builder_instr_insert(&b->nb, &intrin->instr);
5554 break;
5555
5556 case SpvOpTerminateRayNV:
5557 intrin = nir_intrinsic_instr_create(b->nb.shader,
5558 nir_intrinsic_terminate_ray);
5559 nir_builder_instr_insert(&b->nb, &intrin->instr);
5560 break;
5561
5562 case SpvOpExecuteCallableNV:
5563 case SpvOpExecuteCallableKHR: {
5564 intrin = nir_intrinsic_instr_create(b->nb.shader,
5565 nir_intrinsic_execute_callable);
5566 intrin->src[0] = nir_src_for_ssa(vtn_ssa_value(b, w[1])->def);
5567 nir_deref_instr *payload;
5568 if (opcode == SpvOpExecuteCallableNV)
5569 payload = vtn_get_call_payload_for_location(b, w[2]);
5570 else
5571 payload = vtn_nir_deref(b, w[2]);
5572 intrin->src[1] = nir_src_for_ssa(&payload->dest.ssa);
5573 nir_builder_instr_insert(&b->nb, &intrin->instr);
5574 break;
5575 }
5576
5577 default:
5578 vtn_fail_with_opcode("Unhandled opcode", opcode);
5579 }
5580 }
5581
5582 static void
vtn_handle_write_packed_primitive_indices(struct vtn_builder * b,SpvOp opcode,const uint32_t * w,unsigned count)5583 vtn_handle_write_packed_primitive_indices(struct vtn_builder *b, SpvOp opcode,
5584 const uint32_t *w, unsigned count)
5585 {
5586 vtn_assert(opcode == SpvOpWritePackedPrimitiveIndices4x8NV);
5587
5588 /* TODO(mesh): Use or create a primitive that allow the unpacking to
5589 * happen in the backend. What we have here is functional but too
5590 * blunt.
5591 */
5592
5593 struct vtn_type *offset_type = vtn_get_value_type(b, w[1]);
5594 vtn_fail_if(offset_type->base_type != vtn_base_type_scalar ||
5595 offset_type->type != glsl_uint_type(),
5596 "Index Offset type of OpWritePackedPrimitiveIndices4x8NV "
5597 "must be an OpTypeInt with 32-bit Width and 0 Signedness.");
5598
5599 struct vtn_type *packed_type = vtn_get_value_type(b, w[2]);
5600 vtn_fail_if(packed_type->base_type != vtn_base_type_scalar ||
5601 packed_type->type != glsl_uint_type(),
5602 "Packed Indices type of OpWritePackedPrimitiveIndices4x8NV "
5603 "must be an OpTypeInt with 32-bit Width and 0 Signedness.");
5604
5605 nir_deref_instr *indices = NULL;
5606 nir_foreach_variable_with_modes(var, b->nb.shader, nir_var_shader_out) {
5607 if (var->data.location == VARYING_SLOT_PRIMITIVE_INDICES) {
5608 indices = nir_build_deref_var(&b->nb, var);
5609 break;
5610 }
5611 }
5612
5613 /* It may be the case that the variable is not present in the
5614 * entry point interface list.
5615 *
5616 * See https://github.com/KhronosGroup/SPIRV-Registry/issues/104.
5617 */
5618
5619 if (!indices) {
5620 unsigned vertices_per_prim =
5621 num_mesh_vertices_per_primitive(b->shader->info.mesh.primitive_type);
5622 unsigned max_prim_indices =
5623 vertices_per_prim * b->shader->info.mesh.max_primitives_out;
5624 const struct glsl_type *t =
5625 glsl_array_type(glsl_uint_type(), max_prim_indices, 0);
5626 nir_variable *var =
5627 nir_variable_create(b->shader, nir_var_shader_out, t,
5628 "gl_PrimitiveIndicesNV");
5629
5630 var->data.location = VARYING_SLOT_PRIMITIVE_INDICES;
5631 var->data.interpolation = INTERP_MODE_NONE;
5632 indices = nir_build_deref_var(&b->nb, var);
5633 }
5634
5635 nir_ssa_def *offset = vtn_get_nir_ssa(b, w[1]);
5636 nir_ssa_def *packed = vtn_get_nir_ssa(b, w[2]);
5637 nir_ssa_def *unpacked = nir_unpack_bits(&b->nb, packed, 8);
5638 for (int i = 0; i < 4; i++) {
5639 nir_deref_instr *offset_deref =
5640 nir_build_deref_array(&b->nb, indices,
5641 nir_iadd_imm(&b->nb, offset, i));
5642 nir_ssa_def *val = nir_u2u32(&b->nb, nir_channel(&b->nb, unpacked, i));
5643
5644 nir_store_deref(&b->nb, offset_deref, val, 0x1);
5645 }
5646 }
5647
5648 struct ray_query_value {
5649 nir_ray_query_value nir_value;
5650 const struct glsl_type *glsl_type;
5651 };
5652
5653 static struct ray_query_value
spirv_to_nir_type_ray_query_intrinsic(struct vtn_builder * b,SpvOp opcode)5654 spirv_to_nir_type_ray_query_intrinsic(struct vtn_builder *b,
5655 SpvOp opcode)
5656 {
5657 switch (opcode) {
5658 #define CASE(_spv, _nir, _type) case SpvOpRayQueryGet##_spv: \
5659 return (struct ray_query_value) { .nir_value = nir_ray_query_value_##_nir, .glsl_type = _type }
5660 CASE(RayTMinKHR, tmin, glsl_floatN_t_type(32));
5661 CASE(RayFlagsKHR, flags, glsl_uint_type());
5662 CASE(WorldRayDirectionKHR, world_ray_direction, glsl_vec_type(3));
5663 CASE(WorldRayOriginKHR, world_ray_origin, glsl_vec_type(3));
5664 CASE(IntersectionTypeKHR, intersection_type, glsl_uint_type());
5665 CASE(IntersectionTKHR, intersection_t, glsl_floatN_t_type(32));
5666 CASE(IntersectionInstanceCustomIndexKHR, intersection_instance_custom_index, glsl_int_type());
5667 CASE(IntersectionInstanceIdKHR, intersection_instance_id, glsl_int_type());
5668 CASE(IntersectionInstanceShaderBindingTableRecordOffsetKHR, intersection_instance_sbt_index, glsl_uint_type());
5669 CASE(IntersectionGeometryIndexKHR, intersection_geometry_index, glsl_int_type());
5670 CASE(IntersectionPrimitiveIndexKHR, intersection_primitive_index, glsl_int_type());
5671 CASE(IntersectionBarycentricsKHR, intersection_barycentrics, glsl_vec_type(2));
5672 CASE(IntersectionFrontFaceKHR, intersection_front_face, glsl_bool_type());
5673 CASE(IntersectionCandidateAABBOpaqueKHR, intersection_candidate_aabb_opaque, glsl_bool_type());
5674 CASE(IntersectionObjectToWorldKHR, intersection_object_to_world, glsl_matrix_type(glsl_get_base_type(glsl_float_type()), 3, 4));
5675 CASE(IntersectionWorldToObjectKHR, intersection_world_to_object, glsl_matrix_type(glsl_get_base_type(glsl_float_type()), 3, 4));
5676 CASE(IntersectionObjectRayOriginKHR, intersection_object_ray_origin, glsl_vec_type(3));
5677 CASE(IntersectionObjectRayDirectionKHR, intersection_object_ray_direction, glsl_vec_type(3));
5678 #undef CASE
5679 default:
5680 vtn_fail_with_opcode("Unhandled opcode", opcode);
5681 }
5682 }
5683
5684 static void
ray_query_load_intrinsic_create(struct vtn_builder * b,SpvOp opcode,const uint32_t * w,nir_ssa_def * src0,nir_ssa_def * src1)5685 ray_query_load_intrinsic_create(struct vtn_builder *b, SpvOp opcode,
5686 const uint32_t *w, nir_ssa_def *src0,
5687 nir_ssa_def *src1)
5688 {
5689 struct ray_query_value value =
5690 spirv_to_nir_type_ray_query_intrinsic(b, opcode);
5691
5692 if (glsl_type_is_matrix(value.glsl_type)) {
5693 const struct glsl_type *elem_type = glsl_get_array_element(value.glsl_type);
5694 const unsigned elems = glsl_get_length(value.glsl_type);
5695
5696 struct vtn_ssa_value *ssa = vtn_create_ssa_value(b, value.glsl_type);
5697 for (unsigned i = 0; i < elems; i++) {
5698 ssa->elems[i]->def =
5699 nir_build_rq_load(&b->nb,
5700 glsl_get_vector_elements(elem_type),
5701 glsl_get_bit_size(elem_type),
5702 src0, src1,
5703 .base = value.nir_value,
5704 .column = i);
5705 }
5706
5707 vtn_push_ssa_value(b, w[2], ssa);
5708 } else {
5709 assert(glsl_type_is_vector_or_scalar(value.glsl_type));
5710
5711 vtn_push_nir_ssa(b, w[2],
5712 nir_rq_load(&b->nb,
5713 glsl_get_vector_elements(value.glsl_type),
5714 glsl_get_bit_size(value.glsl_type),
5715 src0, src1,
5716 .base = value.nir_value));
5717 }
5718 }
5719
5720 static void
vtn_handle_ray_query_intrinsic(struct vtn_builder * b,SpvOp opcode,const uint32_t * w,unsigned count)5721 vtn_handle_ray_query_intrinsic(struct vtn_builder *b, SpvOp opcode,
5722 const uint32_t *w, unsigned count)
5723 {
5724 switch (opcode) {
5725 case SpvOpRayQueryInitializeKHR: {
5726 nir_intrinsic_instr *intrin =
5727 nir_intrinsic_instr_create(b->nb.shader,
5728 nir_intrinsic_rq_initialize);
5729 /* The sources are in the same order in the NIR intrinsic */
5730 for (unsigned i = 0; i < 8; i++)
5731 intrin->src[i] = nir_src_for_ssa(vtn_ssa_value(b, w[i + 1])->def);
5732 nir_builder_instr_insert(&b->nb, &intrin->instr);
5733 break;
5734 }
5735
5736 case SpvOpRayQueryTerminateKHR:
5737 nir_rq_terminate(&b->nb, vtn_ssa_value(b, w[1])->def);
5738 break;
5739
5740 case SpvOpRayQueryProceedKHR:
5741 vtn_push_nir_ssa(b, w[2],
5742 nir_rq_proceed(&b->nb, 1, vtn_ssa_value(b, w[3])->def));
5743 break;
5744
5745 case SpvOpRayQueryGenerateIntersectionKHR:
5746 nir_rq_generate_intersection(&b->nb,
5747 vtn_ssa_value(b, w[1])->def,
5748 vtn_ssa_value(b, w[2])->def);
5749 break;
5750
5751 case SpvOpRayQueryConfirmIntersectionKHR:
5752 nir_rq_confirm_intersection(&b->nb, vtn_ssa_value(b, w[1])->def);
5753 break;
5754
5755 case SpvOpRayQueryGetIntersectionTKHR:
5756 case SpvOpRayQueryGetIntersectionTypeKHR:
5757 case SpvOpRayQueryGetIntersectionInstanceCustomIndexKHR:
5758 case SpvOpRayQueryGetIntersectionInstanceIdKHR:
5759 case SpvOpRayQueryGetIntersectionInstanceShaderBindingTableRecordOffsetKHR:
5760 case SpvOpRayQueryGetIntersectionGeometryIndexKHR:
5761 case SpvOpRayQueryGetIntersectionPrimitiveIndexKHR:
5762 case SpvOpRayQueryGetIntersectionBarycentricsKHR:
5763 case SpvOpRayQueryGetIntersectionFrontFaceKHR:
5764 case SpvOpRayQueryGetIntersectionObjectRayDirectionKHR:
5765 case SpvOpRayQueryGetIntersectionObjectRayOriginKHR:
5766 case SpvOpRayQueryGetIntersectionObjectToWorldKHR:
5767 case SpvOpRayQueryGetIntersectionWorldToObjectKHR:
5768 ray_query_load_intrinsic_create(b, opcode, w,
5769 vtn_ssa_value(b, w[3])->def,
5770 nir_i2b1(&b->nb, vtn_ssa_value(b, w[4])->def));
5771 break;
5772
5773 case SpvOpRayQueryGetRayTMinKHR:
5774 case SpvOpRayQueryGetRayFlagsKHR:
5775 case SpvOpRayQueryGetWorldRayDirectionKHR:
5776 case SpvOpRayQueryGetWorldRayOriginKHR:
5777 case SpvOpRayQueryGetIntersectionCandidateAABBOpaqueKHR:
5778 ray_query_load_intrinsic_create(b, opcode, w,
5779 vtn_ssa_value(b, w[3])->def,
5780 /* Committed value is ignored for these */
5781 nir_imm_bool(&b->nb, false));
5782 break;
5783
5784 default:
5785 vtn_fail_with_opcode("Unhandled opcode", opcode);
5786 }
5787 }
5788
5789 static bool
vtn_handle_body_instruction(struct vtn_builder * b,SpvOp opcode,const uint32_t * w,unsigned count)5790 vtn_handle_body_instruction(struct vtn_builder *b, SpvOp opcode,
5791 const uint32_t *w, unsigned count)
5792 {
5793 switch (opcode) {
5794 case SpvOpLabel:
5795 break;
5796
5797 case SpvOpLoopMerge:
5798 case SpvOpSelectionMerge:
5799 /* This is handled by cfg pre-pass and walk_blocks */
5800 break;
5801
5802 case SpvOpUndef: {
5803 struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_undef);
5804 val->type = vtn_get_type(b, w[1]);
5805 break;
5806 }
5807
5808 case SpvOpExtInst:
5809 vtn_handle_extension(b, opcode, w, count);
5810 break;
5811
5812 case SpvOpVariable:
5813 case SpvOpLoad:
5814 case SpvOpStore:
5815 case SpvOpCopyMemory:
5816 case SpvOpCopyMemorySized:
5817 case SpvOpAccessChain:
5818 case SpvOpPtrAccessChain:
5819 case SpvOpInBoundsAccessChain:
5820 case SpvOpInBoundsPtrAccessChain:
5821 case SpvOpArrayLength:
5822 case SpvOpConvertPtrToU:
5823 case SpvOpConvertUToPtr:
5824 case SpvOpGenericCastToPtrExplicit:
5825 case SpvOpGenericPtrMemSemantics:
5826 case SpvOpSubgroupBlockReadINTEL:
5827 case SpvOpSubgroupBlockWriteINTEL:
5828 case SpvOpConvertUToAccelerationStructureKHR:
5829 vtn_handle_variables(b, opcode, w, count);
5830 break;
5831
5832 case SpvOpFunctionCall:
5833 vtn_handle_function_call(b, opcode, w, count);
5834 break;
5835
5836 case SpvOpSampledImage:
5837 case SpvOpImage:
5838 case SpvOpImageSparseTexelsResident:
5839 case SpvOpImageSampleImplicitLod:
5840 case SpvOpImageSparseSampleImplicitLod:
5841 case SpvOpImageSampleExplicitLod:
5842 case SpvOpImageSparseSampleExplicitLod:
5843 case SpvOpImageSampleDrefImplicitLod:
5844 case SpvOpImageSparseSampleDrefImplicitLod:
5845 case SpvOpImageSampleDrefExplicitLod:
5846 case SpvOpImageSparseSampleDrefExplicitLod:
5847 case SpvOpImageSampleProjImplicitLod:
5848 case SpvOpImageSampleProjExplicitLod:
5849 case SpvOpImageSampleProjDrefImplicitLod:
5850 case SpvOpImageSampleProjDrefExplicitLod:
5851 case SpvOpImageFetch:
5852 case SpvOpImageSparseFetch:
5853 case SpvOpImageGather:
5854 case SpvOpImageSparseGather:
5855 case SpvOpImageDrefGather:
5856 case SpvOpImageSparseDrefGather:
5857 case SpvOpImageQueryLod:
5858 case SpvOpImageQueryLevels:
5859 vtn_handle_texture(b, opcode, w, count);
5860 break;
5861
5862 case SpvOpImageRead:
5863 case SpvOpImageSparseRead:
5864 case SpvOpImageWrite:
5865 case SpvOpImageTexelPointer:
5866 case SpvOpImageQueryFormat:
5867 case SpvOpImageQueryOrder:
5868 vtn_handle_image(b, opcode, w, count);
5869 break;
5870
5871 case SpvOpImageQuerySamples:
5872 case SpvOpImageQuerySizeLod:
5873 case SpvOpImageQuerySize: {
5874 struct vtn_type *image_type = vtn_get_value_type(b, w[3]);
5875 vtn_assert(image_type->base_type == vtn_base_type_image);
5876 if (glsl_type_is_image(image_type->glsl_image)) {
5877 vtn_handle_image(b, opcode, w, count);
5878 } else {
5879 vtn_assert(glsl_type_is_texture(image_type->glsl_image));
5880 vtn_handle_texture(b, opcode, w, count);
5881 }
5882 break;
5883 }
5884
5885 case SpvOpFragmentMaskFetchAMD:
5886 case SpvOpFragmentFetchAMD:
5887 vtn_handle_texture(b, opcode, w, count);
5888 break;
5889
5890 case SpvOpAtomicLoad:
5891 case SpvOpAtomicExchange:
5892 case SpvOpAtomicCompareExchange:
5893 case SpvOpAtomicCompareExchangeWeak:
5894 case SpvOpAtomicIIncrement:
5895 case SpvOpAtomicIDecrement:
5896 case SpvOpAtomicIAdd:
5897 case SpvOpAtomicISub:
5898 case SpvOpAtomicSMin:
5899 case SpvOpAtomicUMin:
5900 case SpvOpAtomicSMax:
5901 case SpvOpAtomicUMax:
5902 case SpvOpAtomicAnd:
5903 case SpvOpAtomicOr:
5904 case SpvOpAtomicXor:
5905 case SpvOpAtomicFAddEXT:
5906 case SpvOpAtomicFMinEXT:
5907 case SpvOpAtomicFMaxEXT:
5908 case SpvOpAtomicFlagTestAndSet: {
5909 struct vtn_value *pointer = vtn_untyped_value(b, w[3]);
5910 if (pointer->value_type == vtn_value_type_image_pointer) {
5911 vtn_handle_image(b, opcode, w, count);
5912 } else {
5913 vtn_assert(pointer->value_type == vtn_value_type_pointer);
5914 vtn_handle_atomics(b, opcode, w, count);
5915 }
5916 break;
5917 }
5918
5919 case SpvOpAtomicStore:
5920 case SpvOpAtomicFlagClear: {
5921 struct vtn_value *pointer = vtn_untyped_value(b, w[1]);
5922 if (pointer->value_type == vtn_value_type_image_pointer) {
5923 vtn_handle_image(b, opcode, w, count);
5924 } else {
5925 vtn_assert(pointer->value_type == vtn_value_type_pointer);
5926 vtn_handle_atomics(b, opcode, w, count);
5927 }
5928 break;
5929 }
5930
5931 case SpvOpSelect:
5932 vtn_handle_select(b, opcode, w, count);
5933 break;
5934
5935 case SpvOpSNegate:
5936 case SpvOpFNegate:
5937 case SpvOpNot:
5938 case SpvOpAny:
5939 case SpvOpAll:
5940 case SpvOpConvertFToU:
5941 case SpvOpConvertFToS:
5942 case SpvOpConvertSToF:
5943 case SpvOpConvertUToF:
5944 case SpvOpUConvert:
5945 case SpvOpSConvert:
5946 case SpvOpFConvert:
5947 case SpvOpQuantizeToF16:
5948 case SpvOpSatConvertSToU:
5949 case SpvOpSatConvertUToS:
5950 case SpvOpPtrCastToGeneric:
5951 case SpvOpGenericCastToPtr:
5952 case SpvOpIsNan:
5953 case SpvOpIsInf:
5954 case SpvOpIsFinite:
5955 case SpvOpIsNormal:
5956 case SpvOpSignBitSet:
5957 case SpvOpLessOrGreater:
5958 case SpvOpOrdered:
5959 case SpvOpUnordered:
5960 case SpvOpIAdd:
5961 case SpvOpFAdd:
5962 case SpvOpISub:
5963 case SpvOpFSub:
5964 case SpvOpIMul:
5965 case SpvOpFMul:
5966 case SpvOpUDiv:
5967 case SpvOpSDiv:
5968 case SpvOpFDiv:
5969 case SpvOpUMod:
5970 case SpvOpSRem:
5971 case SpvOpSMod:
5972 case SpvOpFRem:
5973 case SpvOpFMod:
5974 case SpvOpVectorTimesScalar:
5975 case SpvOpDot:
5976 case SpvOpIAddCarry:
5977 case SpvOpISubBorrow:
5978 case SpvOpUMulExtended:
5979 case SpvOpSMulExtended:
5980 case SpvOpShiftRightLogical:
5981 case SpvOpShiftRightArithmetic:
5982 case SpvOpShiftLeftLogical:
5983 case SpvOpLogicalEqual:
5984 case SpvOpLogicalNotEqual:
5985 case SpvOpLogicalOr:
5986 case SpvOpLogicalAnd:
5987 case SpvOpLogicalNot:
5988 case SpvOpBitwiseOr:
5989 case SpvOpBitwiseXor:
5990 case SpvOpBitwiseAnd:
5991 case SpvOpIEqual:
5992 case SpvOpFOrdEqual:
5993 case SpvOpFUnordEqual:
5994 case SpvOpINotEqual:
5995 case SpvOpFOrdNotEqual:
5996 case SpvOpFUnordNotEqual:
5997 case SpvOpULessThan:
5998 case SpvOpSLessThan:
5999 case SpvOpFOrdLessThan:
6000 case SpvOpFUnordLessThan:
6001 case SpvOpUGreaterThan:
6002 case SpvOpSGreaterThan:
6003 case SpvOpFOrdGreaterThan:
6004 case SpvOpFUnordGreaterThan:
6005 case SpvOpULessThanEqual:
6006 case SpvOpSLessThanEqual:
6007 case SpvOpFOrdLessThanEqual:
6008 case SpvOpFUnordLessThanEqual:
6009 case SpvOpUGreaterThanEqual:
6010 case SpvOpSGreaterThanEqual:
6011 case SpvOpFOrdGreaterThanEqual:
6012 case SpvOpFUnordGreaterThanEqual:
6013 case SpvOpDPdx:
6014 case SpvOpDPdy:
6015 case SpvOpFwidth:
6016 case SpvOpDPdxFine:
6017 case SpvOpDPdyFine:
6018 case SpvOpFwidthFine:
6019 case SpvOpDPdxCoarse:
6020 case SpvOpDPdyCoarse:
6021 case SpvOpFwidthCoarse:
6022 case SpvOpBitFieldInsert:
6023 case SpvOpBitFieldSExtract:
6024 case SpvOpBitFieldUExtract:
6025 case SpvOpBitReverse:
6026 case SpvOpBitCount:
6027 case SpvOpTranspose:
6028 case SpvOpOuterProduct:
6029 case SpvOpMatrixTimesScalar:
6030 case SpvOpVectorTimesMatrix:
6031 case SpvOpMatrixTimesVector:
6032 case SpvOpMatrixTimesMatrix:
6033 case SpvOpUCountLeadingZerosINTEL:
6034 case SpvOpUCountTrailingZerosINTEL:
6035 case SpvOpAbsISubINTEL:
6036 case SpvOpAbsUSubINTEL:
6037 case SpvOpIAddSatINTEL:
6038 case SpvOpUAddSatINTEL:
6039 case SpvOpIAverageINTEL:
6040 case SpvOpUAverageINTEL:
6041 case SpvOpIAverageRoundedINTEL:
6042 case SpvOpUAverageRoundedINTEL:
6043 case SpvOpISubSatINTEL:
6044 case SpvOpUSubSatINTEL:
6045 case SpvOpIMul32x16INTEL:
6046 case SpvOpUMul32x16INTEL:
6047 vtn_handle_alu(b, opcode, w, count);
6048 break;
6049
6050 case SpvOpSDotKHR:
6051 case SpvOpUDotKHR:
6052 case SpvOpSUDotKHR:
6053 case SpvOpSDotAccSatKHR:
6054 case SpvOpUDotAccSatKHR:
6055 case SpvOpSUDotAccSatKHR:
6056 vtn_handle_integer_dot(b, opcode, w, count);
6057 break;
6058
6059 case SpvOpBitcast:
6060 vtn_handle_bitcast(b, w, count);
6061 break;
6062
6063 case SpvOpVectorExtractDynamic:
6064 case SpvOpVectorInsertDynamic:
6065 case SpvOpVectorShuffle:
6066 case SpvOpCompositeConstruct:
6067 case SpvOpCompositeExtract:
6068 case SpvOpCompositeInsert:
6069 case SpvOpCopyLogical:
6070 case SpvOpCopyObject:
6071 vtn_handle_composite(b, opcode, w, count);
6072 break;
6073
6074 case SpvOpEmitVertex:
6075 case SpvOpEndPrimitive:
6076 case SpvOpEmitStreamVertex:
6077 case SpvOpEndStreamPrimitive:
6078 case SpvOpControlBarrier:
6079 case SpvOpMemoryBarrier:
6080 vtn_handle_barrier(b, opcode, w, count);
6081 break;
6082
6083 case SpvOpGroupNonUniformElect:
6084 case SpvOpGroupNonUniformAll:
6085 case SpvOpGroupNonUniformAny:
6086 case SpvOpGroupNonUniformAllEqual:
6087 case SpvOpGroupNonUniformBroadcast:
6088 case SpvOpGroupNonUniformBroadcastFirst:
6089 case SpvOpGroupNonUniformBallot:
6090 case SpvOpGroupNonUniformInverseBallot:
6091 case SpvOpGroupNonUniformBallotBitExtract:
6092 case SpvOpGroupNonUniformBallotBitCount:
6093 case SpvOpGroupNonUniformBallotFindLSB:
6094 case SpvOpGroupNonUniformBallotFindMSB:
6095 case SpvOpGroupNonUniformShuffle:
6096 case SpvOpGroupNonUniformShuffleXor:
6097 case SpvOpGroupNonUniformShuffleUp:
6098 case SpvOpGroupNonUniformShuffleDown:
6099 case SpvOpGroupNonUniformIAdd:
6100 case SpvOpGroupNonUniformFAdd:
6101 case SpvOpGroupNonUniformIMul:
6102 case SpvOpGroupNonUniformFMul:
6103 case SpvOpGroupNonUniformSMin:
6104 case SpvOpGroupNonUniformUMin:
6105 case SpvOpGroupNonUniformFMin:
6106 case SpvOpGroupNonUniformSMax:
6107 case SpvOpGroupNonUniformUMax:
6108 case SpvOpGroupNonUniformFMax:
6109 case SpvOpGroupNonUniformBitwiseAnd:
6110 case SpvOpGroupNonUniformBitwiseOr:
6111 case SpvOpGroupNonUniformBitwiseXor:
6112 case SpvOpGroupNonUniformLogicalAnd:
6113 case SpvOpGroupNonUniformLogicalOr:
6114 case SpvOpGroupNonUniformLogicalXor:
6115 case SpvOpGroupNonUniformQuadBroadcast:
6116 case SpvOpGroupNonUniformQuadSwap:
6117 case SpvOpGroupAll:
6118 case SpvOpGroupAny:
6119 case SpvOpGroupBroadcast:
6120 case SpvOpGroupIAdd:
6121 case SpvOpGroupFAdd:
6122 case SpvOpGroupFMin:
6123 case SpvOpGroupUMin:
6124 case SpvOpGroupSMin:
6125 case SpvOpGroupFMax:
6126 case SpvOpGroupUMax:
6127 case SpvOpGroupSMax:
6128 case SpvOpSubgroupBallotKHR:
6129 case SpvOpSubgroupFirstInvocationKHR:
6130 case SpvOpSubgroupReadInvocationKHR:
6131 case SpvOpSubgroupAllKHR:
6132 case SpvOpSubgroupAnyKHR:
6133 case SpvOpSubgroupAllEqualKHR:
6134 case SpvOpGroupIAddNonUniformAMD:
6135 case SpvOpGroupFAddNonUniformAMD:
6136 case SpvOpGroupFMinNonUniformAMD:
6137 case SpvOpGroupUMinNonUniformAMD:
6138 case SpvOpGroupSMinNonUniformAMD:
6139 case SpvOpGroupFMaxNonUniformAMD:
6140 case SpvOpGroupUMaxNonUniformAMD:
6141 case SpvOpGroupSMaxNonUniformAMD:
6142 case SpvOpSubgroupShuffleINTEL:
6143 case SpvOpSubgroupShuffleDownINTEL:
6144 case SpvOpSubgroupShuffleUpINTEL:
6145 case SpvOpSubgroupShuffleXorINTEL:
6146 vtn_handle_subgroup(b, opcode, w, count);
6147 break;
6148
6149 case SpvOpPtrDiff:
6150 case SpvOpPtrEqual:
6151 case SpvOpPtrNotEqual:
6152 vtn_handle_ptr(b, opcode, w, count);
6153 break;
6154
6155 case SpvOpBeginInvocationInterlockEXT:
6156 nir_begin_invocation_interlock(&b->nb);
6157 break;
6158
6159 case SpvOpEndInvocationInterlockEXT:
6160 nir_end_invocation_interlock(&b->nb);
6161 break;
6162
6163 case SpvOpDemoteToHelperInvocation: {
6164 nir_demote(&b->nb);
6165 break;
6166 }
6167
6168 case SpvOpIsHelperInvocationEXT: {
6169 vtn_push_nir_ssa(b, w[2], nir_is_helper_invocation(&b->nb, 1));
6170 break;
6171 }
6172
6173 case SpvOpReadClockKHR: {
6174 SpvScope scope = vtn_constant_uint(b, w[3]);
6175 nir_scope nir_scope;
6176
6177 switch (scope) {
6178 case SpvScopeDevice:
6179 nir_scope = NIR_SCOPE_DEVICE;
6180 break;
6181 case SpvScopeSubgroup:
6182 nir_scope = NIR_SCOPE_SUBGROUP;
6183 break;
6184 default:
6185 vtn_fail("invalid read clock scope");
6186 }
6187
6188 /* Operation supports two result types: uvec2 and uint64_t. The NIR
6189 * intrinsic gives uvec2, so pack the result for the other case.
6190 */
6191 nir_ssa_def *result = nir_shader_clock(&b->nb, nir_scope);
6192
6193 struct vtn_type *type = vtn_get_type(b, w[1]);
6194 const struct glsl_type *dest_type = type->type;
6195
6196 if (glsl_type_is_vector(dest_type)) {
6197 assert(dest_type == glsl_vector_type(GLSL_TYPE_UINT, 2));
6198 } else {
6199 assert(glsl_type_is_scalar(dest_type));
6200 assert(glsl_get_base_type(dest_type) == GLSL_TYPE_UINT64);
6201 result = nir_pack_64_2x32(&b->nb, result);
6202 }
6203
6204 vtn_push_nir_ssa(b, w[2], result);
6205 break;
6206 }
6207
6208 case SpvOpTraceNV:
6209 case SpvOpTraceRayKHR:
6210 case SpvOpReportIntersectionKHR:
6211 case SpvOpIgnoreIntersectionNV:
6212 case SpvOpTerminateRayNV:
6213 case SpvOpExecuteCallableNV:
6214 case SpvOpExecuteCallableKHR:
6215 vtn_handle_ray_intrinsic(b, opcode, w, count);
6216 break;
6217
6218 case SpvOpRayQueryInitializeKHR:
6219 case SpvOpRayQueryTerminateKHR:
6220 case SpvOpRayQueryGenerateIntersectionKHR:
6221 case SpvOpRayQueryConfirmIntersectionKHR:
6222 case SpvOpRayQueryProceedKHR:
6223 case SpvOpRayQueryGetIntersectionTypeKHR:
6224 case SpvOpRayQueryGetRayTMinKHR:
6225 case SpvOpRayQueryGetRayFlagsKHR:
6226 case SpvOpRayQueryGetIntersectionTKHR:
6227 case SpvOpRayQueryGetIntersectionInstanceCustomIndexKHR:
6228 case SpvOpRayQueryGetIntersectionInstanceIdKHR:
6229 case SpvOpRayQueryGetIntersectionInstanceShaderBindingTableRecordOffsetKHR:
6230 case SpvOpRayQueryGetIntersectionGeometryIndexKHR:
6231 case SpvOpRayQueryGetIntersectionPrimitiveIndexKHR:
6232 case SpvOpRayQueryGetIntersectionBarycentricsKHR:
6233 case SpvOpRayQueryGetIntersectionFrontFaceKHR:
6234 case SpvOpRayQueryGetIntersectionCandidateAABBOpaqueKHR:
6235 case SpvOpRayQueryGetIntersectionObjectRayDirectionKHR:
6236 case SpvOpRayQueryGetIntersectionObjectRayOriginKHR:
6237 case SpvOpRayQueryGetWorldRayDirectionKHR:
6238 case SpvOpRayQueryGetWorldRayOriginKHR:
6239 case SpvOpRayQueryGetIntersectionObjectToWorldKHR:
6240 case SpvOpRayQueryGetIntersectionWorldToObjectKHR:
6241 vtn_handle_ray_query_intrinsic(b, opcode, w, count);
6242 break;
6243
6244 case SpvOpLifetimeStart:
6245 case SpvOpLifetimeStop:
6246 break;
6247
6248 case SpvOpGroupAsyncCopy:
6249 case SpvOpGroupWaitEvents:
6250 vtn_handle_opencl_core_instruction(b, opcode, w, count);
6251 break;
6252
6253 case SpvOpWritePackedPrimitiveIndices4x8NV:
6254 vtn_handle_write_packed_primitive_indices(b, opcode, w, count);
6255 break;
6256
6257 default:
6258 vtn_fail_with_opcode("Unhandled opcode", opcode);
6259 }
6260
6261 return true;
6262 }
6263
6264 struct vtn_builder*
vtn_create_builder(const uint32_t * words,size_t word_count,gl_shader_stage stage,const char * entry_point_name,const struct spirv_to_nir_options * options)6265 vtn_create_builder(const uint32_t *words, size_t word_count,
6266 gl_shader_stage stage, const char *entry_point_name,
6267 const struct spirv_to_nir_options *options)
6268 {
6269 /* Initialize the vtn_builder object */
6270 struct vtn_builder *b = rzalloc(NULL, struct vtn_builder);
6271 struct spirv_to_nir_options *dup_options =
6272 ralloc(b, struct spirv_to_nir_options);
6273 *dup_options = *options;
6274
6275 b->spirv = words;
6276 b->spirv_word_count = word_count;
6277 b->file = NULL;
6278 b->line = -1;
6279 b->col = -1;
6280 list_inithead(&b->functions);
6281 b->entry_point_stage = stage;
6282 b->entry_point_name = entry_point_name;
6283 b->options = dup_options;
6284
6285 /*
6286 * Handle the SPIR-V header (first 5 dwords).
6287 * Can't use vtx_assert() as the setjmp(3) target isn't initialized yet.
6288 */
6289 if (word_count <= 5)
6290 goto fail;
6291
6292 if (words[0] != SpvMagicNumber) {
6293 vtn_err("words[0] was 0x%x, want 0x%x", words[0], SpvMagicNumber);
6294 goto fail;
6295 }
6296
6297 b->version = words[1];
6298 if (b->version < 0x10000) {
6299 vtn_err("version was 0x%x, want >= 0x10000", b->version);
6300 goto fail;
6301 }
6302
6303 b->generator_id = words[2] >> 16;
6304 uint16_t generator_version = words[2];
6305
6306 /* In GLSLang commit 8297936dd6eb3, their handling of barrier() was fixed
6307 * to provide correct memory semantics on compute shader barrier()
6308 * commands. Prior to that, we need to fix them up ourselves. This
6309 * GLSLang fix caused them to bump to generator version 3.
6310 */
6311 b->wa_glslang_cs_barrier =
6312 (b->generator_id == vtn_generator_glslang_reference_front_end &&
6313 generator_version < 3);
6314
6315 /* Identifying the LLVM-SPIRV translator:
6316 *
6317 * The LLVM-SPIRV translator currently doesn't store any generator ID [1].
6318 * Our use case involving the SPIRV-Tools linker also mean we want to check
6319 * for that tool instead. Finally the SPIRV-Tools linker also stores its
6320 * generator ID in the wrong location [2].
6321 *
6322 * [1] : https://github.com/KhronosGroup/SPIRV-LLVM-Translator/pull/1223
6323 * [2] : https://github.com/KhronosGroup/SPIRV-Tools/pull/4549
6324 */
6325 const bool is_llvm_spirv_translator =
6326 (b->generator_id == 0 &&
6327 generator_version == vtn_generator_spirv_tools_linker) ||
6328 b->generator_id == vtn_generator_spirv_tools_linker;
6329
6330 /* The LLVM-SPIRV translator generates Undef initializers for _local
6331 * variables [1].
6332 *
6333 * [1] : https://github.com/KhronosGroup/SPIRV-LLVM-Translator/issues/1224
6334 */
6335 b->wa_llvm_spirv_ignore_workgroup_initializer =
6336 b->options->environment == NIR_SPIRV_OPENCL && is_llvm_spirv_translator;
6337
6338 /* words[2] == generator magic */
6339 unsigned value_id_bound = words[3];
6340 if (words[4] != 0) {
6341 vtn_err("words[4] was %u, want 0", words[4]);
6342 goto fail;
6343 }
6344
6345 b->value_id_bound = value_id_bound;
6346 b->values = rzalloc_array(b, struct vtn_value, value_id_bound);
6347
6348 if (b->options->environment == NIR_SPIRV_VULKAN && b->version < 0x10400)
6349 b->vars_used_indirectly = _mesa_pointer_set_create(b);
6350
6351 return b;
6352 fail:
6353 ralloc_free(b);
6354 return NULL;
6355 }
6356
6357 static nir_function *
vtn_emit_kernel_entry_point_wrapper(struct vtn_builder * b,nir_function * entry_point)6358 vtn_emit_kernel_entry_point_wrapper(struct vtn_builder *b,
6359 nir_function *entry_point)
6360 {
6361 vtn_assert(entry_point == b->entry_point->func->nir_func);
6362 vtn_fail_if(!entry_point->name, "entry points are required to have a name");
6363 const char *func_name =
6364 ralloc_asprintf(b->shader, "__wrapped_%s", entry_point->name);
6365
6366 vtn_assert(b->shader->info.stage == MESA_SHADER_KERNEL);
6367
6368 nir_function *main_entry_point = nir_function_create(b->shader, func_name);
6369 main_entry_point->impl = nir_function_impl_create(main_entry_point);
6370 nir_builder_init(&b->nb, main_entry_point->impl);
6371 b->nb.cursor = nir_after_cf_list(&main_entry_point->impl->body);
6372 b->func_param_idx = 0;
6373
6374 nir_call_instr *call = nir_call_instr_create(b->nb.shader, entry_point);
6375
6376 for (unsigned i = 0; i < entry_point->num_params; ++i) {
6377 struct vtn_type *param_type = b->entry_point->func->type->params[i];
6378
6379 /* consider all pointers to function memory to be parameters passed
6380 * by value
6381 */
6382 bool is_by_val = param_type->base_type == vtn_base_type_pointer &&
6383 param_type->storage_class == SpvStorageClassFunction;
6384
6385 /* input variable */
6386 nir_variable *in_var = rzalloc(b->nb.shader, nir_variable);
6387
6388 if (is_by_val) {
6389 in_var->data.mode = nir_var_uniform;
6390 in_var->type = param_type->deref->type;
6391 } else if (param_type->base_type == vtn_base_type_image) {
6392 in_var->data.mode = nir_var_image;
6393 in_var->type = param_type->glsl_image;
6394 in_var->data.access =
6395 spirv_to_gl_access_qualifier(b, param_type->access_qualifier);
6396 } else if (param_type->base_type == vtn_base_type_sampler) {
6397 in_var->data.mode = nir_var_uniform;
6398 in_var->type = glsl_bare_sampler_type();
6399 } else {
6400 in_var->data.mode = nir_var_uniform;
6401 in_var->type = param_type->type;
6402 }
6403
6404 in_var->data.read_only = true;
6405 in_var->data.location = i;
6406
6407 nir_shader_add_variable(b->nb.shader, in_var);
6408
6409 /* we have to copy the entire variable into function memory */
6410 if (is_by_val) {
6411 nir_variable *copy_var =
6412 nir_local_variable_create(main_entry_point->impl, in_var->type,
6413 "copy_in");
6414 nir_copy_var(&b->nb, copy_var, in_var);
6415 call->params[i] =
6416 nir_src_for_ssa(&nir_build_deref_var(&b->nb, copy_var)->dest.ssa);
6417 } else if (param_type->base_type == vtn_base_type_image ||
6418 param_type->base_type == vtn_base_type_sampler) {
6419 /* Don't load the var, just pass a deref of it */
6420 call->params[i] = nir_src_for_ssa(&nir_build_deref_var(&b->nb, in_var)->dest.ssa);
6421 } else {
6422 call->params[i] = nir_src_for_ssa(nir_load_var(&b->nb, in_var));
6423 }
6424 }
6425
6426 nir_builder_instr_insert(&b->nb, &call->instr);
6427
6428 return main_entry_point;
6429 }
6430
6431 static bool
can_remove(nir_variable * var,void * data)6432 can_remove(nir_variable *var, void *data)
6433 {
6434 const struct set *vars_used_indirectly = data;
6435 return !_mesa_set_search(vars_used_indirectly, var);
6436 }
6437
6438 nir_shader *
spirv_to_nir(const uint32_t * words,size_t word_count,struct nir_spirv_specialization * spec,unsigned num_spec,gl_shader_stage stage,const char * entry_point_name,const struct spirv_to_nir_options * options,const nir_shader_compiler_options * nir_options)6439 spirv_to_nir(const uint32_t *words, size_t word_count,
6440 struct nir_spirv_specialization *spec, unsigned num_spec,
6441 gl_shader_stage stage, const char *entry_point_name,
6442 const struct spirv_to_nir_options *options,
6443 const nir_shader_compiler_options *nir_options)
6444
6445 {
6446 const uint32_t *word_end = words + word_count;
6447
6448 struct vtn_builder *b = vtn_create_builder(words, word_count,
6449 stage, entry_point_name,
6450 options);
6451
6452 if (b == NULL)
6453 return NULL;
6454
6455 /* See also _vtn_fail() */
6456 if (vtn_setjmp(b->fail_jump)) {
6457 ralloc_free(b);
6458 return NULL;
6459 }
6460
6461 /* Skip the SPIR-V header, handled at vtn_create_builder */
6462 words+= 5;
6463
6464 b->shader = nir_shader_create(b, stage, nir_options, NULL);
6465 b->shader->info.float_controls_execution_mode = options->float_controls_execution_mode;
6466
6467 /* Handle all the preamble instructions */
6468 words = vtn_foreach_instruction(b, words, word_end,
6469 vtn_handle_preamble_instruction);
6470
6471 /* DirectXShaderCompiler and glslang/shaderc both create OpKill from HLSL's
6472 * discard/clip, which uses demote semantics. DirectXShaderCompiler will use
6473 * demote if the extension is enabled, so we disable this workaround in that
6474 * case.
6475 *
6476 * Related glslang issue: https://github.com/KhronosGroup/glslang/issues/2416
6477 */
6478 bool glslang = b->generator_id == vtn_generator_glslang_reference_front_end ||
6479 b->generator_id == vtn_generator_shaderc_over_glslang;
6480 bool dxsc = b->generator_id == vtn_generator_spiregg;
6481 b->convert_discard_to_demote = ((dxsc && !b->uses_demote_to_helper_invocation) ||
6482 (glslang && b->source_lang == SpvSourceLanguageHLSL)) &&
6483 options->caps.demote_to_helper_invocation;
6484
6485 if (!options->create_library && b->entry_point == NULL) {
6486 vtn_fail("Entry point not found for %s shader \"%s\"",
6487 _mesa_shader_stage_to_string(stage), entry_point_name);
6488 ralloc_free(b);
6489 return NULL;
6490 }
6491
6492 /* Ensure a sane address mode is being used for function temps */
6493 assert(nir_address_format_bit_size(b->options->temp_addr_format) == nir_get_ptr_bitsize(b->shader));
6494 assert(nir_address_format_num_components(b->options->temp_addr_format) == 1);
6495
6496 /* Set shader info defaults */
6497 if (stage == MESA_SHADER_GEOMETRY)
6498 b->shader->info.gs.invocations = 1;
6499
6500 /* Parse execution modes. */
6501 if (!options->create_library)
6502 vtn_foreach_execution_mode(b, b->entry_point,
6503 vtn_handle_execution_mode, NULL);
6504
6505 b->specializations = spec;
6506 b->num_specializations = num_spec;
6507
6508 /* Handle all variable, type, and constant instructions */
6509 words = vtn_foreach_instruction(b, words, word_end,
6510 vtn_handle_variable_or_type_instruction);
6511
6512 /* Parse execution modes that depend on IDs. Must happen after we have
6513 * constants parsed.
6514 */
6515 if (!options->create_library)
6516 vtn_foreach_execution_mode(b, b->entry_point,
6517 vtn_handle_execution_mode_id, NULL);
6518
6519 if (b->workgroup_size_builtin) {
6520 vtn_assert(gl_shader_stage_uses_workgroup(stage));
6521 vtn_assert(b->workgroup_size_builtin->type->type ==
6522 glsl_vector_type(GLSL_TYPE_UINT, 3));
6523
6524 nir_const_value *const_size =
6525 b->workgroup_size_builtin->constant->values;
6526
6527 b->shader->info.workgroup_size[0] = const_size[0].u32;
6528 b->shader->info.workgroup_size[1] = const_size[1].u32;
6529 b->shader->info.workgroup_size[2] = const_size[2].u32;
6530 }
6531
6532 /* Set types on all vtn_values */
6533 vtn_foreach_instruction(b, words, word_end, vtn_set_instruction_result_type);
6534
6535 vtn_build_cfg(b, words, word_end);
6536
6537 if (!options->create_library) {
6538 assert(b->entry_point->value_type == vtn_value_type_function);
6539 b->entry_point->func->referenced = true;
6540 }
6541
6542 bool progress;
6543 do {
6544 progress = false;
6545 vtn_foreach_cf_node(node, &b->functions) {
6546 struct vtn_function *func = vtn_cf_node_as_function(node);
6547 if ((options->create_library || func->referenced) && !func->emitted) {
6548 b->const_table = _mesa_pointer_hash_table_create(b);
6549
6550 vtn_function_emit(b, func, vtn_handle_body_instruction);
6551 progress = true;
6552 }
6553 }
6554 } while (progress);
6555
6556 if (!options->create_library) {
6557 vtn_assert(b->entry_point->value_type == vtn_value_type_function);
6558 nir_function *entry_point = b->entry_point->func->nir_func;
6559 vtn_assert(entry_point);
6560
6561 /* post process entry_points with input params */
6562 if (entry_point->num_params && b->shader->info.stage == MESA_SHADER_KERNEL)
6563 entry_point = vtn_emit_kernel_entry_point_wrapper(b, entry_point);
6564
6565 entry_point->is_entrypoint = true;
6566 }
6567
6568 /* structurize the CFG */
6569 nir_lower_goto_ifs(b->shader);
6570
6571 /* A SPIR-V module can have multiple shaders stages and also multiple
6572 * shaders of the same stage. Global variables are declared per-module.
6573 *
6574 * Starting in SPIR-V 1.4 the list of global variables is part of
6575 * OpEntryPoint, so only valid ones will be created. Previous versions
6576 * only have Input and Output variables listed, so remove dead variables to
6577 * clean up the remaining ones.
6578 */
6579 if (!options->create_library && b->version < 0x10400) {
6580 const nir_remove_dead_variables_options dead_opts = {
6581 .can_remove_var = can_remove,
6582 .can_remove_var_data = b->vars_used_indirectly,
6583 };
6584 nir_remove_dead_variables(b->shader, ~(nir_var_function_temp |
6585 nir_var_shader_out |
6586 nir_var_shader_in |
6587 nir_var_system_value),
6588 b->vars_used_indirectly ? &dead_opts : NULL);
6589 }
6590
6591 nir_foreach_variable_in_shader(var, b->shader) {
6592 switch (var->data.mode) {
6593 case nir_var_mem_ubo:
6594 b->shader->info.num_ubos++;
6595 break;
6596 case nir_var_mem_ssbo:
6597 b->shader->info.num_ssbos++;
6598 break;
6599 case nir_var_mem_push_const:
6600 vtn_assert(b->shader->num_uniforms == 0);
6601 b->shader->num_uniforms =
6602 glsl_get_explicit_size(glsl_without_array(var->type), false);
6603 break;
6604 }
6605 }
6606
6607 /* We sometimes generate bogus derefs that, while never used, give the
6608 * validator a bit of heartburn. Run dead code to get rid of them.
6609 */
6610 nir_opt_dce(b->shader);
6611
6612 /* Per SPV_KHR_workgroup_storage_explicit_layout, if one shared variable is
6613 * a Block, all of them will be and Blocks are explicitly laid out.
6614 */
6615 nir_foreach_variable_with_modes(var, b->shader, nir_var_mem_shared) {
6616 if (glsl_type_is_interface(var->type)) {
6617 assert(b->options->caps.workgroup_memory_explicit_layout);
6618 b->shader->info.shared_memory_explicit_layout = true;
6619 break;
6620 }
6621 }
6622 if (b->shader->info.shared_memory_explicit_layout) {
6623 unsigned size = 0;
6624 nir_foreach_variable_with_modes(var, b->shader, nir_var_mem_shared) {
6625 assert(glsl_type_is_interface(var->type));
6626 const bool align_to_stride = false;
6627 size = MAX2(size, glsl_get_explicit_size(var->type, align_to_stride));
6628 }
6629 b->shader->info.shared_size = size;
6630 }
6631
6632 /* Unparent the shader from the vtn_builder before we delete the builder */
6633 ralloc_steal(NULL, b->shader);
6634
6635 nir_shader *shader = b->shader;
6636 ralloc_free(b);
6637
6638 return shader;
6639 }
6640