1 /*
2 * Copyright 2015-2019 Arm Limited
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "spirv_glsl.hpp"
18 #include "GLSL.std.450.h"
19 #include "spirv_common.hpp"
20 #include <algorithm>
21 #include <assert.h>
22 #include <cmath>
23 #include <limits>
24 #include <locale.h>
25 #include <utility>
26
27 #ifndef _WIN32
28 #include <langinfo.h>
29 #endif
30 #include <locale.h>
31
32 using namespace spv;
33 using namespace SPIRV_CROSS_NAMESPACE;
34 using namespace std;
35
is_unsigned_opcode(Op op)36 static bool is_unsigned_opcode(Op op)
37 {
38 // Don't have to be exhaustive, only relevant for legacy target checking ...
39 switch (op)
40 {
41 case OpShiftRightLogical:
42 case OpUGreaterThan:
43 case OpUGreaterThanEqual:
44 case OpULessThan:
45 case OpULessThanEqual:
46 case OpUConvert:
47 case OpUDiv:
48 case OpUMod:
49 case OpUMulExtended:
50 case OpConvertUToF:
51 case OpConvertFToU:
52 return true;
53
54 default:
55 return false;
56 }
57 }
58
is_unsigned_glsl_opcode(GLSLstd450 op)59 static bool is_unsigned_glsl_opcode(GLSLstd450 op)
60 {
61 // Don't have to be exhaustive, only relevant for legacy target checking ...
62 switch (op)
63 {
64 case GLSLstd450UClamp:
65 case GLSLstd450UMin:
66 case GLSLstd450UMax:
67 case GLSLstd450FindUMsb:
68 return true;
69
70 default:
71 return false;
72 }
73 }
74
packing_is_vec4_padded(BufferPackingStandard packing)75 static bool packing_is_vec4_padded(BufferPackingStandard packing)
76 {
77 switch (packing)
78 {
79 case BufferPackingHLSLCbuffer:
80 case BufferPackingHLSLCbufferPackOffset:
81 case BufferPackingStd140:
82 case BufferPackingStd140EnhancedLayout:
83 return true;
84
85 default:
86 return false;
87 }
88 }
89
packing_is_hlsl(BufferPackingStandard packing)90 static bool packing_is_hlsl(BufferPackingStandard packing)
91 {
92 switch (packing)
93 {
94 case BufferPackingHLSLCbuffer:
95 case BufferPackingHLSLCbufferPackOffset:
96 return true;
97
98 default:
99 return false;
100 }
101 }
102
packing_has_flexible_offset(BufferPackingStandard packing)103 static bool packing_has_flexible_offset(BufferPackingStandard packing)
104 {
105 switch (packing)
106 {
107 case BufferPackingStd140:
108 case BufferPackingStd430:
109 case BufferPackingScalar:
110 case BufferPackingHLSLCbuffer:
111 return false;
112
113 default:
114 return true;
115 }
116 }
117
packing_is_scalar(BufferPackingStandard packing)118 static bool packing_is_scalar(BufferPackingStandard packing)
119 {
120 switch (packing)
121 {
122 case BufferPackingScalar:
123 case BufferPackingScalarEnhancedLayout:
124 return true;
125
126 default:
127 return false;
128 }
129 }
130
packing_to_substruct_packing(BufferPackingStandard packing)131 static BufferPackingStandard packing_to_substruct_packing(BufferPackingStandard packing)
132 {
133 switch (packing)
134 {
135 case BufferPackingStd140EnhancedLayout:
136 return BufferPackingStd140;
137 case BufferPackingStd430EnhancedLayout:
138 return BufferPackingStd430;
139 case BufferPackingHLSLCbufferPackOffset:
140 return BufferPackingHLSLCbuffer;
141 case BufferPackingScalarEnhancedLayout:
142 return BufferPackingScalar;
143 default:
144 return packing;
145 }
146 }
147
148 // Sanitizes underscores for GLSL where multiple underscores in a row are not allowed.
sanitize_underscores(const string & str)149 string CompilerGLSL::sanitize_underscores(const string &str)
150 {
151 string res;
152 res.reserve(str.size());
153
154 bool last_underscore = false;
155 for (auto c : str)
156 {
157 if (c == '_')
158 {
159 if (last_underscore)
160 continue;
161
162 res += c;
163 last_underscore = true;
164 }
165 else
166 {
167 res += c;
168 last_underscore = false;
169 }
170 }
171 return res;
172 }
173
init()174 void CompilerGLSL::init()
175 {
176 if (ir.source.known)
177 {
178 options.es = ir.source.es;
179 options.version = ir.source.version;
180 }
181
182 // Query the locale to see what the decimal point is.
183 // We'll rely on fixing it up ourselves in the rare case we have a comma-as-decimal locale
184 // rather than setting locales ourselves. Settings locales in a safe and isolated way is rather
185 // tricky.
186 #ifdef _WIN32
187 // On Windows, localeconv uses thread-local storage, so it should be fine.
188 const struct lconv *conv = localeconv();
189 if (conv && conv->decimal_point)
190 current_locale_radix_character = *conv->decimal_point;
191 #elif defined(__ANDROID__) && __ANDROID_API__ < 26
192 // nl_langinfo is not supported on this platform, fall back to the worse alternative.
193 const struct lconv *conv = localeconv();
194 if (conv && conv->decimal_point)
195 current_locale_radix_character = *conv->decimal_point;
196 #else
197 // localeconv, the portable function is not MT safe ...
198 const char *decimal_point = nl_langinfo(RADIXCHAR);
199 if (decimal_point && *decimal_point != '\0')
200 current_locale_radix_character = *decimal_point;
201 #endif
202 }
203
to_pls_layout(PlsFormat format)204 static const char *to_pls_layout(PlsFormat format)
205 {
206 switch (format)
207 {
208 case PlsR11FG11FB10F:
209 return "layout(r11f_g11f_b10f) ";
210 case PlsR32F:
211 return "layout(r32f) ";
212 case PlsRG16F:
213 return "layout(rg16f) ";
214 case PlsRGB10A2:
215 return "layout(rgb10_a2) ";
216 case PlsRGBA8:
217 return "layout(rgba8) ";
218 case PlsRG16:
219 return "layout(rg16) ";
220 case PlsRGBA8I:
221 return "layout(rgba8i)";
222 case PlsRG16I:
223 return "layout(rg16i) ";
224 case PlsRGB10A2UI:
225 return "layout(rgb10_a2ui) ";
226 case PlsRGBA8UI:
227 return "layout(rgba8ui) ";
228 case PlsRG16UI:
229 return "layout(rg16ui) ";
230 case PlsR32UI:
231 return "layout(r32ui) ";
232 default:
233 return "";
234 }
235 }
236
pls_format_to_basetype(PlsFormat format)237 static SPIRType::BaseType pls_format_to_basetype(PlsFormat format)
238 {
239 switch (format)
240 {
241 default:
242 case PlsR11FG11FB10F:
243 case PlsR32F:
244 case PlsRG16F:
245 case PlsRGB10A2:
246 case PlsRGBA8:
247 case PlsRG16:
248 return SPIRType::Float;
249
250 case PlsRGBA8I:
251 case PlsRG16I:
252 return SPIRType::Int;
253
254 case PlsRGB10A2UI:
255 case PlsRGBA8UI:
256 case PlsRG16UI:
257 case PlsR32UI:
258 return SPIRType::UInt;
259 }
260 }
261
pls_format_to_components(PlsFormat format)262 static uint32_t pls_format_to_components(PlsFormat format)
263 {
264 switch (format)
265 {
266 default:
267 case PlsR32F:
268 case PlsR32UI:
269 return 1;
270
271 case PlsRG16F:
272 case PlsRG16:
273 case PlsRG16UI:
274 case PlsRG16I:
275 return 2;
276
277 case PlsR11FG11FB10F:
278 return 3;
279
280 case PlsRGB10A2:
281 case PlsRGBA8:
282 case PlsRGBA8I:
283 case PlsRGB10A2UI:
284 case PlsRGBA8UI:
285 return 4;
286 }
287 }
288
vector_swizzle(int vecsize,int index)289 static const char *vector_swizzle(int vecsize, int index)
290 {
291 static const char *const swizzle[4][4] = {
292 { ".x", ".y", ".z", ".w" },
293 { ".xy", ".yz", ".zw", nullptr },
294 { ".xyz", ".yzw", nullptr, nullptr },
295 #if defined(__GNUC__) && (__GNUC__ == 9)
296 // This works around a GCC 9 bug, see details in https://gcc.gnu.org/bugzilla/show_bug.cgi?id=90947.
297 // This array ends up being compiled as all nullptrs, tripping the assertions below.
298 { "", nullptr, nullptr, "$" },
299 #else
300 { "", nullptr, nullptr, nullptr },
301 #endif
302 };
303
304 assert(vecsize >= 1 && vecsize <= 4);
305 assert(index >= 0 && index < 4);
306 assert(swizzle[vecsize - 1][index]);
307
308 return swizzle[vecsize - 1][index];
309 }
310
reset()311 void CompilerGLSL::reset()
312 {
313 // We do some speculative optimizations which should pretty much always work out,
314 // but just in case the SPIR-V is rather weird, recompile until it's happy.
315 // This typically only means one extra pass.
316 clear_force_recompile();
317
318 // Clear invalid expression tracking.
319 invalid_expressions.clear();
320 current_function = nullptr;
321
322 // Clear temporary usage tracking.
323 expression_usage_counts.clear();
324 forwarded_temporaries.clear();
325 suppressed_usage_tracking.clear();
326
327 // Ensure that we declare phi-variable copies even if the original declaration isn't deferred
328 flushed_phi_variables.clear();
329
330 reset_name_caches();
331
332 ir.for_each_typed_id<SPIRFunction>([&](uint32_t, SPIRFunction &func) {
333 func.active = false;
334 func.flush_undeclared = true;
335 });
336
337 ir.for_each_typed_id<SPIRVariable>([&](uint32_t, SPIRVariable &var) { var.dependees.clear(); });
338
339 ir.reset_all_of_type<SPIRExpression>();
340 ir.reset_all_of_type<SPIRAccessChain>();
341
342 statement_count = 0;
343 indent = 0;
344 }
345
remap_pls_variables()346 void CompilerGLSL::remap_pls_variables()
347 {
348 for (auto &input : pls_inputs)
349 {
350 auto &var = get<SPIRVariable>(input.id);
351
352 bool input_is_target = false;
353 if (var.storage == StorageClassUniformConstant)
354 {
355 auto &type = get<SPIRType>(var.basetype);
356 input_is_target = type.image.dim == DimSubpassData;
357 }
358
359 if (var.storage != StorageClassInput && !input_is_target)
360 SPIRV_CROSS_THROW("Can only use in and target variables for PLS inputs.");
361 var.remapped_variable = true;
362 }
363
364 for (auto &output : pls_outputs)
365 {
366 auto &var = get<SPIRVariable>(output.id);
367 if (var.storage != StorageClassOutput)
368 SPIRV_CROSS_THROW("Can only use out variables for PLS outputs.");
369 var.remapped_variable = true;
370 }
371 }
372
find_static_extensions()373 void CompilerGLSL::find_static_extensions()
374 {
375 ir.for_each_typed_id<SPIRType>([&](uint32_t, const SPIRType &type) {
376 if (type.basetype == SPIRType::Double)
377 {
378 if (options.es)
379 SPIRV_CROSS_THROW("FP64 not supported in ES profile.");
380 if (!options.es && options.version < 400)
381 require_extension_internal("GL_ARB_gpu_shader_fp64");
382 }
383 else if (type.basetype == SPIRType::Int64 || type.basetype == SPIRType::UInt64)
384 {
385 if (options.es)
386 SPIRV_CROSS_THROW("64-bit integers not supported in ES profile.");
387 if (!options.es)
388 require_extension_internal("GL_ARB_gpu_shader_int64");
389 }
390 else if (type.basetype == SPIRType::Half)
391 {
392 require_extension_internal("GL_EXT_shader_explicit_arithmetic_types_float16");
393 if (options.vulkan_semantics)
394 require_extension_internal("GL_EXT_shader_16bit_storage");
395 }
396 else if (type.basetype == SPIRType::SByte || type.basetype == SPIRType::UByte)
397 {
398 require_extension_internal("GL_EXT_shader_explicit_arithmetic_types_int8");
399 if (options.vulkan_semantics)
400 require_extension_internal("GL_EXT_shader_8bit_storage");
401 }
402 else if (type.basetype == SPIRType::Short || type.basetype == SPIRType::UShort)
403 {
404 require_extension_internal("GL_EXT_shader_explicit_arithmetic_types_int16");
405 if (options.vulkan_semantics)
406 require_extension_internal("GL_EXT_shader_16bit_storage");
407 }
408 });
409
410 auto &execution = get_entry_point();
411 switch (execution.model)
412 {
413 case ExecutionModelGLCompute:
414 if (!options.es && options.version < 430)
415 require_extension_internal("GL_ARB_compute_shader");
416 if (options.es && options.version < 310)
417 SPIRV_CROSS_THROW("At least ESSL 3.10 required for compute shaders.");
418 break;
419
420 case ExecutionModelGeometry:
421 if (options.es && options.version < 320)
422 require_extension_internal("GL_EXT_geometry_shader");
423 if (!options.es && options.version < 150)
424 require_extension_internal("GL_ARB_geometry_shader4");
425
426 if (execution.flags.get(ExecutionModeInvocations) && execution.invocations != 1)
427 {
428 // Instanced GS is part of 400 core or this extension.
429 if (!options.es && options.version < 400)
430 require_extension_internal("GL_ARB_gpu_shader5");
431 }
432 break;
433
434 case ExecutionModelTessellationEvaluation:
435 case ExecutionModelTessellationControl:
436 if (options.es && options.version < 320)
437 require_extension_internal("GL_EXT_tessellation_shader");
438 if (!options.es && options.version < 400)
439 require_extension_internal("GL_ARB_tessellation_shader");
440 break;
441
442 case ExecutionModelRayGenerationNV:
443 case ExecutionModelIntersectionNV:
444 case ExecutionModelAnyHitNV:
445 case ExecutionModelClosestHitNV:
446 case ExecutionModelMissNV:
447 case ExecutionModelCallableNV:
448 if (options.es || options.version < 460)
449 SPIRV_CROSS_THROW("Ray tracing shaders require non-es profile with version 460 or above.");
450 require_extension_internal("GL_NV_ray_tracing");
451 break;
452
453 default:
454 break;
455 }
456
457 if (!pls_inputs.empty() || !pls_outputs.empty())
458 require_extension_internal("GL_EXT_shader_pixel_local_storage");
459
460 if (options.separate_shader_objects && !options.es && options.version < 410)
461 require_extension_internal("GL_ARB_separate_shader_objects");
462
463 if (ir.addressing_model == AddressingModelPhysicalStorageBuffer64EXT)
464 {
465 if (!options.vulkan_semantics)
466 SPIRV_CROSS_THROW("GL_EXT_buffer_reference is only supported in Vulkan GLSL.");
467 if (options.es && options.version < 320)
468 SPIRV_CROSS_THROW("GL_EXT_buffer_reference requires ESSL 320.");
469 else if (!options.es && options.version < 450)
470 SPIRV_CROSS_THROW("GL_EXT_buffer_reference requires GLSL 450.");
471 require_extension_internal("GL_EXT_buffer_reference");
472 }
473 else if (ir.addressing_model != AddressingModelLogical)
474 {
475 SPIRV_CROSS_THROW("Only Logical and PhysicalStorageBuffer64EXT addressing models are supported.");
476 }
477
478 // Check for nonuniform qualifier.
479 // Instead of looping over all decorations to find this, just look at capabilities.
480 for (auto &cap : ir.declared_capabilities)
481 {
482 bool nonuniform_indexing = false;
483 switch (cap)
484 {
485 case CapabilityShaderNonUniformEXT:
486 case CapabilityRuntimeDescriptorArrayEXT:
487 if (!options.vulkan_semantics)
488 SPIRV_CROSS_THROW("GL_EXT_nonuniform_qualifier is only supported in Vulkan GLSL.");
489 require_extension_internal("GL_EXT_nonuniform_qualifier");
490 nonuniform_indexing = true;
491 break;
492
493 default:
494 break;
495 }
496
497 if (nonuniform_indexing)
498 break;
499 }
500 }
501
compile()502 string CompilerGLSL::compile()
503 {
504 if (options.vulkan_semantics)
505 backend.allow_precision_qualifiers = true;
506 backend.force_gl_in_out_block = true;
507 backend.supports_extensions = true;
508 backend.use_array_constructor = true;
509
510 // Scan the SPIR-V to find trivial uses of extensions.
511 fixup_type_alias();
512 reorder_type_alias();
513 build_function_control_flow_graphs_and_analyze();
514 find_static_extensions();
515 fixup_image_load_store_access();
516 update_active_builtins();
517 analyze_image_and_sampler_usage();
518 analyze_interlocked_resource_usage();
519
520 // Shaders might cast unrelated data to pointers of non-block types.
521 // Find all such instances and make sure we can cast the pointers to a synthesized block type.
522 if (ir.addressing_model == AddressingModelPhysicalStorageBuffer64EXT)
523 analyze_non_block_pointer_types();
524
525 uint32_t pass_count = 0;
526 do
527 {
528 if (pass_count >= 3)
529 SPIRV_CROSS_THROW("Over 3 compilation loops detected. Must be a bug!");
530
531 reset();
532
533 buffer.reset();
534
535 emit_header();
536 emit_resources();
537
538 emit_function(get<SPIRFunction>(ir.default_entry_point), Bitset());
539
540 pass_count++;
541 } while (is_forcing_recompilation());
542
543 // Implement the interlocked wrapper function at the end.
544 // The body was implemented in lieu of main().
545 if (interlocked_is_complex)
546 {
547 statement("void main()");
548 begin_scope();
549 statement("// Interlocks were used in a way not compatible with GLSL, this is very slow.");
550 if (options.es)
551 statement("beginInvocationInterlockNV();");
552 else
553 statement("beginInvocationInterlockARB();");
554 statement("spvMainInterlockedBody();");
555 if (options.es)
556 statement("endInvocationInterlockNV();");
557 else
558 statement("endInvocationInterlockARB();");
559 end_scope();
560 }
561
562 // Entry point in GLSL is always main().
563 get_entry_point().name = "main";
564
565 return buffer.str();
566 }
567
get_partial_source()568 std::string CompilerGLSL::get_partial_source()
569 {
570 return buffer.str();
571 }
572
build_workgroup_size(SmallVector<string> & arguments,const SpecializationConstant & wg_x,const SpecializationConstant & wg_y,const SpecializationConstant & wg_z)573 void CompilerGLSL::build_workgroup_size(SmallVector<string> &arguments, const SpecializationConstant &wg_x,
574 const SpecializationConstant &wg_y, const SpecializationConstant &wg_z)
575 {
576 auto &execution = get_entry_point();
577
578 if (wg_x.id)
579 {
580 if (options.vulkan_semantics)
581 arguments.push_back(join("local_size_x_id = ", wg_x.constant_id));
582 else
583 arguments.push_back(join("local_size_x = ", get<SPIRConstant>(wg_x.id).specialization_constant_macro_name));
584 }
585 else
586 arguments.push_back(join("local_size_x = ", execution.workgroup_size.x));
587
588 if (wg_y.id)
589 {
590 if (options.vulkan_semantics)
591 arguments.push_back(join("local_size_y_id = ", wg_y.constant_id));
592 else
593 arguments.push_back(join("local_size_y = ", get<SPIRConstant>(wg_y.id).specialization_constant_macro_name));
594 }
595 else
596 arguments.push_back(join("local_size_y = ", execution.workgroup_size.y));
597
598 if (wg_z.id)
599 {
600 if (options.vulkan_semantics)
601 arguments.push_back(join("local_size_z_id = ", wg_z.constant_id));
602 else
603 arguments.push_back(join("local_size_z = ", get<SPIRConstant>(wg_z.id).specialization_constant_macro_name));
604 }
605 else
606 arguments.push_back(join("local_size_z = ", execution.workgroup_size.z));
607 }
608
emit_header()609 void CompilerGLSL::emit_header()
610 {
611 auto &execution = get_entry_point();
612 statement("#version ", options.version, options.es && options.version > 100 ? " es" : "");
613
614 if (!options.es && options.version < 420)
615 {
616 // Needed for binding = # on UBOs, etc.
617 if (options.enable_420pack_extension)
618 {
619 statement("#ifdef GL_ARB_shading_language_420pack");
620 statement("#extension GL_ARB_shading_language_420pack : require");
621 statement("#endif");
622 }
623 // Needed for: layout(early_fragment_tests) in;
624 if (execution.flags.get(ExecutionModeEarlyFragmentTests))
625 require_extension_internal("GL_ARB_shader_image_load_store");
626 }
627
628 // Needed for: layout(post_depth_coverage) in;
629 if (execution.flags.get(ExecutionModePostDepthCoverage))
630 require_extension_internal("GL_ARB_post_depth_coverage");
631
632 // Needed for: layout({pixel,sample}_interlock_[un]ordered) in;
633 if (execution.flags.get(ExecutionModePixelInterlockOrderedEXT) ||
634 execution.flags.get(ExecutionModePixelInterlockUnorderedEXT) ||
635 execution.flags.get(ExecutionModeSampleInterlockOrderedEXT) ||
636 execution.flags.get(ExecutionModeSampleInterlockUnorderedEXT))
637 {
638 if (options.es)
639 {
640 if (options.version < 310)
641 SPIRV_CROSS_THROW("At least ESSL 3.10 required for fragment shader interlock.");
642 require_extension_internal("GL_NV_fragment_shader_interlock");
643 }
644 else
645 {
646 if (options.version < 420)
647 require_extension_internal("GL_ARB_shader_image_load_store");
648 require_extension_internal("GL_ARB_fragment_shader_interlock");
649 }
650 }
651
652 for (auto &ext : forced_extensions)
653 {
654 if (ext == "GL_EXT_shader_explicit_arithmetic_types_float16")
655 {
656 // Special case, this extension has a potential fallback to another vendor extension in normal GLSL.
657 // GL_AMD_gpu_shader_half_float is a superset, so try that first.
658 statement("#if defined(GL_AMD_gpu_shader_half_float)");
659 statement("#extension GL_AMD_gpu_shader_half_float : require");
660 if (!options.vulkan_semantics)
661 {
662 statement("#elif defined(GL_NV_gpu_shader5)");
663 statement("#extension GL_NV_gpu_shader5 : require");
664 }
665 else
666 {
667 statement("#elif defined(GL_EXT_shader_explicit_arithmetic_types_float16)");
668 statement("#extension GL_EXT_shader_explicit_arithmetic_types_float16 : require");
669 }
670 statement("#else");
671 statement("#error No extension available for FP16.");
672 statement("#endif");
673 }
674 else if (ext == "GL_EXT_shader_explicit_arithmetic_types_int16")
675 {
676 if (options.vulkan_semantics)
677 statement("#extension GL_EXT_shader_explicit_arithmetic_types_int16 : require");
678 else
679 {
680 statement("#if defined(GL_AMD_gpu_shader_int16)");
681 statement("#extension GL_AMD_gpu_shader_int16 : require");
682 statement("#else");
683 statement("#error No extension available for Int16.");
684 statement("#endif");
685 }
686 }
687 else if (ext == "GL_ARB_post_depth_coverage")
688 {
689 if (options.es)
690 statement("#extension GL_EXT_post_depth_coverage : require");
691 else
692 {
693 statement("#if defined(GL_ARB_post_depth_coverge)");
694 statement("#extension GL_ARB_post_depth_coverage : require");
695 statement("#else");
696 statement("#extension GL_EXT_post_depth_coverage : require");
697 statement("#endif");
698 }
699 }
700 else
701 statement("#extension ", ext, " : require");
702 }
703
704 for (auto &header : header_lines)
705 statement(header);
706
707 SmallVector<string> inputs;
708 SmallVector<string> outputs;
709
710 switch (execution.model)
711 {
712 case ExecutionModelGeometry:
713 outputs.push_back(join("max_vertices = ", execution.output_vertices));
714 if ((execution.flags.get(ExecutionModeInvocations)) && execution.invocations != 1)
715 inputs.push_back(join("invocations = ", execution.invocations));
716 if (execution.flags.get(ExecutionModeInputPoints))
717 inputs.push_back("points");
718 if (execution.flags.get(ExecutionModeInputLines))
719 inputs.push_back("lines");
720 if (execution.flags.get(ExecutionModeInputLinesAdjacency))
721 inputs.push_back("lines_adjacency");
722 if (execution.flags.get(ExecutionModeTriangles))
723 inputs.push_back("triangles");
724 if (execution.flags.get(ExecutionModeInputTrianglesAdjacency))
725 inputs.push_back("triangles_adjacency");
726 if (execution.flags.get(ExecutionModeOutputTriangleStrip))
727 outputs.push_back("triangle_strip");
728 if (execution.flags.get(ExecutionModeOutputPoints))
729 outputs.push_back("points");
730 if (execution.flags.get(ExecutionModeOutputLineStrip))
731 outputs.push_back("line_strip");
732 break;
733
734 case ExecutionModelTessellationControl:
735 if (execution.flags.get(ExecutionModeOutputVertices))
736 outputs.push_back(join("vertices = ", execution.output_vertices));
737 break;
738
739 case ExecutionModelTessellationEvaluation:
740 if (execution.flags.get(ExecutionModeQuads))
741 inputs.push_back("quads");
742 if (execution.flags.get(ExecutionModeTriangles))
743 inputs.push_back("triangles");
744 if (execution.flags.get(ExecutionModeIsolines))
745 inputs.push_back("isolines");
746 if (execution.flags.get(ExecutionModePointMode))
747 inputs.push_back("point_mode");
748
749 if (!execution.flags.get(ExecutionModeIsolines))
750 {
751 if (execution.flags.get(ExecutionModeVertexOrderCw))
752 inputs.push_back("cw");
753 if (execution.flags.get(ExecutionModeVertexOrderCcw))
754 inputs.push_back("ccw");
755 }
756
757 if (execution.flags.get(ExecutionModeSpacingFractionalEven))
758 inputs.push_back("fractional_even_spacing");
759 if (execution.flags.get(ExecutionModeSpacingFractionalOdd))
760 inputs.push_back("fractional_odd_spacing");
761 if (execution.flags.get(ExecutionModeSpacingEqual))
762 inputs.push_back("equal_spacing");
763 break;
764
765 case ExecutionModelGLCompute:
766 {
767 if (execution.workgroup_size.constant != 0)
768 {
769 SpecializationConstant wg_x, wg_y, wg_z;
770 get_work_group_size_specialization_constants(wg_x, wg_y, wg_z);
771
772 // If there are any spec constants on legacy GLSL, defer declaration, we need to set up macro
773 // declarations before we can emit the work group size.
774 if (options.vulkan_semantics ||
775 ((wg_x.id == ConstantID(0)) && (wg_y.id == ConstantID(0)) && (wg_z.id == ConstantID(0))))
776 build_workgroup_size(inputs, wg_x, wg_y, wg_z);
777 }
778 else
779 {
780 inputs.push_back(join("local_size_x = ", execution.workgroup_size.x));
781 inputs.push_back(join("local_size_y = ", execution.workgroup_size.y));
782 inputs.push_back(join("local_size_z = ", execution.workgroup_size.z));
783 }
784 break;
785 }
786
787 case ExecutionModelFragment:
788 if (options.es)
789 {
790 switch (options.fragment.default_float_precision)
791 {
792 case Options::Lowp:
793 statement("precision lowp float;");
794 break;
795
796 case Options::Mediump:
797 statement("precision mediump float;");
798 break;
799
800 case Options::Highp:
801 statement("precision highp float;");
802 break;
803
804 default:
805 break;
806 }
807
808 switch (options.fragment.default_int_precision)
809 {
810 case Options::Lowp:
811 statement("precision lowp int;");
812 break;
813
814 case Options::Mediump:
815 statement("precision mediump int;");
816 break;
817
818 case Options::Highp:
819 statement("precision highp int;");
820 break;
821
822 default:
823 break;
824 }
825 }
826
827 if (execution.flags.get(ExecutionModeEarlyFragmentTests))
828 inputs.push_back("early_fragment_tests");
829 if (execution.flags.get(ExecutionModePostDepthCoverage))
830 inputs.push_back("post_depth_coverage");
831
832 if (execution.flags.get(ExecutionModePixelInterlockOrderedEXT))
833 inputs.push_back("pixel_interlock_ordered");
834 else if (execution.flags.get(ExecutionModePixelInterlockUnorderedEXT))
835 inputs.push_back("pixel_interlock_unordered");
836 else if (execution.flags.get(ExecutionModeSampleInterlockOrderedEXT))
837 inputs.push_back("sample_interlock_ordered");
838 else if (execution.flags.get(ExecutionModeSampleInterlockUnorderedEXT))
839 inputs.push_back("sample_interlock_unordered");
840
841 if (!options.es && execution.flags.get(ExecutionModeDepthGreater))
842 statement("layout(depth_greater) out float gl_FragDepth;");
843 else if (!options.es && execution.flags.get(ExecutionModeDepthLess))
844 statement("layout(depth_less) out float gl_FragDepth;");
845
846 break;
847
848 default:
849 break;
850 }
851
852 if (!inputs.empty())
853 statement("layout(", merge(inputs), ") in;");
854 if (!outputs.empty())
855 statement("layout(", merge(outputs), ") out;");
856
857 statement("");
858 }
859
type_is_empty(const SPIRType & type)860 bool CompilerGLSL::type_is_empty(const SPIRType &type)
861 {
862 return type.basetype == SPIRType::Struct && type.member_types.empty();
863 }
864
emit_struct(SPIRType & type)865 void CompilerGLSL::emit_struct(SPIRType &type)
866 {
867 // Struct types can be stamped out multiple times
868 // with just different offsets, matrix layouts, etc ...
869 // Type-punning with these types is legal, which complicates things
870 // when we are storing struct and array types in an SSBO for example.
871 // If the type master is packed however, we can no longer assume that the struct declaration will be redundant.
872 if (type.type_alias != TypeID(0) &&
873 !has_extended_decoration(type.type_alias, SPIRVCrossDecorationBufferBlockRepacked))
874 return;
875
876 add_resource_name(type.self);
877 auto name = type_to_glsl(type);
878
879 statement(!backend.explicit_struct_type ? "struct " : "", name);
880 begin_scope();
881
882 type.member_name_cache.clear();
883
884 uint32_t i = 0;
885 bool emitted = false;
886 for (auto &member : type.member_types)
887 {
888 add_member_name(type, i);
889 emit_struct_member(type, member, i);
890 i++;
891 emitted = true;
892 }
893
894 // Don't declare empty structs in GLSL, this is not allowed.
895 if (type_is_empty(type) && !backend.supports_empty_struct)
896 {
897 statement("int empty_struct_member;");
898 emitted = true;
899 }
900
901 if (has_extended_decoration(type.self, SPIRVCrossDecorationPaddingTarget))
902 emit_struct_padding_target(type);
903
904 end_scope_decl();
905
906 if (emitted)
907 statement("");
908 }
909
to_interpolation_qualifiers(const Bitset & flags)910 string CompilerGLSL::to_interpolation_qualifiers(const Bitset &flags)
911 {
912 string res;
913 //if (flags & (1ull << DecorationSmooth))
914 // res += "smooth ";
915 if (flags.get(DecorationFlat))
916 res += "flat ";
917 if (flags.get(DecorationNoPerspective))
918 res += "noperspective ";
919 if (flags.get(DecorationCentroid))
920 res += "centroid ";
921 if (flags.get(DecorationPatch))
922 res += "patch ";
923 if (flags.get(DecorationSample))
924 res += "sample ";
925 if (flags.get(DecorationInvariant))
926 res += "invariant ";
927 if (flags.get(DecorationExplicitInterpAMD))
928 res += "__explicitInterpAMD ";
929
930 return res;
931 }
932
layout_for_member(const SPIRType & type,uint32_t index)933 string CompilerGLSL::layout_for_member(const SPIRType &type, uint32_t index)
934 {
935 if (is_legacy())
936 return "";
937
938 bool is_block = ir.meta[type.self].decoration.decoration_flags.get(DecorationBlock) ||
939 ir.meta[type.self].decoration.decoration_flags.get(DecorationBufferBlock);
940 if (!is_block)
941 return "";
942
943 auto &memb = ir.meta[type.self].members;
944 if (index >= memb.size())
945 return "";
946 auto &dec = memb[index];
947
948 SmallVector<string> attr;
949
950 // We can only apply layouts on members in block interfaces.
951 // This is a bit problematic because in SPIR-V decorations are applied on the struct types directly.
952 // This is not supported on GLSL, so we have to make the assumption that if a struct within our buffer block struct
953 // has a decoration, it was originally caused by a top-level layout() qualifier in GLSL.
954 //
955 // We would like to go from (SPIR-V style):
956 //
957 // struct Foo { layout(row_major) mat4 matrix; };
958 // buffer UBO { Foo foo; };
959 //
960 // to
961 //
962 // struct Foo { mat4 matrix; }; // GLSL doesn't support any layout shenanigans in raw struct declarations.
963 // buffer UBO { layout(row_major) Foo foo; }; // Apply the layout on top-level.
964 auto flags = combined_decoration_for_member(type, index);
965
966 if (flags.get(DecorationRowMajor))
967 attr.push_back("row_major");
968 // We don't emit any global layouts, so column_major is default.
969 //if (flags & (1ull << DecorationColMajor))
970 // attr.push_back("column_major");
971
972 if (dec.decoration_flags.get(DecorationLocation) && can_use_io_location(type.storage, true))
973 attr.push_back(join("location = ", dec.location));
974
975 // Can only declare component if we can declare location.
976 if (dec.decoration_flags.get(DecorationComponent) && can_use_io_location(type.storage, true))
977 {
978 if (!options.es)
979 {
980 if (options.version < 440 && options.version >= 140)
981 require_extension_internal("GL_ARB_enhanced_layouts");
982 else if (options.version < 140)
983 SPIRV_CROSS_THROW("Component decoration is not supported in targets below GLSL 1.40.");
984 attr.push_back(join("component = ", dec.component));
985 }
986 else
987 SPIRV_CROSS_THROW("Component decoration is not supported in ES targets.");
988 }
989
990 // SPIRVCrossDecorationPacked is set by layout_for_variable earlier to mark that we need to emit offset qualifiers.
991 // This is only done selectively in GLSL as needed.
992 if (has_extended_decoration(type.self, SPIRVCrossDecorationExplicitOffset) &&
993 dec.decoration_flags.get(DecorationOffset))
994 attr.push_back(join("offset = ", dec.offset));
995
996 if (attr.empty())
997 return "";
998
999 string res = "layout(";
1000 res += merge(attr);
1001 res += ") ";
1002 return res;
1003 }
1004
format_to_glsl(spv::ImageFormat format)1005 const char *CompilerGLSL::format_to_glsl(spv::ImageFormat format)
1006 {
1007 if (options.es && is_desktop_only_format(format))
1008 SPIRV_CROSS_THROW("Attempting to use image format not supported in ES profile.");
1009
1010 switch (format)
1011 {
1012 case ImageFormatRgba32f:
1013 return "rgba32f";
1014 case ImageFormatRgba16f:
1015 return "rgba16f";
1016 case ImageFormatR32f:
1017 return "r32f";
1018 case ImageFormatRgba8:
1019 return "rgba8";
1020 case ImageFormatRgba8Snorm:
1021 return "rgba8_snorm";
1022 case ImageFormatRg32f:
1023 return "rg32f";
1024 case ImageFormatRg16f:
1025 return "rg16f";
1026 case ImageFormatRgba32i:
1027 return "rgba32i";
1028 case ImageFormatRgba16i:
1029 return "rgba16i";
1030 case ImageFormatR32i:
1031 return "r32i";
1032 case ImageFormatRgba8i:
1033 return "rgba8i";
1034 case ImageFormatRg32i:
1035 return "rg32i";
1036 case ImageFormatRg16i:
1037 return "rg16i";
1038 case ImageFormatRgba32ui:
1039 return "rgba32ui";
1040 case ImageFormatRgba16ui:
1041 return "rgba16ui";
1042 case ImageFormatR32ui:
1043 return "r32ui";
1044 case ImageFormatRgba8ui:
1045 return "rgba8ui";
1046 case ImageFormatRg32ui:
1047 return "rg32ui";
1048 case ImageFormatRg16ui:
1049 return "rg16ui";
1050 case ImageFormatR11fG11fB10f:
1051 return "r11f_g11f_b10f";
1052 case ImageFormatR16f:
1053 return "r16f";
1054 case ImageFormatRgb10A2:
1055 return "rgb10_a2";
1056 case ImageFormatR8:
1057 return "r8";
1058 case ImageFormatRg8:
1059 return "rg8";
1060 case ImageFormatR16:
1061 return "r16";
1062 case ImageFormatRg16:
1063 return "rg16";
1064 case ImageFormatRgba16:
1065 return "rgba16";
1066 case ImageFormatR16Snorm:
1067 return "r16_snorm";
1068 case ImageFormatRg16Snorm:
1069 return "rg16_snorm";
1070 case ImageFormatRgba16Snorm:
1071 return "rgba16_snorm";
1072 case ImageFormatR8Snorm:
1073 return "r8_snorm";
1074 case ImageFormatRg8Snorm:
1075 return "rg8_snorm";
1076 case ImageFormatR8ui:
1077 return "r8ui";
1078 case ImageFormatRg8ui:
1079 return "rg8ui";
1080 case ImageFormatR16ui:
1081 return "r16ui";
1082 case ImageFormatRgb10a2ui:
1083 return "rgb10_a2ui";
1084 case ImageFormatR8i:
1085 return "r8i";
1086 case ImageFormatRg8i:
1087 return "rg8i";
1088 case ImageFormatR16i:
1089 return "r16i";
1090 default:
1091 case ImageFormatUnknown:
1092 return nullptr;
1093 }
1094 }
1095
type_to_packed_base_size(const SPIRType & type,BufferPackingStandard)1096 uint32_t CompilerGLSL::type_to_packed_base_size(const SPIRType &type, BufferPackingStandard)
1097 {
1098 switch (type.basetype)
1099 {
1100 case SPIRType::Double:
1101 case SPIRType::Int64:
1102 case SPIRType::UInt64:
1103 return 8;
1104 case SPIRType::Float:
1105 case SPIRType::Int:
1106 case SPIRType::UInt:
1107 return 4;
1108 case SPIRType::Half:
1109 case SPIRType::Short:
1110 case SPIRType::UShort:
1111 return 2;
1112 case SPIRType::SByte:
1113 case SPIRType::UByte:
1114 return 1;
1115
1116 default:
1117 SPIRV_CROSS_THROW("Unrecognized type in type_to_packed_base_size.");
1118 }
1119 }
1120
type_to_packed_alignment(const SPIRType & type,const Bitset & flags,BufferPackingStandard packing)1121 uint32_t CompilerGLSL::type_to_packed_alignment(const SPIRType &type, const Bitset &flags,
1122 BufferPackingStandard packing)
1123 {
1124 // If using PhysicalStorageBufferEXT storage class, this is a pointer,
1125 // and is 64-bit.
1126 if (type.storage == StorageClassPhysicalStorageBufferEXT)
1127 {
1128 if (!type.pointer)
1129 SPIRV_CROSS_THROW("Types in PhysicalStorageBufferEXT must be pointers.");
1130
1131 if (ir.addressing_model == AddressingModelPhysicalStorageBuffer64EXT)
1132 {
1133 if (packing_is_vec4_padded(packing) && type_is_array_of_pointers(type))
1134 return 16;
1135 else
1136 return 8;
1137 }
1138 else
1139 SPIRV_CROSS_THROW("AddressingModelPhysicalStorageBuffer64EXT must be used for PhysicalStorageBufferEXT.");
1140 }
1141
1142 if (!type.array.empty())
1143 {
1144 uint32_t minimum_alignment = 1;
1145 if (packing_is_vec4_padded(packing))
1146 minimum_alignment = 16;
1147
1148 auto *tmp = &get<SPIRType>(type.parent_type);
1149 while (!tmp->array.empty())
1150 tmp = &get<SPIRType>(tmp->parent_type);
1151
1152 // Get the alignment of the base type, then maybe round up.
1153 return max(minimum_alignment, type_to_packed_alignment(*tmp, flags, packing));
1154 }
1155
1156 if (type.basetype == SPIRType::Struct)
1157 {
1158 // Rule 9. Structs alignments are maximum alignment of its members.
1159 uint32_t alignment = 1;
1160 for (uint32_t i = 0; i < type.member_types.size(); i++)
1161 {
1162 auto member_flags = ir.meta[type.self].members[i].decoration_flags;
1163 alignment =
1164 max(alignment, type_to_packed_alignment(get<SPIRType>(type.member_types[i]), member_flags, packing));
1165 }
1166
1167 // In std140, struct alignment is rounded up to 16.
1168 if (packing_is_vec4_padded(packing))
1169 alignment = max(alignment, 16u);
1170
1171 return alignment;
1172 }
1173 else
1174 {
1175 const uint32_t base_alignment = type_to_packed_base_size(type, packing);
1176
1177 // Alignment requirement for scalar block layout is always the alignment for the most basic component.
1178 if (packing_is_scalar(packing))
1179 return base_alignment;
1180
1181 // Vectors are *not* aligned in HLSL, but there's an extra rule where vectors cannot straddle
1182 // a vec4, this is handled outside since that part knows our current offset.
1183 if (type.columns == 1 && packing_is_hlsl(packing))
1184 return base_alignment;
1185
1186 // From 7.6.2.2 in GL 4.5 core spec.
1187 // Rule 1
1188 if (type.vecsize == 1 && type.columns == 1)
1189 return base_alignment;
1190
1191 // Rule 2
1192 if ((type.vecsize == 2 || type.vecsize == 4) && type.columns == 1)
1193 return type.vecsize * base_alignment;
1194
1195 // Rule 3
1196 if (type.vecsize == 3 && type.columns == 1)
1197 return 4 * base_alignment;
1198
1199 // Rule 4 implied. Alignment does not change in std430.
1200
1201 // Rule 5. Column-major matrices are stored as arrays of
1202 // vectors.
1203 if (flags.get(DecorationColMajor) && type.columns > 1)
1204 {
1205 if (packing_is_vec4_padded(packing))
1206 return 4 * base_alignment;
1207 else if (type.vecsize == 3)
1208 return 4 * base_alignment;
1209 else
1210 return type.vecsize * base_alignment;
1211 }
1212
1213 // Rule 6 implied.
1214
1215 // Rule 7.
1216 if (flags.get(DecorationRowMajor) && type.vecsize > 1)
1217 {
1218 if (packing_is_vec4_padded(packing))
1219 return 4 * base_alignment;
1220 else if (type.columns == 3)
1221 return 4 * base_alignment;
1222 else
1223 return type.columns * base_alignment;
1224 }
1225
1226 // Rule 8 implied.
1227 }
1228
1229 SPIRV_CROSS_THROW("Did not find suitable rule for type. Bogus decorations?");
1230 }
1231
type_to_packed_array_stride(const SPIRType & type,const Bitset & flags,BufferPackingStandard packing)1232 uint32_t CompilerGLSL::type_to_packed_array_stride(const SPIRType &type, const Bitset &flags,
1233 BufferPackingStandard packing)
1234 {
1235 // Array stride is equal to aligned size of the underlying type.
1236 uint32_t parent = type.parent_type;
1237 assert(parent);
1238
1239 auto &tmp = get<SPIRType>(parent);
1240
1241 uint32_t size = type_to_packed_size(tmp, flags, packing);
1242 if (tmp.array.empty())
1243 {
1244 uint32_t alignment = type_to_packed_alignment(type, flags, packing);
1245 return (size + alignment - 1) & ~(alignment - 1);
1246 }
1247 else
1248 {
1249 // For multidimensional arrays, array stride always matches size of subtype.
1250 // The alignment cannot change because multidimensional arrays are basically N * M array elements.
1251 return size;
1252 }
1253 }
1254
type_to_packed_size(const SPIRType & type,const Bitset & flags,BufferPackingStandard packing)1255 uint32_t CompilerGLSL::type_to_packed_size(const SPIRType &type, const Bitset &flags, BufferPackingStandard packing)
1256 {
1257 if (!type.array.empty())
1258 {
1259 return to_array_size_literal(type) * type_to_packed_array_stride(type, flags, packing);
1260 }
1261
1262 // If using PhysicalStorageBufferEXT storage class, this is a pointer,
1263 // and is 64-bit.
1264 if (type.storage == StorageClassPhysicalStorageBufferEXT)
1265 {
1266 if (!type.pointer)
1267 SPIRV_CROSS_THROW("Types in PhysicalStorageBufferEXT must be pointers.");
1268
1269 if (ir.addressing_model == AddressingModelPhysicalStorageBuffer64EXT)
1270 return 8;
1271 else
1272 SPIRV_CROSS_THROW("AddressingModelPhysicalStorageBuffer64EXT must be used for PhysicalStorageBufferEXT.");
1273 }
1274
1275 uint32_t size = 0;
1276
1277 if (type.basetype == SPIRType::Struct)
1278 {
1279 uint32_t pad_alignment = 1;
1280
1281 for (uint32_t i = 0; i < type.member_types.size(); i++)
1282 {
1283 auto member_flags = ir.meta[type.self].members[i].decoration_flags;
1284 auto &member_type = get<SPIRType>(type.member_types[i]);
1285
1286 uint32_t packed_alignment = type_to_packed_alignment(member_type, member_flags, packing);
1287 uint32_t alignment = max(packed_alignment, pad_alignment);
1288
1289 // The next member following a struct member is aligned to the base alignment of the struct that came before.
1290 // GL 4.5 spec, 7.6.2.2.
1291 if (member_type.basetype == SPIRType::Struct)
1292 pad_alignment = packed_alignment;
1293 else
1294 pad_alignment = 1;
1295
1296 size = (size + alignment - 1) & ~(alignment - 1);
1297 size += type_to_packed_size(member_type, member_flags, packing);
1298 }
1299 }
1300 else
1301 {
1302 const uint32_t base_alignment = type_to_packed_base_size(type, packing);
1303
1304 if (packing_is_scalar(packing))
1305 {
1306 size = type.vecsize * type.columns * base_alignment;
1307 }
1308 else
1309 {
1310 if (type.columns == 1)
1311 size = type.vecsize * base_alignment;
1312
1313 if (flags.get(DecorationColMajor) && type.columns > 1)
1314 {
1315 if (packing_is_vec4_padded(packing))
1316 size = type.columns * 4 * base_alignment;
1317 else if (type.vecsize == 3)
1318 size = type.columns * 4 * base_alignment;
1319 else
1320 size = type.columns * type.vecsize * base_alignment;
1321 }
1322
1323 if (flags.get(DecorationRowMajor) && type.vecsize > 1)
1324 {
1325 if (packing_is_vec4_padded(packing))
1326 size = type.vecsize * 4 * base_alignment;
1327 else if (type.columns == 3)
1328 size = type.vecsize * 4 * base_alignment;
1329 else
1330 size = type.vecsize * type.columns * base_alignment;
1331 }
1332 }
1333 }
1334
1335 return size;
1336 }
1337
buffer_is_packing_standard(const SPIRType & type,BufferPackingStandard packing,uint32_t start_offset,uint32_t end_offset)1338 bool CompilerGLSL::buffer_is_packing_standard(const SPIRType &type, BufferPackingStandard packing,
1339 uint32_t start_offset, uint32_t end_offset)
1340 {
1341 // This is very tricky and error prone, but try to be exhaustive and correct here.
1342 // SPIR-V doesn't directly say if we're using std430 or std140.
1343 // SPIR-V communicates this using Offset and ArrayStride decorations (which is what really matters),
1344 // so we have to try to infer whether or not the original GLSL source was std140 or std430 based on this information.
1345 // We do not have to consider shared or packed since these layouts are not allowed in Vulkan SPIR-V (they are useless anyways, and custom offsets would do the same thing).
1346 //
1347 // It is almost certain that we're using std430, but it gets tricky with arrays in particular.
1348 // We will assume std430, but infer std140 if we can prove the struct is not compliant with std430.
1349 //
1350 // The only two differences between std140 and std430 are related to padding alignment/array stride
1351 // in arrays and structs. In std140 they take minimum vec4 alignment.
1352 // std430 only removes the vec4 requirement.
1353
1354 uint32_t offset = 0;
1355 uint32_t pad_alignment = 1;
1356
1357 bool is_top_level_block =
1358 has_decoration(type.self, DecorationBlock) || has_decoration(type.self, DecorationBufferBlock);
1359
1360 for (uint32_t i = 0; i < type.member_types.size(); i++)
1361 {
1362 auto &memb_type = get<SPIRType>(type.member_types[i]);
1363 auto member_flags = ir.meta[type.self].members[i].decoration_flags;
1364
1365 // Verify alignment rules.
1366 uint32_t packed_alignment = type_to_packed_alignment(memb_type, member_flags, packing);
1367
1368 // This is a rather dirty workaround to deal with some cases of OpSpecConstantOp used as array size, e.g:
1369 // layout(constant_id = 0) const int s = 10;
1370 // const int S = s + 5; // SpecConstantOp
1371 // buffer Foo { int data[S]; }; // <-- Very hard for us to deduce a fixed value here,
1372 // we would need full implementation of compile-time constant folding. :(
1373 // If we are the last member of a struct, there might be cases where the actual size of that member is irrelevant
1374 // for our analysis (e.g. unsized arrays).
1375 // This lets us simply ignore that there are spec constant op sized arrays in our buffers.
1376 // Querying size of this member will fail, so just don't call it unless we have to.
1377 //
1378 // This is likely "best effort" we can support without going into unacceptably complicated workarounds.
1379 bool member_can_be_unsized =
1380 is_top_level_block && size_t(i + 1) == type.member_types.size() && !memb_type.array.empty();
1381
1382 uint32_t packed_size = 0;
1383 if (!member_can_be_unsized)
1384 packed_size = type_to_packed_size(memb_type, member_flags, packing);
1385
1386 // We only need to care about this if we have non-array types which can straddle the vec4 boundary.
1387 if (packing_is_hlsl(packing))
1388 {
1389 // If a member straddles across a vec4 boundary, alignment is actually vec4.
1390 uint32_t begin_word = offset / 16;
1391 uint32_t end_word = (offset + packed_size - 1) / 16;
1392 if (begin_word != end_word)
1393 packed_alignment = max(packed_alignment, 16u);
1394 }
1395
1396 uint32_t alignment = max(packed_alignment, pad_alignment);
1397 offset = (offset + alignment - 1) & ~(alignment - 1);
1398
1399 // Field is not in the specified range anymore and we can ignore any further fields.
1400 if (offset >= end_offset)
1401 break;
1402
1403 // The next member following a struct member is aligned to the base alignment of the struct that came before.
1404 // GL 4.5 spec, 7.6.2.2.
1405 if (memb_type.basetype == SPIRType::Struct && !memb_type.pointer)
1406 pad_alignment = packed_alignment;
1407 else
1408 pad_alignment = 1;
1409
1410 // Only care about packing if we are in the given range
1411 if (offset >= start_offset)
1412 {
1413 uint32_t actual_offset = type_struct_member_offset(type, i);
1414
1415 // We only care about offsets in std140, std430, etc ...
1416 // For EnhancedLayout variants, we have the flexibility to choose our own offsets.
1417 if (!packing_has_flexible_offset(packing))
1418 {
1419 if (actual_offset != offset) // This cannot be the packing we're looking for.
1420 return false;
1421 }
1422 else if ((actual_offset & (alignment - 1)) != 0)
1423 {
1424 // We still need to verify that alignment rules are observed, even if we have explicit offset.
1425 return false;
1426 }
1427
1428 // Verify array stride rules.
1429 if (!memb_type.array.empty() && type_to_packed_array_stride(memb_type, member_flags, packing) !=
1430 type_struct_member_array_stride(type, i))
1431 return false;
1432
1433 // Verify that sub-structs also follow packing rules.
1434 // We cannot use enhanced layouts on substructs, so they better be up to spec.
1435 auto substruct_packing = packing_to_substruct_packing(packing);
1436
1437 if (!memb_type.pointer && !memb_type.member_types.empty() &&
1438 !buffer_is_packing_standard(memb_type, substruct_packing))
1439 {
1440 return false;
1441 }
1442 }
1443
1444 // Bump size.
1445 offset += packed_size;
1446 }
1447
1448 return true;
1449 }
1450
can_use_io_location(StorageClass storage,bool block)1451 bool CompilerGLSL::can_use_io_location(StorageClass storage, bool block)
1452 {
1453 // Location specifiers are must have in SPIR-V, but they aren't really supported in earlier versions of GLSL.
1454 // Be very explicit here about how to solve the issue.
1455 if ((get_execution_model() != ExecutionModelVertex && storage == StorageClassInput) ||
1456 (get_execution_model() != ExecutionModelFragment && storage == StorageClassOutput))
1457 {
1458 uint32_t minimum_desktop_version = block ? 440 : 410;
1459 // ARB_enhanced_layouts vs ARB_separate_shader_objects ...
1460
1461 if (!options.es && options.version < minimum_desktop_version && !options.separate_shader_objects)
1462 return false;
1463 else if (options.es && options.version < 310)
1464 return false;
1465 }
1466
1467 if ((get_execution_model() == ExecutionModelVertex && storage == StorageClassInput) ||
1468 (get_execution_model() == ExecutionModelFragment && storage == StorageClassOutput))
1469 {
1470 if (options.es && options.version < 300)
1471 return false;
1472 else if (!options.es && options.version < 330)
1473 return false;
1474 }
1475
1476 if (storage == StorageClassUniform || storage == StorageClassUniformConstant || storage == StorageClassPushConstant)
1477 {
1478 if (options.es && options.version < 310)
1479 return false;
1480 else if (!options.es && options.version < 430)
1481 return false;
1482 }
1483
1484 return true;
1485 }
1486
layout_for_variable(const SPIRVariable & var)1487 string CompilerGLSL::layout_for_variable(const SPIRVariable &var)
1488 {
1489 // FIXME: Come up with a better solution for when to disable layouts.
1490 // Having layouts depend on extensions as well as which types
1491 // of layouts are used. For now, the simple solution is to just disable
1492 // layouts for legacy versions.
1493 if (is_legacy())
1494 return "";
1495
1496 SmallVector<string> attr;
1497
1498 auto &dec = ir.meta[var.self].decoration;
1499 auto &type = get<SPIRType>(var.basetype);
1500 auto &flags = dec.decoration_flags;
1501 auto typeflags = ir.meta[type.self].decoration.decoration_flags;
1502
1503 if (options.vulkan_semantics && var.storage == StorageClassPushConstant)
1504 attr.push_back("push_constant");
1505 else if (var.storage == StorageClassShaderRecordBufferNV)
1506 attr.push_back("shaderRecordNV");
1507
1508 if (flags.get(DecorationRowMajor))
1509 attr.push_back("row_major");
1510 if (flags.get(DecorationColMajor))
1511 attr.push_back("column_major");
1512
1513 if (options.vulkan_semantics)
1514 {
1515 if (flags.get(DecorationInputAttachmentIndex))
1516 attr.push_back(join("input_attachment_index = ", dec.input_attachment));
1517 }
1518
1519 bool is_block = has_decoration(type.self, DecorationBlock);
1520 if (flags.get(DecorationLocation) && can_use_io_location(var.storage, is_block))
1521 {
1522 Bitset combined_decoration;
1523 for (uint32_t i = 0; i < ir.meta[type.self].members.size(); i++)
1524 combined_decoration.merge_or(combined_decoration_for_member(type, i));
1525
1526 // If our members have location decorations, we don't need to
1527 // emit location decorations at the top as well (looks weird).
1528 if (!combined_decoration.get(DecorationLocation))
1529 attr.push_back(join("location = ", dec.location));
1530 }
1531
1532 // Can only declare Component if we can declare location.
1533 if (flags.get(DecorationComponent) && can_use_io_location(var.storage, is_block))
1534 {
1535 if (!options.es)
1536 {
1537 if (options.version < 440 && options.version >= 140)
1538 require_extension_internal("GL_ARB_enhanced_layouts");
1539 else if (options.version < 140)
1540 SPIRV_CROSS_THROW("Component decoration is not supported in targets below GLSL 1.40.");
1541 attr.push_back(join("component = ", dec.component));
1542 }
1543 else
1544 SPIRV_CROSS_THROW("Component decoration is not supported in ES targets.");
1545 }
1546
1547 if (flags.get(DecorationIndex))
1548 attr.push_back(join("index = ", dec.index));
1549
1550 // Do not emit set = decoration in regular GLSL output, but
1551 // we need to preserve it in Vulkan GLSL mode.
1552 if (var.storage != StorageClassPushConstant && var.storage != StorageClassShaderRecordBufferNV)
1553 {
1554 if (flags.get(DecorationDescriptorSet) && options.vulkan_semantics)
1555 attr.push_back(join("set = ", dec.set));
1556 }
1557
1558 bool push_constant_block = options.vulkan_semantics && var.storage == StorageClassPushConstant;
1559 bool ssbo_block = var.storage == StorageClassStorageBuffer || var.storage == StorageClassShaderRecordBufferNV ||
1560 (var.storage == StorageClassUniform && typeflags.get(DecorationBufferBlock));
1561 bool emulated_ubo = var.storage == StorageClassPushConstant && options.emit_push_constant_as_uniform_buffer;
1562 bool ubo_block = var.storage == StorageClassUniform && typeflags.get(DecorationBlock);
1563
1564 // GL 3.0/GLSL 1.30 is not considered legacy, but it doesn't have UBOs ...
1565 bool can_use_buffer_blocks = (options.es && options.version >= 300) || (!options.es && options.version >= 140);
1566
1567 // pretend no UBOs when options say so
1568 if (ubo_block && options.emit_uniform_buffer_as_plain_uniforms)
1569 can_use_buffer_blocks = false;
1570
1571 bool can_use_binding;
1572 if (options.es)
1573 can_use_binding = options.version >= 310;
1574 else
1575 can_use_binding = options.enable_420pack_extension || (options.version >= 420);
1576
1577 // Make sure we don't emit binding layout for a classic uniform on GLSL 1.30.
1578 if (!can_use_buffer_blocks && var.storage == StorageClassUniform)
1579 can_use_binding = false;
1580
1581 if (var.storage == StorageClassShaderRecordBufferNV)
1582 can_use_binding = false;
1583
1584 if (can_use_binding && flags.get(DecorationBinding))
1585 attr.push_back(join("binding = ", dec.binding));
1586
1587 if (flags.get(DecorationOffset))
1588 attr.push_back(join("offset = ", dec.offset));
1589
1590 // Instead of adding explicit offsets for every element here, just assume we're using std140 or std430.
1591 // If SPIR-V does not comply with either layout, we cannot really work around it.
1592 if (can_use_buffer_blocks && (ubo_block || emulated_ubo))
1593 {
1594 attr.push_back(buffer_to_packing_standard(type, false));
1595 }
1596 else if (can_use_buffer_blocks && (push_constant_block || ssbo_block))
1597 {
1598 attr.push_back(buffer_to_packing_standard(type, true));
1599 }
1600
1601 // For images, the type itself adds a layout qualifer.
1602 // Only emit the format for storage images.
1603 if (type.basetype == SPIRType::Image && type.image.sampled == 2)
1604 {
1605 const char *fmt = format_to_glsl(type.image.format);
1606 if (fmt)
1607 attr.push_back(fmt);
1608 }
1609
1610 if (attr.empty())
1611 return "";
1612
1613 string res = "layout(";
1614 res += merge(attr);
1615 res += ") ";
1616 return res;
1617 }
1618
buffer_to_packing_standard(const SPIRType & type,bool support_std430_without_scalar_layout)1619 string CompilerGLSL::buffer_to_packing_standard(const SPIRType &type, bool support_std430_without_scalar_layout)
1620 {
1621 if (support_std430_without_scalar_layout && buffer_is_packing_standard(type, BufferPackingStd430))
1622 return "std430";
1623 else if (buffer_is_packing_standard(type, BufferPackingStd140))
1624 return "std140";
1625 else if (options.vulkan_semantics && buffer_is_packing_standard(type, BufferPackingScalar))
1626 {
1627 require_extension_internal("GL_EXT_scalar_block_layout");
1628 return "scalar";
1629 }
1630 else if (support_std430_without_scalar_layout &&
1631 buffer_is_packing_standard(type, BufferPackingStd430EnhancedLayout))
1632 {
1633 if (options.es && !options.vulkan_semantics)
1634 SPIRV_CROSS_THROW("Push constant block cannot be expressed as neither std430 nor std140. ES-targets do "
1635 "not support GL_ARB_enhanced_layouts.");
1636 if (!options.es && !options.vulkan_semantics && options.version < 440)
1637 require_extension_internal("GL_ARB_enhanced_layouts");
1638
1639 set_extended_decoration(type.self, SPIRVCrossDecorationExplicitOffset);
1640 return "std430";
1641 }
1642 else if (buffer_is_packing_standard(type, BufferPackingStd140EnhancedLayout))
1643 {
1644 // Fallback time. We might be able to use the ARB_enhanced_layouts to deal with this difference,
1645 // however, we can only use layout(offset) on the block itself, not any substructs, so the substructs better be the appropriate layout.
1646 // Enhanced layouts seem to always work in Vulkan GLSL, so no need for extensions there.
1647 if (options.es && !options.vulkan_semantics)
1648 SPIRV_CROSS_THROW("Push constant block cannot be expressed as neither std430 nor std140. ES-targets do "
1649 "not support GL_ARB_enhanced_layouts.");
1650 if (!options.es && !options.vulkan_semantics && options.version < 440)
1651 require_extension_internal("GL_ARB_enhanced_layouts");
1652
1653 set_extended_decoration(type.self, SPIRVCrossDecorationExplicitOffset);
1654 return "std140";
1655 }
1656 else if (options.vulkan_semantics && buffer_is_packing_standard(type, BufferPackingScalarEnhancedLayout))
1657 {
1658 set_extended_decoration(type.self, SPIRVCrossDecorationExplicitOffset);
1659 require_extension_internal("GL_EXT_scalar_block_layout");
1660 return "scalar";
1661 }
1662 else if (!support_std430_without_scalar_layout && options.vulkan_semantics &&
1663 buffer_is_packing_standard(type, BufferPackingStd430))
1664 {
1665 // UBOs can support std430 with GL_EXT_scalar_block_layout.
1666 require_extension_internal("GL_EXT_scalar_block_layout");
1667 return "std430";
1668 }
1669 else if (!support_std430_without_scalar_layout && options.vulkan_semantics &&
1670 buffer_is_packing_standard(type, BufferPackingStd430EnhancedLayout))
1671 {
1672 // UBOs can support std430 with GL_EXT_scalar_block_layout.
1673 set_extended_decoration(type.self, SPIRVCrossDecorationExplicitOffset);
1674 require_extension_internal("GL_EXT_scalar_block_layout");
1675 return "std430";
1676 }
1677 else
1678 {
1679 SPIRV_CROSS_THROW("Buffer block cannot be expressed as any of std430, std140, scalar, even with enhanced "
1680 "layouts. You can try flattening this block to support a more flexible layout.");
1681 }
1682 }
1683
emit_push_constant_block(const SPIRVariable & var)1684 void CompilerGLSL::emit_push_constant_block(const SPIRVariable &var)
1685 {
1686 if (flattened_buffer_blocks.count(var.self))
1687 emit_buffer_block_flattened(var);
1688 else if (options.vulkan_semantics)
1689 emit_push_constant_block_vulkan(var);
1690 else if (options.emit_push_constant_as_uniform_buffer)
1691 emit_buffer_block_native(var);
1692 else
1693 emit_push_constant_block_glsl(var);
1694 }
1695
emit_push_constant_block_vulkan(const SPIRVariable & var)1696 void CompilerGLSL::emit_push_constant_block_vulkan(const SPIRVariable &var)
1697 {
1698 emit_buffer_block(var);
1699 }
1700
emit_push_constant_block_glsl(const SPIRVariable & var)1701 void CompilerGLSL::emit_push_constant_block_glsl(const SPIRVariable &var)
1702 {
1703 // OpenGL has no concept of push constant blocks, implement it as a uniform struct.
1704 auto &type = get<SPIRType>(var.basetype);
1705
1706 auto &flags = ir.meta[var.self].decoration.decoration_flags;
1707 flags.clear(DecorationBinding);
1708 flags.clear(DecorationDescriptorSet);
1709
1710 #if 0
1711 if (flags & ((1ull << DecorationBinding) | (1ull << DecorationDescriptorSet)))
1712 SPIRV_CROSS_THROW("Push constant blocks cannot be compiled to GLSL with Binding or Set syntax. "
1713 "Remap to location with reflection API first or disable these decorations.");
1714 #endif
1715
1716 // We're emitting the push constant block as a regular struct, so disable the block qualifier temporarily.
1717 // Otherwise, we will end up emitting layout() qualifiers on naked structs which is not allowed.
1718 auto &block_flags = ir.meta[type.self].decoration.decoration_flags;
1719 bool block_flag = block_flags.get(DecorationBlock);
1720 block_flags.clear(DecorationBlock);
1721
1722 emit_struct(type);
1723
1724 if (block_flag)
1725 block_flags.set(DecorationBlock);
1726
1727 emit_uniform(var);
1728 statement("");
1729 }
1730
emit_buffer_block(const SPIRVariable & var)1731 void CompilerGLSL::emit_buffer_block(const SPIRVariable &var)
1732 {
1733 auto &type = get<SPIRType>(var.basetype);
1734 bool ubo_block = var.storage == StorageClassUniform && has_decoration(type.self, DecorationBlock);
1735
1736 if (flattened_buffer_blocks.count(var.self))
1737 emit_buffer_block_flattened(var);
1738 else if (is_legacy() || (!options.es && options.version == 130) ||
1739 (ubo_block && options.emit_uniform_buffer_as_plain_uniforms))
1740 emit_buffer_block_legacy(var);
1741 else
1742 emit_buffer_block_native(var);
1743 }
1744
emit_buffer_block_legacy(const SPIRVariable & var)1745 void CompilerGLSL::emit_buffer_block_legacy(const SPIRVariable &var)
1746 {
1747 auto &type = get<SPIRType>(var.basetype);
1748 bool ssbo = var.storage == StorageClassStorageBuffer ||
1749 ir.meta[type.self].decoration.decoration_flags.get(DecorationBufferBlock);
1750 if (ssbo)
1751 SPIRV_CROSS_THROW("SSBOs not supported in legacy targets.");
1752
1753 // We're emitting the push constant block as a regular struct, so disable the block qualifier temporarily.
1754 // Otherwise, we will end up emitting layout() qualifiers on naked structs which is not allowed.
1755 auto &block_flags = ir.meta[type.self].decoration.decoration_flags;
1756 bool block_flag = block_flags.get(DecorationBlock);
1757 block_flags.clear(DecorationBlock);
1758 emit_struct(type);
1759 if (block_flag)
1760 block_flags.set(DecorationBlock);
1761 emit_uniform(var);
1762 statement("");
1763 }
1764
emit_buffer_reference_block(SPIRType & type,bool forward_declaration)1765 void CompilerGLSL::emit_buffer_reference_block(SPIRType &type, bool forward_declaration)
1766 {
1767 string buffer_name;
1768
1769 if (forward_declaration)
1770 {
1771 // Block names should never alias, but from HLSL input they kind of can because block types are reused for UAVs ...
1772 // Allow aliased name since we might be declaring the block twice. Once with buffer reference (forward declared) and one proper declaration.
1773 // The names must match up.
1774 buffer_name = to_name(type.self, false);
1775
1776 // Shaders never use the block by interface name, so we don't
1777 // have to track this other than updating name caches.
1778 // If we have a collision for any reason, just fallback immediately.
1779 if (ir.meta[type.self].decoration.alias.empty() ||
1780 block_ssbo_names.find(buffer_name) != end(block_ssbo_names) ||
1781 resource_names.find(buffer_name) != end(resource_names))
1782 {
1783 buffer_name = join("_", type.self);
1784 }
1785
1786 // Make sure we get something unique for both global name scope and block name scope.
1787 // See GLSL 4.5 spec: section 4.3.9 for details.
1788 add_variable(block_ssbo_names, resource_names, buffer_name);
1789
1790 // If for some reason buffer_name is an illegal name, make a final fallback to a workaround name.
1791 // This cannot conflict with anything else, so we're safe now.
1792 // We cannot reuse this fallback name in neither global scope (blocked by block_names) nor block name scope.
1793 if (buffer_name.empty())
1794 buffer_name = join("_", type.self);
1795
1796 block_names.insert(buffer_name);
1797 block_ssbo_names.insert(buffer_name);
1798 }
1799 else if (type.basetype != SPIRType::Struct)
1800 buffer_name = type_to_glsl(type);
1801 else
1802 buffer_name = to_name(type.self, false);
1803
1804 if (!forward_declaration)
1805 {
1806 if (type.basetype == SPIRType::Struct)
1807 statement("layout(buffer_reference, ", buffer_to_packing_standard(type, true), ") buffer ", buffer_name);
1808 else
1809 statement("layout(buffer_reference) buffer ", buffer_name);
1810
1811 begin_scope();
1812
1813 if (type.basetype == SPIRType::Struct)
1814 {
1815 type.member_name_cache.clear();
1816
1817 uint32_t i = 0;
1818 for (auto &member : type.member_types)
1819 {
1820 add_member_name(type, i);
1821 emit_struct_member(type, member, i);
1822 i++;
1823 }
1824 }
1825 else
1826 {
1827 auto &pointee_type = get_pointee_type(type);
1828 statement(type_to_glsl(pointee_type), " value", type_to_array_glsl(pointee_type), ";");
1829 }
1830
1831 end_scope_decl();
1832 statement("");
1833 }
1834 else
1835 {
1836 statement("layout(buffer_reference) buffer ", buffer_name, ";");
1837 }
1838 }
1839
emit_buffer_block_native(const SPIRVariable & var)1840 void CompilerGLSL::emit_buffer_block_native(const SPIRVariable &var)
1841 {
1842 auto &type = get<SPIRType>(var.basetype);
1843
1844 Bitset flags = ir.get_buffer_block_flags(var);
1845 bool ssbo = var.storage == StorageClassStorageBuffer || var.storage == StorageClassShaderRecordBufferNV ||
1846 ir.meta[type.self].decoration.decoration_flags.get(DecorationBufferBlock);
1847 bool is_restrict = ssbo && flags.get(DecorationRestrict);
1848 bool is_writeonly = ssbo && flags.get(DecorationNonReadable);
1849 bool is_readonly = ssbo && flags.get(DecorationNonWritable);
1850 bool is_coherent = ssbo && flags.get(DecorationCoherent);
1851
1852 // Block names should never alias, but from HLSL input they kind of can because block types are reused for UAVs ...
1853 auto buffer_name = to_name(type.self, false);
1854
1855 auto &block_namespace = ssbo ? block_ssbo_names : block_ubo_names;
1856
1857 // Shaders never use the block by interface name, so we don't
1858 // have to track this other than updating name caches.
1859 // If we have a collision for any reason, just fallback immediately.
1860 if (ir.meta[type.self].decoration.alias.empty() || block_namespace.find(buffer_name) != end(block_namespace) ||
1861 resource_names.find(buffer_name) != end(resource_names))
1862 {
1863 buffer_name = get_block_fallback_name(var.self);
1864 }
1865
1866 // Make sure we get something unique for both global name scope and block name scope.
1867 // See GLSL 4.5 spec: section 4.3.9 for details.
1868 add_variable(block_namespace, resource_names, buffer_name);
1869
1870 // If for some reason buffer_name is an illegal name, make a final fallback to a workaround name.
1871 // This cannot conflict with anything else, so we're safe now.
1872 // We cannot reuse this fallback name in neither global scope (blocked by block_names) nor block name scope.
1873 if (buffer_name.empty())
1874 buffer_name = join("_", get<SPIRType>(var.basetype).self, "_", var.self);
1875
1876 block_names.insert(buffer_name);
1877 block_namespace.insert(buffer_name);
1878
1879 // Save for post-reflection later.
1880 declared_block_names[var.self] = buffer_name;
1881
1882 statement(layout_for_variable(var), is_coherent ? "coherent " : "", is_restrict ? "restrict " : "",
1883 is_writeonly ? "writeonly " : "", is_readonly ? "readonly " : "", ssbo ? "buffer " : "uniform ",
1884 buffer_name);
1885
1886 begin_scope();
1887
1888 type.member_name_cache.clear();
1889
1890 uint32_t i = 0;
1891 for (auto &member : type.member_types)
1892 {
1893 add_member_name(type, i);
1894 emit_struct_member(type, member, i);
1895 i++;
1896 }
1897
1898 // var.self can be used as a backup name for the block name,
1899 // so we need to make sure we don't disturb the name here on a recompile.
1900 // It will need to be reset if we have to recompile.
1901 preserve_alias_on_reset(var.self);
1902 add_resource_name(var.self);
1903 end_scope_decl(to_name(var.self) + type_to_array_glsl(type));
1904 statement("");
1905 }
1906
emit_buffer_block_flattened(const SPIRVariable & var)1907 void CompilerGLSL::emit_buffer_block_flattened(const SPIRVariable &var)
1908 {
1909 auto &type = get<SPIRType>(var.basetype);
1910
1911 // Block names should never alias.
1912 auto buffer_name = to_name(type.self, false);
1913 size_t buffer_size = (get_declared_struct_size(type) + 15) / 16;
1914
1915 SPIRType::BaseType basic_type;
1916 if (get_common_basic_type(type, basic_type))
1917 {
1918 SPIRType tmp;
1919 tmp.basetype = basic_type;
1920 tmp.vecsize = 4;
1921 if (basic_type != SPIRType::Float && basic_type != SPIRType::Int && basic_type != SPIRType::UInt)
1922 SPIRV_CROSS_THROW("Basic types in a flattened UBO must be float, int or uint.");
1923
1924 auto flags = ir.get_buffer_block_flags(var);
1925 statement("uniform ", flags_to_qualifiers_glsl(tmp, flags), type_to_glsl(tmp), " ", buffer_name, "[",
1926 buffer_size, "];");
1927 }
1928 else
1929 SPIRV_CROSS_THROW("All basic types in a flattened block must be the same.");
1930 }
1931
to_storage_qualifiers_glsl(const SPIRVariable & var)1932 const char *CompilerGLSL::to_storage_qualifiers_glsl(const SPIRVariable &var)
1933 {
1934 auto &execution = get_entry_point();
1935
1936 if (var.storage == StorageClassInput || var.storage == StorageClassOutput)
1937 {
1938 if (is_legacy() && execution.model == ExecutionModelVertex)
1939 return var.storage == StorageClassInput ? "attribute " : "varying ";
1940 else if (is_legacy() && execution.model == ExecutionModelFragment)
1941 return "varying "; // Fragment outputs are renamed so they never hit this case.
1942 else
1943 return var.storage == StorageClassInput ? "in " : "out ";
1944 }
1945 else if (var.storage == StorageClassUniformConstant || var.storage == StorageClassUniform ||
1946 var.storage == StorageClassPushConstant)
1947 {
1948 return "uniform ";
1949 }
1950 else if (var.storage == StorageClassRayPayloadNV)
1951 {
1952 return "rayPayloadNV ";
1953 }
1954 else if (var.storage == StorageClassIncomingRayPayloadNV)
1955 {
1956 return "rayPayloadInNV ";
1957 }
1958 else if (var.storage == StorageClassHitAttributeNV)
1959 {
1960 return "hitAttributeNV ";
1961 }
1962 else if (var.storage == StorageClassCallableDataNV)
1963 {
1964 return "callableDataNV ";
1965 }
1966 else if (var.storage == StorageClassIncomingCallableDataNV)
1967 {
1968 return "callableDataInNV ";
1969 }
1970
1971 return "";
1972 }
1973
emit_flattened_io_block(const SPIRVariable & var,const char * qual)1974 void CompilerGLSL::emit_flattened_io_block(const SPIRVariable &var, const char *qual)
1975 {
1976 auto &type = get<SPIRType>(var.basetype);
1977 if (!type.array.empty())
1978 SPIRV_CROSS_THROW("Array of varying structs cannot be flattened to legacy-compatible varyings.");
1979
1980 auto old_flags = ir.meta[type.self].decoration.decoration_flags;
1981 // Emit the members as if they are part of a block to get all qualifiers.
1982 ir.meta[type.self].decoration.decoration_flags.set(DecorationBlock);
1983
1984 type.member_name_cache.clear();
1985
1986 uint32_t i = 0;
1987 for (auto &member : type.member_types)
1988 {
1989 add_member_name(type, i);
1990 auto &membertype = get<SPIRType>(member);
1991
1992 if (membertype.basetype == SPIRType::Struct)
1993 SPIRV_CROSS_THROW("Cannot flatten struct inside structs in I/O variables.");
1994
1995 // Pass in the varying qualifier here so it will appear in the correct declaration order.
1996 // Replace member name while emitting it so it encodes both struct name and member name.
1997 // Sanitize underscores because joining the two identifiers might create more than 1 underscore in a row,
1998 // which is not allowed.
1999 auto backup_name = get_member_name(type.self, i);
2000 auto member_name = to_member_name(type, i);
2001 set_member_name(type.self, i, sanitize_underscores(join(to_name(var.self), "_", member_name)));
2002 emit_struct_member(type, member, i, qual);
2003 // Restore member name.
2004 set_member_name(type.self, i, member_name);
2005 i++;
2006 }
2007
2008 ir.meta[type.self].decoration.decoration_flags = old_flags;
2009
2010 // Treat this variable as flattened from now on.
2011 flattened_structs.insert(var.self);
2012 }
2013
emit_interface_block(const SPIRVariable & var)2014 void CompilerGLSL::emit_interface_block(const SPIRVariable &var)
2015 {
2016 auto &type = get<SPIRType>(var.basetype);
2017
2018 // Either make it plain in/out or in/out blocks depending on what shader is doing ...
2019 bool block = ir.meta[type.self].decoration.decoration_flags.get(DecorationBlock);
2020 const char *qual = to_storage_qualifiers_glsl(var);
2021
2022 if (block)
2023 {
2024 // ESSL earlier than 310 and GLSL earlier than 150 did not support
2025 // I/O variables which are struct types.
2026 // To support this, flatten the struct into separate varyings instead.
2027 if ((options.es && options.version < 310) || (!options.es && options.version < 150))
2028 {
2029 // I/O blocks on ES require version 310 with Android Extension Pack extensions, or core version 320.
2030 // On desktop, I/O blocks were introduced with geometry shaders in GL 3.2 (GLSL 150).
2031 emit_flattened_io_block(var, qual);
2032 }
2033 else
2034 {
2035 if (options.es && options.version < 320)
2036 {
2037 // Geometry and tessellation extensions imply this extension.
2038 if (!has_extension("GL_EXT_geometry_shader") && !has_extension("GL_EXT_tessellation_shader"))
2039 require_extension_internal("GL_EXT_shader_io_blocks");
2040 }
2041
2042 // Block names should never alias.
2043 auto block_name = to_name(type.self, false);
2044
2045 // The namespace for I/O blocks is separate from other variables in GLSL.
2046 auto &block_namespace = type.storage == StorageClassInput ? block_input_names : block_output_names;
2047
2048 // Shaders never use the block by interface name, so we don't
2049 // have to track this other than updating name caches.
2050 if (block_name.empty() || block_namespace.find(block_name) != end(block_namespace))
2051 block_name = get_fallback_name(type.self);
2052 else
2053 block_namespace.insert(block_name);
2054
2055 // If for some reason buffer_name is an illegal name, make a final fallback to a workaround name.
2056 // This cannot conflict with anything else, so we're safe now.
2057 if (block_name.empty())
2058 block_name = join("_", get<SPIRType>(var.basetype).self, "_", var.self);
2059
2060 // Instance names cannot alias block names.
2061 resource_names.insert(block_name);
2062
2063 statement(layout_for_variable(var), qual, block_name);
2064 begin_scope();
2065
2066 type.member_name_cache.clear();
2067
2068 uint32_t i = 0;
2069 for (auto &member : type.member_types)
2070 {
2071 add_member_name(type, i);
2072 emit_struct_member(type, member, i);
2073 i++;
2074 }
2075
2076 add_resource_name(var.self);
2077 end_scope_decl(join(to_name(var.self), type_to_array_glsl(type)));
2078 statement("");
2079 }
2080 }
2081 else
2082 {
2083 // ESSL earlier than 310 and GLSL earlier than 150 did not support
2084 // I/O variables which are struct types.
2085 // To support this, flatten the struct into separate varyings instead.
2086 if (type.basetype == SPIRType::Struct &&
2087 ((options.es && options.version < 310) || (!options.es && options.version < 150)))
2088 {
2089 emit_flattened_io_block(var, qual);
2090 }
2091 else
2092 {
2093 add_resource_name(var.self);
2094 statement(layout_for_variable(var), to_qualifiers_glsl(var.self),
2095 variable_decl(type, to_name(var.self), var.self), ";");
2096
2097 // If a StorageClassOutput variable has an initializer, we need to initialize it in main().
2098 if (var.storage == StorageClassOutput && var.initializer)
2099 {
2100 auto &entry_func = this->get<SPIRFunction>(ir.default_entry_point);
2101 entry_func.fixup_hooks_in.push_back(
2102 [&]() { statement(to_name(var.self), " = ", to_expression(var.initializer), ";"); });
2103 }
2104 }
2105 }
2106 }
2107
emit_uniform(const SPIRVariable & var)2108 void CompilerGLSL::emit_uniform(const SPIRVariable &var)
2109 {
2110 auto &type = get<SPIRType>(var.basetype);
2111 if (type.basetype == SPIRType::Image && type.image.sampled == 2)
2112 {
2113 if (!options.es && options.version < 420)
2114 require_extension_internal("GL_ARB_shader_image_load_store");
2115 else if (options.es && options.version < 310)
2116 SPIRV_CROSS_THROW("At least ESSL 3.10 required for shader image load store.");
2117 }
2118
2119 add_resource_name(var.self);
2120 statement(layout_for_variable(var), variable_decl(var), ";");
2121 }
2122
constant_value_macro_name(uint32_t id)2123 string CompilerGLSL::constant_value_macro_name(uint32_t id)
2124 {
2125 return join("SPIRV_CROSS_CONSTANT_ID_", id);
2126 }
2127
emit_specialization_constant_op(const SPIRConstantOp & constant)2128 void CompilerGLSL::emit_specialization_constant_op(const SPIRConstantOp &constant)
2129 {
2130 auto &type = get<SPIRType>(constant.basetype);
2131 auto name = to_name(constant.self);
2132 statement("const ", variable_decl(type, name), " = ", constant_op_expression(constant), ";");
2133 }
2134
emit_constant(const SPIRConstant & constant)2135 void CompilerGLSL::emit_constant(const SPIRConstant &constant)
2136 {
2137 auto &type = get<SPIRType>(constant.constant_type);
2138 auto name = to_name(constant.self);
2139
2140 SpecializationConstant wg_x, wg_y, wg_z;
2141 ID workgroup_size_id = get_work_group_size_specialization_constants(wg_x, wg_y, wg_z);
2142
2143 // This specialization constant is implicitly declared by emitting layout() in;
2144 if (constant.self == workgroup_size_id)
2145 return;
2146
2147 // These specialization constants are implicitly declared by emitting layout() in;
2148 // In legacy GLSL, we will still need to emit macros for these, so a layout() in; declaration
2149 // later can use macro overrides for work group size.
2150 bool is_workgroup_size_constant = ConstantID(constant.self) == wg_x.id || ConstantID(constant.self) == wg_y.id ||
2151 ConstantID(constant.self) == wg_z.id;
2152
2153 if (options.vulkan_semantics && is_workgroup_size_constant)
2154 {
2155 // Vulkan GLSL does not need to declare workgroup spec constants explicitly, it is handled in layout().
2156 return;
2157 }
2158 else if (!options.vulkan_semantics && is_workgroup_size_constant &&
2159 !has_decoration(constant.self, DecorationSpecId))
2160 {
2161 // Only bother declaring a workgroup size if it is actually a specialization constant, because we need macros.
2162 return;
2163 }
2164
2165 // Only scalars have constant IDs.
2166 if (has_decoration(constant.self, DecorationSpecId))
2167 {
2168 if (options.vulkan_semantics)
2169 {
2170 statement("layout(constant_id = ", get_decoration(constant.self, DecorationSpecId), ") const ",
2171 variable_decl(type, name), " = ", constant_expression(constant), ";");
2172 }
2173 else
2174 {
2175 const string ¯o_name = constant.specialization_constant_macro_name;
2176 statement("#ifndef ", macro_name);
2177 statement("#define ", macro_name, " ", constant_expression(constant));
2178 statement("#endif");
2179
2180 // For workgroup size constants, only emit the macros.
2181 if (!is_workgroup_size_constant)
2182 statement("const ", variable_decl(type, name), " = ", macro_name, ";");
2183 }
2184 }
2185 else
2186 {
2187 statement("const ", variable_decl(type, name), " = ", constant_expression(constant), ";");
2188 }
2189 }
2190
emit_entry_point_declarations()2191 void CompilerGLSL::emit_entry_point_declarations()
2192 {
2193 }
2194
replace_illegal_names()2195 void CompilerGLSL::replace_illegal_names()
2196 {
2197 // clang-format off
2198 static const unordered_set<string> keywords = {
2199 "abs", "acos", "acosh", "all", "any", "asin", "asinh", "atan", "atanh",
2200 "atomicAdd", "atomicCompSwap", "atomicCounter", "atomicCounterDecrement", "atomicCounterIncrement",
2201 "atomicExchange", "atomicMax", "atomicMin", "atomicOr", "atomicXor",
2202 "bitCount", "bitfieldExtract", "bitfieldInsert", "bitfieldReverse",
2203 "ceil", "cos", "cosh", "cross", "degrees",
2204 "dFdx", "dFdxCoarse", "dFdxFine",
2205 "dFdy", "dFdyCoarse", "dFdyFine",
2206 "distance", "dot", "EmitStreamVertex", "EmitVertex", "EndPrimitive", "EndStreamPrimitive", "equal", "exp", "exp2",
2207 "faceforward", "findLSB", "findMSB", "float16BitsToInt16", "float16BitsToUint16", "floatBitsToInt", "floatBitsToUint", "floor", "fma", "fract",
2208 "frexp", "fwidth", "fwidthCoarse", "fwidthFine",
2209 "greaterThan", "greaterThanEqual", "groupMemoryBarrier",
2210 "imageAtomicAdd", "imageAtomicAnd", "imageAtomicCompSwap", "imageAtomicExchange", "imageAtomicMax", "imageAtomicMin", "imageAtomicOr", "imageAtomicXor",
2211 "imageLoad", "imageSamples", "imageSize", "imageStore", "imulExtended", "int16BitsToFloat16", "intBitsToFloat", "interpolateAtOffset", "interpolateAtCentroid", "interpolateAtSample",
2212 "inverse", "inversesqrt", "isinf", "isnan", "ldexp", "length", "lessThan", "lessThanEqual", "log", "log2",
2213 "matrixCompMult", "max", "memoryBarrier", "memoryBarrierAtomicCounter", "memoryBarrierBuffer", "memoryBarrierImage", "memoryBarrierShared",
2214 "min", "mix", "mod", "modf", "noise", "noise1", "noise2", "noise3", "noise4", "normalize", "not", "notEqual",
2215 "outerProduct", "packDouble2x32", "packHalf2x16", "packInt2x16", "packInt4x16", "packSnorm2x16", "packSnorm4x8",
2216 "packUint2x16", "packUint4x16", "packUnorm2x16", "packUnorm4x8", "pow",
2217 "radians", "reflect", "refract", "round", "roundEven", "sign", "sin", "sinh", "smoothstep", "sqrt", "step",
2218 "tan", "tanh", "texelFetch", "texelFetchOffset", "texture", "textureGather", "textureGatherOffset", "textureGatherOffsets",
2219 "textureGrad", "textureGradOffset", "textureLod", "textureLodOffset", "textureOffset", "textureProj", "textureProjGrad",
2220 "textureProjGradOffset", "textureProjLod", "textureProjLodOffset", "textureProjOffset", "textureQueryLevels", "textureQueryLod", "textureSamples", "textureSize",
2221 "transpose", "trunc", "uaddCarry", "uint16BitsToFloat16", "uintBitsToFloat", "umulExtended", "unpackDouble2x32", "unpackHalf2x16", "unpackInt2x16", "unpackInt4x16",
2222 "unpackSnorm2x16", "unpackSnorm4x8", "unpackUint2x16", "unpackUint4x16", "unpackUnorm2x16", "unpackUnorm4x8", "usubBorrow",
2223
2224 "active", "asm", "atomic_uint", "attribute", "bool", "break", "buffer",
2225 "bvec2", "bvec3", "bvec4", "case", "cast", "centroid", "class", "coherent", "common", "const", "continue", "default", "discard",
2226 "dmat2", "dmat2x2", "dmat2x3", "dmat2x4", "dmat3", "dmat3x2", "dmat3x3", "dmat3x4", "dmat4", "dmat4x2", "dmat4x3", "dmat4x4",
2227 "do", "double", "dvec2", "dvec3", "dvec4", "else", "enum", "extern", "external", "false", "filter", "fixed", "flat", "float",
2228 "for", "fvec2", "fvec3", "fvec4", "goto", "half", "highp", "hvec2", "hvec3", "hvec4", "if", "iimage1D", "iimage1DArray",
2229 "iimage2D", "iimage2DArray", "iimage2DMS", "iimage2DMSArray", "iimage2DRect", "iimage3D", "iimageBuffer", "iimageCube",
2230 "iimageCubeArray", "image1D", "image1DArray", "image2D", "image2DArray", "image2DMS", "image2DMSArray", "image2DRect",
2231 "image3D", "imageBuffer", "imageCube", "imageCubeArray", "in", "inline", "inout", "input", "int", "interface", "invariant",
2232 "isampler1D", "isampler1DArray", "isampler2D", "isampler2DArray", "isampler2DMS", "isampler2DMSArray", "isampler2DRect",
2233 "isampler3D", "isamplerBuffer", "isamplerCube", "isamplerCubeArray", "ivec2", "ivec3", "ivec4", "layout", "long", "lowp",
2234 "mat2", "mat2x2", "mat2x3", "mat2x4", "mat3", "mat3x2", "mat3x3", "mat3x4", "mat4", "mat4x2", "mat4x3", "mat4x4", "mediump",
2235 "namespace", "noinline", "noperspective", "out", "output", "packed", "partition", "patch", "precise", "precision", "public", "readonly",
2236 "resource", "restrict", "return", "sample", "sampler1D", "sampler1DArray", "sampler1DArrayShadow",
2237 "sampler1DShadow", "sampler2D", "sampler2DArray", "sampler2DArrayShadow", "sampler2DMS", "sampler2DMSArray",
2238 "sampler2DRect", "sampler2DRectShadow", "sampler2DShadow", "sampler3D", "sampler3DRect", "samplerBuffer",
2239 "samplerCube", "samplerCubeArray", "samplerCubeArrayShadow", "samplerCubeShadow", "shared", "short", "sizeof", "smooth", "static",
2240 "struct", "subroutine", "superp", "switch", "template", "this", "true", "typedef", "uimage1D", "uimage1DArray", "uimage2D",
2241 "uimage2DArray", "uimage2DMS", "uimage2DMSArray", "uimage2DRect", "uimage3D", "uimageBuffer", "uimageCube",
2242 "uimageCubeArray", "uint", "uniform", "union", "unsigned", "usampler1D", "usampler1DArray", "usampler2D", "usampler2DArray",
2243 "usampler2DMS", "usampler2DMSArray", "usampler2DRect", "usampler3D", "usamplerBuffer", "usamplerCube",
2244 "usamplerCubeArray", "using", "uvec2", "uvec3", "uvec4", "varying", "vec2", "vec3", "vec4", "void", "volatile",
2245 "while", "writeonly",
2246 };
2247 // clang-format on
2248
2249 ir.for_each_typed_id<SPIRVariable>([&](uint32_t, const SPIRVariable &var) {
2250 if (!is_hidden_variable(var))
2251 {
2252 auto &m = ir.meta[var.self].decoration;
2253 if (m.alias.compare(0, 3, "gl_") == 0 || keywords.find(m.alias) != end(keywords))
2254 m.alias = join("_", m.alias);
2255 }
2256 });
2257 }
2258
replace_fragment_output(SPIRVariable & var)2259 void CompilerGLSL::replace_fragment_output(SPIRVariable &var)
2260 {
2261 auto &m = ir.meta[var.self].decoration;
2262 uint32_t location = 0;
2263 if (m.decoration_flags.get(DecorationLocation))
2264 location = m.location;
2265
2266 // If our variable is arrayed, we must not emit the array part of this as the SPIR-V will
2267 // do the access chain part of this for us.
2268 auto &type = get<SPIRType>(var.basetype);
2269
2270 if (type.array.empty())
2271 {
2272 // Redirect the write to a specific render target in legacy GLSL.
2273 m.alias = join("gl_FragData[", location, "]");
2274
2275 if (is_legacy_es() && location != 0)
2276 require_extension_internal("GL_EXT_draw_buffers");
2277 }
2278 else if (type.array.size() == 1)
2279 {
2280 // If location is non-zero, we probably have to add an offset.
2281 // This gets really tricky since we'd have to inject an offset in the access chain.
2282 // FIXME: This seems like an extremely odd-ball case, so it's probably fine to leave it like this for now.
2283 m.alias = "gl_FragData";
2284 if (location != 0)
2285 SPIRV_CROSS_THROW("Arrayed output variable used, but location is not 0. "
2286 "This is unimplemented in SPIRV-Cross.");
2287
2288 if (is_legacy_es())
2289 require_extension_internal("GL_EXT_draw_buffers");
2290 }
2291 else
2292 SPIRV_CROSS_THROW("Array-of-array output variable used. This cannot be implemented in legacy GLSL.");
2293
2294 var.compat_builtin = true; // We don't want to declare this variable, but use the name as-is.
2295 }
2296
replace_fragment_outputs()2297 void CompilerGLSL::replace_fragment_outputs()
2298 {
2299 ir.for_each_typed_id<SPIRVariable>([&](uint32_t, SPIRVariable &var) {
2300 auto &type = this->get<SPIRType>(var.basetype);
2301
2302 if (!is_builtin_variable(var) && !var.remapped_variable && type.pointer && var.storage == StorageClassOutput)
2303 replace_fragment_output(var);
2304 });
2305 }
2306
remap_swizzle(const SPIRType & out_type,uint32_t input_components,const string & expr)2307 string CompilerGLSL::remap_swizzle(const SPIRType &out_type, uint32_t input_components, const string &expr)
2308 {
2309 if (out_type.vecsize == input_components)
2310 return expr;
2311 else if (input_components == 1 && !backend.can_swizzle_scalar)
2312 return join(type_to_glsl(out_type), "(", expr, ")");
2313 else
2314 {
2315 // FIXME: This will not work with packed expressions.
2316 auto e = enclose_expression(expr) + ".";
2317 // Just clamp the swizzle index if we have more outputs than inputs.
2318 for (uint32_t c = 0; c < out_type.vecsize; c++)
2319 e += index_to_swizzle(min(c, input_components - 1));
2320 if (backend.swizzle_is_function && out_type.vecsize > 1)
2321 e += "()";
2322
2323 remove_duplicate_swizzle(e);
2324 return e;
2325 }
2326 }
2327
emit_pls()2328 void CompilerGLSL::emit_pls()
2329 {
2330 auto &execution = get_entry_point();
2331 if (execution.model != ExecutionModelFragment)
2332 SPIRV_CROSS_THROW("Pixel local storage only supported in fragment shaders.");
2333
2334 if (!options.es)
2335 SPIRV_CROSS_THROW("Pixel local storage only supported in OpenGL ES.");
2336
2337 if (options.version < 300)
2338 SPIRV_CROSS_THROW("Pixel local storage only supported in ESSL 3.0 and above.");
2339
2340 if (!pls_inputs.empty())
2341 {
2342 statement("__pixel_local_inEXT _PLSIn");
2343 begin_scope();
2344 for (auto &input : pls_inputs)
2345 statement(pls_decl(input), ";");
2346 end_scope_decl();
2347 statement("");
2348 }
2349
2350 if (!pls_outputs.empty())
2351 {
2352 statement("__pixel_local_outEXT _PLSOut");
2353 begin_scope();
2354 for (auto &output : pls_outputs)
2355 statement(pls_decl(output), ";");
2356 end_scope_decl();
2357 statement("");
2358 }
2359 }
2360
fixup_image_load_store_access()2361 void CompilerGLSL::fixup_image_load_store_access()
2362 {
2363 ir.for_each_typed_id<SPIRVariable>([&](uint32_t var, const SPIRVariable &) {
2364 auto &vartype = expression_type(var);
2365 if (vartype.basetype == SPIRType::Image)
2366 {
2367 // Older glslangValidator does not emit required qualifiers here.
2368 // Solve this by making the image access as restricted as possible and loosen up if we need to.
2369 // If any no-read/no-write flags are actually set, assume that the compiler knows what it's doing.
2370
2371 auto &flags = ir.meta[var].decoration.decoration_flags;
2372 if (!flags.get(DecorationNonWritable) && !flags.get(DecorationNonReadable))
2373 {
2374 flags.set(DecorationNonWritable);
2375 flags.set(DecorationNonReadable);
2376 }
2377 }
2378 });
2379 }
2380
emit_declared_builtin_block(StorageClass storage,ExecutionModel model)2381 void CompilerGLSL::emit_declared_builtin_block(StorageClass storage, ExecutionModel model)
2382 {
2383 Bitset emitted_builtins;
2384 Bitset global_builtins;
2385 const SPIRVariable *block_var = nullptr;
2386 bool emitted_block = false;
2387 bool builtin_array = false;
2388
2389 // Need to use declared size in the type.
2390 // These variables might have been declared, but not statically used, so we haven't deduced their size yet.
2391 uint32_t cull_distance_size = 0;
2392 uint32_t clip_distance_size = 0;
2393
2394 ir.for_each_typed_id<SPIRVariable>([&](uint32_t, SPIRVariable &var) {
2395 auto &type = this->get<SPIRType>(var.basetype);
2396 bool block = has_decoration(type.self, DecorationBlock);
2397 Bitset builtins;
2398
2399 if (var.storage == storage && block && is_builtin_variable(var))
2400 {
2401 uint32_t index = 0;
2402 for (auto &m : ir.meta[type.self].members)
2403 {
2404 if (m.builtin)
2405 {
2406 builtins.set(m.builtin_type);
2407 if (m.builtin_type == BuiltInCullDistance)
2408 cull_distance_size = this->get<SPIRType>(type.member_types[index]).array.front();
2409 else if (m.builtin_type == BuiltInClipDistance)
2410 clip_distance_size = this->get<SPIRType>(type.member_types[index]).array.front();
2411 }
2412 index++;
2413 }
2414 }
2415 else if (var.storage == storage && !block && is_builtin_variable(var))
2416 {
2417 // While we're at it, collect all declared global builtins (HLSL mostly ...).
2418 auto &m = ir.meta[var.self].decoration;
2419 if (m.builtin)
2420 {
2421 global_builtins.set(m.builtin_type);
2422 if (m.builtin_type == BuiltInCullDistance)
2423 cull_distance_size = type.array.front();
2424 else if (m.builtin_type == BuiltInClipDistance)
2425 clip_distance_size = type.array.front();
2426 }
2427 }
2428
2429 if (builtins.empty())
2430 return;
2431
2432 if (emitted_block)
2433 SPIRV_CROSS_THROW("Cannot use more than one builtin I/O block.");
2434
2435 emitted_builtins = builtins;
2436 emitted_block = true;
2437 builtin_array = !type.array.empty();
2438 block_var = &var;
2439 });
2440
2441 global_builtins =
2442 Bitset(global_builtins.get_lower() & ((1ull << BuiltInPosition) | (1ull << BuiltInPointSize) |
2443 (1ull << BuiltInClipDistance) | (1ull << BuiltInCullDistance)));
2444
2445 // Try to collect all other declared builtins.
2446 if (!emitted_block)
2447 emitted_builtins = global_builtins;
2448
2449 // Can't declare an empty interface block.
2450 if (emitted_builtins.empty())
2451 return;
2452
2453 if (storage == StorageClassOutput)
2454 statement("out gl_PerVertex");
2455 else
2456 statement("in gl_PerVertex");
2457
2458 begin_scope();
2459 if (emitted_builtins.get(BuiltInPosition))
2460 statement("vec4 gl_Position;");
2461 if (emitted_builtins.get(BuiltInPointSize))
2462 statement("float gl_PointSize;");
2463 if (emitted_builtins.get(BuiltInClipDistance))
2464 statement("float gl_ClipDistance[", clip_distance_size, "];");
2465 if (emitted_builtins.get(BuiltInCullDistance))
2466 statement("float gl_CullDistance[", cull_distance_size, "];");
2467
2468 bool tessellation = model == ExecutionModelTessellationEvaluation || model == ExecutionModelTessellationControl;
2469 if (builtin_array)
2470 {
2471 // Make sure the array has a supported name in the code.
2472 if (storage == StorageClassOutput)
2473 set_name(block_var->self, "gl_out");
2474 else if (storage == StorageClassInput)
2475 set_name(block_var->self, "gl_in");
2476
2477 if (model == ExecutionModelTessellationControl && storage == StorageClassOutput)
2478 end_scope_decl(join(to_name(block_var->self), "[", get_entry_point().output_vertices, "]"));
2479 else
2480 end_scope_decl(join(to_name(block_var->self), tessellation ? "[gl_MaxPatchVertices]" : "[]"));
2481 }
2482 else
2483 end_scope_decl();
2484 statement("");
2485 }
2486
declare_undefined_values()2487 void CompilerGLSL::declare_undefined_values()
2488 {
2489 bool emitted = false;
2490 ir.for_each_typed_id<SPIRUndef>([&](uint32_t, const SPIRUndef &undef) {
2491 statement(variable_decl(this->get<SPIRType>(undef.basetype), to_name(undef.self), undef.self), ";");
2492 emitted = true;
2493 });
2494
2495 if (emitted)
2496 statement("");
2497 }
2498
variable_is_lut(const SPIRVariable & var) const2499 bool CompilerGLSL::variable_is_lut(const SPIRVariable &var) const
2500 {
2501 bool statically_assigned = var.statically_assigned && var.static_expression != ID(0) && var.remapped_variable;
2502
2503 if (statically_assigned)
2504 {
2505 auto *constant = maybe_get<SPIRConstant>(var.static_expression);
2506 if (constant && constant->is_used_as_lut)
2507 return true;
2508 }
2509
2510 return false;
2511 }
2512
emit_resources()2513 void CompilerGLSL::emit_resources()
2514 {
2515 auto &execution = get_entry_point();
2516
2517 replace_illegal_names();
2518
2519 // Legacy GL uses gl_FragData[], redeclare all fragment outputs
2520 // with builtins.
2521 if (execution.model == ExecutionModelFragment && is_legacy())
2522 replace_fragment_outputs();
2523
2524 // Emit PLS blocks if we have such variables.
2525 if (!pls_inputs.empty() || !pls_outputs.empty())
2526 emit_pls();
2527
2528 // Emit custom gl_PerVertex for SSO compatibility.
2529 if (options.separate_shader_objects && !options.es && execution.model != ExecutionModelFragment)
2530 {
2531 switch (execution.model)
2532 {
2533 case ExecutionModelGeometry:
2534 case ExecutionModelTessellationControl:
2535 case ExecutionModelTessellationEvaluation:
2536 emit_declared_builtin_block(StorageClassInput, execution.model);
2537 emit_declared_builtin_block(StorageClassOutput, execution.model);
2538 break;
2539
2540 case ExecutionModelVertex:
2541 emit_declared_builtin_block(StorageClassOutput, execution.model);
2542 break;
2543
2544 default:
2545 break;
2546 }
2547 }
2548 else
2549 {
2550 // Need to redeclare clip/cull distance with explicit size to use them.
2551 // SPIR-V mandates these builtins have a size declared.
2552 const char *storage = execution.model == ExecutionModelFragment ? "in" : "out";
2553 if (clip_distance_count != 0)
2554 statement(storage, " float gl_ClipDistance[", clip_distance_count, "];");
2555 if (cull_distance_count != 0)
2556 statement(storage, " float gl_CullDistance[", cull_distance_count, "];");
2557 if (clip_distance_count != 0 || cull_distance_count != 0)
2558 statement("");
2559 }
2560
2561 if (position_invariant)
2562 {
2563 statement("invariant gl_Position;");
2564 statement("");
2565 }
2566
2567 bool emitted = false;
2568
2569 // If emitted Vulkan GLSL,
2570 // emit specialization constants as actual floats,
2571 // spec op expressions will redirect to the constant name.
2572 //
2573 {
2574 auto loop_lock = ir.create_loop_hard_lock();
2575 for (auto &id_ : ir.ids_for_constant_or_type)
2576 {
2577 auto &id = ir.ids[id_];
2578
2579 if (id.get_type() == TypeConstant)
2580 {
2581 auto &c = id.get<SPIRConstant>();
2582
2583 bool needs_declaration = c.specialization || c.is_used_as_lut;
2584
2585 if (needs_declaration)
2586 {
2587 if (!options.vulkan_semantics && c.specialization)
2588 {
2589 c.specialization_constant_macro_name =
2590 constant_value_macro_name(get_decoration(c.self, DecorationSpecId));
2591 }
2592 emit_constant(c);
2593 emitted = true;
2594 }
2595 }
2596 else if (id.get_type() == TypeConstantOp)
2597 {
2598 emit_specialization_constant_op(id.get<SPIRConstantOp>());
2599 emitted = true;
2600 }
2601 else if (id.get_type() == TypeType)
2602 {
2603 auto &type = id.get<SPIRType>();
2604 if (type.basetype == SPIRType::Struct && type.array.empty() && !type.pointer &&
2605 (!ir.meta[type.self].decoration.decoration_flags.get(DecorationBlock) &&
2606 !ir.meta[type.self].decoration.decoration_flags.get(DecorationBufferBlock)))
2607 {
2608 if (emitted)
2609 statement("");
2610 emitted = false;
2611
2612 emit_struct(type);
2613 }
2614 }
2615 }
2616 }
2617
2618 if (emitted)
2619 statement("");
2620
2621 // If we needed to declare work group size late, check here.
2622 // If the work group size depends on a specialization constant, we need to declare the layout() block
2623 // after constants (and their macros) have been declared.
2624 if (execution.model == ExecutionModelGLCompute && !options.vulkan_semantics &&
2625 execution.workgroup_size.constant != 0)
2626 {
2627 SpecializationConstant wg_x, wg_y, wg_z;
2628 get_work_group_size_specialization_constants(wg_x, wg_y, wg_z);
2629
2630 if ((wg_x.id != ConstantID(0)) || (wg_y.id != ConstantID(0)) || (wg_z.id != ConstantID(0)))
2631 {
2632 SmallVector<string> inputs;
2633 build_workgroup_size(inputs, wg_x, wg_y, wg_z);
2634 statement("layout(", merge(inputs), ") in;");
2635 statement("");
2636 }
2637 }
2638
2639 emitted = false;
2640
2641 if (ir.addressing_model == AddressingModelPhysicalStorageBuffer64EXT)
2642 {
2643 for (auto type : physical_storage_non_block_pointer_types)
2644 {
2645 emit_buffer_reference_block(get<SPIRType>(type), false);
2646 }
2647
2648 // Output buffer reference blocks.
2649 // Do this in two stages, one with forward declaration,
2650 // and one without. Buffer reference blocks can reference themselves
2651 // to support things like linked lists.
2652 ir.for_each_typed_id<SPIRType>([&](uint32_t, SPIRType &type) {
2653 bool has_block_flags = has_decoration(type.self, DecorationBlock);
2654 if (has_block_flags && type.pointer && type.pointer_depth == 1 && !type_is_array_of_pointers(type) &&
2655 type.storage == StorageClassPhysicalStorageBufferEXT)
2656 {
2657 emit_buffer_reference_block(type, true);
2658 }
2659 });
2660
2661 ir.for_each_typed_id<SPIRType>([&](uint32_t, SPIRType &type) {
2662 bool has_block_flags = has_decoration(type.self, DecorationBlock);
2663 if (has_block_flags && type.pointer && type.pointer_depth == 1 && !type_is_array_of_pointers(type) &&
2664 type.storage == StorageClassPhysicalStorageBufferEXT)
2665 {
2666 emit_buffer_reference_block(type, false);
2667 }
2668 });
2669 }
2670
2671 // Output UBOs and SSBOs
2672 ir.for_each_typed_id<SPIRVariable>([&](uint32_t, SPIRVariable &var) {
2673 auto &type = this->get<SPIRType>(var.basetype);
2674
2675 bool is_block_storage = type.storage == StorageClassStorageBuffer || type.storage == StorageClassUniform ||
2676 type.storage == StorageClassShaderRecordBufferNV;
2677 bool has_block_flags = ir.meta[type.self].decoration.decoration_flags.get(DecorationBlock) ||
2678 ir.meta[type.self].decoration.decoration_flags.get(DecorationBufferBlock);
2679
2680 if (var.storage != StorageClassFunction && type.pointer && is_block_storage && !is_hidden_variable(var) &&
2681 has_block_flags)
2682 {
2683 emit_buffer_block(var);
2684 }
2685 });
2686
2687 // Output push constant blocks
2688 ir.for_each_typed_id<SPIRVariable>([&](uint32_t, SPIRVariable &var) {
2689 auto &type = this->get<SPIRType>(var.basetype);
2690 if (var.storage != StorageClassFunction && type.pointer && type.storage == StorageClassPushConstant &&
2691 !is_hidden_variable(var))
2692 {
2693 emit_push_constant_block(var);
2694 }
2695 });
2696
2697 bool skip_separate_image_sampler = !combined_image_samplers.empty() || !options.vulkan_semantics;
2698
2699 // Output Uniform Constants (values, samplers, images, etc).
2700 ir.for_each_typed_id<SPIRVariable>([&](uint32_t, SPIRVariable &var) {
2701 auto &type = this->get<SPIRType>(var.basetype);
2702
2703 // If we're remapping separate samplers and images, only emit the combined samplers.
2704 if (skip_separate_image_sampler)
2705 {
2706 // Sampler buffers are always used without a sampler, and they will also work in regular GL.
2707 bool sampler_buffer = type.basetype == SPIRType::Image && type.image.dim == DimBuffer;
2708 bool separate_image = type.basetype == SPIRType::Image && type.image.sampled == 1;
2709 bool separate_sampler = type.basetype == SPIRType::Sampler;
2710 if (!sampler_buffer && (separate_image || separate_sampler))
2711 return;
2712 }
2713
2714 if (var.storage != StorageClassFunction && type.pointer &&
2715 (type.storage == StorageClassUniformConstant || type.storage == StorageClassAtomicCounter ||
2716 type.storage == StorageClassRayPayloadNV || type.storage == StorageClassIncomingRayPayloadNV ||
2717 type.storage == StorageClassCallableDataNV || type.storage == StorageClassIncomingCallableDataNV ||
2718 type.storage == StorageClassHitAttributeNV) &&
2719 !is_hidden_variable(var))
2720 {
2721 emit_uniform(var);
2722 emitted = true;
2723 }
2724 });
2725
2726 if (emitted)
2727 statement("");
2728 emitted = false;
2729
2730 // Output in/out interfaces.
2731 ir.for_each_typed_id<SPIRVariable>([&](uint32_t, SPIRVariable &var) {
2732 auto &type = this->get<SPIRType>(var.basetype);
2733
2734 if (var.storage != StorageClassFunction && type.pointer &&
2735 (var.storage == StorageClassInput || var.storage == StorageClassOutput) &&
2736 interface_variable_exists_in_entry_point(var.self) && !is_hidden_variable(var))
2737 {
2738 emit_interface_block(var);
2739 emitted = true;
2740 }
2741 else if (is_builtin_variable(var))
2742 {
2743 // For gl_InstanceIndex emulation on GLES, the API user needs to
2744 // supply this uniform.
2745 if (options.vertex.support_nonzero_base_instance &&
2746 ir.meta[var.self].decoration.builtin_type == BuiltInInstanceIndex && !options.vulkan_semantics)
2747 {
2748 statement("uniform int SPIRV_Cross_BaseInstance;");
2749 emitted = true;
2750 }
2751 }
2752 });
2753
2754 // Global variables.
2755 for (auto global : global_variables)
2756 {
2757 auto &var = get<SPIRVariable>(global);
2758 if (var.storage != StorageClassOutput)
2759 {
2760 if (!variable_is_lut(var))
2761 {
2762 add_resource_name(var.self);
2763 statement(variable_decl(var), ";");
2764 emitted = true;
2765 }
2766 }
2767 }
2768
2769 if (emitted)
2770 statement("");
2771
2772 declare_undefined_values();
2773 }
2774
2775 // Returns a string representation of the ID, usable as a function arg.
2776 // Default is to simply return the expression representation fo the arg ID.
2777 // Subclasses may override to modify the return value.
to_func_call_arg(const SPIRFunction::Parameter &,uint32_t id)2778 string CompilerGLSL::to_func_call_arg(const SPIRFunction::Parameter &, uint32_t id)
2779 {
2780 // Make sure that we use the name of the original variable, and not the parameter alias.
2781 uint32_t name_id = id;
2782 auto *var = maybe_get<SPIRVariable>(id);
2783 if (var && var->basevariable)
2784 name_id = var->basevariable;
2785 return to_expression(name_id);
2786 }
2787
handle_invalid_expression(uint32_t id)2788 void CompilerGLSL::handle_invalid_expression(uint32_t id)
2789 {
2790 // We tried to read an invalidated expression.
2791 // This means we need another pass at compilation, but next time, force temporary variables so that they cannot be invalidated.
2792 forced_temporaries.insert(id);
2793 force_recompile();
2794 }
2795
2796 // Converts the format of the current expression from packed to unpacked,
2797 // by wrapping the expression in a constructor of the appropriate type.
2798 // GLSL does not support packed formats, so simply return the expression.
2799 // Subclasses that do will override.
unpack_expression_type(string expr_str,const SPIRType &,uint32_t,bool,bool)2800 string CompilerGLSL::unpack_expression_type(string expr_str, const SPIRType &, uint32_t, bool, bool)
2801 {
2802 return expr_str;
2803 }
2804
2805 // Sometimes we proactively enclosed an expression where it turns out we might have not needed it after all.
strip_enclosed_expression(string & expr)2806 void CompilerGLSL::strip_enclosed_expression(string &expr)
2807 {
2808 if (expr.size() < 2 || expr.front() != '(' || expr.back() != ')')
2809 return;
2810
2811 // Have to make sure that our first and last parens actually enclose everything inside it.
2812 uint32_t paren_count = 0;
2813 for (auto &c : expr)
2814 {
2815 if (c == '(')
2816 paren_count++;
2817 else if (c == ')')
2818 {
2819 paren_count--;
2820
2821 // If we hit 0 and this is not the final char, our first and final parens actually don't
2822 // enclose the expression, and we cannot strip, e.g.: (a + b) * (c + d).
2823 if (paren_count == 0 && &c != &expr.back())
2824 return;
2825 }
2826 }
2827 expr.erase(expr.size() - 1, 1);
2828 expr.erase(begin(expr));
2829 }
2830
enclose_expression(const string & expr)2831 string CompilerGLSL::enclose_expression(const string &expr)
2832 {
2833 bool need_parens = false;
2834
2835 // If the expression starts with a unary we need to enclose to deal with cases where we have back-to-back
2836 // unary expressions.
2837 if (!expr.empty())
2838 {
2839 auto c = expr.front();
2840 if (c == '-' || c == '+' || c == '!' || c == '~' || c == '&' || c == '*')
2841 need_parens = true;
2842 }
2843
2844 if (!need_parens)
2845 {
2846 uint32_t paren_count = 0;
2847 for (auto c : expr)
2848 {
2849 if (c == '(' || c == '[')
2850 paren_count++;
2851 else if (c == ')' || c == ']')
2852 {
2853 assert(paren_count);
2854 paren_count--;
2855 }
2856 else if (c == ' ' && paren_count == 0)
2857 {
2858 need_parens = true;
2859 break;
2860 }
2861 }
2862 assert(paren_count == 0);
2863 }
2864
2865 // If this expression contains any spaces which are not enclosed by parentheses,
2866 // we need to enclose it so we can treat the whole string as an expression.
2867 // This happens when two expressions have been part of a binary op earlier.
2868 if (need_parens)
2869 return join('(', expr, ')');
2870 else
2871 return expr;
2872 }
2873
dereference_expression(const SPIRType & expr_type,const std::string & expr)2874 string CompilerGLSL::dereference_expression(const SPIRType &expr_type, const std::string &expr)
2875 {
2876 // If this expression starts with an address-of operator ('&'), then
2877 // just return the part after the operator.
2878 // TODO: Strip parens if unnecessary?
2879 if (expr.front() == '&')
2880 return expr.substr(1);
2881 else if (backend.native_pointers)
2882 return join('*', expr);
2883 else if (expr_type.storage == StorageClassPhysicalStorageBufferEXT && expr_type.basetype != SPIRType::Struct &&
2884 expr_type.pointer_depth == 1)
2885 {
2886 return join(enclose_expression(expr), ".value");
2887 }
2888 else
2889 return expr;
2890 }
2891
address_of_expression(const std::string & expr)2892 string CompilerGLSL::address_of_expression(const std::string &expr)
2893 {
2894 if (expr.size() > 3 && expr[0] == '(' && expr[1] == '*' && expr.back() == ')')
2895 {
2896 // If we have an expression which looks like (*foo), taking the address of it is the same as stripping
2897 // the first two and last characters. We might have to enclose the expression.
2898 // This doesn't work for cases like (*foo + 10),
2899 // but this is an r-value expression which we cannot take the address of anyways.
2900 return enclose_expression(expr.substr(2, expr.size() - 3));
2901 }
2902 else if (expr.front() == '*')
2903 {
2904 // If this expression starts with a dereference operator ('*'), then
2905 // just return the part after the operator.
2906 return expr.substr(1);
2907 }
2908 else
2909 return join('&', enclose_expression(expr));
2910 }
2911
2912 // Just like to_expression except that we enclose the expression inside parentheses if needed.
to_enclosed_expression(uint32_t id,bool register_expression_read)2913 string CompilerGLSL::to_enclosed_expression(uint32_t id, bool register_expression_read)
2914 {
2915 return enclose_expression(to_expression(id, register_expression_read));
2916 }
2917
2918 // Used explicitly when we want to read a row-major expression, but without any transpose shenanigans.
2919 // need_transpose must be forced to false.
to_unpacked_row_major_matrix_expression(uint32_t id)2920 string CompilerGLSL::to_unpacked_row_major_matrix_expression(uint32_t id)
2921 {
2922 return unpack_expression_type(to_expression(id), expression_type(id),
2923 get_extended_decoration(id, SPIRVCrossDecorationPhysicalTypeID),
2924 has_extended_decoration(id, SPIRVCrossDecorationPhysicalTypePacked), true);
2925 }
2926
to_unpacked_expression(uint32_t id,bool register_expression_read)2927 string CompilerGLSL::to_unpacked_expression(uint32_t id, bool register_expression_read)
2928 {
2929 // If we need to transpose, it will also take care of unpacking rules.
2930 auto *e = maybe_get<SPIRExpression>(id);
2931 bool need_transpose = e && e->need_transpose;
2932 bool is_remapped = has_extended_decoration(id, SPIRVCrossDecorationPhysicalTypeID);
2933 bool is_packed = has_extended_decoration(id, SPIRVCrossDecorationPhysicalTypePacked);
2934
2935 if (!need_transpose && (is_remapped || is_packed))
2936 {
2937 return unpack_expression_type(to_expression(id, register_expression_read),
2938 get_pointee_type(expression_type_id(id)),
2939 get_extended_decoration(id, SPIRVCrossDecorationPhysicalTypeID),
2940 has_extended_decoration(id, SPIRVCrossDecorationPhysicalTypePacked), false);
2941 }
2942 else
2943 return to_expression(id, register_expression_read);
2944 }
2945
to_enclosed_unpacked_expression(uint32_t id,bool register_expression_read)2946 string CompilerGLSL::to_enclosed_unpacked_expression(uint32_t id, bool register_expression_read)
2947 {
2948 // If we need to transpose, it will also take care of unpacking rules.
2949 auto *e = maybe_get<SPIRExpression>(id);
2950 bool need_transpose = e && e->need_transpose;
2951 bool is_remapped = has_extended_decoration(id, SPIRVCrossDecorationPhysicalTypeID);
2952 bool is_packed = has_extended_decoration(id, SPIRVCrossDecorationPhysicalTypePacked);
2953 if (!need_transpose && (is_remapped || is_packed))
2954 {
2955 return unpack_expression_type(to_expression(id, register_expression_read), expression_type(id),
2956 get_extended_decoration(id, SPIRVCrossDecorationPhysicalTypeID),
2957 has_extended_decoration(id, SPIRVCrossDecorationPhysicalTypePacked), false);
2958 }
2959 else
2960 return to_enclosed_expression(id, register_expression_read);
2961 }
2962
to_dereferenced_expression(uint32_t id,bool register_expression_read)2963 string CompilerGLSL::to_dereferenced_expression(uint32_t id, bool register_expression_read)
2964 {
2965 auto &type = expression_type(id);
2966 if (type.pointer && should_dereference(id))
2967 return dereference_expression(type, to_enclosed_expression(id, register_expression_read));
2968 else
2969 return to_expression(id, register_expression_read);
2970 }
2971
to_pointer_expression(uint32_t id,bool register_expression_read)2972 string CompilerGLSL::to_pointer_expression(uint32_t id, bool register_expression_read)
2973 {
2974 auto &type = expression_type(id);
2975 if (type.pointer && expression_is_lvalue(id) && !should_dereference(id))
2976 return address_of_expression(to_enclosed_expression(id, register_expression_read));
2977 else
2978 return to_unpacked_expression(id, register_expression_read);
2979 }
2980
to_enclosed_pointer_expression(uint32_t id,bool register_expression_read)2981 string CompilerGLSL::to_enclosed_pointer_expression(uint32_t id, bool register_expression_read)
2982 {
2983 auto &type = expression_type(id);
2984 if (type.pointer && expression_is_lvalue(id) && !should_dereference(id))
2985 return address_of_expression(to_enclosed_expression(id, register_expression_read));
2986 else
2987 return to_enclosed_unpacked_expression(id, register_expression_read);
2988 }
2989
to_extract_component_expression(uint32_t id,uint32_t index)2990 string CompilerGLSL::to_extract_component_expression(uint32_t id, uint32_t index)
2991 {
2992 auto expr = to_enclosed_expression(id);
2993 if (has_extended_decoration(id, SPIRVCrossDecorationPhysicalTypePacked))
2994 return join(expr, "[", index, "]");
2995 else
2996 return join(expr, ".", index_to_swizzle(index));
2997 }
2998
to_rerolled_array_expression(const string & base_expr,const SPIRType & type)2999 string CompilerGLSL::to_rerolled_array_expression(const string &base_expr, const SPIRType &type)
3000 {
3001 uint32_t size = to_array_size_literal(type);
3002 auto &parent = get<SPIRType>(type.parent_type);
3003 string expr = "{ ";
3004
3005 for (uint32_t i = 0; i < size; i++)
3006 {
3007 auto subexpr = join(base_expr, "[", convert_to_string(i), "]");
3008 if (parent.array.empty())
3009 expr += subexpr;
3010 else
3011 expr += to_rerolled_array_expression(subexpr, parent);
3012
3013 if (i + 1 < size)
3014 expr += ", ";
3015 }
3016
3017 expr += " }";
3018 return expr;
3019 }
3020
to_composite_constructor_expression(uint32_t id)3021 string CompilerGLSL::to_composite_constructor_expression(uint32_t id)
3022 {
3023 auto &type = expression_type(id);
3024 if (!backend.array_is_value_type && !type.array.empty())
3025 {
3026 // For this case, we need to "re-roll" an array initializer from a temporary.
3027 // We cannot simply pass the array directly, since it decays to a pointer and it cannot
3028 // participate in a struct initializer. E.g.
3029 // float arr[2] = { 1.0, 2.0 };
3030 // Foo foo = { arr }; must be transformed to
3031 // Foo foo = { { arr[0], arr[1] } };
3032 // The array sizes cannot be deduced from specialization constants since we cannot use any loops.
3033
3034 // We're only triggering one read of the array expression, but this is fine since arrays have to be declared
3035 // as temporaries anyways.
3036 return to_rerolled_array_expression(to_enclosed_expression(id), type);
3037 }
3038 else
3039 return to_unpacked_expression(id);
3040 }
3041
to_expression(uint32_t id,bool register_expression_read)3042 string CompilerGLSL::to_expression(uint32_t id, bool register_expression_read)
3043 {
3044 auto itr = invalid_expressions.find(id);
3045 if (itr != end(invalid_expressions))
3046 handle_invalid_expression(id);
3047
3048 if (ir.ids[id].get_type() == TypeExpression)
3049 {
3050 // We might have a more complex chain of dependencies.
3051 // A possible scenario is that we
3052 //
3053 // %1 = OpLoad
3054 // %2 = OpDoSomething %1 %1. here %2 will have a dependency on %1.
3055 // %3 = OpDoSomethingAgain %2 %2. Here %3 will lose the link to %1 since we don't propagate the dependencies like that.
3056 // OpStore %1 %foo // Here we can invalidate %1, and hence all expressions which depend on %1. Only %2 will know since it's part of invalid_expressions.
3057 // %4 = OpDoSomethingAnotherTime %3 %3 // If we forward all expressions we will see %1 expression after store, not before.
3058 //
3059 // However, we can propagate up a list of depended expressions when we used %2, so we can check if %2 is invalid when reading %3 after the store,
3060 // and see that we should not forward reads of the original variable.
3061 auto &expr = get<SPIRExpression>(id);
3062 for (uint32_t dep : expr.expression_dependencies)
3063 if (invalid_expressions.find(dep) != end(invalid_expressions))
3064 handle_invalid_expression(dep);
3065 }
3066
3067 if (register_expression_read)
3068 track_expression_read(id);
3069
3070 switch (ir.ids[id].get_type())
3071 {
3072 case TypeExpression:
3073 {
3074 auto &e = get<SPIRExpression>(id);
3075 if (e.base_expression)
3076 return to_enclosed_expression(e.base_expression) + e.expression;
3077 else if (e.need_transpose)
3078 {
3079 // This should not be reached for access chains, since we always deal explicitly with transpose state
3080 // when consuming an access chain expression.
3081 uint32_t physical_type_id = get_extended_decoration(id, SPIRVCrossDecorationPhysicalTypeID);
3082 bool is_packed = has_extended_decoration(id, SPIRVCrossDecorationPhysicalTypePacked);
3083 return convert_row_major_matrix(e.expression, get<SPIRType>(e.expression_type), physical_type_id,
3084 is_packed);
3085 }
3086 else
3087 {
3088 if (is_forcing_recompilation())
3089 {
3090 // During first compilation phase, certain expression patterns can trigger exponential growth of memory.
3091 // Avoid this by returning dummy expressions during this phase.
3092 // Do not use empty expressions here, because those are sentinels for other cases.
3093 return "_";
3094 }
3095 else
3096 return e.expression;
3097 }
3098 }
3099
3100 case TypeConstant:
3101 {
3102 auto &c = get<SPIRConstant>(id);
3103 auto &type = get<SPIRType>(c.constant_type);
3104
3105 // WorkGroupSize may be a constant.
3106 auto &dec = ir.meta[c.self].decoration;
3107 if (dec.builtin)
3108 return builtin_to_glsl(dec.builtin_type, StorageClassGeneric);
3109 else if (c.specialization)
3110 return to_name(id);
3111 else if (c.is_used_as_lut)
3112 return to_name(id);
3113 else if (type.basetype == SPIRType::Struct && !backend.can_declare_struct_inline)
3114 return to_name(id);
3115 else if (!type.array.empty() && !backend.can_declare_arrays_inline)
3116 return to_name(id);
3117 else
3118 return constant_expression(c);
3119 }
3120
3121 case TypeConstantOp:
3122 return to_name(id);
3123
3124 case TypeVariable:
3125 {
3126 auto &var = get<SPIRVariable>(id);
3127 // If we try to use a loop variable before the loop header, we have to redirect it to the static expression,
3128 // the variable has not been declared yet.
3129 if (var.statically_assigned || (var.loop_variable && !var.loop_variable_enable))
3130 return to_expression(var.static_expression);
3131 else if (var.deferred_declaration)
3132 {
3133 var.deferred_declaration = false;
3134 return variable_decl(var);
3135 }
3136 else if (flattened_structs.count(id))
3137 {
3138 return load_flattened_struct(var);
3139 }
3140 else
3141 {
3142 auto &dec = ir.meta[var.self].decoration;
3143 if (dec.builtin)
3144 return builtin_to_glsl(dec.builtin_type, var.storage);
3145 else
3146 return to_name(id);
3147 }
3148 }
3149
3150 case TypeCombinedImageSampler:
3151 // This type should never be taken the expression of directly.
3152 // The intention is that texture sampling functions will extract the image and samplers
3153 // separately and take their expressions as needed.
3154 // GLSL does not use this type because OpSampledImage immediately creates a combined image sampler
3155 // expression ala sampler2D(texture, sampler).
3156 SPIRV_CROSS_THROW("Combined image samplers have no default expression representation.");
3157
3158 case TypeAccessChain:
3159 // We cannot express this type. They only have meaning in other OpAccessChains, OpStore or OpLoad.
3160 SPIRV_CROSS_THROW("Access chains have no default expression representation.");
3161
3162 default:
3163 return to_name(id);
3164 }
3165 }
3166
constant_op_expression(const SPIRConstantOp & cop)3167 string CompilerGLSL::constant_op_expression(const SPIRConstantOp &cop)
3168 {
3169 auto &type = get<SPIRType>(cop.basetype);
3170 bool binary = false;
3171 bool unary = false;
3172 string op;
3173
3174 if (is_legacy() && is_unsigned_opcode(cop.opcode))
3175 SPIRV_CROSS_THROW("Unsigned integers are not supported on legacy targets.");
3176
3177 // TODO: Find a clean way to reuse emit_instruction.
3178 switch (cop.opcode)
3179 {
3180 case OpSConvert:
3181 case OpUConvert:
3182 case OpFConvert:
3183 op = type_to_glsl_constructor(type);
3184 break;
3185
3186 #define GLSL_BOP(opname, x) \
3187 case Op##opname: \
3188 binary = true; \
3189 op = x; \
3190 break
3191
3192 #define GLSL_UOP(opname, x) \
3193 case Op##opname: \
3194 unary = true; \
3195 op = x; \
3196 break
3197
3198 GLSL_UOP(SNegate, "-");
3199 GLSL_UOP(Not, "~");
3200 GLSL_BOP(IAdd, "+");
3201 GLSL_BOP(ISub, "-");
3202 GLSL_BOP(IMul, "*");
3203 GLSL_BOP(SDiv, "/");
3204 GLSL_BOP(UDiv, "/");
3205 GLSL_BOP(UMod, "%");
3206 GLSL_BOP(SMod, "%");
3207 GLSL_BOP(ShiftRightLogical, ">>");
3208 GLSL_BOP(ShiftRightArithmetic, ">>");
3209 GLSL_BOP(ShiftLeftLogical, "<<");
3210 GLSL_BOP(BitwiseOr, "|");
3211 GLSL_BOP(BitwiseXor, "^");
3212 GLSL_BOP(BitwiseAnd, "&");
3213 GLSL_BOP(LogicalOr, "||");
3214 GLSL_BOP(LogicalAnd, "&&");
3215 GLSL_UOP(LogicalNot, "!");
3216 GLSL_BOP(LogicalEqual, "==");
3217 GLSL_BOP(LogicalNotEqual, "!=");
3218 GLSL_BOP(IEqual, "==");
3219 GLSL_BOP(INotEqual, "!=");
3220 GLSL_BOP(ULessThan, "<");
3221 GLSL_BOP(SLessThan, "<");
3222 GLSL_BOP(ULessThanEqual, "<=");
3223 GLSL_BOP(SLessThanEqual, "<=");
3224 GLSL_BOP(UGreaterThan, ">");
3225 GLSL_BOP(SGreaterThan, ">");
3226 GLSL_BOP(UGreaterThanEqual, ">=");
3227 GLSL_BOP(SGreaterThanEqual, ">=");
3228
3229 case OpSelect:
3230 {
3231 if (cop.arguments.size() < 3)
3232 SPIRV_CROSS_THROW("Not enough arguments to OpSpecConstantOp.");
3233
3234 // This one is pretty annoying. It's triggered from
3235 // uint(bool), int(bool) from spec constants.
3236 // In order to preserve its compile-time constness in Vulkan GLSL,
3237 // we need to reduce the OpSelect expression back to this simplified model.
3238 // If we cannot, fail.
3239 if (to_trivial_mix_op(type, op, cop.arguments[2], cop.arguments[1], cop.arguments[0]))
3240 {
3241 // Implement as a simple cast down below.
3242 }
3243 else
3244 {
3245 // Implement a ternary and pray the compiler understands it :)
3246 return to_ternary_expression(type, cop.arguments[0], cop.arguments[1], cop.arguments[2]);
3247 }
3248 break;
3249 }
3250
3251 case OpVectorShuffle:
3252 {
3253 string expr = type_to_glsl_constructor(type);
3254 expr += "(";
3255
3256 uint32_t left_components = expression_type(cop.arguments[0]).vecsize;
3257 string left_arg = to_enclosed_expression(cop.arguments[0]);
3258 string right_arg = to_enclosed_expression(cop.arguments[1]);
3259
3260 for (uint32_t i = 2; i < uint32_t(cop.arguments.size()); i++)
3261 {
3262 uint32_t index = cop.arguments[i];
3263 if (index >= left_components)
3264 expr += right_arg + "." + "xyzw"[index - left_components];
3265 else
3266 expr += left_arg + "." + "xyzw"[index];
3267
3268 if (i + 1 < uint32_t(cop.arguments.size()))
3269 expr += ", ";
3270 }
3271
3272 expr += ")";
3273 return expr;
3274 }
3275
3276 case OpCompositeExtract:
3277 {
3278 auto expr = access_chain_internal(cop.arguments[0], &cop.arguments[1], uint32_t(cop.arguments.size() - 1),
3279 ACCESS_CHAIN_INDEX_IS_LITERAL_BIT, nullptr);
3280 return expr;
3281 }
3282
3283 case OpCompositeInsert:
3284 SPIRV_CROSS_THROW("OpCompositeInsert spec constant op is not supported.");
3285
3286 default:
3287 // Some opcodes are unimplemented here, these are currently not possible to test from glslang.
3288 SPIRV_CROSS_THROW("Unimplemented spec constant op.");
3289 }
3290
3291 uint32_t bit_width = 0;
3292 if (unary || binary || cop.opcode == OpSConvert || cop.opcode == OpUConvert)
3293 bit_width = expression_type(cop.arguments[0]).width;
3294
3295 SPIRType::BaseType input_type;
3296 bool skip_cast_if_equal_type = opcode_is_sign_invariant(cop.opcode);
3297
3298 switch (cop.opcode)
3299 {
3300 case OpIEqual:
3301 case OpINotEqual:
3302 input_type = to_signed_basetype(bit_width);
3303 break;
3304
3305 case OpSLessThan:
3306 case OpSLessThanEqual:
3307 case OpSGreaterThan:
3308 case OpSGreaterThanEqual:
3309 case OpSMod:
3310 case OpSDiv:
3311 case OpShiftRightArithmetic:
3312 case OpSConvert:
3313 case OpSNegate:
3314 input_type = to_signed_basetype(bit_width);
3315 break;
3316
3317 case OpULessThan:
3318 case OpULessThanEqual:
3319 case OpUGreaterThan:
3320 case OpUGreaterThanEqual:
3321 case OpUMod:
3322 case OpUDiv:
3323 case OpShiftRightLogical:
3324 case OpUConvert:
3325 input_type = to_unsigned_basetype(bit_width);
3326 break;
3327
3328 default:
3329 input_type = type.basetype;
3330 break;
3331 }
3332
3333 #undef GLSL_BOP
3334 #undef GLSL_UOP
3335 if (binary)
3336 {
3337 if (cop.arguments.size() < 2)
3338 SPIRV_CROSS_THROW("Not enough arguments to OpSpecConstantOp.");
3339
3340 string cast_op0;
3341 string cast_op1;
3342 auto expected_type = binary_op_bitcast_helper(cast_op0, cast_op1, input_type, cop.arguments[0],
3343 cop.arguments[1], skip_cast_if_equal_type);
3344
3345 if (type.basetype != input_type && type.basetype != SPIRType::Boolean)
3346 {
3347 expected_type.basetype = input_type;
3348 auto expr = bitcast_glsl_op(type, expected_type);
3349 expr += '(';
3350 expr += join(cast_op0, " ", op, " ", cast_op1);
3351 expr += ')';
3352 return expr;
3353 }
3354 else
3355 return join("(", cast_op0, " ", op, " ", cast_op1, ")");
3356 }
3357 else if (unary)
3358 {
3359 if (cop.arguments.size() < 1)
3360 SPIRV_CROSS_THROW("Not enough arguments to OpSpecConstantOp.");
3361
3362 // Auto-bitcast to result type as needed.
3363 // Works around various casting scenarios in glslang as there is no OpBitcast for specialization constants.
3364 return join("(", op, bitcast_glsl(type, cop.arguments[0]), ")");
3365 }
3366 else if (cop.opcode == OpSConvert || cop.opcode == OpUConvert)
3367 {
3368 if (cop.arguments.size() < 1)
3369 SPIRV_CROSS_THROW("Not enough arguments to OpSpecConstantOp.");
3370
3371 auto &arg_type = expression_type(cop.arguments[0]);
3372 if (arg_type.width < type.width && input_type != arg_type.basetype)
3373 {
3374 auto expected = arg_type;
3375 expected.basetype = input_type;
3376 return join(op, "(", bitcast_glsl(expected, cop.arguments[0]), ")");
3377 }
3378 else
3379 return join(op, "(", to_expression(cop.arguments[0]), ")");
3380 }
3381 else
3382 {
3383 if (cop.arguments.size() < 1)
3384 SPIRV_CROSS_THROW("Not enough arguments to OpSpecConstantOp.");
3385 return join(op, "(", to_expression(cop.arguments[0]), ")");
3386 }
3387 }
3388
constant_expression(const SPIRConstant & c)3389 string CompilerGLSL::constant_expression(const SPIRConstant &c)
3390 {
3391 auto &type = get<SPIRType>(c.constant_type);
3392
3393 if (type.pointer)
3394 {
3395 return backend.null_pointer_literal;
3396 }
3397 else if (!c.subconstants.empty())
3398 {
3399 // Handles Arrays and structures.
3400 string res;
3401
3402 // Allow Metal to use the array<T> template to make arrays a value type
3403 bool needs_trailing_tracket = false;
3404 if (backend.use_initializer_list && backend.use_typed_initializer_list && type.basetype == SPIRType::Struct &&
3405 type.array.empty())
3406 {
3407 res = type_to_glsl_constructor(type) + "{ ";
3408 }
3409 else if (backend.use_initializer_list && backend.use_typed_initializer_list && !type.array.empty())
3410 {
3411 res = type_to_glsl_constructor(type) + "({ ";
3412 needs_trailing_tracket = true;
3413 }
3414 else if (backend.use_initializer_list)
3415 {
3416 res = "{ ";
3417 }
3418 else
3419 {
3420 res = type_to_glsl_constructor(type) + "(";
3421 }
3422
3423 for (auto &elem : c.subconstants)
3424 {
3425 auto &subc = get<SPIRConstant>(elem);
3426 if (subc.specialization)
3427 res += to_name(elem);
3428 else
3429 res += constant_expression(subc);
3430
3431 if (&elem != &c.subconstants.back())
3432 res += ", ";
3433 }
3434
3435 res += backend.use_initializer_list ? " }" : ")";
3436 if (needs_trailing_tracket)
3437 res += ")";
3438
3439 return res;
3440 }
3441 else if (type.basetype == SPIRType::Struct && type.member_types.size() == 0)
3442 {
3443 // Metal tessellation likes empty structs which are then constant expressions.
3444 if (backend.supports_empty_struct)
3445 return "{ }";
3446 else if (backend.use_typed_initializer_list)
3447 return join(type_to_glsl(get<SPIRType>(c.constant_type)), "{ 0 }");
3448 else if (backend.use_initializer_list)
3449 return "{ 0 }";
3450 else
3451 return join(type_to_glsl(get<SPIRType>(c.constant_type)), "(0)");
3452 }
3453 else if (c.columns() == 1)
3454 {
3455 return constant_expression_vector(c, 0);
3456 }
3457 else
3458 {
3459 string res = type_to_glsl(get<SPIRType>(c.constant_type)) + "(";
3460 for (uint32_t col = 0; col < c.columns(); col++)
3461 {
3462 if (c.specialization_constant_id(col) != 0)
3463 res += to_name(c.specialization_constant_id(col));
3464 else
3465 res += constant_expression_vector(c, col);
3466
3467 if (col + 1 < c.columns())
3468 res += ", ";
3469 }
3470 res += ")";
3471 return res;
3472 }
3473 }
3474
3475 #ifdef _MSC_VER
3476 // sprintf warning.
3477 // We cannot rely on snprintf existing because, ..., MSVC.
3478 #pragma warning(push)
3479 #pragma warning(disable : 4996)
3480 #endif
3481
convert_half_to_string(const SPIRConstant & c,uint32_t col,uint32_t row)3482 string CompilerGLSL::convert_half_to_string(const SPIRConstant &c, uint32_t col, uint32_t row)
3483 {
3484 string res;
3485 float float_value = c.scalar_f16(col, row);
3486
3487 // There is no literal "hf" in GL_NV_gpu_shader5, so to avoid lots
3488 // of complicated workarounds, just value-cast to the half type always.
3489 if (std::isnan(float_value) || std::isinf(float_value))
3490 {
3491 SPIRType type;
3492 type.basetype = SPIRType::Half;
3493 type.vecsize = 1;
3494 type.columns = 1;
3495
3496 if (float_value == numeric_limits<float>::infinity())
3497 res = join(type_to_glsl(type), "(1.0 / 0.0)");
3498 else if (float_value == -numeric_limits<float>::infinity())
3499 res = join(type_to_glsl(type), "(-1.0 / 0.0)");
3500 else if (std::isnan(float_value))
3501 res = join(type_to_glsl(type), "(0.0 / 0.0)");
3502 else
3503 SPIRV_CROSS_THROW("Cannot represent non-finite floating point constant.");
3504 }
3505 else
3506 {
3507 SPIRType type;
3508 type.basetype = SPIRType::Half;
3509 type.vecsize = 1;
3510 type.columns = 1;
3511 res = join(type_to_glsl(type), "(", convert_to_string(float_value, current_locale_radix_character), ")");
3512 }
3513
3514 return res;
3515 }
3516
convert_float_to_string(const SPIRConstant & c,uint32_t col,uint32_t row)3517 string CompilerGLSL::convert_float_to_string(const SPIRConstant &c, uint32_t col, uint32_t row)
3518 {
3519 string res;
3520 float float_value = c.scalar_f32(col, row);
3521
3522 if (std::isnan(float_value) || std::isinf(float_value))
3523 {
3524 // Use special representation.
3525 if (!is_legacy())
3526 {
3527 SPIRType out_type;
3528 SPIRType in_type;
3529 out_type.basetype = SPIRType::Float;
3530 in_type.basetype = SPIRType::UInt;
3531 out_type.vecsize = 1;
3532 in_type.vecsize = 1;
3533 out_type.width = 32;
3534 in_type.width = 32;
3535
3536 char print_buffer[32];
3537 sprintf(print_buffer, "0x%xu", c.scalar(col, row));
3538 res = join(bitcast_glsl_op(out_type, in_type), "(", print_buffer, ")");
3539 }
3540 else
3541 {
3542 if (float_value == numeric_limits<float>::infinity())
3543 {
3544 if (backend.float_literal_suffix)
3545 res = "(1.0f / 0.0f)";
3546 else
3547 res = "(1.0 / 0.0)";
3548 }
3549 else if (float_value == -numeric_limits<float>::infinity())
3550 {
3551 if (backend.float_literal_suffix)
3552 res = "(-1.0f / 0.0f)";
3553 else
3554 res = "(-1.0 / 0.0)";
3555 }
3556 else if (std::isnan(float_value))
3557 {
3558 if (backend.float_literal_suffix)
3559 res = "(0.0f / 0.0f)";
3560 else
3561 res = "(0.0 / 0.0)";
3562 }
3563 else
3564 SPIRV_CROSS_THROW("Cannot represent non-finite floating point constant.");
3565 }
3566 }
3567 else
3568 {
3569 res = convert_to_string(float_value, current_locale_radix_character);
3570 if (backend.float_literal_suffix)
3571 res += "f";
3572 }
3573
3574 return res;
3575 }
3576
convert_double_to_string(const SPIRConstant & c,uint32_t col,uint32_t row)3577 std::string CompilerGLSL::convert_double_to_string(const SPIRConstant &c, uint32_t col, uint32_t row)
3578 {
3579 string res;
3580 double double_value = c.scalar_f64(col, row);
3581
3582 if (std::isnan(double_value) || std::isinf(double_value))
3583 {
3584 // Use special representation.
3585 if (!is_legacy())
3586 {
3587 SPIRType out_type;
3588 SPIRType in_type;
3589 out_type.basetype = SPIRType::Double;
3590 in_type.basetype = SPIRType::UInt64;
3591 out_type.vecsize = 1;
3592 in_type.vecsize = 1;
3593 out_type.width = 64;
3594 in_type.width = 64;
3595
3596 uint64_t u64_value = c.scalar_u64(col, row);
3597
3598 if (options.es)
3599 SPIRV_CROSS_THROW("64-bit integers/float not supported in ES profile.");
3600 require_extension_internal("GL_ARB_gpu_shader_int64");
3601
3602 char print_buffer[64];
3603 sprintf(print_buffer, "0x%llx%s", static_cast<unsigned long long>(u64_value),
3604 backend.long_long_literal_suffix ? "ull" : "ul");
3605 res = join(bitcast_glsl_op(out_type, in_type), "(", print_buffer, ")");
3606 }
3607 else
3608 {
3609 if (options.es)
3610 SPIRV_CROSS_THROW("FP64 not supported in ES profile.");
3611 if (options.version < 400)
3612 require_extension_internal("GL_ARB_gpu_shader_fp64");
3613
3614 if (double_value == numeric_limits<double>::infinity())
3615 {
3616 if (backend.double_literal_suffix)
3617 res = "(1.0lf / 0.0lf)";
3618 else
3619 res = "(1.0 / 0.0)";
3620 }
3621 else if (double_value == -numeric_limits<double>::infinity())
3622 {
3623 if (backend.double_literal_suffix)
3624 res = "(-1.0lf / 0.0lf)";
3625 else
3626 res = "(-1.0 / 0.0)";
3627 }
3628 else if (std::isnan(double_value))
3629 {
3630 if (backend.double_literal_suffix)
3631 res = "(0.0lf / 0.0lf)";
3632 else
3633 res = "(0.0 / 0.0)";
3634 }
3635 else
3636 SPIRV_CROSS_THROW("Cannot represent non-finite floating point constant.");
3637 }
3638 }
3639 else
3640 {
3641 res = convert_to_string(double_value, current_locale_radix_character);
3642 if (backend.double_literal_suffix)
3643 res += "lf";
3644 }
3645
3646 return res;
3647 }
3648
3649 #ifdef _MSC_VER
3650 #pragma warning(pop)
3651 #endif
3652
constant_expression_vector(const SPIRConstant & c,uint32_t vector)3653 string CompilerGLSL::constant_expression_vector(const SPIRConstant &c, uint32_t vector)
3654 {
3655 auto type = get<SPIRType>(c.constant_type);
3656 type.columns = 1;
3657
3658 auto scalar_type = type;
3659 scalar_type.vecsize = 1;
3660
3661 string res;
3662 bool splat = backend.use_constructor_splatting && c.vector_size() > 1;
3663 bool swizzle_splat = backend.can_swizzle_scalar && c.vector_size() > 1;
3664
3665 if (!type_is_floating_point(type))
3666 {
3667 // Cannot swizzle literal integers as a special case.
3668 swizzle_splat = false;
3669 }
3670
3671 if (splat || swizzle_splat)
3672 {
3673 // Cannot use constant splatting if we have specialization constants somewhere in the vector.
3674 for (uint32_t i = 0; i < c.vector_size(); i++)
3675 {
3676 if (c.specialization_constant_id(vector, i) != 0)
3677 {
3678 splat = false;
3679 swizzle_splat = false;
3680 break;
3681 }
3682 }
3683 }
3684
3685 if (splat || swizzle_splat)
3686 {
3687 if (type.width == 64)
3688 {
3689 uint64_t ident = c.scalar_u64(vector, 0);
3690 for (uint32_t i = 1; i < c.vector_size(); i++)
3691 {
3692 if (ident != c.scalar_u64(vector, i))
3693 {
3694 splat = false;
3695 swizzle_splat = false;
3696 break;
3697 }
3698 }
3699 }
3700 else
3701 {
3702 uint32_t ident = c.scalar(vector, 0);
3703 for (uint32_t i = 1; i < c.vector_size(); i++)
3704 {
3705 if (ident != c.scalar(vector, i))
3706 {
3707 splat = false;
3708 swizzle_splat = false;
3709 }
3710 }
3711 }
3712 }
3713
3714 if (c.vector_size() > 1 && !swizzle_splat)
3715 res += type_to_glsl(type) + "(";
3716
3717 switch (type.basetype)
3718 {
3719 case SPIRType::Half:
3720 if (splat || swizzle_splat)
3721 {
3722 res += convert_half_to_string(c, vector, 0);
3723 if (swizzle_splat)
3724 res = remap_swizzle(get<SPIRType>(c.constant_type), 1, res);
3725 }
3726 else
3727 {
3728 for (uint32_t i = 0; i < c.vector_size(); i++)
3729 {
3730 if (c.vector_size() > 1 && c.specialization_constant_id(vector, i) != 0)
3731 res += to_name(c.specialization_constant_id(vector, i));
3732 else
3733 res += convert_half_to_string(c, vector, i);
3734
3735 if (i + 1 < c.vector_size())
3736 res += ", ";
3737 }
3738 }
3739 break;
3740
3741 case SPIRType::Float:
3742 if (splat || swizzle_splat)
3743 {
3744 res += convert_float_to_string(c, vector, 0);
3745 if (swizzle_splat)
3746 res = remap_swizzle(get<SPIRType>(c.constant_type), 1, res);
3747 }
3748 else
3749 {
3750 for (uint32_t i = 0; i < c.vector_size(); i++)
3751 {
3752 if (c.vector_size() > 1 && c.specialization_constant_id(vector, i) != 0)
3753 res += to_name(c.specialization_constant_id(vector, i));
3754 else
3755 res += convert_float_to_string(c, vector, i);
3756
3757 if (i + 1 < c.vector_size())
3758 res += ", ";
3759 }
3760 }
3761 break;
3762
3763 case SPIRType::Double:
3764 if (splat || swizzle_splat)
3765 {
3766 res += convert_double_to_string(c, vector, 0);
3767 if (swizzle_splat)
3768 res = remap_swizzle(get<SPIRType>(c.constant_type), 1, res);
3769 }
3770 else
3771 {
3772 for (uint32_t i = 0; i < c.vector_size(); i++)
3773 {
3774 if (c.vector_size() > 1 && c.specialization_constant_id(vector, i) != 0)
3775 res += to_name(c.specialization_constant_id(vector, i));
3776 else
3777 res += convert_double_to_string(c, vector, i);
3778
3779 if (i + 1 < c.vector_size())
3780 res += ", ";
3781 }
3782 }
3783 break;
3784
3785 case SPIRType::Int64:
3786 if (splat)
3787 {
3788 res += convert_to_string(c.scalar_i64(vector, 0));
3789 if (backend.long_long_literal_suffix)
3790 res += "ll";
3791 else
3792 res += "l";
3793 }
3794 else
3795 {
3796 for (uint32_t i = 0; i < c.vector_size(); i++)
3797 {
3798 if (c.vector_size() > 1 && c.specialization_constant_id(vector, i) != 0)
3799 res += to_name(c.specialization_constant_id(vector, i));
3800 else
3801 {
3802 res += convert_to_string(c.scalar_i64(vector, i));
3803 if (backend.long_long_literal_suffix)
3804 res += "ll";
3805 else
3806 res += "l";
3807 }
3808
3809 if (i + 1 < c.vector_size())
3810 res += ", ";
3811 }
3812 }
3813 break;
3814
3815 case SPIRType::UInt64:
3816 if (splat)
3817 {
3818 res += convert_to_string(c.scalar_u64(vector, 0));
3819 if (backend.long_long_literal_suffix)
3820 res += "ull";
3821 else
3822 res += "ul";
3823 }
3824 else
3825 {
3826 for (uint32_t i = 0; i < c.vector_size(); i++)
3827 {
3828 if (c.vector_size() > 1 && c.specialization_constant_id(vector, i) != 0)
3829 res += to_name(c.specialization_constant_id(vector, i));
3830 else
3831 {
3832 res += convert_to_string(c.scalar_u64(vector, i));
3833 if (backend.long_long_literal_suffix)
3834 res += "ull";
3835 else
3836 res += "ul";
3837 }
3838
3839 if (i + 1 < c.vector_size())
3840 res += ", ";
3841 }
3842 }
3843 break;
3844
3845 case SPIRType::UInt:
3846 if (splat)
3847 {
3848 res += convert_to_string(c.scalar(vector, 0));
3849 if (is_legacy())
3850 {
3851 // Fake unsigned constant literals with signed ones if possible.
3852 // Things like array sizes, etc, tend to be unsigned even though they could just as easily be signed.
3853 if (c.scalar_i32(vector, 0) < 0)
3854 SPIRV_CROSS_THROW("Tried to convert uint literal into int, but this made the literal negative.");
3855 }
3856 else if (backend.uint32_t_literal_suffix)
3857 res += "u";
3858 }
3859 else
3860 {
3861 for (uint32_t i = 0; i < c.vector_size(); i++)
3862 {
3863 if (c.vector_size() > 1 && c.specialization_constant_id(vector, i) != 0)
3864 res += to_name(c.specialization_constant_id(vector, i));
3865 else
3866 {
3867 res += convert_to_string(c.scalar(vector, i));
3868 if (is_legacy())
3869 {
3870 // Fake unsigned constant literals with signed ones if possible.
3871 // Things like array sizes, etc, tend to be unsigned even though they could just as easily be signed.
3872 if (c.scalar_i32(vector, i) < 0)
3873 SPIRV_CROSS_THROW(
3874 "Tried to convert uint literal into int, but this made the literal negative.");
3875 }
3876 else if (backend.uint32_t_literal_suffix)
3877 res += "u";
3878 }
3879
3880 if (i + 1 < c.vector_size())
3881 res += ", ";
3882 }
3883 }
3884 break;
3885
3886 case SPIRType::Int:
3887 if (splat)
3888 res += convert_to_string(c.scalar_i32(vector, 0));
3889 else
3890 {
3891 for (uint32_t i = 0; i < c.vector_size(); i++)
3892 {
3893 if (c.vector_size() > 1 && c.specialization_constant_id(vector, i) != 0)
3894 res += to_name(c.specialization_constant_id(vector, i));
3895 else
3896 res += convert_to_string(c.scalar_i32(vector, i));
3897 if (i + 1 < c.vector_size())
3898 res += ", ";
3899 }
3900 }
3901 break;
3902
3903 case SPIRType::UShort:
3904 if (splat)
3905 {
3906 res += convert_to_string(c.scalar(vector, 0));
3907 }
3908 else
3909 {
3910 for (uint32_t i = 0; i < c.vector_size(); i++)
3911 {
3912 if (c.vector_size() > 1 && c.specialization_constant_id(vector, i) != 0)
3913 res += to_name(c.specialization_constant_id(vector, i));
3914 else
3915 {
3916 if (*backend.uint16_t_literal_suffix)
3917 {
3918 res += convert_to_string(c.scalar_u16(vector, i));
3919 res += backend.uint16_t_literal_suffix;
3920 }
3921 else
3922 {
3923 // If backend doesn't have a literal suffix, we need to value cast.
3924 res += type_to_glsl(scalar_type);
3925 res += "(";
3926 res += convert_to_string(c.scalar_u16(vector, i));
3927 res += ")";
3928 }
3929 }
3930
3931 if (i + 1 < c.vector_size())
3932 res += ", ";
3933 }
3934 }
3935 break;
3936
3937 case SPIRType::Short:
3938 if (splat)
3939 {
3940 res += convert_to_string(c.scalar_i16(vector, 0));
3941 }
3942 else
3943 {
3944 for (uint32_t i = 0; i < c.vector_size(); i++)
3945 {
3946 if (c.vector_size() > 1 && c.specialization_constant_id(vector, i) != 0)
3947 res += to_name(c.specialization_constant_id(vector, i));
3948 else
3949 {
3950 if (*backend.int16_t_literal_suffix)
3951 {
3952 res += convert_to_string(c.scalar_i16(vector, i));
3953 res += backend.int16_t_literal_suffix;
3954 }
3955 else
3956 {
3957 // If backend doesn't have a literal suffix, we need to value cast.
3958 res += type_to_glsl(scalar_type);
3959 res += "(";
3960 res += convert_to_string(c.scalar_i16(vector, i));
3961 res += ")";
3962 }
3963 }
3964
3965 if (i + 1 < c.vector_size())
3966 res += ", ";
3967 }
3968 }
3969 break;
3970
3971 case SPIRType::UByte:
3972 if (splat)
3973 {
3974 res += convert_to_string(c.scalar_u8(vector, 0));
3975 }
3976 else
3977 {
3978 for (uint32_t i = 0; i < c.vector_size(); i++)
3979 {
3980 if (c.vector_size() > 1 && c.specialization_constant_id(vector, i) != 0)
3981 res += to_name(c.specialization_constant_id(vector, i));
3982 else
3983 {
3984 res += type_to_glsl(scalar_type);
3985 res += "(";
3986 res += convert_to_string(c.scalar_u8(vector, i));
3987 res += ")";
3988 }
3989
3990 if (i + 1 < c.vector_size())
3991 res += ", ";
3992 }
3993 }
3994 break;
3995
3996 case SPIRType::SByte:
3997 if (splat)
3998 {
3999 res += convert_to_string(c.scalar_i8(vector, 0));
4000 }
4001 else
4002 {
4003 for (uint32_t i = 0; i < c.vector_size(); i++)
4004 {
4005 if (c.vector_size() > 1 && c.specialization_constant_id(vector, i) != 0)
4006 res += to_name(c.specialization_constant_id(vector, i));
4007 else
4008 {
4009 res += type_to_glsl(scalar_type);
4010 res += "(";
4011 res += convert_to_string(c.scalar_i8(vector, i));
4012 res += ")";
4013 }
4014
4015 if (i + 1 < c.vector_size())
4016 res += ", ";
4017 }
4018 }
4019 break;
4020
4021 case SPIRType::Boolean:
4022 if (splat)
4023 res += c.scalar(vector, 0) ? "true" : "false";
4024 else
4025 {
4026 for (uint32_t i = 0; i < c.vector_size(); i++)
4027 {
4028 if (c.vector_size() > 1 && c.specialization_constant_id(vector, i) != 0)
4029 res += to_name(c.specialization_constant_id(vector, i));
4030 else
4031 res += c.scalar(vector, i) ? "true" : "false";
4032
4033 if (i + 1 < c.vector_size())
4034 res += ", ";
4035 }
4036 }
4037 break;
4038
4039 default:
4040 SPIRV_CROSS_THROW("Invalid constant expression basetype.");
4041 }
4042
4043 if (c.vector_size() > 1 && !swizzle_splat)
4044 res += ")";
4045
4046 return res;
4047 }
4048
emit_uninitialized_temporary_expression(uint32_t type,uint32_t id)4049 SPIRExpression &CompilerGLSL::emit_uninitialized_temporary_expression(uint32_t type, uint32_t id)
4050 {
4051 forced_temporaries.insert(id);
4052 emit_uninitialized_temporary(type, id);
4053 return set<SPIRExpression>(id, to_name(id), type, true);
4054 }
4055
emit_uninitialized_temporary(uint32_t result_type,uint32_t result_id)4056 void CompilerGLSL::emit_uninitialized_temporary(uint32_t result_type, uint32_t result_id)
4057 {
4058 // If we're declaring temporaries inside continue blocks,
4059 // we must declare the temporary in the loop header so that the continue block can avoid declaring new variables.
4060 if (current_continue_block && !hoisted_temporaries.count(result_id))
4061 {
4062 auto &header = get<SPIRBlock>(current_continue_block->loop_dominator);
4063 if (find_if(begin(header.declare_temporary), end(header.declare_temporary),
4064 [result_type, result_id](const pair<uint32_t, uint32_t> &tmp) {
4065 return tmp.first == result_type && tmp.second == result_id;
4066 }) == end(header.declare_temporary))
4067 {
4068 header.declare_temporary.emplace_back(result_type, result_id);
4069 hoisted_temporaries.insert(result_id);
4070 force_recompile();
4071 }
4072 }
4073 else if (hoisted_temporaries.count(result_id) == 0)
4074 {
4075 auto &type = get<SPIRType>(result_type);
4076 auto &flags = ir.meta[result_id].decoration.decoration_flags;
4077
4078 // The result_id has not been made into an expression yet, so use flags interface.
4079 add_local_variable_name(result_id);
4080 statement(flags_to_qualifiers_glsl(type, flags), variable_decl(type, to_name(result_id)), ";");
4081 }
4082 }
4083
declare_temporary(uint32_t result_type,uint32_t result_id)4084 string CompilerGLSL::declare_temporary(uint32_t result_type, uint32_t result_id)
4085 {
4086 auto &type = get<SPIRType>(result_type);
4087 auto &flags = ir.meta[result_id].decoration.decoration_flags;
4088
4089 // If we're declaring temporaries inside continue blocks,
4090 // we must declare the temporary in the loop header so that the continue block can avoid declaring new variables.
4091 if (current_continue_block && !hoisted_temporaries.count(result_id))
4092 {
4093 auto &header = get<SPIRBlock>(current_continue_block->loop_dominator);
4094 if (find_if(begin(header.declare_temporary), end(header.declare_temporary),
4095 [result_type, result_id](const pair<uint32_t, uint32_t> &tmp) {
4096 return tmp.first == result_type && tmp.second == result_id;
4097 }) == end(header.declare_temporary))
4098 {
4099 header.declare_temporary.emplace_back(result_type, result_id);
4100 hoisted_temporaries.insert(result_id);
4101 force_recompile();
4102 }
4103
4104 return join(to_name(result_id), " = ");
4105 }
4106 else if (hoisted_temporaries.count(result_id))
4107 {
4108 // The temporary has already been declared earlier, so just "declare" the temporary by writing to it.
4109 return join(to_name(result_id), " = ");
4110 }
4111 else
4112 {
4113 // The result_id has not been made into an expression yet, so use flags interface.
4114 add_local_variable_name(result_id);
4115 return join(flags_to_qualifiers_glsl(type, flags), variable_decl(type, to_name(result_id)), " = ");
4116 }
4117 }
4118
expression_is_forwarded(uint32_t id) const4119 bool CompilerGLSL::expression_is_forwarded(uint32_t id) const
4120 {
4121 return forwarded_temporaries.count(id) != 0;
4122 }
4123
expression_suppresses_usage_tracking(uint32_t id) const4124 bool CompilerGLSL::expression_suppresses_usage_tracking(uint32_t id) const
4125 {
4126 return suppressed_usage_tracking.count(id) != 0;
4127 }
4128
emit_op(uint32_t result_type,uint32_t result_id,const string & rhs,bool forwarding,bool suppress_usage_tracking)4129 SPIRExpression &CompilerGLSL::emit_op(uint32_t result_type, uint32_t result_id, const string &rhs, bool forwarding,
4130 bool suppress_usage_tracking)
4131 {
4132 if (forwarding && (forced_temporaries.find(result_id) == end(forced_temporaries)))
4133 {
4134 // Just forward it without temporary.
4135 // If the forward is trivial, we do not force flushing to temporary for this expression.
4136 forwarded_temporaries.insert(result_id);
4137 if (suppress_usage_tracking)
4138 suppressed_usage_tracking.insert(result_id);
4139
4140 return set<SPIRExpression>(result_id, rhs, result_type, true);
4141 }
4142 else
4143 {
4144 // If expression isn't immutable, bind it to a temporary and make the new temporary immutable (they always are).
4145 statement(declare_temporary(result_type, result_id), rhs, ";");
4146 return set<SPIRExpression>(result_id, to_name(result_id), result_type, true);
4147 }
4148 }
4149
emit_unary_op(uint32_t result_type,uint32_t result_id,uint32_t op0,const char * op)4150 void CompilerGLSL::emit_unary_op(uint32_t result_type, uint32_t result_id, uint32_t op0, const char *op)
4151 {
4152 bool forward = should_forward(op0);
4153 emit_op(result_type, result_id, join(op, to_enclosed_unpacked_expression(op0)), forward);
4154 inherit_expression_dependencies(result_id, op0);
4155 }
4156
emit_binary_op(uint32_t result_type,uint32_t result_id,uint32_t op0,uint32_t op1,const char * op)4157 void CompilerGLSL::emit_binary_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1, const char *op)
4158 {
4159 bool forward = should_forward(op0) && should_forward(op1);
4160 emit_op(result_type, result_id,
4161 join(to_enclosed_unpacked_expression(op0), " ", op, " ", to_enclosed_unpacked_expression(op1)), forward);
4162
4163 inherit_expression_dependencies(result_id, op0);
4164 inherit_expression_dependencies(result_id, op1);
4165 }
4166
emit_unrolled_unary_op(uint32_t result_type,uint32_t result_id,uint32_t operand,const char * op)4167 void CompilerGLSL::emit_unrolled_unary_op(uint32_t result_type, uint32_t result_id, uint32_t operand, const char *op)
4168 {
4169 auto &type = get<SPIRType>(result_type);
4170 auto expr = type_to_glsl_constructor(type);
4171 expr += '(';
4172 for (uint32_t i = 0; i < type.vecsize; i++)
4173 {
4174 // Make sure to call to_expression multiple times to ensure
4175 // that these expressions are properly flushed to temporaries if needed.
4176 expr += op;
4177 expr += to_extract_component_expression(operand, i);
4178
4179 if (i + 1 < type.vecsize)
4180 expr += ", ";
4181 }
4182 expr += ')';
4183 emit_op(result_type, result_id, expr, should_forward(operand));
4184
4185 inherit_expression_dependencies(result_id, operand);
4186 }
4187
emit_unrolled_binary_op(uint32_t result_type,uint32_t result_id,uint32_t op0,uint32_t op1,const char * op,bool negate,SPIRType::BaseType expected_type)4188 void CompilerGLSL::emit_unrolled_binary_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1,
4189 const char *op, bool negate, SPIRType::BaseType expected_type)
4190 {
4191 auto &type0 = expression_type(op0);
4192 auto &type1 = expression_type(op1);
4193
4194 SPIRType target_type0 = type0;
4195 SPIRType target_type1 = type1;
4196 target_type0.basetype = expected_type;
4197 target_type1.basetype = expected_type;
4198 target_type0.vecsize = 1;
4199 target_type1.vecsize = 1;
4200
4201 auto &type = get<SPIRType>(result_type);
4202 auto expr = type_to_glsl_constructor(type);
4203 expr += '(';
4204 for (uint32_t i = 0; i < type.vecsize; i++)
4205 {
4206 // Make sure to call to_expression multiple times to ensure
4207 // that these expressions are properly flushed to temporaries if needed.
4208 if (negate)
4209 expr += "!(";
4210
4211 if (expected_type != SPIRType::Unknown && type0.basetype != expected_type)
4212 expr += bitcast_expression(target_type0, type0.basetype, to_extract_component_expression(op0, i));
4213 else
4214 expr += to_extract_component_expression(op0, i);
4215
4216 expr += ' ';
4217 expr += op;
4218 expr += ' ';
4219
4220 if (expected_type != SPIRType::Unknown && type1.basetype != expected_type)
4221 expr += bitcast_expression(target_type1, type1.basetype, to_extract_component_expression(op1, i));
4222 else
4223 expr += to_extract_component_expression(op1, i);
4224
4225 if (negate)
4226 expr += ")";
4227
4228 if (i + 1 < type.vecsize)
4229 expr += ", ";
4230 }
4231 expr += ')';
4232 emit_op(result_type, result_id, expr, should_forward(op0) && should_forward(op1));
4233
4234 inherit_expression_dependencies(result_id, op0);
4235 inherit_expression_dependencies(result_id, op1);
4236 }
4237
binary_op_bitcast_helper(string & cast_op0,string & cast_op1,SPIRType::BaseType & input_type,uint32_t op0,uint32_t op1,bool skip_cast_if_equal_type)4238 SPIRType CompilerGLSL::binary_op_bitcast_helper(string &cast_op0, string &cast_op1, SPIRType::BaseType &input_type,
4239 uint32_t op0, uint32_t op1, bool skip_cast_if_equal_type)
4240 {
4241 auto &type0 = expression_type(op0);
4242 auto &type1 = expression_type(op1);
4243
4244 // We have to bitcast if our inputs are of different type, or if our types are not equal to expected inputs.
4245 // For some functions like OpIEqual and INotEqual, we don't care if inputs are of different types than expected
4246 // since equality test is exactly the same.
4247 bool cast = (type0.basetype != type1.basetype) || (!skip_cast_if_equal_type && type0.basetype != input_type);
4248
4249 // Create a fake type so we can bitcast to it.
4250 // We only deal with regular arithmetic types here like int, uints and so on.
4251 SPIRType expected_type;
4252 expected_type.basetype = input_type;
4253 expected_type.vecsize = type0.vecsize;
4254 expected_type.columns = type0.columns;
4255 expected_type.width = type0.width;
4256
4257 if (cast)
4258 {
4259 cast_op0 = bitcast_glsl(expected_type, op0);
4260 cast_op1 = bitcast_glsl(expected_type, op1);
4261 }
4262 else
4263 {
4264 // If we don't cast, our actual input type is that of the first (or second) argument.
4265 cast_op0 = to_enclosed_unpacked_expression(op0);
4266 cast_op1 = to_enclosed_unpacked_expression(op1);
4267 input_type = type0.basetype;
4268 }
4269
4270 return expected_type;
4271 }
4272
emit_binary_op_cast(uint32_t result_type,uint32_t result_id,uint32_t op0,uint32_t op1,const char * op,SPIRType::BaseType input_type,bool skip_cast_if_equal_type)4273 void CompilerGLSL::emit_binary_op_cast(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1,
4274 const char *op, SPIRType::BaseType input_type, bool skip_cast_if_equal_type)
4275 {
4276 string cast_op0, cast_op1;
4277 auto expected_type = binary_op_bitcast_helper(cast_op0, cast_op1, input_type, op0, op1, skip_cast_if_equal_type);
4278 auto &out_type = get<SPIRType>(result_type);
4279
4280 // We might have casted away from the result type, so bitcast again.
4281 // For example, arithmetic right shift with uint inputs.
4282 // Special case boolean outputs since relational opcodes output booleans instead of int/uint.
4283 string expr;
4284 if (out_type.basetype != input_type && out_type.basetype != SPIRType::Boolean)
4285 {
4286 expected_type.basetype = input_type;
4287 expr = bitcast_glsl_op(out_type, expected_type);
4288 expr += '(';
4289 expr += join(cast_op0, " ", op, " ", cast_op1);
4290 expr += ')';
4291 }
4292 else
4293 expr += join(cast_op0, " ", op, " ", cast_op1);
4294
4295 emit_op(result_type, result_id, expr, should_forward(op0) && should_forward(op1));
4296 inherit_expression_dependencies(result_id, op0);
4297 inherit_expression_dependencies(result_id, op1);
4298 }
4299
emit_unary_func_op(uint32_t result_type,uint32_t result_id,uint32_t op0,const char * op)4300 void CompilerGLSL::emit_unary_func_op(uint32_t result_type, uint32_t result_id, uint32_t op0, const char *op)
4301 {
4302 bool forward = should_forward(op0);
4303 emit_op(result_type, result_id, join(op, "(", to_unpacked_expression(op0), ")"), forward);
4304 inherit_expression_dependencies(result_id, op0);
4305 }
4306
emit_binary_func_op(uint32_t result_type,uint32_t result_id,uint32_t op0,uint32_t op1,const char * op)4307 void CompilerGLSL::emit_binary_func_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1,
4308 const char *op)
4309 {
4310 bool forward = should_forward(op0) && should_forward(op1);
4311 emit_op(result_type, result_id, join(op, "(", to_unpacked_expression(op0), ", ", to_unpacked_expression(op1), ")"),
4312 forward);
4313 inherit_expression_dependencies(result_id, op0);
4314 inherit_expression_dependencies(result_id, op1);
4315 }
4316
emit_unary_func_op_cast(uint32_t result_type,uint32_t result_id,uint32_t op0,const char * op,SPIRType::BaseType input_type,SPIRType::BaseType expected_result_type)4317 void CompilerGLSL::emit_unary_func_op_cast(uint32_t result_type, uint32_t result_id, uint32_t op0, const char *op,
4318 SPIRType::BaseType input_type, SPIRType::BaseType expected_result_type)
4319 {
4320 auto &out_type = get<SPIRType>(result_type);
4321 auto &expr_type = expression_type(op0);
4322 auto expected_type = out_type;
4323
4324 // Bit-widths might be different in unary cases because we use it for SConvert/UConvert and friends.
4325 expected_type.basetype = input_type;
4326 expected_type.width = expr_type.width;
4327 string cast_op = expr_type.basetype != input_type ? bitcast_glsl(expected_type, op0) : to_unpacked_expression(op0);
4328
4329 string expr;
4330 if (out_type.basetype != expected_result_type)
4331 {
4332 expected_type.basetype = expected_result_type;
4333 expected_type.width = out_type.width;
4334 expr = bitcast_glsl_op(out_type, expected_type);
4335 expr += '(';
4336 expr += join(op, "(", cast_op, ")");
4337 expr += ')';
4338 }
4339 else
4340 {
4341 expr += join(op, "(", cast_op, ")");
4342 }
4343
4344 emit_op(result_type, result_id, expr, should_forward(op0));
4345 inherit_expression_dependencies(result_id, op0);
4346 }
4347
4348 // Very special case. Handling bitfieldExtract requires us to deal with different bitcasts of different signs
4349 // and different vector sizes all at once. Need a special purpose method here.
emit_trinary_func_op_bitextract(uint32_t result_type,uint32_t result_id,uint32_t op0,uint32_t op1,uint32_t op2,const char * op,SPIRType::BaseType expected_result_type,SPIRType::BaseType input_type0,SPIRType::BaseType input_type1,SPIRType::BaseType input_type2)4350 void CompilerGLSL::emit_trinary_func_op_bitextract(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1,
4351 uint32_t op2, const char *op,
4352 SPIRType::BaseType expected_result_type,
4353 SPIRType::BaseType input_type0, SPIRType::BaseType input_type1,
4354 SPIRType::BaseType input_type2)
4355 {
4356 auto &out_type = get<SPIRType>(result_type);
4357 auto expected_type = out_type;
4358 expected_type.basetype = input_type0;
4359
4360 string cast_op0 =
4361 expression_type(op0).basetype != input_type0 ? bitcast_glsl(expected_type, op0) : to_unpacked_expression(op0);
4362
4363 auto op1_expr = to_unpacked_expression(op1);
4364 auto op2_expr = to_unpacked_expression(op2);
4365
4366 // Use value casts here instead. Input must be exactly int or uint, but SPIR-V might be 16-bit.
4367 expected_type.basetype = input_type1;
4368 expected_type.vecsize = 1;
4369 string cast_op1 = expression_type(op1).basetype != input_type1 ?
4370 join(type_to_glsl_constructor(expected_type), "(", op1_expr, ")") :
4371 op1_expr;
4372
4373 expected_type.basetype = input_type2;
4374 expected_type.vecsize = 1;
4375 string cast_op2 = expression_type(op2).basetype != input_type2 ?
4376 join(type_to_glsl_constructor(expected_type), "(", op2_expr, ")") :
4377 op2_expr;
4378
4379 string expr;
4380 if (out_type.basetype != expected_result_type)
4381 {
4382 expected_type.vecsize = out_type.vecsize;
4383 expected_type.basetype = expected_result_type;
4384 expr = bitcast_glsl_op(out_type, expected_type);
4385 expr += '(';
4386 expr += join(op, "(", cast_op0, ", ", cast_op1, ", ", cast_op2, ")");
4387 expr += ')';
4388 }
4389 else
4390 {
4391 expr += join(op, "(", cast_op0, ", ", cast_op1, ", ", cast_op2, ")");
4392 }
4393
4394 emit_op(result_type, result_id, expr, should_forward(op0) && should_forward(op1) && should_forward(op2));
4395 inherit_expression_dependencies(result_id, op0);
4396 inherit_expression_dependencies(result_id, op1);
4397 inherit_expression_dependencies(result_id, op2);
4398 }
4399
emit_trinary_func_op_cast(uint32_t result_type,uint32_t result_id,uint32_t op0,uint32_t op1,uint32_t op2,const char * op,SPIRType::BaseType input_type)4400 void CompilerGLSL::emit_trinary_func_op_cast(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1,
4401 uint32_t op2, const char *op, SPIRType::BaseType input_type)
4402 {
4403 auto &out_type = get<SPIRType>(result_type);
4404 auto expected_type = out_type;
4405 expected_type.basetype = input_type;
4406 string cast_op0 =
4407 expression_type(op0).basetype != input_type ? bitcast_glsl(expected_type, op0) : to_unpacked_expression(op0);
4408 string cast_op1 =
4409 expression_type(op1).basetype != input_type ? bitcast_glsl(expected_type, op1) : to_unpacked_expression(op1);
4410 string cast_op2 =
4411 expression_type(op2).basetype != input_type ? bitcast_glsl(expected_type, op2) : to_unpacked_expression(op2);
4412
4413 string expr;
4414 if (out_type.basetype != input_type)
4415 {
4416 expr = bitcast_glsl_op(out_type, expected_type);
4417 expr += '(';
4418 expr += join(op, "(", cast_op0, ", ", cast_op1, ", ", cast_op2, ")");
4419 expr += ')';
4420 }
4421 else
4422 {
4423 expr += join(op, "(", cast_op0, ", ", cast_op1, ", ", cast_op2, ")");
4424 }
4425
4426 emit_op(result_type, result_id, expr, should_forward(op0) && should_forward(op1) && should_forward(op2));
4427 inherit_expression_dependencies(result_id, op0);
4428 inherit_expression_dependencies(result_id, op1);
4429 inherit_expression_dependencies(result_id, op2);
4430 }
4431
emit_binary_func_op_cast(uint32_t result_type,uint32_t result_id,uint32_t op0,uint32_t op1,const char * op,SPIRType::BaseType input_type,bool skip_cast_if_equal_type)4432 void CompilerGLSL::emit_binary_func_op_cast(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1,
4433 const char *op, SPIRType::BaseType input_type, bool skip_cast_if_equal_type)
4434 {
4435 string cast_op0, cast_op1;
4436 auto expected_type = binary_op_bitcast_helper(cast_op0, cast_op1, input_type, op0, op1, skip_cast_if_equal_type);
4437 auto &out_type = get<SPIRType>(result_type);
4438
4439 // Special case boolean outputs since relational opcodes output booleans instead of int/uint.
4440 string expr;
4441 if (out_type.basetype != input_type && out_type.basetype != SPIRType::Boolean)
4442 {
4443 expected_type.basetype = input_type;
4444 expr = bitcast_glsl_op(out_type, expected_type);
4445 expr += '(';
4446 expr += join(op, "(", cast_op0, ", ", cast_op1, ")");
4447 expr += ')';
4448 }
4449 else
4450 {
4451 expr += join(op, "(", cast_op0, ", ", cast_op1, ")");
4452 }
4453
4454 emit_op(result_type, result_id, expr, should_forward(op0) && should_forward(op1));
4455 inherit_expression_dependencies(result_id, op0);
4456 inherit_expression_dependencies(result_id, op1);
4457 }
4458
emit_trinary_func_op(uint32_t result_type,uint32_t result_id,uint32_t op0,uint32_t op1,uint32_t op2,const char * op)4459 void CompilerGLSL::emit_trinary_func_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1,
4460 uint32_t op2, const char *op)
4461 {
4462 bool forward = should_forward(op0) && should_forward(op1) && should_forward(op2);
4463 emit_op(result_type, result_id,
4464 join(op, "(", to_unpacked_expression(op0), ", ", to_unpacked_expression(op1), ", ",
4465 to_unpacked_expression(op2), ")"),
4466 forward);
4467
4468 inherit_expression_dependencies(result_id, op0);
4469 inherit_expression_dependencies(result_id, op1);
4470 inherit_expression_dependencies(result_id, op2);
4471 }
4472
emit_quaternary_func_op(uint32_t result_type,uint32_t result_id,uint32_t op0,uint32_t op1,uint32_t op2,uint32_t op3,const char * op)4473 void CompilerGLSL::emit_quaternary_func_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1,
4474 uint32_t op2, uint32_t op3, const char *op)
4475 {
4476 bool forward = should_forward(op0) && should_forward(op1) && should_forward(op2) && should_forward(op3);
4477 emit_op(result_type, result_id,
4478 join(op, "(", to_unpacked_expression(op0), ", ", to_unpacked_expression(op1), ", ",
4479 to_unpacked_expression(op2), ", ", to_unpacked_expression(op3), ")"),
4480 forward);
4481
4482 inherit_expression_dependencies(result_id, op0);
4483 inherit_expression_dependencies(result_id, op1);
4484 inherit_expression_dependencies(result_id, op2);
4485 inherit_expression_dependencies(result_id, op3);
4486 }
4487
emit_bitfield_insert_op(uint32_t result_type,uint32_t result_id,uint32_t op0,uint32_t op1,uint32_t op2,uint32_t op3,const char * op,SPIRType::BaseType offset_count_type)4488 void CompilerGLSL::emit_bitfield_insert_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1,
4489 uint32_t op2, uint32_t op3, const char *op,
4490 SPIRType::BaseType offset_count_type)
4491 {
4492 // Only need to cast offset/count arguments. Types of base/insert must be same as result type,
4493 // and bitfieldInsert is sign invariant.
4494 bool forward = should_forward(op0) && should_forward(op1) && should_forward(op2) && should_forward(op3);
4495
4496 auto op0_expr = to_unpacked_expression(op0);
4497 auto op1_expr = to_unpacked_expression(op1);
4498 auto op2_expr = to_unpacked_expression(op2);
4499 auto op3_expr = to_unpacked_expression(op3);
4500
4501 SPIRType target_type;
4502 target_type.vecsize = 1;
4503 target_type.basetype = offset_count_type;
4504
4505 if (expression_type(op2).basetype != offset_count_type)
4506 {
4507 // Value-cast here. Input might be 16-bit. GLSL requires int.
4508 op2_expr = join(type_to_glsl_constructor(target_type), "(", op2_expr, ")");
4509 }
4510
4511 if (expression_type(op3).basetype != offset_count_type)
4512 {
4513 // Value-cast here. Input might be 16-bit. GLSL requires int.
4514 op3_expr = join(type_to_glsl_constructor(target_type), "(", op3_expr, ")");
4515 }
4516
4517 emit_op(result_type, result_id, join(op, "(", op0_expr, ", ", op1_expr, ", ", op2_expr, ", ", op3_expr, ")"),
4518 forward);
4519
4520 inherit_expression_dependencies(result_id, op0);
4521 inherit_expression_dependencies(result_id, op1);
4522 inherit_expression_dependencies(result_id, op2);
4523 inherit_expression_dependencies(result_id, op3);
4524 }
4525
4526 // EXT_shader_texture_lod only concerns fragment shaders so lod tex functions
4527 // are not allowed in ES 2 vertex shaders. But SPIR-V only supports lod tex
4528 // functions in vertex shaders so we revert those back to plain calls when
4529 // the lod is a constant value of zero.
check_explicit_lod_allowed(uint32_t lod)4530 bool CompilerGLSL::check_explicit_lod_allowed(uint32_t lod)
4531 {
4532 auto &execution = get_entry_point();
4533 bool allowed = !is_legacy_es() || execution.model == ExecutionModelFragment;
4534 if (!allowed && lod != 0)
4535 {
4536 auto *lod_constant = maybe_get<SPIRConstant>(lod);
4537 if (!lod_constant || lod_constant->scalar_f32() != 0.0f)
4538 {
4539 SPIRV_CROSS_THROW("Explicit lod not allowed in legacy ES non-fragment shaders.");
4540 }
4541 }
4542 return allowed;
4543 }
4544
legacy_tex_op(const std::string & op,const SPIRType & imgtype,uint32_t lod,uint32_t tex)4545 string CompilerGLSL::legacy_tex_op(const std::string &op, const SPIRType &imgtype, uint32_t lod, uint32_t tex)
4546 {
4547 const char *type;
4548 switch (imgtype.image.dim)
4549 {
4550 case spv::Dim1D:
4551 type = (imgtype.image.arrayed && !options.es) ? "1DArray" : "1D";
4552 break;
4553 case spv::Dim2D:
4554 type = (imgtype.image.arrayed && !options.es) ? "2DArray" : "2D";
4555 break;
4556 case spv::Dim3D:
4557 type = "3D";
4558 break;
4559 case spv::DimCube:
4560 type = "Cube";
4561 break;
4562 case spv::DimRect:
4563 type = "2DRect";
4564 break;
4565 case spv::DimBuffer:
4566 type = "Buffer";
4567 break;
4568 case spv::DimSubpassData:
4569 type = "2D";
4570 break;
4571 default:
4572 type = "";
4573 break;
4574 }
4575
4576 bool use_explicit_lod = check_explicit_lod_allowed(lod);
4577
4578 if (op == "textureLod" || op == "textureProjLod" || op == "textureGrad" || op == "textureProjGrad")
4579 {
4580 if (is_legacy_es())
4581 {
4582 if (use_explicit_lod)
4583 require_extension_internal("GL_EXT_shader_texture_lod");
4584 }
4585 else if (is_legacy())
4586 require_extension_internal("GL_ARB_shader_texture_lod");
4587 }
4588
4589 if (op == "textureLodOffset" || op == "textureProjLodOffset")
4590 {
4591 if (is_legacy_es())
4592 SPIRV_CROSS_THROW(join(op, " not allowed in legacy ES"));
4593
4594 require_extension_internal("GL_EXT_gpu_shader4");
4595 }
4596
4597 // GLES has very limited support for shadow samplers.
4598 // Basically shadow2D and shadow2DProj work through EXT_shadow_samplers,
4599 // everything else can just throw
4600 if (image_is_comparison(imgtype, tex) && is_legacy_es())
4601 {
4602 if (op == "texture" || op == "textureProj")
4603 require_extension_internal("GL_EXT_shadow_samplers");
4604 else
4605 SPIRV_CROSS_THROW(join(op, " not allowed on depth samplers in legacy ES"));
4606 }
4607
4608 bool is_es_and_depth = is_legacy_es() && image_is_comparison(imgtype, tex);
4609 std::string type_prefix = image_is_comparison(imgtype, tex) ? "shadow" : "texture";
4610
4611 if (op == "texture")
4612 return is_es_and_depth ? join(type_prefix, type, "EXT") : join(type_prefix, type);
4613 else if (op == "textureLod")
4614 {
4615 if (use_explicit_lod)
4616 return join(type_prefix, type, is_legacy_es() ? "LodEXT" : "Lod");
4617 else
4618 return join(type_prefix, type);
4619 }
4620 else if (op == "textureProj")
4621 return join(type_prefix, type, is_es_and_depth ? "ProjEXT" : "Proj");
4622 else if (op == "textureGrad")
4623 return join(type_prefix, type, is_legacy_es() ? "GradEXT" : is_legacy_desktop() ? "GradARB" : "Grad");
4624 else if (op == "textureProjLod")
4625 {
4626 if (use_explicit_lod)
4627 return join(type_prefix, type, is_legacy_es() ? "ProjLodEXT" : "ProjLod");
4628 else
4629 return join(type_prefix, type, "Proj");
4630 }
4631 else if (op == "textureLodOffset")
4632 {
4633 if (use_explicit_lod)
4634 return join(type_prefix, type, "LodOffset");
4635 else
4636 return join(type_prefix, type);
4637 }
4638 else if (op == "textureProjGrad")
4639 return join(type_prefix, type,
4640 is_legacy_es() ? "ProjGradEXT" : is_legacy_desktop() ? "ProjGradARB" : "ProjGrad");
4641 else if (op == "textureProjLodOffset")
4642 {
4643 if (use_explicit_lod)
4644 return join(type_prefix, type, "ProjLodOffset");
4645 else
4646 return join(type_prefix, type, "ProjOffset");
4647 }
4648 else
4649 {
4650 SPIRV_CROSS_THROW(join("Unsupported legacy texture op: ", op));
4651 }
4652 }
4653
to_trivial_mix_op(const SPIRType & type,string & op,uint32_t left,uint32_t right,uint32_t lerp)4654 bool CompilerGLSL::to_trivial_mix_op(const SPIRType &type, string &op, uint32_t left, uint32_t right, uint32_t lerp)
4655 {
4656 auto *cleft = maybe_get<SPIRConstant>(left);
4657 auto *cright = maybe_get<SPIRConstant>(right);
4658 auto &lerptype = expression_type(lerp);
4659
4660 // If our targets aren't constants, we cannot use construction.
4661 if (!cleft || !cright)
4662 return false;
4663
4664 // If our targets are spec constants, we cannot use construction.
4665 if (cleft->specialization || cright->specialization)
4666 return false;
4667
4668 // We can only use trivial construction if we have a scalar
4669 // (should be possible to do it for vectors as well, but that is overkill for now).
4670 if (lerptype.basetype != SPIRType::Boolean || lerptype.vecsize > 1)
4671 return false;
4672
4673 // If our bool selects between 0 and 1, we can cast from bool instead, making our trivial constructor.
4674 bool ret = false;
4675 switch (type.basetype)
4676 {
4677 case SPIRType::Short:
4678 case SPIRType::UShort:
4679 ret = cleft->scalar_u16() == 0 && cright->scalar_u16() == 1;
4680 break;
4681
4682 case SPIRType::Int:
4683 case SPIRType::UInt:
4684 ret = cleft->scalar() == 0 && cright->scalar() == 1;
4685 break;
4686
4687 case SPIRType::Half:
4688 ret = cleft->scalar_f16() == 0.0f && cright->scalar_f16() == 1.0f;
4689 break;
4690
4691 case SPIRType::Float:
4692 ret = cleft->scalar_f32() == 0.0f && cright->scalar_f32() == 1.0f;
4693 break;
4694
4695 case SPIRType::Double:
4696 ret = cleft->scalar_f64() == 0.0 && cright->scalar_f64() == 1.0;
4697 break;
4698
4699 case SPIRType::Int64:
4700 case SPIRType::UInt64:
4701 ret = cleft->scalar_u64() == 0 && cright->scalar_u64() == 1;
4702 break;
4703
4704 default:
4705 break;
4706 }
4707
4708 if (ret)
4709 op = type_to_glsl_constructor(type);
4710 return ret;
4711 }
4712
to_ternary_expression(const SPIRType & restype,uint32_t select,uint32_t true_value,uint32_t false_value)4713 string CompilerGLSL::to_ternary_expression(const SPIRType &restype, uint32_t select, uint32_t true_value,
4714 uint32_t false_value)
4715 {
4716 string expr;
4717 auto &lerptype = expression_type(select);
4718
4719 if (lerptype.vecsize == 1)
4720 expr = join(to_enclosed_expression(select), " ? ", to_enclosed_pointer_expression(true_value), " : ",
4721 to_enclosed_pointer_expression(false_value));
4722 else
4723 {
4724 auto swiz = [this](uint32_t expression, uint32_t i) { return to_extract_component_expression(expression, i); };
4725
4726 expr = type_to_glsl_constructor(restype);
4727 expr += "(";
4728 for (uint32_t i = 0; i < restype.vecsize; i++)
4729 {
4730 expr += swiz(select, i);
4731 expr += " ? ";
4732 expr += swiz(true_value, i);
4733 expr += " : ";
4734 expr += swiz(false_value, i);
4735 if (i + 1 < restype.vecsize)
4736 expr += ", ";
4737 }
4738 expr += ")";
4739 }
4740
4741 return expr;
4742 }
4743
emit_mix_op(uint32_t result_type,uint32_t id,uint32_t left,uint32_t right,uint32_t lerp)4744 void CompilerGLSL::emit_mix_op(uint32_t result_type, uint32_t id, uint32_t left, uint32_t right, uint32_t lerp)
4745 {
4746 auto &lerptype = expression_type(lerp);
4747 auto &restype = get<SPIRType>(result_type);
4748
4749 // If this results in a variable pointer, assume it may be written through.
4750 if (restype.pointer)
4751 {
4752 register_write(left);
4753 register_write(right);
4754 }
4755
4756 string mix_op;
4757 bool has_boolean_mix = *backend.boolean_mix_function &&
4758 ((options.es && options.version >= 310) || (!options.es && options.version >= 450));
4759 bool trivial_mix = to_trivial_mix_op(restype, mix_op, left, right, lerp);
4760
4761 // Cannot use boolean mix when the lerp argument is just one boolean,
4762 // fall back to regular trinary statements.
4763 if (lerptype.vecsize == 1)
4764 has_boolean_mix = false;
4765
4766 // If we can reduce the mix to a simple cast, do so.
4767 // This helps for cases like int(bool), uint(bool) which is implemented with
4768 // OpSelect bool 1 0.
4769 if (trivial_mix)
4770 {
4771 emit_unary_func_op(result_type, id, lerp, mix_op.c_str());
4772 }
4773 else if (!has_boolean_mix && lerptype.basetype == SPIRType::Boolean)
4774 {
4775 // Boolean mix not supported on desktop without extension.
4776 // Was added in OpenGL 4.5 with ES 3.1 compat.
4777 //
4778 // Could use GL_EXT_shader_integer_mix on desktop at least,
4779 // but Apple doesn't support it. :(
4780 // Just implement it as ternary expressions.
4781 auto expr = to_ternary_expression(get<SPIRType>(result_type), lerp, right, left);
4782 emit_op(result_type, id, expr, should_forward(left) && should_forward(right) && should_forward(lerp));
4783 inherit_expression_dependencies(id, left);
4784 inherit_expression_dependencies(id, right);
4785 inherit_expression_dependencies(id, lerp);
4786 }
4787 else if (lerptype.basetype == SPIRType::Boolean)
4788 emit_trinary_func_op(result_type, id, left, right, lerp, backend.boolean_mix_function);
4789 else
4790 emit_trinary_func_op(result_type, id, left, right, lerp, "mix");
4791 }
4792
to_combined_image_sampler(VariableID image_id,VariableID samp_id)4793 string CompilerGLSL::to_combined_image_sampler(VariableID image_id, VariableID samp_id)
4794 {
4795 // Keep track of the array indices we have used to load the image.
4796 // We'll need to use the same array index into the combined image sampler array.
4797 auto image_expr = to_expression(image_id);
4798 string array_expr;
4799 auto array_index = image_expr.find_first_of('[');
4800 if (array_index != string::npos)
4801 array_expr = image_expr.substr(array_index, string::npos);
4802
4803 auto &args = current_function->arguments;
4804
4805 // For GLSL and ESSL targets, we must enumerate all possible combinations for sampler2D(texture2D, sampler) and redirect
4806 // all possible combinations into new sampler2D uniforms.
4807 auto *image = maybe_get_backing_variable(image_id);
4808 auto *samp = maybe_get_backing_variable(samp_id);
4809 if (image)
4810 image_id = image->self;
4811 if (samp)
4812 samp_id = samp->self;
4813
4814 auto image_itr = find_if(begin(args), end(args),
4815 [image_id](const SPIRFunction::Parameter ¶m) { return image_id == param.id; });
4816
4817 auto sampler_itr = find_if(begin(args), end(args),
4818 [samp_id](const SPIRFunction::Parameter ¶m) { return samp_id == param.id; });
4819
4820 if (image_itr != end(args) || sampler_itr != end(args))
4821 {
4822 // If any parameter originates from a parameter, we will find it in our argument list.
4823 bool global_image = image_itr == end(args);
4824 bool global_sampler = sampler_itr == end(args);
4825 VariableID iid = global_image ? image_id : VariableID(uint32_t(image_itr - begin(args)));
4826 VariableID sid = global_sampler ? samp_id : VariableID(uint32_t(sampler_itr - begin(args)));
4827
4828 auto &combined = current_function->combined_parameters;
4829 auto itr = find_if(begin(combined), end(combined), [=](const SPIRFunction::CombinedImageSamplerParameter &p) {
4830 return p.global_image == global_image && p.global_sampler == global_sampler && p.image_id == iid &&
4831 p.sampler_id == sid;
4832 });
4833
4834 if (itr != end(combined))
4835 return to_expression(itr->id) + array_expr;
4836 else
4837 {
4838 SPIRV_CROSS_THROW(
4839 "Cannot find mapping for combined sampler parameter, was build_combined_image_samplers() used "
4840 "before compile() was called?");
4841 }
4842 }
4843 else
4844 {
4845 // For global sampler2D, look directly at the global remapping table.
4846 auto &mapping = combined_image_samplers;
4847 auto itr = find_if(begin(mapping), end(mapping), [image_id, samp_id](const CombinedImageSampler &combined) {
4848 return combined.image_id == image_id && combined.sampler_id == samp_id;
4849 });
4850
4851 if (itr != end(combined_image_samplers))
4852 return to_expression(itr->combined_id) + array_expr;
4853 else
4854 {
4855 SPIRV_CROSS_THROW("Cannot find mapping for combined sampler, was build_combined_image_samplers() used "
4856 "before compile() was called?");
4857 }
4858 }
4859 }
4860
emit_sampled_image_op(uint32_t result_type,uint32_t result_id,uint32_t image_id,uint32_t samp_id)4861 void CompilerGLSL::emit_sampled_image_op(uint32_t result_type, uint32_t result_id, uint32_t image_id, uint32_t samp_id)
4862 {
4863 if (options.vulkan_semantics && combined_image_samplers.empty())
4864 {
4865 emit_binary_func_op(result_type, result_id, image_id, samp_id,
4866 type_to_glsl(get<SPIRType>(result_type), result_id).c_str());
4867 }
4868 else
4869 {
4870 // Make sure to suppress usage tracking. It is illegal to create temporaries of opaque types.
4871 emit_op(result_type, result_id, to_combined_image_sampler(image_id, samp_id), true, true);
4872 }
4873
4874 // Make sure to suppress usage tracking and any expression invalidation.
4875 // It is illegal to create temporaries of opaque types.
4876 forwarded_temporaries.erase(result_id);
4877 }
4878
image_opcode_is_sample_no_dref(Op op)4879 static inline bool image_opcode_is_sample_no_dref(Op op)
4880 {
4881 switch (op)
4882 {
4883 case OpImageSampleExplicitLod:
4884 case OpImageSampleImplicitLod:
4885 case OpImageSampleProjExplicitLod:
4886 case OpImageSampleProjImplicitLod:
4887 case OpImageFetch:
4888 case OpImageRead:
4889 case OpImageSparseSampleExplicitLod:
4890 case OpImageSparseSampleImplicitLod:
4891 case OpImageSparseSampleProjExplicitLod:
4892 case OpImageSparseSampleProjImplicitLod:
4893 case OpImageSparseFetch:
4894 case OpImageSparseRead:
4895 return true;
4896
4897 default:
4898 return false;
4899 }
4900 }
4901
emit_texture_op(const Instruction & i)4902 void CompilerGLSL::emit_texture_op(const Instruction &i)
4903 {
4904 auto *ops = stream(i);
4905 auto op = static_cast<Op>(i.op);
4906
4907 SmallVector<uint32_t> inherited_expressions;
4908
4909 uint32_t result_type_id = ops[0];
4910 uint32_t id = ops[1];
4911
4912 bool forward = false;
4913 string expr = to_texture_op(i, &forward, inherited_expressions);
4914 emit_op(result_type_id, id, expr, forward);
4915 for (auto &inherit : inherited_expressions)
4916 inherit_expression_dependencies(id, inherit);
4917
4918 switch (op)
4919 {
4920 case OpImageSampleDrefImplicitLod:
4921 case OpImageSampleImplicitLod:
4922 case OpImageSampleProjImplicitLod:
4923 case OpImageSampleProjDrefImplicitLod:
4924 register_control_dependent_expression(id);
4925 break;
4926
4927 default:
4928 break;
4929 }
4930 }
4931
to_texture_op(const Instruction & i,bool * forward,SmallVector<uint32_t> & inherited_expressions)4932 std::string CompilerGLSL::to_texture_op(const Instruction &i, bool *forward,
4933 SmallVector<uint32_t> &inherited_expressions)
4934 {
4935 auto *ops = stream(i);
4936 auto op = static_cast<Op>(i.op);
4937 uint32_t length = i.length;
4938
4939 uint32_t result_type_id = ops[0];
4940 VariableID img = ops[2];
4941 uint32_t coord = ops[3];
4942 uint32_t dref = 0;
4943 uint32_t comp = 0;
4944 bool gather = false;
4945 bool proj = false;
4946 bool fetch = false;
4947 const uint32_t *opt = nullptr;
4948
4949 auto &result_type = get<SPIRType>(result_type_id);
4950
4951 inherited_expressions.push_back(coord);
4952
4953 // Make sure non-uniform decoration is back-propagated to where it needs to be.
4954 if (has_decoration(img, DecorationNonUniformEXT))
4955 propagate_nonuniform_qualifier(img);
4956
4957 switch (op)
4958 {
4959 case OpImageSampleDrefImplicitLod:
4960 case OpImageSampleDrefExplicitLod:
4961 dref = ops[4];
4962 opt = &ops[5];
4963 length -= 5;
4964 break;
4965
4966 case OpImageSampleProjDrefImplicitLod:
4967 case OpImageSampleProjDrefExplicitLod:
4968 dref = ops[4];
4969 opt = &ops[5];
4970 length -= 5;
4971 proj = true;
4972 break;
4973
4974 case OpImageDrefGather:
4975 dref = ops[4];
4976 opt = &ops[5];
4977 length -= 5;
4978 gather = true;
4979 break;
4980
4981 case OpImageGather:
4982 comp = ops[4];
4983 opt = &ops[5];
4984 length -= 5;
4985 gather = true;
4986 break;
4987
4988 case OpImageFetch:
4989 case OpImageRead: // Reads == fetches in Metal (other langs will not get here)
4990 opt = &ops[4];
4991 length -= 4;
4992 fetch = true;
4993 break;
4994
4995 case OpImageSampleProjImplicitLod:
4996 case OpImageSampleProjExplicitLod:
4997 opt = &ops[4];
4998 length -= 4;
4999 proj = true;
5000 break;
5001
5002 default:
5003 opt = &ops[4];
5004 length -= 4;
5005 break;
5006 }
5007
5008 // Bypass pointers because we need the real image struct
5009 auto &type = expression_type(img);
5010 auto &imgtype = get<SPIRType>(type.self);
5011
5012 uint32_t coord_components = 0;
5013 switch (imgtype.image.dim)
5014 {
5015 case spv::Dim1D:
5016 coord_components = 1;
5017 break;
5018 case spv::Dim2D:
5019 coord_components = 2;
5020 break;
5021 case spv::Dim3D:
5022 coord_components = 3;
5023 break;
5024 case spv::DimCube:
5025 coord_components = 3;
5026 break;
5027 case spv::DimBuffer:
5028 coord_components = 1;
5029 break;
5030 default:
5031 coord_components = 2;
5032 break;
5033 }
5034
5035 if (dref)
5036 inherited_expressions.push_back(dref);
5037
5038 if (proj)
5039 coord_components++;
5040 if (imgtype.image.arrayed)
5041 coord_components++;
5042
5043 uint32_t bias = 0;
5044 uint32_t lod = 0;
5045 uint32_t grad_x = 0;
5046 uint32_t grad_y = 0;
5047 uint32_t coffset = 0;
5048 uint32_t offset = 0;
5049 uint32_t coffsets = 0;
5050 uint32_t sample = 0;
5051 uint32_t minlod = 0;
5052 uint32_t flags = 0;
5053
5054 if (length)
5055 {
5056 flags = *opt++;
5057 length--;
5058 }
5059
5060 auto test = [&](uint32_t &v, uint32_t flag) {
5061 if (length && (flags & flag))
5062 {
5063 v = *opt++;
5064 inherited_expressions.push_back(v);
5065 length--;
5066 }
5067 };
5068
5069 test(bias, ImageOperandsBiasMask);
5070 test(lod, ImageOperandsLodMask);
5071 test(grad_x, ImageOperandsGradMask);
5072 test(grad_y, ImageOperandsGradMask);
5073 test(coffset, ImageOperandsConstOffsetMask);
5074 test(offset, ImageOperandsOffsetMask);
5075 test(coffsets, ImageOperandsConstOffsetsMask);
5076 test(sample, ImageOperandsSampleMask);
5077 test(minlod, ImageOperandsMinLodMask);
5078
5079 string expr;
5080 expr += to_function_name(img, imgtype, !!fetch, !!gather, !!proj, !!coffsets, (!!coffset || !!offset),
5081 (!!grad_x || !!grad_y), !!dref, lod, minlod);
5082 expr += "(";
5083 expr += to_function_args(img, imgtype, fetch, gather, proj, coord, coord_components, dref, grad_x, grad_y, lod,
5084 coffset, offset, bias, comp, sample, minlod, forward);
5085 expr += ")";
5086
5087 // texture(samplerXShadow) returns float. shadowX() returns vec4. Swizzle here.
5088 if (is_legacy() && image_is_comparison(imgtype, img))
5089 expr += ".r";
5090
5091 // Sampling from a texture which was deduced to be a depth image, might actually return 1 component here.
5092 // Remap back to 4 components as sampling opcodes expect.
5093 if (backend.comparison_image_samples_scalar && image_opcode_is_sample_no_dref(op))
5094 {
5095 bool image_is_depth = false;
5096 const auto *combined = maybe_get<SPIRCombinedImageSampler>(img);
5097 VariableID image_id = combined ? combined->image : img;
5098
5099 if (combined && image_is_comparison(imgtype, combined->image))
5100 image_is_depth = true;
5101 else if (image_is_comparison(imgtype, img))
5102 image_is_depth = true;
5103
5104 // We must also check the backing variable for the image.
5105 // We might have loaded an OpImage, and used that handle for two different purposes.
5106 // Once with comparison, once without.
5107 auto *image_variable = maybe_get_backing_variable(image_id);
5108 if (image_variable && image_is_comparison(get<SPIRType>(image_variable->basetype), image_variable->self))
5109 image_is_depth = true;
5110
5111 if (image_is_depth)
5112 expr = remap_swizzle(result_type, 1, expr);
5113 }
5114
5115 if (!backend.support_small_type_sampling_result && result_type.width < 32)
5116 {
5117 // Just value cast (narrowing) to expected type since we cannot rely on narrowing to work automatically.
5118 // Hopefully compiler picks this up and converts the texturing instruction to the appropriate precision.
5119 expr = join(type_to_glsl_constructor(result_type), "(", expr, ")");
5120 }
5121
5122 // Deals with reads from MSL. We might need to downconvert to fewer components.
5123 if (op == OpImageRead)
5124 expr = remap_swizzle(result_type, 4, expr);
5125
5126 return expr;
5127 }
5128
expression_is_constant_null(uint32_t id) const5129 bool CompilerGLSL::expression_is_constant_null(uint32_t id) const
5130 {
5131 auto *c = maybe_get<SPIRConstant>(id);
5132 if (!c)
5133 return false;
5134 return c->constant_is_null();
5135 }
5136
5137 // Returns the function name for a texture sampling function for the specified image and sampling characteristics.
5138 // For some subclasses, the function is a method on the specified image.
to_function_name(VariableID tex,const SPIRType & imgtype,bool is_fetch,bool is_gather,bool is_proj,bool has_array_offsets,bool has_offset,bool has_grad,bool,uint32_t lod,uint32_t minlod)5139 string CompilerGLSL::to_function_name(VariableID tex, const SPIRType &imgtype, bool is_fetch, bool is_gather,
5140 bool is_proj, bool has_array_offsets, bool has_offset, bool has_grad, bool,
5141 uint32_t lod, uint32_t minlod)
5142 {
5143 if (minlod != 0)
5144 SPIRV_CROSS_THROW("Sparse texturing not yet supported.");
5145
5146 string fname;
5147
5148 // textureLod on sampler2DArrayShadow and samplerCubeShadow does not exist in GLSL for some reason.
5149 // To emulate this, we will have to use textureGrad with a constant gradient of 0.
5150 // The workaround will assert that the LOD is in fact constant 0, or we cannot emit correct code.
5151 // This happens for HLSL SampleCmpLevelZero on Texture2DArray and TextureCube.
5152 bool workaround_lod_array_shadow_as_grad = false;
5153 if (((imgtype.image.arrayed && imgtype.image.dim == Dim2D) || imgtype.image.dim == DimCube) &&
5154 image_is_comparison(imgtype, tex) && lod)
5155 {
5156 if (!expression_is_constant_null(lod))
5157 {
5158 SPIRV_CROSS_THROW(
5159 "textureLod on sampler2DArrayShadow is not constant 0.0. This cannot be expressed in GLSL.");
5160 }
5161 workaround_lod_array_shadow_as_grad = true;
5162 }
5163
5164 if (is_fetch)
5165 fname += "texelFetch";
5166 else
5167 {
5168 fname += "texture";
5169
5170 if (is_gather)
5171 fname += "Gather";
5172 if (has_array_offsets)
5173 fname += "Offsets";
5174 if (is_proj)
5175 fname += "Proj";
5176 if (has_grad || workaround_lod_array_shadow_as_grad)
5177 fname += "Grad";
5178 if (!!lod && !workaround_lod_array_shadow_as_grad)
5179 fname += "Lod";
5180 }
5181
5182 if (has_offset)
5183 fname += "Offset";
5184
5185 return is_legacy() ? legacy_tex_op(fname, imgtype, lod, tex) : fname;
5186 }
5187
convert_separate_image_to_expression(uint32_t id)5188 std::string CompilerGLSL::convert_separate_image_to_expression(uint32_t id)
5189 {
5190 auto *var = maybe_get_backing_variable(id);
5191
5192 // If we are fetching from a plain OpTypeImage, we must combine with a dummy sampler in GLSL.
5193 // In Vulkan GLSL, we can make use of the newer GL_EXT_samplerless_texture_functions.
5194 if (var)
5195 {
5196 auto &type = get<SPIRType>(var->basetype);
5197 if (type.basetype == SPIRType::Image && type.image.sampled == 1 && type.image.dim != DimBuffer)
5198 {
5199 if (options.vulkan_semantics)
5200 {
5201 if (dummy_sampler_id)
5202 {
5203 // Don't need to consider Shadow state since the dummy sampler is always non-shadow.
5204 auto sampled_type = type;
5205 sampled_type.basetype = SPIRType::SampledImage;
5206 return join(type_to_glsl(sampled_type), "(", to_expression(id), ", ",
5207 to_expression(dummy_sampler_id), ")");
5208 }
5209 else
5210 {
5211 // Newer glslang supports this extension to deal with texture2D as argument to texture functions.
5212 require_extension_internal("GL_EXT_samplerless_texture_functions");
5213 }
5214 }
5215 else
5216 {
5217 if (!dummy_sampler_id)
5218 SPIRV_CROSS_THROW(
5219 "Cannot find dummy sampler ID. Was build_dummy_sampler_for_combined_images() called?");
5220
5221 return to_combined_image_sampler(id, dummy_sampler_id);
5222 }
5223 }
5224 }
5225
5226 return to_expression(id);
5227 }
5228
5229 // Returns the function args for a texture sampling function for the specified image and sampling characteristics.
to_function_args(VariableID img,const SPIRType & imgtype,bool is_fetch,bool is_gather,bool is_proj,uint32_t coord,uint32_t coord_components,uint32_t dref,uint32_t grad_x,uint32_t grad_y,uint32_t lod,uint32_t coffset,uint32_t offset,uint32_t bias,uint32_t comp,uint32_t sample,uint32_t,bool * p_forward)5230 string CompilerGLSL::to_function_args(VariableID img, const SPIRType &imgtype, bool is_fetch, bool is_gather,
5231 bool is_proj, uint32_t coord, uint32_t coord_components, uint32_t dref,
5232 uint32_t grad_x, uint32_t grad_y, uint32_t lod, uint32_t coffset, uint32_t offset,
5233 uint32_t bias, uint32_t comp, uint32_t sample, uint32_t /*minlod*/,
5234 bool *p_forward)
5235 {
5236 string farg_str;
5237 if (is_fetch)
5238 farg_str = convert_separate_image_to_expression(img);
5239 else
5240 farg_str = to_expression(img);
5241
5242 bool swizz_func = backend.swizzle_is_function;
5243 auto swizzle = [swizz_func](uint32_t comps, uint32_t in_comps) -> const char * {
5244 if (comps == in_comps)
5245 return "";
5246
5247 switch (comps)
5248 {
5249 case 1:
5250 return ".x";
5251 case 2:
5252 return swizz_func ? ".xy()" : ".xy";
5253 case 3:
5254 return swizz_func ? ".xyz()" : ".xyz";
5255 default:
5256 return "";
5257 }
5258 };
5259
5260 bool forward = should_forward(coord);
5261
5262 // The IR can give us more components than we need, so chop them off as needed.
5263 auto swizzle_expr = swizzle(coord_components, expression_type(coord).vecsize);
5264 // Only enclose the UV expression if needed.
5265 auto coord_expr = (*swizzle_expr == '\0') ? to_expression(coord) : (to_enclosed_expression(coord) + swizzle_expr);
5266
5267 // texelFetch only takes int, not uint.
5268 auto &coord_type = expression_type(coord);
5269 if (coord_type.basetype == SPIRType::UInt)
5270 {
5271 auto expected_type = coord_type;
5272 expected_type.vecsize = coord_components;
5273 expected_type.basetype = SPIRType::Int;
5274 coord_expr = bitcast_expression(expected_type, coord_type.basetype, coord_expr);
5275 }
5276
5277 // textureLod on sampler2DArrayShadow and samplerCubeShadow does not exist in GLSL for some reason.
5278 // To emulate this, we will have to use textureGrad with a constant gradient of 0.
5279 // The workaround will assert that the LOD is in fact constant 0, or we cannot emit correct code.
5280 // This happens for HLSL SampleCmpLevelZero on Texture2DArray and TextureCube.
5281 bool workaround_lod_array_shadow_as_grad =
5282 ((imgtype.image.arrayed && imgtype.image.dim == Dim2D) || imgtype.image.dim == DimCube) &&
5283 image_is_comparison(imgtype, img) && lod;
5284
5285 if (dref)
5286 {
5287 forward = forward && should_forward(dref);
5288
5289 // SPIR-V splits dref and coordinate.
5290 if (is_gather || coord_components == 4) // GLSL also splits the arguments in two. Same for textureGather.
5291 {
5292 farg_str += ", ";
5293 farg_str += to_expression(coord);
5294 farg_str += ", ";
5295 farg_str += to_expression(dref);
5296 }
5297 else if (is_proj)
5298 {
5299 // Have to reshuffle so we get vec4(coord, dref, proj), special case.
5300 // Other shading languages splits up the arguments for coord and compare value like SPIR-V.
5301 // The coordinate type for textureProj shadow is always vec4 even for sampler1DShadow.
5302 farg_str += ", vec4(";
5303
5304 if (imgtype.image.dim == Dim1D)
5305 {
5306 // Could reuse coord_expr, but we will mess up the temporary usage checking.
5307 farg_str += to_enclosed_expression(coord) + ".x";
5308 farg_str += ", ";
5309 farg_str += "0.0, ";
5310 farg_str += to_expression(dref);
5311 farg_str += ", ";
5312 farg_str += to_enclosed_expression(coord) + ".y)";
5313 }
5314 else if (imgtype.image.dim == Dim2D)
5315 {
5316 // Could reuse coord_expr, but we will mess up the temporary usage checking.
5317 farg_str += to_enclosed_expression(coord) + (swizz_func ? ".xy()" : ".xy");
5318 farg_str += ", ";
5319 farg_str += to_expression(dref);
5320 farg_str += ", ";
5321 farg_str += to_enclosed_expression(coord) + ".z)";
5322 }
5323 else
5324 SPIRV_CROSS_THROW("Invalid type for textureProj with shadow.");
5325 }
5326 else
5327 {
5328 // Create a composite which merges coord/dref into a single vector.
5329 auto type = expression_type(coord);
5330 type.vecsize = coord_components + 1;
5331 farg_str += ", ";
5332 farg_str += type_to_glsl_constructor(type);
5333 farg_str += "(";
5334 farg_str += coord_expr;
5335 farg_str += ", ";
5336 farg_str += to_expression(dref);
5337 farg_str += ")";
5338 }
5339 }
5340 else
5341 {
5342 farg_str += ", ";
5343 farg_str += coord_expr;
5344 }
5345
5346 if (grad_x || grad_y)
5347 {
5348 forward = forward && should_forward(grad_x);
5349 forward = forward && should_forward(grad_y);
5350 farg_str += ", ";
5351 farg_str += to_expression(grad_x);
5352 farg_str += ", ";
5353 farg_str += to_expression(grad_y);
5354 }
5355
5356 if (lod)
5357 {
5358 if (workaround_lod_array_shadow_as_grad)
5359 {
5360 // Implement textureGrad() instead. LOD == 0.0 is implemented as gradient of 0.0.
5361 // Implementing this as plain texture() is not safe on some implementations.
5362 if (imgtype.image.dim == Dim2D)
5363 farg_str += ", vec2(0.0), vec2(0.0)";
5364 else if (imgtype.image.dim == DimCube)
5365 farg_str += ", vec3(0.0), vec3(0.0)";
5366 }
5367 else
5368 {
5369 if (check_explicit_lod_allowed(lod))
5370 {
5371 forward = forward && should_forward(lod);
5372 farg_str += ", ";
5373
5374 auto &lod_expr_type = expression_type(lod);
5375
5376 // Lod expression for TexelFetch in GLSL must be int, and only int.
5377 if (is_fetch && imgtype.image.dim != DimBuffer && !imgtype.image.ms &&
5378 lod_expr_type.basetype != SPIRType::Int)
5379 {
5380 farg_str += join("int(", to_expression(lod), ")");
5381 }
5382 else
5383 {
5384 farg_str += to_expression(lod);
5385 }
5386 }
5387 }
5388 }
5389 else if (is_fetch && imgtype.image.dim != DimBuffer && !imgtype.image.ms)
5390 {
5391 // Lod argument is optional in OpImageFetch, but we require a LOD value, pick 0 as the default.
5392 farg_str += ", 0";
5393 }
5394
5395 if (coffset)
5396 {
5397 forward = forward && should_forward(coffset);
5398 farg_str += ", ";
5399 farg_str += to_expression(coffset);
5400 }
5401 else if (offset)
5402 {
5403 forward = forward && should_forward(offset);
5404 farg_str += ", ";
5405 farg_str += to_expression(offset);
5406 }
5407
5408 if (bias)
5409 {
5410 forward = forward && should_forward(bias);
5411 farg_str += ", ";
5412 farg_str += to_expression(bias);
5413 }
5414
5415 if (comp)
5416 {
5417 forward = forward && should_forward(comp);
5418 farg_str += ", ";
5419 farg_str += to_expression(comp);
5420 }
5421
5422 if (sample)
5423 {
5424 farg_str += ", ";
5425 farg_str += to_expression(sample);
5426 }
5427
5428 *p_forward = forward;
5429
5430 return farg_str;
5431 }
5432
emit_glsl_op(uint32_t result_type,uint32_t id,uint32_t eop,const uint32_t * args,uint32_t length)5433 void CompilerGLSL::emit_glsl_op(uint32_t result_type, uint32_t id, uint32_t eop, const uint32_t *args, uint32_t length)
5434 {
5435 auto op = static_cast<GLSLstd450>(eop);
5436
5437 if (is_legacy() && is_unsigned_glsl_opcode(op))
5438 SPIRV_CROSS_THROW("Unsigned integers are not supported on legacy GLSL targets.");
5439
5440 // If we need to do implicit bitcasts, make sure we do it with the correct type.
5441 uint32_t integer_width = get_integer_width_for_glsl_instruction(op, args, length);
5442 auto int_type = to_signed_basetype(integer_width);
5443 auto uint_type = to_unsigned_basetype(integer_width);
5444
5445 switch (op)
5446 {
5447 // FP fiddling
5448 case GLSLstd450Round:
5449 emit_unary_func_op(result_type, id, args[0], "round");
5450 break;
5451
5452 case GLSLstd450RoundEven:
5453 if ((options.es && options.version >= 300) || (!options.es && options.version >= 130))
5454 emit_unary_func_op(result_type, id, args[0], "roundEven");
5455 else
5456 SPIRV_CROSS_THROW("roundEven supported only in ESSL 300 and GLSL 130 and up.");
5457 break;
5458
5459 case GLSLstd450Trunc:
5460 emit_unary_func_op(result_type, id, args[0], "trunc");
5461 break;
5462 case GLSLstd450SAbs:
5463 emit_unary_func_op_cast(result_type, id, args[0], "abs", int_type, int_type);
5464 break;
5465 case GLSLstd450FAbs:
5466 emit_unary_func_op(result_type, id, args[0], "abs");
5467 break;
5468 case GLSLstd450SSign:
5469 emit_unary_func_op_cast(result_type, id, args[0], "sign", int_type, int_type);
5470 break;
5471 case GLSLstd450FSign:
5472 emit_unary_func_op(result_type, id, args[0], "sign");
5473 break;
5474 case GLSLstd450Floor:
5475 emit_unary_func_op(result_type, id, args[0], "floor");
5476 break;
5477 case GLSLstd450Ceil:
5478 emit_unary_func_op(result_type, id, args[0], "ceil");
5479 break;
5480 case GLSLstd450Fract:
5481 emit_unary_func_op(result_type, id, args[0], "fract");
5482 break;
5483 case GLSLstd450Radians:
5484 emit_unary_func_op(result_type, id, args[0], "radians");
5485 break;
5486 case GLSLstd450Degrees:
5487 emit_unary_func_op(result_type, id, args[0], "degrees");
5488 break;
5489 case GLSLstd450Fma:
5490 if ((!options.es && options.version < 400) || (options.es && options.version < 320))
5491 {
5492 auto expr = join(to_enclosed_expression(args[0]), " * ", to_enclosed_expression(args[1]), " + ",
5493 to_enclosed_expression(args[2]));
5494
5495 emit_op(result_type, id, expr,
5496 should_forward(args[0]) && should_forward(args[1]) && should_forward(args[2]));
5497 for (uint32_t i = 0; i < 3; i++)
5498 inherit_expression_dependencies(id, args[i]);
5499 }
5500 else
5501 emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "fma");
5502 break;
5503 case GLSLstd450Modf:
5504 register_call_out_argument(args[1]);
5505 forced_temporaries.insert(id);
5506 emit_binary_func_op(result_type, id, args[0], args[1], "modf");
5507 break;
5508
5509 case GLSLstd450ModfStruct:
5510 {
5511 auto &type = get<SPIRType>(result_type);
5512 emit_uninitialized_temporary_expression(result_type, id);
5513 statement(to_expression(id), ".", to_member_name(type, 0), " = ", "modf(", to_expression(args[0]), ", ",
5514 to_expression(id), ".", to_member_name(type, 1), ");");
5515 break;
5516 }
5517
5518 // Minmax
5519 case GLSLstd450UMin:
5520 emit_binary_func_op_cast(result_type, id, args[0], args[1], "min", uint_type, false);
5521 break;
5522
5523 case GLSLstd450SMin:
5524 emit_binary_func_op_cast(result_type, id, args[0], args[1], "min", int_type, false);
5525 break;
5526
5527 case GLSLstd450FMin:
5528 emit_binary_func_op(result_type, id, args[0], args[1], "min");
5529 break;
5530
5531 case GLSLstd450FMax:
5532 emit_binary_func_op(result_type, id, args[0], args[1], "max");
5533 break;
5534
5535 case GLSLstd450UMax:
5536 emit_binary_func_op_cast(result_type, id, args[0], args[1], "max", uint_type, false);
5537 break;
5538
5539 case GLSLstd450SMax:
5540 emit_binary_func_op_cast(result_type, id, args[0], args[1], "max", int_type, false);
5541 break;
5542
5543 case GLSLstd450FClamp:
5544 emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "clamp");
5545 break;
5546
5547 case GLSLstd450UClamp:
5548 emit_trinary_func_op_cast(result_type, id, args[0], args[1], args[2], "clamp", uint_type);
5549 break;
5550
5551 case GLSLstd450SClamp:
5552 emit_trinary_func_op_cast(result_type, id, args[0], args[1], args[2], "clamp", int_type);
5553 break;
5554
5555 // Trig
5556 case GLSLstd450Sin:
5557 emit_unary_func_op(result_type, id, args[0], "sin");
5558 break;
5559 case GLSLstd450Cos:
5560 emit_unary_func_op(result_type, id, args[0], "cos");
5561 break;
5562 case GLSLstd450Tan:
5563 emit_unary_func_op(result_type, id, args[0], "tan");
5564 break;
5565 case GLSLstd450Asin:
5566 emit_unary_func_op(result_type, id, args[0], "asin");
5567 break;
5568 case GLSLstd450Acos:
5569 emit_unary_func_op(result_type, id, args[0], "acos");
5570 break;
5571 case GLSLstd450Atan:
5572 emit_unary_func_op(result_type, id, args[0], "atan");
5573 break;
5574 case GLSLstd450Sinh:
5575 emit_unary_func_op(result_type, id, args[0], "sinh");
5576 break;
5577 case GLSLstd450Cosh:
5578 emit_unary_func_op(result_type, id, args[0], "cosh");
5579 break;
5580 case GLSLstd450Tanh:
5581 emit_unary_func_op(result_type, id, args[0], "tanh");
5582 break;
5583 case GLSLstd450Asinh:
5584 emit_unary_func_op(result_type, id, args[0], "asinh");
5585 break;
5586 case GLSLstd450Acosh:
5587 emit_unary_func_op(result_type, id, args[0], "acosh");
5588 break;
5589 case GLSLstd450Atanh:
5590 emit_unary_func_op(result_type, id, args[0], "atanh");
5591 break;
5592 case GLSLstd450Atan2:
5593 emit_binary_func_op(result_type, id, args[0], args[1], "atan");
5594 break;
5595
5596 // Exponentials
5597 case GLSLstd450Pow:
5598 emit_binary_func_op(result_type, id, args[0], args[1], "pow");
5599 break;
5600 case GLSLstd450Exp:
5601 emit_unary_func_op(result_type, id, args[0], "exp");
5602 break;
5603 case GLSLstd450Log:
5604 emit_unary_func_op(result_type, id, args[0], "log");
5605 break;
5606 case GLSLstd450Exp2:
5607 emit_unary_func_op(result_type, id, args[0], "exp2");
5608 break;
5609 case GLSLstd450Log2:
5610 emit_unary_func_op(result_type, id, args[0], "log2");
5611 break;
5612 case GLSLstd450Sqrt:
5613 emit_unary_func_op(result_type, id, args[0], "sqrt");
5614 break;
5615 case GLSLstd450InverseSqrt:
5616 emit_unary_func_op(result_type, id, args[0], "inversesqrt");
5617 break;
5618
5619 // Matrix math
5620 case GLSLstd450Determinant:
5621 emit_unary_func_op(result_type, id, args[0], "determinant");
5622 break;
5623 case GLSLstd450MatrixInverse:
5624 emit_unary_func_op(result_type, id, args[0], "inverse");
5625 break;
5626
5627 // Lerping
5628 case GLSLstd450FMix:
5629 case GLSLstd450IMix:
5630 {
5631 emit_mix_op(result_type, id, args[0], args[1], args[2]);
5632 break;
5633 }
5634 case GLSLstd450Step:
5635 emit_binary_func_op(result_type, id, args[0], args[1], "step");
5636 break;
5637 case GLSLstd450SmoothStep:
5638 emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "smoothstep");
5639 break;
5640
5641 // Packing
5642 case GLSLstd450Frexp:
5643 register_call_out_argument(args[1]);
5644 forced_temporaries.insert(id);
5645 emit_binary_func_op(result_type, id, args[0], args[1], "frexp");
5646 break;
5647
5648 case GLSLstd450FrexpStruct:
5649 {
5650 auto &type = get<SPIRType>(result_type);
5651 emit_uninitialized_temporary_expression(result_type, id);
5652 statement(to_expression(id), ".", to_member_name(type, 0), " = ", "frexp(", to_expression(args[0]), ", ",
5653 to_expression(id), ".", to_member_name(type, 1), ");");
5654 break;
5655 }
5656
5657 case GLSLstd450Ldexp:
5658 {
5659 bool forward = should_forward(args[0]) && should_forward(args[1]);
5660
5661 auto op0 = to_unpacked_expression(args[0]);
5662 auto op1 = to_unpacked_expression(args[1]);
5663 auto &op1_type = expression_type(args[1]);
5664 if (op1_type.basetype != SPIRType::Int)
5665 {
5666 // Need a value cast here.
5667 auto target_type = op1_type;
5668 target_type.basetype = SPIRType::Int;
5669 op1 = join(type_to_glsl_constructor(target_type), "(", op1, ")");
5670 }
5671
5672 auto expr = join("ldexp(", op0, ", ", op1, ")");
5673
5674 emit_op(result_type, id, expr, forward);
5675 inherit_expression_dependencies(id, args[0]);
5676 inherit_expression_dependencies(id, args[1]);
5677 break;
5678 }
5679
5680 case GLSLstd450PackSnorm4x8:
5681 emit_unary_func_op(result_type, id, args[0], "packSnorm4x8");
5682 break;
5683 case GLSLstd450PackUnorm4x8:
5684 emit_unary_func_op(result_type, id, args[0], "packUnorm4x8");
5685 break;
5686 case GLSLstd450PackSnorm2x16:
5687 emit_unary_func_op(result_type, id, args[0], "packSnorm2x16");
5688 break;
5689 case GLSLstd450PackUnorm2x16:
5690 emit_unary_func_op(result_type, id, args[0], "packUnorm2x16");
5691 break;
5692 case GLSLstd450PackHalf2x16:
5693 emit_unary_func_op(result_type, id, args[0], "packHalf2x16");
5694 break;
5695 case GLSLstd450UnpackSnorm4x8:
5696 emit_unary_func_op(result_type, id, args[0], "unpackSnorm4x8");
5697 break;
5698 case GLSLstd450UnpackUnorm4x8:
5699 emit_unary_func_op(result_type, id, args[0], "unpackUnorm4x8");
5700 break;
5701 case GLSLstd450UnpackSnorm2x16:
5702 emit_unary_func_op(result_type, id, args[0], "unpackSnorm2x16");
5703 break;
5704 case GLSLstd450UnpackUnorm2x16:
5705 emit_unary_func_op(result_type, id, args[0], "unpackUnorm2x16");
5706 break;
5707 case GLSLstd450UnpackHalf2x16:
5708 emit_unary_func_op(result_type, id, args[0], "unpackHalf2x16");
5709 break;
5710
5711 case GLSLstd450PackDouble2x32:
5712 emit_unary_func_op(result_type, id, args[0], "packDouble2x32");
5713 break;
5714 case GLSLstd450UnpackDouble2x32:
5715 emit_unary_func_op(result_type, id, args[0], "unpackDouble2x32");
5716 break;
5717
5718 // Vector math
5719 case GLSLstd450Length:
5720 emit_unary_func_op(result_type, id, args[0], "length");
5721 break;
5722 case GLSLstd450Distance:
5723 emit_binary_func_op(result_type, id, args[0], args[1], "distance");
5724 break;
5725 case GLSLstd450Cross:
5726 emit_binary_func_op(result_type, id, args[0], args[1], "cross");
5727 break;
5728 case GLSLstd450Normalize:
5729 emit_unary_func_op(result_type, id, args[0], "normalize");
5730 break;
5731 case GLSLstd450FaceForward:
5732 emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "faceforward");
5733 break;
5734 case GLSLstd450Reflect:
5735 emit_binary_func_op(result_type, id, args[0], args[1], "reflect");
5736 break;
5737 case GLSLstd450Refract:
5738 emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "refract");
5739 break;
5740
5741 // Bit-fiddling
5742 case GLSLstd450FindILsb:
5743 // findLSB always returns int.
5744 emit_unary_func_op_cast(result_type, id, args[0], "findLSB", expression_type(args[0]).basetype, int_type);
5745 break;
5746
5747 case GLSLstd450FindSMsb:
5748 emit_unary_func_op_cast(result_type, id, args[0], "findMSB", int_type, int_type);
5749 break;
5750
5751 case GLSLstd450FindUMsb:
5752 emit_unary_func_op_cast(result_type, id, args[0], "findMSB", uint_type,
5753 int_type); // findMSB always returns int.
5754 break;
5755
5756 // Multisampled varying
5757 case GLSLstd450InterpolateAtCentroid:
5758 emit_unary_func_op(result_type, id, args[0], "interpolateAtCentroid");
5759 break;
5760 case GLSLstd450InterpolateAtSample:
5761 emit_binary_func_op(result_type, id, args[0], args[1], "interpolateAtSample");
5762 break;
5763 case GLSLstd450InterpolateAtOffset:
5764 emit_binary_func_op(result_type, id, args[0], args[1], "interpolateAtOffset");
5765 break;
5766
5767 case GLSLstd450NMin:
5768 case GLSLstd450NMax:
5769 {
5770 emit_nminmax_op(result_type, id, args[0], args[1], op);
5771 break;
5772 }
5773
5774 case GLSLstd450NClamp:
5775 {
5776 // Make sure we have a unique ID here to avoid aliasing the extra sub-expressions between clamp and NMin sub-op.
5777 // IDs cannot exceed 24 bits, so we can make use of the higher bits for some unique flags.
5778 uint32_t &max_id = extra_sub_expressions[id | 0x80000000u];
5779 if (!max_id)
5780 max_id = ir.increase_bound_by(1);
5781
5782 // Inherit precision qualifiers.
5783 ir.meta[max_id] = ir.meta[id];
5784
5785 emit_nminmax_op(result_type, max_id, args[0], args[1], GLSLstd450NMax);
5786 emit_nminmax_op(result_type, id, max_id, args[2], GLSLstd450NMin);
5787 break;
5788 }
5789
5790 default:
5791 statement("// unimplemented GLSL op ", eop);
5792 break;
5793 }
5794 }
5795
emit_nminmax_op(uint32_t result_type,uint32_t id,uint32_t op0,uint32_t op1,GLSLstd450 op)5796 void CompilerGLSL::emit_nminmax_op(uint32_t result_type, uint32_t id, uint32_t op0, uint32_t op1, GLSLstd450 op)
5797 {
5798 // Need to emulate this call.
5799 uint32_t &ids = extra_sub_expressions[id];
5800 if (!ids)
5801 {
5802 ids = ir.increase_bound_by(5);
5803 auto btype = get<SPIRType>(result_type);
5804 btype.basetype = SPIRType::Boolean;
5805 set<SPIRType>(ids, btype);
5806 }
5807
5808 uint32_t btype_id = ids + 0;
5809 uint32_t left_nan_id = ids + 1;
5810 uint32_t right_nan_id = ids + 2;
5811 uint32_t tmp_id = ids + 3;
5812 uint32_t mixed_first_id = ids + 4;
5813
5814 // Inherit precision qualifiers.
5815 ir.meta[tmp_id] = ir.meta[id];
5816 ir.meta[mixed_first_id] = ir.meta[id];
5817
5818 emit_unary_func_op(btype_id, left_nan_id, op0, "isnan");
5819 emit_unary_func_op(btype_id, right_nan_id, op1, "isnan");
5820 emit_binary_func_op(result_type, tmp_id, op0, op1, op == GLSLstd450NMin ? "min" : "max");
5821 emit_mix_op(result_type, mixed_first_id, tmp_id, op1, left_nan_id);
5822 emit_mix_op(result_type, id, mixed_first_id, op0, right_nan_id);
5823 }
5824
emit_spv_amd_shader_ballot_op(uint32_t result_type,uint32_t id,uint32_t eop,const uint32_t * args,uint32_t)5825 void CompilerGLSL::emit_spv_amd_shader_ballot_op(uint32_t result_type, uint32_t id, uint32_t eop, const uint32_t *args,
5826 uint32_t)
5827 {
5828 require_extension_internal("GL_AMD_shader_ballot");
5829
5830 enum AMDShaderBallot
5831 {
5832 SwizzleInvocationsAMD = 1,
5833 SwizzleInvocationsMaskedAMD = 2,
5834 WriteInvocationAMD = 3,
5835 MbcntAMD = 4
5836 };
5837
5838 auto op = static_cast<AMDShaderBallot>(eop);
5839
5840 switch (op)
5841 {
5842 case SwizzleInvocationsAMD:
5843 emit_binary_func_op(result_type, id, args[0], args[1], "swizzleInvocationsAMD");
5844 register_control_dependent_expression(id);
5845 break;
5846
5847 case SwizzleInvocationsMaskedAMD:
5848 emit_binary_func_op(result_type, id, args[0], args[1], "swizzleInvocationsMaskedAMD");
5849 register_control_dependent_expression(id);
5850 break;
5851
5852 case WriteInvocationAMD:
5853 emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "writeInvocationAMD");
5854 register_control_dependent_expression(id);
5855 break;
5856
5857 case MbcntAMD:
5858 emit_unary_func_op(result_type, id, args[0], "mbcntAMD");
5859 register_control_dependent_expression(id);
5860 break;
5861
5862 default:
5863 statement("// unimplemented SPV AMD shader ballot op ", eop);
5864 break;
5865 }
5866 }
5867
emit_spv_amd_shader_explicit_vertex_parameter_op(uint32_t result_type,uint32_t id,uint32_t eop,const uint32_t * args,uint32_t)5868 void CompilerGLSL::emit_spv_amd_shader_explicit_vertex_parameter_op(uint32_t result_type, uint32_t id, uint32_t eop,
5869 const uint32_t *args, uint32_t)
5870 {
5871 require_extension_internal("GL_AMD_shader_explicit_vertex_parameter");
5872
5873 enum AMDShaderExplicitVertexParameter
5874 {
5875 InterpolateAtVertexAMD = 1
5876 };
5877
5878 auto op = static_cast<AMDShaderExplicitVertexParameter>(eop);
5879
5880 switch (op)
5881 {
5882 case InterpolateAtVertexAMD:
5883 emit_binary_func_op(result_type, id, args[0], args[1], "interpolateAtVertexAMD");
5884 break;
5885
5886 default:
5887 statement("// unimplemented SPV AMD shader explicit vertex parameter op ", eop);
5888 break;
5889 }
5890 }
5891
emit_spv_amd_shader_trinary_minmax_op(uint32_t result_type,uint32_t id,uint32_t eop,const uint32_t * args,uint32_t)5892 void CompilerGLSL::emit_spv_amd_shader_trinary_minmax_op(uint32_t result_type, uint32_t id, uint32_t eop,
5893 const uint32_t *args, uint32_t)
5894 {
5895 require_extension_internal("GL_AMD_shader_trinary_minmax");
5896
5897 enum AMDShaderTrinaryMinMax
5898 {
5899 FMin3AMD = 1,
5900 UMin3AMD = 2,
5901 SMin3AMD = 3,
5902 FMax3AMD = 4,
5903 UMax3AMD = 5,
5904 SMax3AMD = 6,
5905 FMid3AMD = 7,
5906 UMid3AMD = 8,
5907 SMid3AMD = 9
5908 };
5909
5910 auto op = static_cast<AMDShaderTrinaryMinMax>(eop);
5911
5912 switch (op)
5913 {
5914 case FMin3AMD:
5915 case UMin3AMD:
5916 case SMin3AMD:
5917 emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "min3");
5918 break;
5919
5920 case FMax3AMD:
5921 case UMax3AMD:
5922 case SMax3AMD:
5923 emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "max3");
5924 break;
5925
5926 case FMid3AMD:
5927 case UMid3AMD:
5928 case SMid3AMD:
5929 emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "mid3");
5930 break;
5931
5932 default:
5933 statement("// unimplemented SPV AMD shader trinary minmax op ", eop);
5934 break;
5935 }
5936 }
5937
emit_spv_amd_gcn_shader_op(uint32_t result_type,uint32_t id,uint32_t eop,const uint32_t * args,uint32_t)5938 void CompilerGLSL::emit_spv_amd_gcn_shader_op(uint32_t result_type, uint32_t id, uint32_t eop, const uint32_t *args,
5939 uint32_t)
5940 {
5941 require_extension_internal("GL_AMD_gcn_shader");
5942
5943 enum AMDGCNShader
5944 {
5945 CubeFaceIndexAMD = 1,
5946 CubeFaceCoordAMD = 2,
5947 TimeAMD = 3
5948 };
5949
5950 auto op = static_cast<AMDGCNShader>(eop);
5951
5952 switch (op)
5953 {
5954 case CubeFaceIndexAMD:
5955 emit_unary_func_op(result_type, id, args[0], "cubeFaceIndexAMD");
5956 break;
5957 case CubeFaceCoordAMD:
5958 emit_unary_func_op(result_type, id, args[0], "cubeFaceCoordAMD");
5959 break;
5960 case TimeAMD:
5961 {
5962 string expr = "timeAMD()";
5963 emit_op(result_type, id, expr, true);
5964 register_control_dependent_expression(id);
5965 break;
5966 }
5967
5968 default:
5969 statement("// unimplemented SPV AMD gcn shader op ", eop);
5970 break;
5971 }
5972 }
5973
emit_subgroup_op(const Instruction & i)5974 void CompilerGLSL::emit_subgroup_op(const Instruction &i)
5975 {
5976 const uint32_t *ops = stream(i);
5977 auto op = static_cast<Op>(i.op);
5978
5979 if (!options.vulkan_semantics)
5980 SPIRV_CROSS_THROW("Can only use subgroup operations in Vulkan semantics.");
5981
5982 switch (op)
5983 {
5984 case OpGroupNonUniformElect:
5985 require_extension_internal("GL_KHR_shader_subgroup_basic");
5986 break;
5987
5988 case OpGroupNonUniformBroadcast:
5989 case OpGroupNonUniformBroadcastFirst:
5990 case OpGroupNonUniformBallot:
5991 case OpGroupNonUniformInverseBallot:
5992 case OpGroupNonUniformBallotBitExtract:
5993 case OpGroupNonUniformBallotBitCount:
5994 case OpGroupNonUniformBallotFindLSB:
5995 case OpGroupNonUniformBallotFindMSB:
5996 require_extension_internal("GL_KHR_shader_subgroup_ballot");
5997 break;
5998
5999 case OpGroupNonUniformShuffle:
6000 case OpGroupNonUniformShuffleXor:
6001 require_extension_internal("GL_KHR_shader_subgroup_shuffle");
6002 break;
6003
6004 case OpGroupNonUniformShuffleUp:
6005 case OpGroupNonUniformShuffleDown:
6006 require_extension_internal("GL_KHR_shader_subgroup_shuffle_relative");
6007 break;
6008
6009 case OpGroupNonUniformAll:
6010 case OpGroupNonUniformAny:
6011 case OpGroupNonUniformAllEqual:
6012 require_extension_internal("GL_KHR_shader_subgroup_vote");
6013 break;
6014
6015 case OpGroupNonUniformFAdd:
6016 case OpGroupNonUniformFMul:
6017 case OpGroupNonUniformFMin:
6018 case OpGroupNonUniformFMax:
6019 case OpGroupNonUniformIAdd:
6020 case OpGroupNonUniformIMul:
6021 case OpGroupNonUniformSMin:
6022 case OpGroupNonUniformSMax:
6023 case OpGroupNonUniformUMin:
6024 case OpGroupNonUniformUMax:
6025 case OpGroupNonUniformBitwiseAnd:
6026 case OpGroupNonUniformBitwiseOr:
6027 case OpGroupNonUniformBitwiseXor:
6028 {
6029 auto operation = static_cast<GroupOperation>(ops[3]);
6030 if (operation == GroupOperationClusteredReduce)
6031 {
6032 require_extension_internal("GL_KHR_shader_subgroup_clustered");
6033 }
6034 else if (operation == GroupOperationExclusiveScan || operation == GroupOperationInclusiveScan ||
6035 operation == GroupOperationReduce)
6036 {
6037 require_extension_internal("GL_KHR_shader_subgroup_arithmetic");
6038 }
6039 else
6040 SPIRV_CROSS_THROW("Invalid group operation.");
6041 break;
6042 }
6043
6044 case OpGroupNonUniformQuadSwap:
6045 case OpGroupNonUniformQuadBroadcast:
6046 require_extension_internal("GL_KHR_shader_subgroup_quad");
6047 break;
6048
6049 default:
6050 SPIRV_CROSS_THROW("Invalid opcode for subgroup.");
6051 }
6052
6053 uint32_t result_type = ops[0];
6054 uint32_t id = ops[1];
6055
6056 auto scope = static_cast<Scope>(get<SPIRConstant>(ops[2]).scalar());
6057 if (scope != ScopeSubgroup)
6058 SPIRV_CROSS_THROW("Only subgroup scope is supported.");
6059
6060 switch (op)
6061 {
6062 case OpGroupNonUniformElect:
6063 emit_op(result_type, id, "subgroupElect()", true);
6064 break;
6065
6066 case OpGroupNonUniformBroadcast:
6067 emit_binary_func_op(result_type, id, ops[3], ops[4], "subgroupBroadcast");
6068 break;
6069
6070 case OpGroupNonUniformBroadcastFirst:
6071 emit_unary_func_op(result_type, id, ops[3], "subgroupBroadcastFirst");
6072 break;
6073
6074 case OpGroupNonUniformBallot:
6075 emit_unary_func_op(result_type, id, ops[3], "subgroupBallot");
6076 break;
6077
6078 case OpGroupNonUniformInverseBallot:
6079 emit_unary_func_op(result_type, id, ops[3], "subgroupInverseBallot");
6080 break;
6081
6082 case OpGroupNonUniformBallotBitExtract:
6083 emit_binary_func_op(result_type, id, ops[3], ops[4], "subgroupBallotBitExtract");
6084 break;
6085
6086 case OpGroupNonUniformBallotFindLSB:
6087 emit_unary_func_op(result_type, id, ops[3], "subgroupBallotFindLSB");
6088 break;
6089
6090 case OpGroupNonUniformBallotFindMSB:
6091 emit_unary_func_op(result_type, id, ops[3], "subgroupBallotFindMSB");
6092 break;
6093
6094 case OpGroupNonUniformBallotBitCount:
6095 {
6096 auto operation = static_cast<GroupOperation>(ops[3]);
6097 if (operation == GroupOperationReduce)
6098 emit_unary_func_op(result_type, id, ops[4], "subgroupBallotBitCount");
6099 else if (operation == GroupOperationInclusiveScan)
6100 emit_unary_func_op(result_type, id, ops[4], "subgroupBallotInclusiveBitCount");
6101 else if (operation == GroupOperationExclusiveScan)
6102 emit_unary_func_op(result_type, id, ops[4], "subgroupBallotExclusiveBitCount");
6103 else
6104 SPIRV_CROSS_THROW("Invalid BitCount operation.");
6105 break;
6106 }
6107
6108 case OpGroupNonUniformShuffle:
6109 emit_binary_func_op(result_type, id, ops[3], ops[4], "subgroupShuffle");
6110 break;
6111
6112 case OpGroupNonUniformShuffleXor:
6113 emit_binary_func_op(result_type, id, ops[3], ops[4], "subgroupShuffleXor");
6114 break;
6115
6116 case OpGroupNonUniformShuffleUp:
6117 emit_binary_func_op(result_type, id, ops[3], ops[4], "subgroupShuffleUp");
6118 break;
6119
6120 case OpGroupNonUniformShuffleDown:
6121 emit_binary_func_op(result_type, id, ops[3], ops[4], "subgroupShuffleDown");
6122 break;
6123
6124 case OpGroupNonUniformAll:
6125 emit_unary_func_op(result_type, id, ops[3], "subgroupAll");
6126 break;
6127
6128 case OpGroupNonUniformAny:
6129 emit_unary_func_op(result_type, id, ops[3], "subgroupAny");
6130 break;
6131
6132 case OpGroupNonUniformAllEqual:
6133 emit_unary_func_op(result_type, id, ops[3], "subgroupAllEqual");
6134 break;
6135
6136 // clang-format off
6137 #define GLSL_GROUP_OP(op, glsl_op) \
6138 case OpGroupNonUniform##op: \
6139 { \
6140 auto operation = static_cast<GroupOperation>(ops[3]); \
6141 if (operation == GroupOperationReduce) \
6142 emit_unary_func_op(result_type, id, ops[4], "subgroup" #glsl_op); \
6143 else if (operation == GroupOperationInclusiveScan) \
6144 emit_unary_func_op(result_type, id, ops[4], "subgroupInclusive" #glsl_op); \
6145 else if (operation == GroupOperationExclusiveScan) \
6146 emit_unary_func_op(result_type, id, ops[4], "subgroupExclusive" #glsl_op); \
6147 else if (operation == GroupOperationClusteredReduce) \
6148 emit_binary_func_op(result_type, id, ops[4], ops[5], "subgroupClustered" #glsl_op); \
6149 else \
6150 SPIRV_CROSS_THROW("Invalid group operation."); \
6151 break; \
6152 }
6153 GLSL_GROUP_OP(FAdd, Add)
6154 GLSL_GROUP_OP(FMul, Mul)
6155 GLSL_GROUP_OP(FMin, Min)
6156 GLSL_GROUP_OP(FMax, Max)
6157 GLSL_GROUP_OP(IAdd, Add)
6158 GLSL_GROUP_OP(IMul, Mul)
6159 GLSL_GROUP_OP(SMin, Min)
6160 GLSL_GROUP_OP(SMax, Max)
6161 GLSL_GROUP_OP(UMin, Min)
6162 GLSL_GROUP_OP(UMax, Max)
6163 GLSL_GROUP_OP(BitwiseAnd, And)
6164 GLSL_GROUP_OP(BitwiseOr, Or)
6165 GLSL_GROUP_OP(BitwiseXor, Xor)
6166 #undef GLSL_GROUP_OP
6167 // clang-format on
6168
6169 case OpGroupNonUniformQuadSwap:
6170 {
6171 uint32_t direction = get<SPIRConstant>(ops[4]).scalar();
6172 if (direction == 0)
6173 emit_unary_func_op(result_type, id, ops[3], "subgroupQuadSwapHorizontal");
6174 else if (direction == 1)
6175 emit_unary_func_op(result_type, id, ops[3], "subgroupQuadSwapVertical");
6176 else if (direction == 2)
6177 emit_unary_func_op(result_type, id, ops[3], "subgroupQuadSwapDiagonal");
6178 else
6179 SPIRV_CROSS_THROW("Invalid quad swap direction.");
6180 break;
6181 }
6182
6183 case OpGroupNonUniformQuadBroadcast:
6184 {
6185 emit_binary_func_op(result_type, id, ops[3], ops[4], "subgroupQuadBroadcast");
6186 break;
6187 }
6188
6189 default:
6190 SPIRV_CROSS_THROW("Invalid opcode for subgroup.");
6191 }
6192
6193 register_control_dependent_expression(id);
6194 }
6195
bitcast_glsl_op(const SPIRType & out_type,const SPIRType & in_type)6196 string CompilerGLSL::bitcast_glsl_op(const SPIRType &out_type, const SPIRType &in_type)
6197 {
6198 // OpBitcast can deal with pointers.
6199 if (out_type.pointer || in_type.pointer)
6200 return type_to_glsl(out_type);
6201
6202 if (out_type.basetype == in_type.basetype)
6203 return "";
6204
6205 assert(out_type.basetype != SPIRType::Boolean);
6206 assert(in_type.basetype != SPIRType::Boolean);
6207
6208 bool integral_cast = type_is_integral(out_type) && type_is_integral(in_type);
6209 bool same_size_cast = out_type.width == in_type.width;
6210
6211 // Trivial bitcast case, casts between integers.
6212 if (integral_cast && same_size_cast)
6213 return type_to_glsl(out_type);
6214
6215 // Catch-all 8-bit arithmetic casts (GL_EXT_shader_explicit_arithmetic_types).
6216 if (out_type.width == 8 && in_type.width >= 16 && integral_cast && in_type.vecsize == 1)
6217 return "unpack8";
6218 else if (in_type.width == 8 && out_type.width == 16 && integral_cast && out_type.vecsize == 1)
6219 return "pack16";
6220 else if (in_type.width == 8 && out_type.width == 32 && integral_cast && out_type.vecsize == 1)
6221 return "pack32";
6222
6223 // Floating <-> Integer special casts. Just have to enumerate all cases. :(
6224 // 16-bit, 32-bit and 64-bit floats.
6225 if (out_type.basetype == SPIRType::UInt && in_type.basetype == SPIRType::Float)
6226 {
6227 if (is_legacy_es())
6228 SPIRV_CROSS_THROW("Float -> Uint bitcast not supported on legacy ESSL.");
6229 else if (!options.es && options.version < 330)
6230 require_extension_internal("GL_ARB_shader_bit_encoding");
6231 return "floatBitsToUint";
6232 }
6233 else if (out_type.basetype == SPIRType::Int && in_type.basetype == SPIRType::Float)
6234 {
6235 if (is_legacy_es())
6236 SPIRV_CROSS_THROW("Float -> Int bitcast not supported on legacy ESSL.");
6237 else if (!options.es && options.version < 330)
6238 require_extension_internal("GL_ARB_shader_bit_encoding");
6239 return "floatBitsToInt";
6240 }
6241 else if (out_type.basetype == SPIRType::Float && in_type.basetype == SPIRType::UInt)
6242 {
6243 if (is_legacy_es())
6244 SPIRV_CROSS_THROW("Uint -> Float bitcast not supported on legacy ESSL.");
6245 else if (!options.es && options.version < 330)
6246 require_extension_internal("GL_ARB_shader_bit_encoding");
6247 return "uintBitsToFloat";
6248 }
6249 else if (out_type.basetype == SPIRType::Float && in_type.basetype == SPIRType::Int)
6250 {
6251 if (is_legacy_es())
6252 SPIRV_CROSS_THROW("Int -> Float bitcast not supported on legacy ESSL.");
6253 else if (!options.es && options.version < 330)
6254 require_extension_internal("GL_ARB_shader_bit_encoding");
6255 return "intBitsToFloat";
6256 }
6257
6258 else if (out_type.basetype == SPIRType::Int64 && in_type.basetype == SPIRType::Double)
6259 return "doubleBitsToInt64";
6260 else if (out_type.basetype == SPIRType::UInt64 && in_type.basetype == SPIRType::Double)
6261 return "doubleBitsToUint64";
6262 else if (out_type.basetype == SPIRType::Double && in_type.basetype == SPIRType::Int64)
6263 return "int64BitsToDouble";
6264 else if (out_type.basetype == SPIRType::Double && in_type.basetype == SPIRType::UInt64)
6265 return "uint64BitsToDouble";
6266 else if (out_type.basetype == SPIRType::Short && in_type.basetype == SPIRType::Half)
6267 return "float16BitsToInt16";
6268 else if (out_type.basetype == SPIRType::UShort && in_type.basetype == SPIRType::Half)
6269 return "float16BitsToUint16";
6270 else if (out_type.basetype == SPIRType::Half && in_type.basetype == SPIRType::Short)
6271 return "int16BitsToFloat16";
6272 else if (out_type.basetype == SPIRType::Half && in_type.basetype == SPIRType::UShort)
6273 return "uint16BitsToFloat16";
6274
6275 // And finally, some even more special purpose casts.
6276 if (out_type.basetype == SPIRType::UInt64 && in_type.basetype == SPIRType::UInt && in_type.vecsize == 2)
6277 return "packUint2x32";
6278 else if (out_type.basetype == SPIRType::Half && in_type.basetype == SPIRType::UInt && in_type.vecsize == 1)
6279 return "unpackFloat2x16";
6280 else if (out_type.basetype == SPIRType::UInt && in_type.basetype == SPIRType::Half && in_type.vecsize == 2)
6281 return "packFloat2x16";
6282 else if (out_type.basetype == SPIRType::Int && in_type.basetype == SPIRType::Short && in_type.vecsize == 2)
6283 return "packInt2x16";
6284 else if (out_type.basetype == SPIRType::Short && in_type.basetype == SPIRType::Int && in_type.vecsize == 1)
6285 return "unpackInt2x16";
6286 else if (out_type.basetype == SPIRType::UInt && in_type.basetype == SPIRType::UShort && in_type.vecsize == 2)
6287 return "packUint2x16";
6288 else if (out_type.basetype == SPIRType::UShort && in_type.basetype == SPIRType::UInt && in_type.vecsize == 1)
6289 return "unpackUint2x16";
6290 else if (out_type.basetype == SPIRType::Int64 && in_type.basetype == SPIRType::Short && in_type.vecsize == 4)
6291 return "packInt4x16";
6292 else if (out_type.basetype == SPIRType::Short && in_type.basetype == SPIRType::Int64 && in_type.vecsize == 1)
6293 return "unpackInt4x16";
6294 else if (out_type.basetype == SPIRType::UInt64 && in_type.basetype == SPIRType::UShort && in_type.vecsize == 4)
6295 return "packUint4x16";
6296 else if (out_type.basetype == SPIRType::UShort && in_type.basetype == SPIRType::UInt64 && in_type.vecsize == 1)
6297 return "unpackUint4x16";
6298
6299 return "";
6300 }
6301
bitcast_glsl(const SPIRType & result_type,uint32_t argument)6302 string CompilerGLSL::bitcast_glsl(const SPIRType &result_type, uint32_t argument)
6303 {
6304 auto op = bitcast_glsl_op(result_type, expression_type(argument));
6305 if (op.empty())
6306 return to_enclosed_unpacked_expression(argument);
6307 else
6308 return join(op, "(", to_unpacked_expression(argument), ")");
6309 }
6310
bitcast_expression(SPIRType::BaseType target_type,uint32_t arg)6311 std::string CompilerGLSL::bitcast_expression(SPIRType::BaseType target_type, uint32_t arg)
6312 {
6313 auto expr = to_expression(arg);
6314 auto &src_type = expression_type(arg);
6315 if (src_type.basetype != target_type)
6316 {
6317 auto target = src_type;
6318 target.basetype = target_type;
6319 expr = join(bitcast_glsl_op(target, src_type), "(", expr, ")");
6320 }
6321
6322 return expr;
6323 }
6324
bitcast_expression(const SPIRType & target_type,SPIRType::BaseType expr_type,const std::string & expr)6325 std::string CompilerGLSL::bitcast_expression(const SPIRType &target_type, SPIRType::BaseType expr_type,
6326 const std::string &expr)
6327 {
6328 if (target_type.basetype == expr_type)
6329 return expr;
6330
6331 auto src_type = target_type;
6332 src_type.basetype = expr_type;
6333 return join(bitcast_glsl_op(target_type, src_type), "(", expr, ")");
6334 }
6335
builtin_to_glsl(BuiltIn builtin,StorageClass storage)6336 string CompilerGLSL::builtin_to_glsl(BuiltIn builtin, StorageClass storage)
6337 {
6338 switch (builtin)
6339 {
6340 case BuiltInPosition:
6341 return "gl_Position";
6342 case BuiltInPointSize:
6343 return "gl_PointSize";
6344 case BuiltInClipDistance:
6345 return "gl_ClipDistance";
6346 case BuiltInCullDistance:
6347 return "gl_CullDistance";
6348 case BuiltInVertexId:
6349 if (options.vulkan_semantics)
6350 SPIRV_CROSS_THROW(
6351 "Cannot implement gl_VertexID in Vulkan GLSL. This shader was created with GL semantics.");
6352 return "gl_VertexID";
6353 case BuiltInInstanceId:
6354 if (options.vulkan_semantics)
6355 SPIRV_CROSS_THROW(
6356 "Cannot implement gl_InstanceID in Vulkan GLSL. This shader was created with GL semantics.");
6357 return "gl_InstanceID";
6358 case BuiltInVertexIndex:
6359 if (options.vulkan_semantics)
6360 return "gl_VertexIndex";
6361 else
6362 return "gl_VertexID"; // gl_VertexID already has the base offset applied.
6363 case BuiltInInstanceIndex:
6364 if (options.vulkan_semantics)
6365 return "gl_InstanceIndex";
6366 else if (options.vertex.support_nonzero_base_instance)
6367 return "(gl_InstanceID + SPIRV_Cross_BaseInstance)"; // ... but not gl_InstanceID.
6368 else
6369 return "gl_InstanceID";
6370 case BuiltInPrimitiveId:
6371 if (storage == StorageClassInput && get_entry_point().model == ExecutionModelGeometry)
6372 return "gl_PrimitiveIDIn";
6373 else
6374 return "gl_PrimitiveID";
6375 case BuiltInInvocationId:
6376 return "gl_InvocationID";
6377 case BuiltInLayer:
6378 return "gl_Layer";
6379 case BuiltInViewportIndex:
6380 return "gl_ViewportIndex";
6381 case BuiltInTessLevelOuter:
6382 return "gl_TessLevelOuter";
6383 case BuiltInTessLevelInner:
6384 return "gl_TessLevelInner";
6385 case BuiltInTessCoord:
6386 return "gl_TessCoord";
6387 case BuiltInFragCoord:
6388 return "gl_FragCoord";
6389 case BuiltInPointCoord:
6390 return "gl_PointCoord";
6391 case BuiltInFrontFacing:
6392 return "gl_FrontFacing";
6393 case BuiltInFragDepth:
6394 return "gl_FragDepth";
6395 case BuiltInNumWorkgroups:
6396 return "gl_NumWorkGroups";
6397 case BuiltInWorkgroupSize:
6398 return "gl_WorkGroupSize";
6399 case BuiltInWorkgroupId:
6400 return "gl_WorkGroupID";
6401 case BuiltInLocalInvocationId:
6402 return "gl_LocalInvocationID";
6403 case BuiltInGlobalInvocationId:
6404 return "gl_GlobalInvocationID";
6405 case BuiltInLocalInvocationIndex:
6406 return "gl_LocalInvocationIndex";
6407 case BuiltInHelperInvocation:
6408 return "gl_HelperInvocation";
6409 case BuiltInBaseVertex:
6410 if (options.es)
6411 SPIRV_CROSS_THROW("BaseVertex not supported in ES profile.");
6412 if (options.version < 460)
6413 {
6414 require_extension_internal("GL_ARB_shader_draw_parameters");
6415 return "gl_BaseVertexARB";
6416 }
6417 return "gl_BaseVertex";
6418 case BuiltInBaseInstance:
6419 if (options.es)
6420 SPIRV_CROSS_THROW("BaseInstance not supported in ES profile.");
6421 if (options.version < 460)
6422 {
6423 require_extension_internal("GL_ARB_shader_draw_parameters");
6424 return "gl_BaseInstanceARB";
6425 }
6426 return "gl_BaseInstance";
6427 case BuiltInDrawIndex:
6428 if (options.es)
6429 SPIRV_CROSS_THROW("DrawIndex not supported in ES profile.");
6430 if (options.version < 460)
6431 {
6432 require_extension_internal("GL_ARB_shader_draw_parameters");
6433 return "gl_DrawIDARB";
6434 }
6435 return "gl_DrawID";
6436
6437 case BuiltInSampleId:
6438 if (options.es && options.version < 320)
6439 require_extension_internal("GL_OES_sample_variables");
6440 if (!options.es && options.version < 400)
6441 SPIRV_CROSS_THROW("gl_SampleID not supported before GLSL 400.");
6442 return "gl_SampleID";
6443
6444 case BuiltInSampleMask:
6445 if (options.es && options.version < 320)
6446 require_extension_internal("GL_OES_sample_variables");
6447 if (!options.es && options.version < 400)
6448 SPIRV_CROSS_THROW("gl_SampleMask/gl_SampleMaskIn not supported before GLSL 400.");
6449
6450 if (storage == StorageClassInput)
6451 return "gl_SampleMaskIn";
6452 else
6453 return "gl_SampleMask";
6454
6455 case BuiltInSamplePosition:
6456 if (options.es && options.version < 320)
6457 require_extension_internal("GL_OES_sample_variables");
6458 if (!options.es && options.version < 400)
6459 SPIRV_CROSS_THROW("gl_SamplePosition not supported before GLSL 400.");
6460 return "gl_SamplePosition";
6461
6462 case BuiltInViewIndex:
6463 if (options.vulkan_semantics)
6464 {
6465 require_extension_internal("GL_EXT_multiview");
6466 return "gl_ViewIndex";
6467 }
6468 else
6469 {
6470 require_extension_internal("GL_OVR_multiview2");
6471 return "gl_ViewID_OVR";
6472 }
6473
6474 case BuiltInNumSubgroups:
6475 if (!options.vulkan_semantics)
6476 SPIRV_CROSS_THROW("Need Vulkan semantics for subgroup.");
6477 require_extension_internal("GL_KHR_shader_subgroup_basic");
6478 return "gl_NumSubgroups";
6479
6480 case BuiltInSubgroupId:
6481 if (!options.vulkan_semantics)
6482 SPIRV_CROSS_THROW("Need Vulkan semantics for subgroup.");
6483 require_extension_internal("GL_KHR_shader_subgroup_basic");
6484 return "gl_SubgroupID";
6485
6486 case BuiltInSubgroupSize:
6487 if (!options.vulkan_semantics)
6488 SPIRV_CROSS_THROW("Need Vulkan semantics for subgroup.");
6489 require_extension_internal("GL_KHR_shader_subgroup_basic");
6490 return "gl_SubgroupSize";
6491
6492 case BuiltInSubgroupLocalInvocationId:
6493 if (!options.vulkan_semantics)
6494 SPIRV_CROSS_THROW("Need Vulkan semantics for subgroup.");
6495 require_extension_internal("GL_KHR_shader_subgroup_basic");
6496 return "gl_SubgroupInvocationID";
6497
6498 case BuiltInSubgroupEqMask:
6499 if (!options.vulkan_semantics)
6500 SPIRV_CROSS_THROW("Need Vulkan semantics for subgroup.");
6501 require_extension_internal("GL_KHR_shader_subgroup_ballot");
6502 return "gl_SubgroupEqMask";
6503
6504 case BuiltInSubgroupGeMask:
6505 if (!options.vulkan_semantics)
6506 SPIRV_CROSS_THROW("Need Vulkan semantics for subgroup.");
6507 require_extension_internal("GL_KHR_shader_subgroup_ballot");
6508 return "gl_SubgroupGeMask";
6509
6510 case BuiltInSubgroupGtMask:
6511 if (!options.vulkan_semantics)
6512 SPIRV_CROSS_THROW("Need Vulkan semantics for subgroup.");
6513 require_extension_internal("GL_KHR_shader_subgroup_ballot");
6514 return "gl_SubgroupGtMask";
6515
6516 case BuiltInSubgroupLeMask:
6517 if (!options.vulkan_semantics)
6518 SPIRV_CROSS_THROW("Need Vulkan semantics for subgroup.");
6519 require_extension_internal("GL_KHR_shader_subgroup_ballot");
6520 return "gl_SubgroupLeMask";
6521
6522 case BuiltInSubgroupLtMask:
6523 if (!options.vulkan_semantics)
6524 SPIRV_CROSS_THROW("Need Vulkan semantics for subgroup.");
6525 require_extension_internal("GL_KHR_shader_subgroup_ballot");
6526 return "gl_SubgroupLtMask";
6527
6528 case BuiltInLaunchIdNV:
6529 return "gl_LaunchIDNV";
6530 case BuiltInLaunchSizeNV:
6531 return "gl_LaunchSizeNV";
6532 case BuiltInWorldRayOriginNV:
6533 return "gl_WorldRayOriginNV";
6534 case BuiltInWorldRayDirectionNV:
6535 return "gl_WorldRayDirectionNV";
6536 case BuiltInObjectRayOriginNV:
6537 return "gl_ObjectRayOriginNV";
6538 case BuiltInObjectRayDirectionNV:
6539 return "gl_ObjectRayDirectionNV";
6540 case BuiltInRayTminNV:
6541 return "gl_RayTminNV";
6542 case BuiltInRayTmaxNV:
6543 return "gl_RayTmaxNV";
6544 case BuiltInInstanceCustomIndexNV:
6545 return "gl_InstanceCustomIndexNV";
6546 case BuiltInObjectToWorldNV:
6547 return "gl_ObjectToWorldNV";
6548 case BuiltInWorldToObjectNV:
6549 return "gl_WorldToObjectNV";
6550 case BuiltInHitTNV:
6551 return "gl_HitTNV";
6552 case BuiltInHitKindNV:
6553 return "gl_HitKindNV";
6554 case BuiltInIncomingRayFlagsNV:
6555 return "gl_IncomingRayFlagsNV";
6556
6557 case BuiltInBaryCoordNV:
6558 {
6559 if (options.es && options.version < 320)
6560 SPIRV_CROSS_THROW("gl_BaryCoordNV requires ESSL 320.");
6561 else if (!options.es && options.version < 450)
6562 SPIRV_CROSS_THROW("gl_BaryCoordNV requires GLSL 450.");
6563 require_extension_internal("GL_NV_fragment_shader_barycentric");
6564 return "gl_BaryCoordNV";
6565 }
6566
6567 case BuiltInBaryCoordNoPerspNV:
6568 {
6569 if (options.es && options.version < 320)
6570 SPIRV_CROSS_THROW("gl_BaryCoordNoPerspNV requires ESSL 320.");
6571 else if (!options.es && options.version < 450)
6572 SPIRV_CROSS_THROW("gl_BaryCoordNoPerspNV requires GLSL 450.");
6573 require_extension_internal("GL_NV_fragment_shader_barycentric");
6574 return "gl_BaryCoordNoPerspNV";
6575 }
6576
6577 case BuiltInFragStencilRefEXT:
6578 {
6579 if (!options.es)
6580 {
6581 require_extension_internal("GL_ARB_shader_stencil_export");
6582 return "gl_FragStencilRefARB";
6583 }
6584 else
6585 SPIRV_CROSS_THROW("Stencil export not supported in GLES.");
6586 }
6587
6588 case BuiltInDeviceIndex:
6589 if (!options.vulkan_semantics)
6590 SPIRV_CROSS_THROW("Need Vulkan semantics for device group support.");
6591 require_extension_internal("GL_EXT_device_group");
6592 return "gl_DeviceIndex";
6593
6594 default:
6595 return join("gl_BuiltIn_", convert_to_string(builtin));
6596 }
6597 }
6598
index_to_swizzle(uint32_t index)6599 const char *CompilerGLSL::index_to_swizzle(uint32_t index)
6600 {
6601 switch (index)
6602 {
6603 case 0:
6604 return "x";
6605 case 1:
6606 return "y";
6607 case 2:
6608 return "z";
6609 case 3:
6610 return "w";
6611 default:
6612 SPIRV_CROSS_THROW("Swizzle index out of range");
6613 }
6614 }
6615
access_chain_internal_append_index(std::string & expr,uint32_t,const SPIRType * type,AccessChainFlags flags,bool &,uint32_t index)6616 void CompilerGLSL::access_chain_internal_append_index(std::string &expr, uint32_t /*base*/, const SPIRType *type,
6617 AccessChainFlags flags, bool & /*access_chain_is_arrayed*/,
6618 uint32_t index)
6619 {
6620 bool index_is_literal = (flags & ACCESS_CHAIN_INDEX_IS_LITERAL_BIT) != 0;
6621 bool register_expression_read = (flags & ACCESS_CHAIN_SKIP_REGISTER_EXPRESSION_READ_BIT) == 0;
6622
6623 expr += "[";
6624
6625 // If we are indexing into an array of SSBOs or UBOs, we need to index it with a non-uniform qualifier.
6626 bool nonuniform_index =
6627 has_decoration(index, DecorationNonUniformEXT) &&
6628 (has_decoration(type->self, DecorationBlock) || has_decoration(type->self, DecorationBufferBlock));
6629 if (nonuniform_index)
6630 {
6631 expr += backend.nonuniform_qualifier;
6632 expr += "(";
6633 }
6634
6635 if (index_is_literal)
6636 expr += convert_to_string(index);
6637 else
6638 expr += to_expression(index, register_expression_read);
6639
6640 if (nonuniform_index)
6641 expr += ")";
6642
6643 expr += "]";
6644 }
6645
access_chain_internal(uint32_t base,const uint32_t * indices,uint32_t count,AccessChainFlags flags,AccessChainMeta * meta)6646 string CompilerGLSL::access_chain_internal(uint32_t base, const uint32_t *indices, uint32_t count,
6647 AccessChainFlags flags, AccessChainMeta *meta)
6648 {
6649 string expr;
6650
6651 bool index_is_literal = (flags & ACCESS_CHAIN_INDEX_IS_LITERAL_BIT) != 0;
6652 bool chain_only = (flags & ACCESS_CHAIN_CHAIN_ONLY_BIT) != 0;
6653 bool ptr_chain = (flags & ACCESS_CHAIN_PTR_CHAIN_BIT) != 0;
6654 bool register_expression_read = (flags & ACCESS_CHAIN_SKIP_REGISTER_EXPRESSION_READ_BIT) == 0;
6655
6656 if (!chain_only)
6657 {
6658 // We handle transpose explicitly, so don't resolve that here.
6659 auto *e = maybe_get<SPIRExpression>(base);
6660 bool old_transpose = e && e->need_transpose;
6661 if (e)
6662 e->need_transpose = false;
6663 expr = to_enclosed_expression(base, register_expression_read);
6664 if (e)
6665 e->need_transpose = old_transpose;
6666 }
6667
6668 // Start traversing type hierarchy at the proper non-pointer types,
6669 // but keep type_id referencing the original pointer for use below.
6670 uint32_t type_id = expression_type_id(base);
6671
6672 if (!backend.native_pointers)
6673 {
6674 if (ptr_chain)
6675 SPIRV_CROSS_THROW("Backend does not support native pointers and does not support OpPtrAccessChain.");
6676
6677 // Wrapped buffer reference pointer types will need to poke into the internal "value" member before
6678 // continuing the access chain.
6679 if (should_dereference(base))
6680 {
6681 auto &type = get<SPIRType>(type_id);
6682 expr = dereference_expression(type, expr);
6683 }
6684 }
6685
6686 const auto *type = &get_pointee_type(type_id);
6687
6688 bool access_chain_is_arrayed = expr.find_first_of('[') != string::npos;
6689 bool row_major_matrix_needs_conversion = is_non_native_row_major_matrix(base);
6690 bool is_packed = has_extended_decoration(base, SPIRVCrossDecorationPhysicalTypePacked);
6691 uint32_t physical_type = get_extended_decoration(base, SPIRVCrossDecorationPhysicalTypeID);
6692 bool is_invariant = has_decoration(base, DecorationInvariant);
6693 bool pending_array_enclose = false;
6694 bool dimension_flatten = false;
6695
6696 const auto append_index = [&](uint32_t index) {
6697 access_chain_internal_append_index(expr, base, type, flags, access_chain_is_arrayed, index);
6698 };
6699
6700 for (uint32_t i = 0; i < count; i++)
6701 {
6702 uint32_t index = indices[i];
6703
6704 // Pointer chains
6705 if (ptr_chain && i == 0)
6706 {
6707 // If we are flattening multidimensional arrays, only create opening bracket on first
6708 // array index.
6709 if (options.flatten_multidimensional_arrays)
6710 {
6711 dimension_flatten = type->array.size() >= 1;
6712 pending_array_enclose = dimension_flatten;
6713 if (pending_array_enclose)
6714 expr += "[";
6715 }
6716
6717 if (options.flatten_multidimensional_arrays && dimension_flatten)
6718 {
6719 // If we are flattening multidimensional arrays, do manual stride computation.
6720 if (index_is_literal)
6721 expr += convert_to_string(index);
6722 else
6723 expr += to_enclosed_expression(index, register_expression_read);
6724
6725 for (auto j = uint32_t(type->array.size()); j; j--)
6726 {
6727 expr += " * ";
6728 expr += enclose_expression(to_array_size(*type, j - 1));
6729 }
6730
6731 if (type->array.empty())
6732 pending_array_enclose = false;
6733 else
6734 expr += " + ";
6735
6736 if (!pending_array_enclose)
6737 expr += "]";
6738 }
6739 else
6740 {
6741 append_index(index);
6742 }
6743
6744 if (type->basetype == SPIRType::ControlPointArray)
6745 {
6746 type_id = type->parent_type;
6747 type = &get<SPIRType>(type_id);
6748 }
6749
6750 access_chain_is_arrayed = true;
6751 }
6752 // Arrays
6753 else if (!type->array.empty())
6754 {
6755 // If we are flattening multidimensional arrays, only create opening bracket on first
6756 // array index.
6757 if (options.flatten_multidimensional_arrays && !pending_array_enclose)
6758 {
6759 dimension_flatten = type->array.size() > 1;
6760 pending_array_enclose = dimension_flatten;
6761 if (pending_array_enclose)
6762 expr += "[";
6763 }
6764
6765 assert(type->parent_type);
6766
6767 auto *var = maybe_get<SPIRVariable>(base);
6768 if (backend.force_gl_in_out_block && i == 0 && var && is_builtin_variable(*var) &&
6769 !has_decoration(type->self, DecorationBlock))
6770 {
6771 // This deals with scenarios for tesc/geom where arrays of gl_Position[] are declared.
6772 // Normally, these variables live in blocks when compiled from GLSL,
6773 // but HLSL seems to just emit straight arrays here.
6774 // We must pretend this access goes through gl_in/gl_out arrays
6775 // to be able to access certain builtins as arrays.
6776 auto builtin = ir.meta[base].decoration.builtin_type;
6777 switch (builtin)
6778 {
6779 // case BuiltInCullDistance: // These are already arrays, need to figure out rules for these in tess/geom.
6780 // case BuiltInClipDistance:
6781 case BuiltInPosition:
6782 case BuiltInPointSize:
6783 if (var->storage == StorageClassInput)
6784 expr = join("gl_in[", to_expression(index, register_expression_read), "].", expr);
6785 else if (var->storage == StorageClassOutput)
6786 expr = join("gl_out[", to_expression(index, register_expression_read), "].", expr);
6787 else
6788 append_index(index);
6789 break;
6790
6791 default:
6792 append_index(index);
6793 break;
6794 }
6795 }
6796 else if (options.flatten_multidimensional_arrays && dimension_flatten)
6797 {
6798 // If we are flattening multidimensional arrays, do manual stride computation.
6799 auto &parent_type = get<SPIRType>(type->parent_type);
6800
6801 if (index_is_literal)
6802 expr += convert_to_string(index);
6803 else
6804 expr += to_enclosed_expression(index, register_expression_read);
6805
6806 for (auto j = uint32_t(parent_type.array.size()); j; j--)
6807 {
6808 expr += " * ";
6809 expr += enclose_expression(to_array_size(parent_type, j - 1));
6810 }
6811
6812 if (parent_type.array.empty())
6813 pending_array_enclose = false;
6814 else
6815 expr += " + ";
6816
6817 if (!pending_array_enclose)
6818 expr += "]";
6819 }
6820 // Some builtins are arrays in SPIR-V but not in other languages, e.g. gl_SampleMask[] is an array in SPIR-V but not in Metal.
6821 // By throwing away the index, we imply the index was 0, which it must be for gl_SampleMask.
6822 else if (!builtin_translates_to_nonarray(BuiltIn(get_decoration(base, DecorationBuiltIn))))
6823 {
6824 append_index(index);
6825 }
6826
6827 type_id = type->parent_type;
6828 type = &get<SPIRType>(type_id);
6829
6830 access_chain_is_arrayed = true;
6831 }
6832 // For structs, the index refers to a constant, which indexes into the members.
6833 // We also check if this member is a builtin, since we then replace the entire expression with the builtin one.
6834 else if (type->basetype == SPIRType::Struct)
6835 {
6836 if (!index_is_literal)
6837 index = get<SPIRConstant>(index).scalar();
6838
6839 if (index >= type->member_types.size())
6840 SPIRV_CROSS_THROW("Member index is out of bounds!");
6841
6842 BuiltIn builtin;
6843 if (is_member_builtin(*type, index, &builtin))
6844 {
6845 if (access_chain_is_arrayed)
6846 {
6847 expr += ".";
6848 expr += builtin_to_glsl(builtin, type->storage);
6849 }
6850 else
6851 expr = builtin_to_glsl(builtin, type->storage);
6852 }
6853 else
6854 {
6855 // If the member has a qualified name, use it as the entire chain
6856 string qual_mbr_name = get_member_qualified_name(type_id, index);
6857 if (!qual_mbr_name.empty())
6858 expr = qual_mbr_name;
6859 else
6860 expr += to_member_reference(base, *type, index, ptr_chain);
6861 }
6862
6863 if (has_member_decoration(type->self, index, DecorationInvariant))
6864 is_invariant = true;
6865
6866 is_packed = member_is_packed_physical_type(*type, index);
6867 if (member_is_remapped_physical_type(*type, index))
6868 physical_type = get_extended_member_decoration(type->self, index, SPIRVCrossDecorationPhysicalTypeID);
6869 else
6870 physical_type = 0;
6871
6872 row_major_matrix_needs_conversion = member_is_non_native_row_major_matrix(*type, index);
6873 type = &get<SPIRType>(type->member_types[index]);
6874 }
6875 // Matrix -> Vector
6876 else if (type->columns > 1)
6877 {
6878 // If we have a row-major matrix here, we need to defer any transpose in case this access chain
6879 // is used to store a column. We can resolve it right here and now if we access a scalar directly,
6880 // by flipping indexing order of the matrix.
6881
6882 expr += "[";
6883 if (index_is_literal)
6884 expr += convert_to_string(index);
6885 else
6886 expr += to_expression(index, register_expression_read);
6887 expr += "]";
6888
6889 type_id = type->parent_type;
6890 type = &get<SPIRType>(type_id);
6891 }
6892 // Vector -> Scalar
6893 else if (type->vecsize > 1)
6894 {
6895 string deferred_index;
6896 if (row_major_matrix_needs_conversion)
6897 {
6898 // Flip indexing order.
6899 auto column_index = expr.find_last_of('[');
6900 if (column_index != string::npos)
6901 {
6902 deferred_index = expr.substr(column_index);
6903 expr.resize(column_index);
6904 }
6905 }
6906
6907 if (index_is_literal && !is_packed && !row_major_matrix_needs_conversion)
6908 {
6909 expr += ".";
6910 expr += index_to_swizzle(index);
6911 }
6912 else if (ir.ids[index].get_type() == TypeConstant && !is_packed && !row_major_matrix_needs_conversion)
6913 {
6914 auto &c = get<SPIRConstant>(index);
6915 if (c.specialization)
6916 {
6917 // If the index is a spec constant, we cannot turn extract into a swizzle.
6918 expr += join("[", to_expression(index), "]");
6919 }
6920 else
6921 {
6922 expr += ".";
6923 expr += index_to_swizzle(c.scalar());
6924 }
6925 }
6926 else if (index_is_literal)
6927 {
6928 // For packed vectors, we can only access them as an array, not by swizzle.
6929 expr += join("[", index, "]");
6930 }
6931 else
6932 {
6933 expr += "[";
6934 expr += to_expression(index, register_expression_read);
6935 expr += "]";
6936 }
6937
6938 expr += deferred_index;
6939 row_major_matrix_needs_conversion = false;
6940
6941 is_packed = false;
6942 physical_type = 0;
6943 type_id = type->parent_type;
6944 type = &get<SPIRType>(type_id);
6945 }
6946 else if (!backend.allow_truncated_access_chain)
6947 SPIRV_CROSS_THROW("Cannot subdivide a scalar value!");
6948 }
6949
6950 if (pending_array_enclose)
6951 {
6952 SPIRV_CROSS_THROW("Flattening of multidimensional arrays were enabled, "
6953 "but the access chain was terminated in the middle of a multidimensional array. "
6954 "This is not supported.");
6955 }
6956
6957 if (meta)
6958 {
6959 meta->need_transpose = row_major_matrix_needs_conversion;
6960 meta->storage_is_packed = is_packed;
6961 meta->storage_is_invariant = is_invariant;
6962 meta->storage_physical_type = physical_type;
6963 }
6964
6965 return expr;
6966 }
6967
to_flattened_struct_member(const SPIRVariable & var,uint32_t index)6968 string CompilerGLSL::to_flattened_struct_member(const SPIRVariable &var, uint32_t index)
6969 {
6970 auto &type = get<SPIRType>(var.basetype);
6971 return sanitize_underscores(join(to_name(var.self), "_", to_member_name(type, index)));
6972 }
6973
access_chain(uint32_t base,const uint32_t * indices,uint32_t count,const SPIRType & target_type,AccessChainMeta * meta,bool ptr_chain)6974 string CompilerGLSL::access_chain(uint32_t base, const uint32_t *indices, uint32_t count, const SPIRType &target_type,
6975 AccessChainMeta *meta, bool ptr_chain)
6976 {
6977 if (flattened_buffer_blocks.count(base))
6978 {
6979 uint32_t matrix_stride = 0;
6980 bool need_transpose = false;
6981 flattened_access_chain_offset(expression_type(base), indices, count, 0, 16, &need_transpose, &matrix_stride,
6982 ptr_chain);
6983
6984 if (meta)
6985 {
6986 meta->need_transpose = target_type.columns > 1 && need_transpose;
6987 meta->storage_is_packed = false;
6988 }
6989
6990 return flattened_access_chain(base, indices, count, target_type, 0, matrix_stride, need_transpose);
6991 }
6992 else if (flattened_structs.count(base) && count > 0)
6993 {
6994 AccessChainFlags flags = ACCESS_CHAIN_CHAIN_ONLY_BIT | ACCESS_CHAIN_SKIP_REGISTER_EXPRESSION_READ_BIT;
6995 if (ptr_chain)
6996 flags |= ACCESS_CHAIN_PTR_CHAIN_BIT;
6997
6998 auto chain = access_chain_internal(base, indices, count, flags, nullptr).substr(1);
6999 if (meta)
7000 {
7001 meta->need_transpose = false;
7002 meta->storage_is_packed = false;
7003 }
7004 return sanitize_underscores(join(to_name(base), "_", chain));
7005 }
7006 else
7007 {
7008 AccessChainFlags flags = ACCESS_CHAIN_SKIP_REGISTER_EXPRESSION_READ_BIT;
7009 if (ptr_chain)
7010 flags |= ACCESS_CHAIN_PTR_CHAIN_BIT;
7011 return access_chain_internal(base, indices, count, flags, meta);
7012 }
7013 }
7014
load_flattened_struct(SPIRVariable & var)7015 string CompilerGLSL::load_flattened_struct(SPIRVariable &var)
7016 {
7017 auto expr = type_to_glsl_constructor(get<SPIRType>(var.basetype));
7018 expr += '(';
7019
7020 auto &type = get<SPIRType>(var.basetype);
7021 for (uint32_t i = 0; i < uint32_t(type.member_types.size()); i++)
7022 {
7023 if (i)
7024 expr += ", ";
7025
7026 // Flatten the varyings.
7027 // Apply name transformation for flattened I/O blocks.
7028 expr += to_flattened_struct_member(var, i);
7029 }
7030 expr += ')';
7031 return expr;
7032 }
7033
store_flattened_struct(SPIRVariable & var,uint32_t value)7034 void CompilerGLSL::store_flattened_struct(SPIRVariable &var, uint32_t value)
7035 {
7036 // We're trying to store a structure which has been flattened.
7037 // Need to copy members one by one.
7038 auto rhs = to_expression(value);
7039
7040 // Store result locally.
7041 // Since we're declaring a variable potentially multiple times here,
7042 // store the variable in an isolated scope.
7043 begin_scope();
7044 statement(variable_decl_function_local(var), " = ", rhs, ";");
7045
7046 auto &type = get<SPIRType>(var.basetype);
7047 for (uint32_t i = 0; i < uint32_t(type.member_types.size()); i++)
7048 {
7049 // Flatten the varyings.
7050 // Apply name transformation for flattened I/O blocks.
7051
7052 auto lhs = sanitize_underscores(join(to_name(var.self), "_", to_member_name(type, i)));
7053 rhs = join(to_name(var.self), ".", to_member_name(type, i));
7054 statement(lhs, " = ", rhs, ";");
7055 }
7056 end_scope();
7057 }
7058
flattened_access_chain(uint32_t base,const uint32_t * indices,uint32_t count,const SPIRType & target_type,uint32_t offset,uint32_t matrix_stride,bool need_transpose)7059 std::string CompilerGLSL::flattened_access_chain(uint32_t base, const uint32_t *indices, uint32_t count,
7060 const SPIRType &target_type, uint32_t offset, uint32_t matrix_stride,
7061 bool need_transpose)
7062 {
7063 if (!target_type.array.empty())
7064 SPIRV_CROSS_THROW("Access chains that result in an array can not be flattened");
7065 else if (target_type.basetype == SPIRType::Struct)
7066 return flattened_access_chain_struct(base, indices, count, target_type, offset);
7067 else if (target_type.columns > 1)
7068 return flattened_access_chain_matrix(base, indices, count, target_type, offset, matrix_stride, need_transpose);
7069 else
7070 return flattened_access_chain_vector(base, indices, count, target_type, offset, matrix_stride, need_transpose);
7071 }
7072
flattened_access_chain_struct(uint32_t base,const uint32_t * indices,uint32_t count,const SPIRType & target_type,uint32_t offset)7073 std::string CompilerGLSL::flattened_access_chain_struct(uint32_t base, const uint32_t *indices, uint32_t count,
7074 const SPIRType &target_type, uint32_t offset)
7075 {
7076 std::string expr;
7077
7078 expr += type_to_glsl_constructor(target_type);
7079 expr += "(";
7080
7081 for (uint32_t i = 0; i < uint32_t(target_type.member_types.size()); ++i)
7082 {
7083 if (i != 0)
7084 expr += ", ";
7085
7086 const SPIRType &member_type = get<SPIRType>(target_type.member_types[i]);
7087 uint32_t member_offset = type_struct_member_offset(target_type, i);
7088
7089 // The access chain terminates at the struct, so we need to find matrix strides and row-major information
7090 // ahead of time.
7091 bool need_transpose = false;
7092 uint32_t matrix_stride = 0;
7093 if (member_type.columns > 1)
7094 {
7095 need_transpose = combined_decoration_for_member(target_type, i).get(DecorationRowMajor);
7096 matrix_stride = type_struct_member_matrix_stride(target_type, i);
7097 }
7098
7099 auto tmp = flattened_access_chain(base, indices, count, member_type, offset + member_offset, matrix_stride,
7100 need_transpose);
7101
7102 // Cannot forward transpositions, so resolve them here.
7103 if (need_transpose)
7104 expr += convert_row_major_matrix(tmp, member_type, 0, false);
7105 else
7106 expr += tmp;
7107 }
7108
7109 expr += ")";
7110
7111 return expr;
7112 }
7113
flattened_access_chain_matrix(uint32_t base,const uint32_t * indices,uint32_t count,const SPIRType & target_type,uint32_t offset,uint32_t matrix_stride,bool need_transpose)7114 std::string CompilerGLSL::flattened_access_chain_matrix(uint32_t base, const uint32_t *indices, uint32_t count,
7115 const SPIRType &target_type, uint32_t offset,
7116 uint32_t matrix_stride, bool need_transpose)
7117 {
7118 assert(matrix_stride);
7119 SPIRType tmp_type = target_type;
7120 if (need_transpose)
7121 swap(tmp_type.vecsize, tmp_type.columns);
7122
7123 std::string expr;
7124
7125 expr += type_to_glsl_constructor(tmp_type);
7126 expr += "(";
7127
7128 for (uint32_t i = 0; i < tmp_type.columns; i++)
7129 {
7130 if (i != 0)
7131 expr += ", ";
7132
7133 expr += flattened_access_chain_vector(base, indices, count, tmp_type, offset + i * matrix_stride, matrix_stride,
7134 /* need_transpose= */ false);
7135 }
7136
7137 expr += ")";
7138
7139 return expr;
7140 }
7141
flattened_access_chain_vector(uint32_t base,const uint32_t * indices,uint32_t count,const SPIRType & target_type,uint32_t offset,uint32_t matrix_stride,bool need_transpose)7142 std::string CompilerGLSL::flattened_access_chain_vector(uint32_t base, const uint32_t *indices, uint32_t count,
7143 const SPIRType &target_type, uint32_t offset,
7144 uint32_t matrix_stride, bool need_transpose)
7145 {
7146 auto result = flattened_access_chain_offset(expression_type(base), indices, count, offset, 16);
7147
7148 auto buffer_name = to_name(expression_type(base).self);
7149
7150 if (need_transpose)
7151 {
7152 std::string expr;
7153
7154 if (target_type.vecsize > 1)
7155 {
7156 expr += type_to_glsl_constructor(target_type);
7157 expr += "(";
7158 }
7159
7160 for (uint32_t i = 0; i < target_type.vecsize; ++i)
7161 {
7162 if (i != 0)
7163 expr += ", ";
7164
7165 uint32_t component_offset = result.second + i * matrix_stride;
7166
7167 assert(component_offset % (target_type.width / 8) == 0);
7168 uint32_t index = component_offset / (target_type.width / 8);
7169
7170 expr += buffer_name;
7171 expr += "[";
7172 expr += result.first; // this is a series of N1 * k1 + N2 * k2 + ... that is either empty or ends with a +
7173 expr += convert_to_string(index / 4);
7174 expr += "]";
7175
7176 expr += vector_swizzle(1, index % 4);
7177 }
7178
7179 if (target_type.vecsize > 1)
7180 {
7181 expr += ")";
7182 }
7183
7184 return expr;
7185 }
7186 else
7187 {
7188 assert(result.second % (target_type.width / 8) == 0);
7189 uint32_t index = result.second / (target_type.width / 8);
7190
7191 std::string expr;
7192
7193 expr += buffer_name;
7194 expr += "[";
7195 expr += result.first; // this is a series of N1 * k1 + N2 * k2 + ... that is either empty or ends with a +
7196 expr += convert_to_string(index / 4);
7197 expr += "]";
7198
7199 expr += vector_swizzle(target_type.vecsize, index % 4);
7200
7201 return expr;
7202 }
7203 }
7204
flattened_access_chain_offset(const SPIRType & basetype,const uint32_t * indices,uint32_t count,uint32_t offset,uint32_t word_stride,bool * need_transpose,uint32_t * out_matrix_stride,bool ptr_chain)7205 std::pair<std::string, uint32_t> CompilerGLSL::flattened_access_chain_offset(
7206 const SPIRType &basetype, const uint32_t *indices, uint32_t count, uint32_t offset, uint32_t word_stride,
7207 bool *need_transpose, uint32_t *out_matrix_stride, bool ptr_chain)
7208 {
7209 // Start traversing type hierarchy at the proper non-pointer types.
7210 const auto *type = &get_pointee_type(basetype);
7211
7212 // This holds the type of the current pointer which we are traversing through.
7213 // We always start out from a struct type which is the block.
7214 // This is primarily used to reflect the array strides and matrix strides later.
7215 // For the first access chain index, type_id won't be needed, so just keep it as 0, it will be set
7216 // accordingly as members of structs are accessed.
7217 assert(type->basetype == SPIRType::Struct);
7218 uint32_t type_id = 0;
7219
7220 std::string expr;
7221
7222 // Inherit matrix information in case we are access chaining a vector which might have come from a row major layout.
7223 bool row_major_matrix_needs_conversion = need_transpose ? *need_transpose : false;
7224 uint32_t matrix_stride = out_matrix_stride ? *out_matrix_stride : 0;
7225
7226 for (uint32_t i = 0; i < count; i++)
7227 {
7228 uint32_t index = indices[i];
7229
7230 // Pointers
7231 if (ptr_chain && i == 0)
7232 {
7233 // Here, the pointer type will be decorated with an array stride.
7234 uint32_t array_stride = get_decoration(basetype.self, DecorationArrayStride);
7235 if (!array_stride)
7236 SPIRV_CROSS_THROW("SPIR-V does not define ArrayStride for buffer block.");
7237
7238 auto *constant = maybe_get<SPIRConstant>(index);
7239 if (constant)
7240 {
7241 // Constant array access.
7242 offset += constant->scalar() * array_stride;
7243 }
7244 else
7245 {
7246 // Dynamic array access.
7247 if (array_stride % word_stride)
7248 {
7249 SPIRV_CROSS_THROW(
7250 "Array stride for dynamic indexing must be divisible by the size of a 4-component vector. "
7251 "Likely culprit here is a float or vec2 array inside a push constant block which is std430. "
7252 "This cannot be flattened. Try using std140 layout instead.");
7253 }
7254
7255 expr += to_enclosed_expression(index);
7256 expr += " * ";
7257 expr += convert_to_string(array_stride / word_stride);
7258 expr += " + ";
7259 }
7260 // Type ID is unchanged.
7261 }
7262 // Arrays
7263 else if (!type->array.empty())
7264 {
7265 // Here, the type_id will be a type ID for the array type itself.
7266 uint32_t array_stride = get_decoration(type_id, DecorationArrayStride);
7267 if (!array_stride)
7268 SPIRV_CROSS_THROW("SPIR-V does not define ArrayStride for buffer block.");
7269
7270 auto *constant = maybe_get<SPIRConstant>(index);
7271 if (constant)
7272 {
7273 // Constant array access.
7274 offset += constant->scalar() * array_stride;
7275 }
7276 else
7277 {
7278 // Dynamic array access.
7279 if (array_stride % word_stride)
7280 {
7281 SPIRV_CROSS_THROW(
7282 "Array stride for dynamic indexing must be divisible by the size of a 4-component vector. "
7283 "Likely culprit here is a float or vec2 array inside a push constant block which is std430. "
7284 "This cannot be flattened. Try using std140 layout instead.");
7285 }
7286
7287 expr += to_enclosed_expression(index, false);
7288 expr += " * ";
7289 expr += convert_to_string(array_stride / word_stride);
7290 expr += " + ";
7291 }
7292
7293 uint32_t parent_type = type->parent_type;
7294 type = &get<SPIRType>(parent_type);
7295 type_id = parent_type;
7296
7297 // Type ID now refers to the array type with one less dimension.
7298 }
7299 // For structs, the index refers to a constant, which indexes into the members.
7300 // We also check if this member is a builtin, since we then replace the entire expression with the builtin one.
7301 else if (type->basetype == SPIRType::Struct)
7302 {
7303 index = get<SPIRConstant>(index).scalar();
7304
7305 if (index >= type->member_types.size())
7306 SPIRV_CROSS_THROW("Member index is out of bounds!");
7307
7308 offset += type_struct_member_offset(*type, index);
7309 type_id = type->member_types[index];
7310
7311 auto &struct_type = *type;
7312 type = &get<SPIRType>(type->member_types[index]);
7313
7314 if (type->columns > 1)
7315 {
7316 matrix_stride = type_struct_member_matrix_stride(struct_type, index);
7317 row_major_matrix_needs_conversion =
7318 combined_decoration_for_member(struct_type, index).get(DecorationRowMajor);
7319 }
7320 else
7321 row_major_matrix_needs_conversion = false;
7322 }
7323 // Matrix -> Vector
7324 else if (type->columns > 1)
7325 {
7326 auto *constant = maybe_get<SPIRConstant>(index);
7327 if (constant)
7328 {
7329 index = get<SPIRConstant>(index).scalar();
7330 offset += index * (row_major_matrix_needs_conversion ? (type->width / 8) : matrix_stride);
7331 }
7332 else
7333 {
7334 uint32_t indexing_stride = row_major_matrix_needs_conversion ? (type->width / 8) : matrix_stride;
7335 // Dynamic array access.
7336 if (indexing_stride % word_stride)
7337 {
7338 SPIRV_CROSS_THROW(
7339 "Matrix stride for dynamic indexing must be divisible by the size of a 4-component vector. "
7340 "Likely culprit here is a row-major matrix being accessed dynamically. "
7341 "This cannot be flattened. Try using std140 layout instead.");
7342 }
7343
7344 expr += to_enclosed_expression(index, false);
7345 expr += " * ";
7346 expr += convert_to_string(indexing_stride / word_stride);
7347 expr += " + ";
7348 }
7349
7350 uint32_t parent_type = type->parent_type;
7351 type = &get<SPIRType>(type->parent_type);
7352 type_id = parent_type;
7353 }
7354 // Vector -> Scalar
7355 else if (type->vecsize > 1)
7356 {
7357 auto *constant = maybe_get<SPIRConstant>(index);
7358 if (constant)
7359 {
7360 index = get<SPIRConstant>(index).scalar();
7361 offset += index * (row_major_matrix_needs_conversion ? matrix_stride : (type->width / 8));
7362 }
7363 else
7364 {
7365 uint32_t indexing_stride = row_major_matrix_needs_conversion ? matrix_stride : (type->width / 8);
7366
7367 // Dynamic array access.
7368 if (indexing_stride % word_stride)
7369 {
7370 SPIRV_CROSS_THROW(
7371 "Stride for dynamic vector indexing must be divisible by the size of a 4-component vector. "
7372 "This cannot be flattened in legacy targets.");
7373 }
7374
7375 expr += to_enclosed_expression(index, false);
7376 expr += " * ";
7377 expr += convert_to_string(indexing_stride / word_stride);
7378 expr += " + ";
7379 }
7380
7381 uint32_t parent_type = type->parent_type;
7382 type = &get<SPIRType>(type->parent_type);
7383 type_id = parent_type;
7384 }
7385 else
7386 SPIRV_CROSS_THROW("Cannot subdivide a scalar value!");
7387 }
7388
7389 if (need_transpose)
7390 *need_transpose = row_major_matrix_needs_conversion;
7391 if (out_matrix_stride)
7392 *out_matrix_stride = matrix_stride;
7393
7394 return std::make_pair(expr, offset);
7395 }
7396
should_dereference(uint32_t id)7397 bool CompilerGLSL::should_dereference(uint32_t id)
7398 {
7399 const auto &type = expression_type(id);
7400 // Non-pointer expressions don't need to be dereferenced.
7401 if (!type.pointer)
7402 return false;
7403
7404 // Handles shouldn't be dereferenced either.
7405 if (!expression_is_lvalue(id))
7406 return false;
7407
7408 // If id is a variable but not a phi variable, we should not dereference it.
7409 if (auto *var = maybe_get<SPIRVariable>(id))
7410 return var->phi_variable;
7411
7412 // If id is an access chain, we should not dereference it.
7413 if (auto *expr = maybe_get<SPIRExpression>(id))
7414 return !expr->access_chain;
7415
7416 // Otherwise, we should dereference this pointer expression.
7417 return true;
7418 }
7419
should_forward(uint32_t id) const7420 bool CompilerGLSL::should_forward(uint32_t id) const
7421 {
7422 // If id is a variable we will try to forward it regardless of force_temporary check below
7423 // This is important because otherwise we'll get local sampler copies (highp sampler2D foo = bar) that are invalid in OpenGL GLSL
7424 auto *var = maybe_get<SPIRVariable>(id);
7425 if (var && var->forwardable)
7426 return true;
7427
7428 // For debugging emit temporary variables for all expressions
7429 if (options.force_temporary)
7430 return false;
7431
7432 // Immutable expression can always be forwarded.
7433 if (is_immutable(id))
7434 return true;
7435
7436 return false;
7437 }
7438
should_suppress_usage_tracking(uint32_t id) const7439 bool CompilerGLSL::should_suppress_usage_tracking(uint32_t id) const
7440 {
7441 // Used only by opcodes which don't do any real "work", they just swizzle data in some fashion.
7442 return !expression_is_forwarded(id) || expression_suppresses_usage_tracking(id);
7443 }
7444
track_expression_read(uint32_t id)7445 void CompilerGLSL::track_expression_read(uint32_t id)
7446 {
7447 switch (ir.ids[id].get_type())
7448 {
7449 case TypeExpression:
7450 {
7451 auto &e = get<SPIRExpression>(id);
7452 for (auto implied_read : e.implied_read_expressions)
7453 track_expression_read(implied_read);
7454 break;
7455 }
7456
7457 case TypeAccessChain:
7458 {
7459 auto &e = get<SPIRAccessChain>(id);
7460 for (auto implied_read : e.implied_read_expressions)
7461 track_expression_read(implied_read);
7462 break;
7463 }
7464
7465 default:
7466 break;
7467 }
7468
7469 // If we try to read a forwarded temporary more than once we will stamp out possibly complex code twice.
7470 // In this case, it's better to just bind the complex expression to the temporary and read that temporary twice.
7471 if (expression_is_forwarded(id) && !expression_suppresses_usage_tracking(id))
7472 {
7473 auto &v = expression_usage_counts[id];
7474 v++;
7475
7476 if (v >= 2)
7477 {
7478 //if (v == 2)
7479 // fprintf(stderr, "ID %u was forced to temporary due to more than 1 expression use!\n", id);
7480
7481 forced_temporaries.insert(id);
7482 // Force a recompile after this pass to avoid forwarding this variable.
7483 force_recompile();
7484 }
7485 }
7486 }
7487
args_will_forward(uint32_t id,const uint32_t * args,uint32_t num_args,bool pure)7488 bool CompilerGLSL::args_will_forward(uint32_t id, const uint32_t *args, uint32_t num_args, bool pure)
7489 {
7490 if (forced_temporaries.find(id) != end(forced_temporaries))
7491 return false;
7492
7493 for (uint32_t i = 0; i < num_args; i++)
7494 if (!should_forward(args[i]))
7495 return false;
7496
7497 // We need to forward globals as well.
7498 if (!pure)
7499 {
7500 for (auto global : global_variables)
7501 if (!should_forward(global))
7502 return false;
7503 for (auto aliased : aliased_variables)
7504 if (!should_forward(aliased))
7505 return false;
7506 }
7507
7508 return true;
7509 }
7510
register_impure_function_call()7511 void CompilerGLSL::register_impure_function_call()
7512 {
7513 // Impure functions can modify globals and aliased variables, so invalidate them as well.
7514 for (auto global : global_variables)
7515 flush_dependees(get<SPIRVariable>(global));
7516 for (auto aliased : aliased_variables)
7517 flush_dependees(get<SPIRVariable>(aliased));
7518 }
7519
register_call_out_argument(uint32_t id)7520 void CompilerGLSL::register_call_out_argument(uint32_t id)
7521 {
7522 register_write(id);
7523
7524 auto *var = maybe_get<SPIRVariable>(id);
7525 if (var)
7526 flush_variable_declaration(var->self);
7527 }
7528
variable_decl_function_local(SPIRVariable & var)7529 string CompilerGLSL::variable_decl_function_local(SPIRVariable &var)
7530 {
7531 // These variables are always function local,
7532 // so make sure we emit the variable without storage qualifiers.
7533 // Some backends will inject custom variables locally in a function
7534 // with a storage qualifier which is not function-local.
7535 auto old_storage = var.storage;
7536 var.storage = StorageClassFunction;
7537 auto expr = variable_decl(var);
7538 var.storage = old_storage;
7539 return expr;
7540 }
7541
emit_variable_temporary_copies(const SPIRVariable & var)7542 void CompilerGLSL::emit_variable_temporary_copies(const SPIRVariable &var)
7543 {
7544 // Ensure that we declare phi-variable copies even if the original declaration isn't deferred
7545 if (var.allocate_temporary_copy && !flushed_phi_variables.count(var.self))
7546 {
7547 auto &type = get<SPIRType>(var.basetype);
7548 auto &flags = get_decoration_bitset(var.self);
7549 statement(flags_to_qualifiers_glsl(type, flags), variable_decl(type, join("_", var.self, "_copy")), ";");
7550 flushed_phi_variables.insert(var.self);
7551 }
7552 }
7553
flush_variable_declaration(uint32_t id)7554 void CompilerGLSL::flush_variable_declaration(uint32_t id)
7555 {
7556 // Ensure that we declare phi-variable copies even if the original declaration isn't deferred
7557 auto *var = maybe_get<SPIRVariable>(id);
7558 if (var && var->deferred_declaration)
7559 {
7560 statement(variable_decl_function_local(*var), ";");
7561 var->deferred_declaration = false;
7562 }
7563 if (var)
7564 {
7565 emit_variable_temporary_copies(*var);
7566 }
7567 }
7568
remove_duplicate_swizzle(string & op)7569 bool CompilerGLSL::remove_duplicate_swizzle(string &op)
7570 {
7571 auto pos = op.find_last_of('.');
7572 if (pos == string::npos || pos == 0)
7573 return false;
7574
7575 string final_swiz = op.substr(pos + 1, string::npos);
7576
7577 if (backend.swizzle_is_function)
7578 {
7579 if (final_swiz.size() < 2)
7580 return false;
7581
7582 if (final_swiz.substr(final_swiz.size() - 2, string::npos) == "()")
7583 final_swiz.erase(final_swiz.size() - 2, string::npos);
7584 else
7585 return false;
7586 }
7587
7588 // Check if final swizzle is of form .x, .xy, .xyz, .xyzw or similar.
7589 // If so, and previous swizzle is of same length,
7590 // we can drop the final swizzle altogether.
7591 for (uint32_t i = 0; i < final_swiz.size(); i++)
7592 {
7593 static const char expected[] = { 'x', 'y', 'z', 'w' };
7594 if (i >= 4 || final_swiz[i] != expected[i])
7595 return false;
7596 }
7597
7598 auto prevpos = op.find_last_of('.', pos - 1);
7599 if (prevpos == string::npos)
7600 return false;
7601
7602 prevpos++;
7603
7604 // Make sure there are only swizzles here ...
7605 for (auto i = prevpos; i < pos; i++)
7606 {
7607 if (op[i] < 'w' || op[i] > 'z')
7608 {
7609 // If swizzles are foo.xyz() like in C++ backend for example, check for that.
7610 if (backend.swizzle_is_function && i + 2 == pos && op[i] == '(' && op[i + 1] == ')')
7611 break;
7612 return false;
7613 }
7614 }
7615
7616 // If original swizzle is large enough, just carve out the components we need.
7617 // E.g. foobar.wyx.xy will turn into foobar.wy.
7618 if (pos - prevpos >= final_swiz.size())
7619 {
7620 op.erase(prevpos + final_swiz.size(), string::npos);
7621
7622 // Add back the function call ...
7623 if (backend.swizzle_is_function)
7624 op += "()";
7625 }
7626 return true;
7627 }
7628
7629 // Optimizes away vector swizzles where we have something like
7630 // vec3 foo;
7631 // foo.xyz <-- swizzle expression does nothing.
7632 // This is a very common pattern after OpCompositeCombine.
remove_unity_swizzle(uint32_t base,string & op)7633 bool CompilerGLSL::remove_unity_swizzle(uint32_t base, string &op)
7634 {
7635 auto pos = op.find_last_of('.');
7636 if (pos == string::npos || pos == 0)
7637 return false;
7638
7639 string final_swiz = op.substr(pos + 1, string::npos);
7640
7641 if (backend.swizzle_is_function)
7642 {
7643 if (final_swiz.size() < 2)
7644 return false;
7645
7646 if (final_swiz.substr(final_swiz.size() - 2, string::npos) == "()")
7647 final_swiz.erase(final_swiz.size() - 2, string::npos);
7648 else
7649 return false;
7650 }
7651
7652 // Check if final swizzle is of form .x, .xy, .xyz, .xyzw or similar.
7653 // If so, and previous swizzle is of same length,
7654 // we can drop the final swizzle altogether.
7655 for (uint32_t i = 0; i < final_swiz.size(); i++)
7656 {
7657 static const char expected[] = { 'x', 'y', 'z', 'w' };
7658 if (i >= 4 || final_swiz[i] != expected[i])
7659 return false;
7660 }
7661
7662 auto &type = expression_type(base);
7663
7664 // Sanity checking ...
7665 assert(type.columns == 1 && type.array.empty());
7666
7667 if (type.vecsize == final_swiz.size())
7668 op.erase(pos, string::npos);
7669 return true;
7670 }
7671
build_composite_combiner(uint32_t return_type,const uint32_t * elems,uint32_t length)7672 string CompilerGLSL::build_composite_combiner(uint32_t return_type, const uint32_t *elems, uint32_t length)
7673 {
7674 ID base = 0;
7675 string op;
7676 string subop;
7677
7678 // Can only merge swizzles for vectors.
7679 auto &type = get<SPIRType>(return_type);
7680 bool can_apply_swizzle_opt = type.basetype != SPIRType::Struct && type.array.empty() && type.columns == 1;
7681 bool swizzle_optimization = false;
7682
7683 for (uint32_t i = 0; i < length; i++)
7684 {
7685 auto *e = maybe_get<SPIRExpression>(elems[i]);
7686
7687 // If we're merging another scalar which belongs to the same base
7688 // object, just merge the swizzles to avoid triggering more than 1 expression read as much as possible!
7689 if (can_apply_swizzle_opt && e && e->base_expression && e->base_expression == base)
7690 {
7691 // Only supposed to be used for vector swizzle -> scalar.
7692 assert(!e->expression.empty() && e->expression.front() == '.');
7693 subop += e->expression.substr(1, string::npos);
7694 swizzle_optimization = true;
7695 }
7696 else
7697 {
7698 // We'll likely end up with duplicated swizzles, e.g.
7699 // foobar.xyz.xyz from patterns like
7700 // OpVectorShuffle
7701 // OpCompositeExtract x 3
7702 // OpCompositeConstruct 3x + other scalar.
7703 // Just modify op in-place.
7704 if (swizzle_optimization)
7705 {
7706 if (backend.swizzle_is_function)
7707 subop += "()";
7708
7709 // Don't attempt to remove unity swizzling if we managed to remove duplicate swizzles.
7710 // The base "foo" might be vec4, while foo.xyz is vec3 (OpVectorShuffle) and looks like a vec3 due to the .xyz tacked on.
7711 // We only want to remove the swizzles if we're certain that the resulting base will be the same vecsize.
7712 // Essentially, we can only remove one set of swizzles, since that's what we have control over ...
7713 // Case 1:
7714 // foo.yxz.xyz: Duplicate swizzle kicks in, giving foo.yxz, we are done.
7715 // foo.yxz was the result of OpVectorShuffle and we don't know the type of foo.
7716 // Case 2:
7717 // foo.xyz: Duplicate swizzle won't kick in.
7718 // If foo is vec3, we can remove xyz, giving just foo.
7719 if (!remove_duplicate_swizzle(subop))
7720 remove_unity_swizzle(base, subop);
7721
7722 // Strips away redundant parens if we created them during component extraction.
7723 strip_enclosed_expression(subop);
7724 swizzle_optimization = false;
7725 op += subop;
7726 }
7727 else
7728 op += subop;
7729
7730 if (i)
7731 op += ", ";
7732 subop = to_composite_constructor_expression(elems[i]);
7733 }
7734
7735 base = e ? e->base_expression : ID(0);
7736 }
7737
7738 if (swizzle_optimization)
7739 {
7740 if (backend.swizzle_is_function)
7741 subop += "()";
7742
7743 if (!remove_duplicate_swizzle(subop))
7744 remove_unity_swizzle(base, subop);
7745 // Strips away redundant parens if we created them during component extraction.
7746 strip_enclosed_expression(subop);
7747 }
7748
7749 op += subop;
7750 return op;
7751 }
7752
skip_argument(uint32_t id) const7753 bool CompilerGLSL::skip_argument(uint32_t id) const
7754 {
7755 if (!combined_image_samplers.empty() || !options.vulkan_semantics)
7756 {
7757 auto &type = expression_type(id);
7758 if (type.basetype == SPIRType::Sampler || (type.basetype == SPIRType::Image && type.image.sampled == 1))
7759 return true;
7760 }
7761 return false;
7762 }
7763
optimize_read_modify_write(const SPIRType & type,const string & lhs,const string & rhs)7764 bool CompilerGLSL::optimize_read_modify_write(const SPIRType &type, const string &lhs, const string &rhs)
7765 {
7766 // Do this with strings because we have a very clear pattern we can check for and it avoids
7767 // adding lots of special cases to the code emission.
7768 if (rhs.size() < lhs.size() + 3)
7769 return false;
7770
7771 // Do not optimize matrices. They are a bit awkward to reason about in general
7772 // (in which order does operation happen?), and it does not work on MSL anyways.
7773 if (type.vecsize > 1 && type.columns > 1)
7774 return false;
7775
7776 auto index = rhs.find(lhs);
7777 if (index != 0)
7778 return false;
7779
7780 // TODO: Shift operators, but it's not important for now.
7781 auto op = rhs.find_first_of("+-/*%|&^", lhs.size() + 1);
7782 if (op != lhs.size() + 1)
7783 return false;
7784
7785 // Check that the op is followed by space. This excludes && and ||.
7786 if (rhs[op + 1] != ' ')
7787 return false;
7788
7789 char bop = rhs[op];
7790 auto expr = rhs.substr(lhs.size() + 3);
7791 // Try to find increments and decrements. Makes it look neater as += 1, -= 1 is fairly rare to see in real code.
7792 // Find some common patterns which are equivalent.
7793 if ((bop == '+' || bop == '-') && (expr == "1" || expr == "uint(1)" || expr == "1u" || expr == "int(1u)"))
7794 statement(lhs, bop, bop, ";");
7795 else
7796 statement(lhs, " ", bop, "= ", expr, ";");
7797 return true;
7798 }
7799
register_control_dependent_expression(uint32_t expr)7800 void CompilerGLSL::register_control_dependent_expression(uint32_t expr)
7801 {
7802 if (forwarded_temporaries.find(expr) == end(forwarded_temporaries))
7803 return;
7804
7805 assert(current_emitting_block);
7806 current_emitting_block->invalidate_expressions.push_back(expr);
7807 }
7808
emit_block_instructions(SPIRBlock & block)7809 void CompilerGLSL::emit_block_instructions(SPIRBlock &block)
7810 {
7811 current_emitting_block = █
7812 for (auto &op : block.ops)
7813 emit_instruction(op);
7814 current_emitting_block = nullptr;
7815 }
7816
disallow_forwarding_in_expression_chain(const SPIRExpression & expr)7817 void CompilerGLSL::disallow_forwarding_in_expression_chain(const SPIRExpression &expr)
7818 {
7819 // Allow trivially forwarded expressions like OpLoad or trivial shuffles,
7820 // these will be marked as having suppressed usage tracking.
7821 // Our only concern is to make sure arithmetic operations are done in similar ways.
7822 if (expression_is_forwarded(expr.self) && !expression_suppresses_usage_tracking(expr.self) &&
7823 forced_invariant_temporaries.count(expr.self) == 0)
7824 {
7825 forced_temporaries.insert(expr.self);
7826 forced_invariant_temporaries.insert(expr.self);
7827 force_recompile();
7828
7829 for (auto &dependent : expr.expression_dependencies)
7830 disallow_forwarding_in_expression_chain(get<SPIRExpression>(dependent));
7831 }
7832 }
7833
handle_store_to_invariant_variable(uint32_t store_id,uint32_t value_id)7834 void CompilerGLSL::handle_store_to_invariant_variable(uint32_t store_id, uint32_t value_id)
7835 {
7836 // Variables or access chains marked invariant are complicated. We will need to make sure the code-gen leading up to
7837 // this variable is consistent. The failure case for SPIRV-Cross is when an expression is forced to a temporary
7838 // in one translation unit, but not another, e.g. due to multiple use of an expression.
7839 // This causes variance despite the output variable being marked invariant, so the solution here is to force all dependent
7840 // expressions to be temporaries.
7841 // It is uncertain if this is enough to support invariant in all possible cases, but it should be good enough
7842 // for all reasonable uses of invariant.
7843 if (!has_decoration(store_id, DecorationInvariant))
7844 return;
7845
7846 auto *expr = maybe_get<SPIRExpression>(value_id);
7847 if (!expr)
7848 return;
7849
7850 disallow_forwarding_in_expression_chain(*expr);
7851 }
7852
emit_store_statement(uint32_t lhs_expression,uint32_t rhs_expression)7853 void CompilerGLSL::emit_store_statement(uint32_t lhs_expression, uint32_t rhs_expression)
7854 {
7855 auto rhs = to_pointer_expression(rhs_expression);
7856
7857 // Statements to OpStore may be empty if it is a struct with zero members. Just forward the store to /dev/null.
7858 if (!rhs.empty())
7859 {
7860 handle_store_to_invariant_variable(lhs_expression, rhs_expression);
7861
7862 auto lhs = to_dereferenced_expression(lhs_expression);
7863
7864 // We might need to bitcast in order to store to a builtin.
7865 bitcast_to_builtin_store(lhs_expression, rhs, expression_type(rhs_expression));
7866
7867 // Tries to optimize assignments like "<lhs> = <lhs> op expr".
7868 // While this is purely cosmetic, this is important for legacy ESSL where loop
7869 // variable increments must be in either i++ or i += const-expr.
7870 // Without this, we end up with i = i + 1, which is correct GLSL, but not correct GLES 2.0.
7871 if (!optimize_read_modify_write(expression_type(rhs_expression), lhs, rhs))
7872 statement(lhs, " = ", rhs, ";");
7873 register_write(lhs_expression);
7874 }
7875 }
7876
get_integer_width_for_instruction(const Instruction & instr) const7877 uint32_t CompilerGLSL::get_integer_width_for_instruction(const Instruction &instr) const
7878 {
7879 if (instr.length < 3)
7880 return 32;
7881
7882 auto *ops = stream(instr);
7883
7884 switch (instr.op)
7885 {
7886 case OpSConvert:
7887 case OpConvertSToF:
7888 case OpUConvert:
7889 case OpConvertUToF:
7890 case OpIEqual:
7891 case OpINotEqual:
7892 case OpSLessThan:
7893 case OpSLessThanEqual:
7894 case OpSGreaterThan:
7895 case OpSGreaterThanEqual:
7896 case OpULessThan:
7897 case OpULessThanEqual:
7898 case OpUGreaterThan:
7899 case OpUGreaterThanEqual:
7900 return expression_type(ops[2]).width;
7901
7902 default:
7903 {
7904 // We can look at result type which is more robust.
7905 auto *type = maybe_get<SPIRType>(ops[0]);
7906 if (type && type_is_integral(*type))
7907 return type->width;
7908 else
7909 return 32;
7910 }
7911 }
7912 }
7913
get_integer_width_for_glsl_instruction(GLSLstd450 op,const uint32_t * ops,uint32_t length) const7914 uint32_t CompilerGLSL::get_integer_width_for_glsl_instruction(GLSLstd450 op, const uint32_t *ops, uint32_t length) const
7915 {
7916 if (length < 1)
7917 return 32;
7918
7919 switch (op)
7920 {
7921 case GLSLstd450SAbs:
7922 case GLSLstd450SSign:
7923 case GLSLstd450UMin:
7924 case GLSLstd450SMin:
7925 case GLSLstd450UMax:
7926 case GLSLstd450SMax:
7927 case GLSLstd450UClamp:
7928 case GLSLstd450SClamp:
7929 case GLSLstd450FindSMsb:
7930 case GLSLstd450FindUMsb:
7931 return expression_type(ops[0]).width;
7932
7933 default:
7934 {
7935 // We don't need to care about other opcodes, just return 32.
7936 return 32;
7937 }
7938 }
7939 }
7940
emit_instruction(const Instruction & instruction)7941 void CompilerGLSL::emit_instruction(const Instruction &instruction)
7942 {
7943 auto ops = stream(instruction);
7944 auto opcode = static_cast<Op>(instruction.op);
7945 uint32_t length = instruction.length;
7946
7947 #define GLSL_BOP(op) emit_binary_op(ops[0], ops[1], ops[2], ops[3], #op)
7948 #define GLSL_BOP_CAST(op, type) \
7949 emit_binary_op_cast(ops[0], ops[1], ops[2], ops[3], #op, type, opcode_is_sign_invariant(opcode))
7950 #define GLSL_UOP(op) emit_unary_op(ops[0], ops[1], ops[2], #op)
7951 #define GLSL_QFOP(op) emit_quaternary_func_op(ops[0], ops[1], ops[2], ops[3], ops[4], ops[5], #op)
7952 #define GLSL_TFOP(op) emit_trinary_func_op(ops[0], ops[1], ops[2], ops[3], ops[4], #op)
7953 #define GLSL_BFOP(op) emit_binary_func_op(ops[0], ops[1], ops[2], ops[3], #op)
7954 #define GLSL_BFOP_CAST(op, type) \
7955 emit_binary_func_op_cast(ops[0], ops[1], ops[2], ops[3], #op, type, opcode_is_sign_invariant(opcode))
7956 #define GLSL_BFOP(op) emit_binary_func_op(ops[0], ops[1], ops[2], ops[3], #op)
7957 #define GLSL_UFOP(op) emit_unary_func_op(ops[0], ops[1], ops[2], #op)
7958
7959 // If we need to do implicit bitcasts, make sure we do it with the correct type.
7960 uint32_t integer_width = get_integer_width_for_instruction(instruction);
7961 auto int_type = to_signed_basetype(integer_width);
7962 auto uint_type = to_unsigned_basetype(integer_width);
7963
7964 switch (opcode)
7965 {
7966 // Dealing with memory
7967 case OpLoad:
7968 {
7969 uint32_t result_type = ops[0];
7970 uint32_t id = ops[1];
7971 uint32_t ptr = ops[2];
7972
7973 flush_variable_declaration(ptr);
7974
7975 // If we're loading from memory that cannot be changed by the shader,
7976 // just forward the expression directly to avoid needless temporaries.
7977 // If an expression is mutable and forwardable, we speculate that it is immutable.
7978 bool forward = should_forward(ptr) && forced_temporaries.find(id) == end(forced_temporaries);
7979
7980 // If loading a non-native row-major matrix, mark the expression as need_transpose.
7981 bool need_transpose = false;
7982 bool old_need_transpose = false;
7983
7984 auto *ptr_expression = maybe_get<SPIRExpression>(ptr);
7985
7986 if (forward)
7987 {
7988 // If we're forwarding the load, we're also going to forward transpose state, so don't transpose while
7989 // taking the expression.
7990 if (ptr_expression && ptr_expression->need_transpose)
7991 {
7992 old_need_transpose = true;
7993 ptr_expression->need_transpose = false;
7994 need_transpose = true;
7995 }
7996 else if (is_non_native_row_major_matrix(ptr))
7997 need_transpose = true;
7998 }
7999
8000 // If we are forwarding this load,
8001 // don't register the read to access chain here, defer that to when we actually use the expression,
8002 // using the add_implied_read_expression mechanism.
8003 string expr;
8004
8005 bool is_packed = has_extended_decoration(ptr, SPIRVCrossDecorationPhysicalTypePacked);
8006 bool is_remapped = has_extended_decoration(ptr, SPIRVCrossDecorationPhysicalTypeID);
8007 if (forward || (!is_packed && !is_remapped))
8008 {
8009 // For the simple case, we do not need to deal with repacking.
8010 expr = to_dereferenced_expression(ptr, false);
8011 }
8012 else
8013 {
8014 // If we are not forwarding the expression, we need to unpack and resolve any physical type remapping here before
8015 // storing the expression to a temporary.
8016 expr = to_unpacked_expression(ptr);
8017 }
8018
8019 // We might need to bitcast in order to load from a builtin.
8020 bitcast_from_builtin_load(ptr, expr, get<SPIRType>(result_type));
8021
8022 // We might be trying to load a gl_Position[N], where we should be
8023 // doing float4[](gl_in[i].gl_Position, ...) instead.
8024 // Similar workarounds are required for input arrays in tessellation.
8025 unroll_array_from_complex_load(id, ptr, expr);
8026
8027 auto &type = get<SPIRType>(result_type);
8028 // Shouldn't need to check for ID, but current glslang codegen requires it in some cases
8029 // when loading Image/Sampler descriptors. It does not hurt to check ID as well.
8030 if (has_decoration(id, DecorationNonUniformEXT) || has_decoration(ptr, DecorationNonUniformEXT))
8031 {
8032 propagate_nonuniform_qualifier(ptr);
8033 convert_non_uniform_expression(type, expr);
8034 }
8035
8036 if (forward && ptr_expression)
8037 ptr_expression->need_transpose = old_need_transpose;
8038
8039 // By default, suppress usage tracking since using same expression multiple times does not imply any extra work.
8040 // However, if we try to load a complex, composite object from a flattened buffer,
8041 // we should avoid emitting the same code over and over and lower the result to a temporary.
8042 bool usage_tracking = ptr_expression && flattened_buffer_blocks.count(ptr_expression->loaded_from) != 0 &&
8043 (type.basetype == SPIRType::Struct || (type.columns > 1));
8044
8045 SPIRExpression *e = nullptr;
8046 if (!backend.array_is_value_type && !type.array.empty() && !forward)
8047 {
8048 // Complicated load case where we need to make a copy of ptr, but we cannot, because
8049 // it is an array, and our backend does not support arrays as value types.
8050 // Emit the temporary, and copy it explicitly.
8051 e = &emit_uninitialized_temporary_expression(result_type, id);
8052 emit_array_copy(to_expression(id), ptr, StorageClassFunction, get_backing_variable_storage(ptr));
8053 }
8054 else
8055 e = &emit_op(result_type, id, expr, forward, !usage_tracking);
8056
8057 e->need_transpose = need_transpose;
8058 register_read(id, ptr, forward);
8059
8060 if (forward)
8061 {
8062 // Pass through whether the result is of a packed type and the physical type ID.
8063 if (has_extended_decoration(ptr, SPIRVCrossDecorationPhysicalTypePacked))
8064 set_extended_decoration(id, SPIRVCrossDecorationPhysicalTypePacked);
8065 if (has_extended_decoration(ptr, SPIRVCrossDecorationPhysicalTypeID))
8066 {
8067 set_extended_decoration(id, SPIRVCrossDecorationPhysicalTypeID,
8068 get_extended_decoration(ptr, SPIRVCrossDecorationPhysicalTypeID));
8069 }
8070 }
8071 else
8072 {
8073 // This might have been set on an earlier compilation iteration, force it to be unset.
8074 unset_extended_decoration(id, SPIRVCrossDecorationPhysicalTypePacked);
8075 unset_extended_decoration(id, SPIRVCrossDecorationPhysicalTypeID);
8076 }
8077
8078 inherit_expression_dependencies(id, ptr);
8079 if (forward)
8080 add_implied_read_expression(*e, ptr);
8081 break;
8082 }
8083
8084 case OpInBoundsAccessChain:
8085 case OpAccessChain:
8086 case OpPtrAccessChain:
8087 {
8088 auto *var = maybe_get<SPIRVariable>(ops[2]);
8089 if (var)
8090 flush_variable_declaration(var->self);
8091
8092 // If the base is immutable, the access chain pointer must also be.
8093 // If an expression is mutable and forwardable, we speculate that it is immutable.
8094 AccessChainMeta meta;
8095 bool ptr_chain = opcode == OpPtrAccessChain;
8096 auto e = access_chain(ops[2], &ops[3], length - 3, get<SPIRType>(ops[0]), &meta, ptr_chain);
8097
8098 auto &expr = set<SPIRExpression>(ops[1], move(e), ops[0], should_forward(ops[2]));
8099
8100 auto *backing_variable = maybe_get_backing_variable(ops[2]);
8101 expr.loaded_from = backing_variable ? backing_variable->self : ID(ops[2]);
8102 expr.need_transpose = meta.need_transpose;
8103 expr.access_chain = true;
8104
8105 // Mark the result as being packed. Some platforms handled packed vectors differently than non-packed.
8106 if (meta.storage_is_packed)
8107 set_extended_decoration(ops[1], SPIRVCrossDecorationPhysicalTypePacked);
8108 if (meta.storage_physical_type != 0)
8109 set_extended_decoration(ops[1], SPIRVCrossDecorationPhysicalTypeID, meta.storage_physical_type);
8110 if (meta.storage_is_invariant)
8111 set_decoration(ops[1], DecorationInvariant);
8112
8113 // If we have some expression dependencies in our access chain, this access chain is technically a forwarded
8114 // temporary which could be subject to invalidation.
8115 // Need to assume we're forwarded while calling inherit_expression_depdendencies.
8116 forwarded_temporaries.insert(ops[1]);
8117 // The access chain itself is never forced to a temporary, but its dependencies might.
8118 suppressed_usage_tracking.insert(ops[1]);
8119
8120 for (uint32_t i = 2; i < length; i++)
8121 {
8122 inherit_expression_dependencies(ops[1], ops[i]);
8123 add_implied_read_expression(expr, ops[i]);
8124 }
8125
8126 // If we have no dependencies after all, i.e., all indices in the access chain are immutable temporaries,
8127 // we're not forwarded after all.
8128 if (expr.expression_dependencies.empty())
8129 forwarded_temporaries.erase(ops[1]);
8130
8131 break;
8132 }
8133
8134 case OpStore:
8135 {
8136 auto *var = maybe_get<SPIRVariable>(ops[0]);
8137
8138 if (has_decoration(ops[0], DecorationNonUniformEXT))
8139 propagate_nonuniform_qualifier(ops[0]);
8140
8141 if (var && var->statically_assigned)
8142 var->static_expression = ops[1];
8143 else if (var && var->loop_variable && !var->loop_variable_enable)
8144 var->static_expression = ops[1];
8145 else if (var && var->remapped_variable)
8146 {
8147 // Skip the write.
8148 }
8149 else if (var && flattened_structs.count(ops[0]))
8150 {
8151 store_flattened_struct(*var, ops[1]);
8152 register_write(ops[0]);
8153 }
8154 else
8155 {
8156 emit_store_statement(ops[0], ops[1]);
8157 }
8158
8159 // Storing a pointer results in a variable pointer, so we must conservatively assume
8160 // we can write through it.
8161 if (expression_type(ops[1]).pointer)
8162 register_write(ops[1]);
8163 break;
8164 }
8165
8166 case OpArrayLength:
8167 {
8168 uint32_t result_type = ops[0];
8169 uint32_t id = ops[1];
8170 auto e = access_chain_internal(ops[2], &ops[3], length - 3, ACCESS_CHAIN_INDEX_IS_LITERAL_BIT, nullptr);
8171 set<SPIRExpression>(id, join(type_to_glsl(get<SPIRType>(result_type)), "(", e, ".length())"), result_type,
8172 true);
8173 break;
8174 }
8175
8176 // Function calls
8177 case OpFunctionCall:
8178 {
8179 uint32_t result_type = ops[0];
8180 uint32_t id = ops[1];
8181 uint32_t func = ops[2];
8182 const auto *arg = &ops[3];
8183 length -= 3;
8184
8185 auto &callee = get<SPIRFunction>(func);
8186 auto &return_type = get<SPIRType>(callee.return_type);
8187 bool pure = function_is_pure(callee);
8188
8189 bool callee_has_out_variables = false;
8190 bool emit_return_value_as_argument = false;
8191
8192 // Invalidate out variables passed to functions since they can be OpStore'd to.
8193 for (uint32_t i = 0; i < length; i++)
8194 {
8195 if (callee.arguments[i].write_count)
8196 {
8197 register_call_out_argument(arg[i]);
8198 callee_has_out_variables = true;
8199 }
8200
8201 flush_variable_declaration(arg[i]);
8202 }
8203
8204 if (!return_type.array.empty() && !backend.can_return_array)
8205 {
8206 callee_has_out_variables = true;
8207 emit_return_value_as_argument = true;
8208 }
8209
8210 if (!pure)
8211 register_impure_function_call();
8212
8213 string funexpr;
8214 SmallVector<string> arglist;
8215 funexpr += to_name(func) + "(";
8216
8217 if (emit_return_value_as_argument)
8218 {
8219 statement(type_to_glsl(return_type), " ", to_name(id), type_to_array_glsl(return_type), ";");
8220 arglist.push_back(to_name(id));
8221 }
8222
8223 for (uint32_t i = 0; i < length; i++)
8224 {
8225 // Do not pass in separate images or samplers if we're remapping
8226 // to combined image samplers.
8227 if (skip_argument(arg[i]))
8228 continue;
8229
8230 arglist.push_back(to_func_call_arg(callee.arguments[i], arg[i]));
8231 }
8232
8233 for (auto &combined : callee.combined_parameters)
8234 {
8235 auto image_id = combined.global_image ? combined.image_id : VariableID(arg[combined.image_id]);
8236 auto sampler_id = combined.global_sampler ? combined.sampler_id : VariableID(arg[combined.sampler_id]);
8237 arglist.push_back(to_combined_image_sampler(image_id, sampler_id));
8238 }
8239
8240 append_global_func_args(callee, length, arglist);
8241
8242 funexpr += merge(arglist);
8243 funexpr += ")";
8244
8245 // Check for function call constraints.
8246 check_function_call_constraints(arg, length);
8247
8248 if (return_type.basetype != SPIRType::Void)
8249 {
8250 // If the function actually writes to an out variable,
8251 // take the conservative route and do not forward.
8252 // The problem is that we might not read the function
8253 // result (and emit the function) before an out variable
8254 // is read (common case when return value is ignored!
8255 // In order to avoid start tracking invalid variables,
8256 // just avoid the forwarding problem altogether.
8257 bool forward = args_will_forward(id, arg, length, pure) && !callee_has_out_variables && pure &&
8258 (forced_temporaries.find(id) == end(forced_temporaries));
8259
8260 if (emit_return_value_as_argument)
8261 {
8262 statement(funexpr, ";");
8263 set<SPIRExpression>(id, to_name(id), result_type, true);
8264 }
8265 else
8266 emit_op(result_type, id, funexpr, forward);
8267
8268 // Function calls are implicit loads from all variables in question.
8269 // Set dependencies for them.
8270 for (uint32_t i = 0; i < length; i++)
8271 register_read(id, arg[i], forward);
8272
8273 // If we're going to forward the temporary result,
8274 // put dependencies on every variable that must not change.
8275 if (forward)
8276 register_global_read_dependencies(callee, id);
8277 }
8278 else
8279 statement(funexpr, ";");
8280
8281 break;
8282 }
8283
8284 // Composite munging
8285 case OpCompositeConstruct:
8286 {
8287 uint32_t result_type = ops[0];
8288 uint32_t id = ops[1];
8289 const auto *const elems = &ops[2];
8290 length -= 2;
8291
8292 bool forward = true;
8293 for (uint32_t i = 0; i < length; i++)
8294 forward = forward && should_forward(elems[i]);
8295
8296 auto &out_type = get<SPIRType>(result_type);
8297 auto *in_type = length > 0 ? &expression_type(elems[0]) : nullptr;
8298
8299 // Only splat if we have vector constructors.
8300 // Arrays and structs must be initialized properly in full.
8301 bool composite = !out_type.array.empty() || out_type.basetype == SPIRType::Struct;
8302
8303 bool splat = false;
8304 bool swizzle_splat = false;
8305
8306 if (in_type)
8307 {
8308 splat = in_type->vecsize == 1 && in_type->columns == 1 && !composite && backend.use_constructor_splatting;
8309 swizzle_splat = in_type->vecsize == 1 && in_type->columns == 1 && backend.can_swizzle_scalar;
8310
8311 if (ir.ids[elems[0]].get_type() == TypeConstant && !type_is_floating_point(*in_type))
8312 {
8313 // Cannot swizzle literal integers as a special case.
8314 swizzle_splat = false;
8315 }
8316 }
8317
8318 if (splat || swizzle_splat)
8319 {
8320 uint32_t input = elems[0];
8321 for (uint32_t i = 0; i < length; i++)
8322 {
8323 if (input != elems[i])
8324 {
8325 splat = false;
8326 swizzle_splat = false;
8327 }
8328 }
8329 }
8330
8331 if (out_type.basetype == SPIRType::Struct && !backend.can_declare_struct_inline)
8332 forward = false;
8333 if (!out_type.array.empty() && !backend.can_declare_arrays_inline)
8334 forward = false;
8335 if (type_is_empty(out_type) && !backend.supports_empty_struct)
8336 forward = false;
8337
8338 string constructor_op;
8339 if (backend.use_initializer_list && composite)
8340 {
8341 bool needs_trailing_tracket = false;
8342 // Only use this path if we are building composites.
8343 // This path cannot be used for arithmetic.
8344 if (backend.use_typed_initializer_list && out_type.basetype == SPIRType::Struct && out_type.array.empty())
8345 constructor_op += type_to_glsl_constructor(get<SPIRType>(result_type));
8346 else if (backend.use_typed_initializer_list && !out_type.array.empty())
8347 {
8348 // MSL path. Array constructor is baked into type here, do not use _constructor variant.
8349 constructor_op += type_to_glsl_constructor(get<SPIRType>(result_type)) + "(";
8350 needs_trailing_tracket = true;
8351 }
8352 constructor_op += "{ ";
8353
8354 if (type_is_empty(out_type) && !backend.supports_empty_struct)
8355 constructor_op += "0";
8356 else if (splat)
8357 constructor_op += to_unpacked_expression(elems[0]);
8358 else
8359 constructor_op += build_composite_combiner(result_type, elems, length);
8360 constructor_op += " }";
8361 if (needs_trailing_tracket)
8362 constructor_op += ")";
8363 }
8364 else if (swizzle_splat && !composite)
8365 {
8366 constructor_op = remap_swizzle(get<SPIRType>(result_type), 1, to_unpacked_expression(elems[0]));
8367 }
8368 else
8369 {
8370 constructor_op = type_to_glsl_constructor(get<SPIRType>(result_type)) + "(";
8371 if (type_is_empty(out_type) && !backend.supports_empty_struct)
8372 constructor_op += "0";
8373 else if (splat)
8374 constructor_op += to_unpacked_expression(elems[0]);
8375 else
8376 constructor_op += build_composite_combiner(result_type, elems, length);
8377 constructor_op += ")";
8378 }
8379
8380 if (!constructor_op.empty())
8381 {
8382 emit_op(result_type, id, constructor_op, forward);
8383 for (uint32_t i = 0; i < length; i++)
8384 inherit_expression_dependencies(id, elems[i]);
8385 }
8386 break;
8387 }
8388
8389 case OpVectorInsertDynamic:
8390 {
8391 uint32_t result_type = ops[0];
8392 uint32_t id = ops[1];
8393 uint32_t vec = ops[2];
8394 uint32_t comp = ops[3];
8395 uint32_t index = ops[4];
8396
8397 flush_variable_declaration(vec);
8398
8399 // Make a copy, then use access chain to store the variable.
8400 statement(declare_temporary(result_type, id), to_expression(vec), ";");
8401 set<SPIRExpression>(id, to_name(id), result_type, true);
8402 auto chain = access_chain_internal(id, &index, 1, 0, nullptr);
8403 statement(chain, " = ", to_expression(comp), ";");
8404 break;
8405 }
8406
8407 case OpVectorExtractDynamic:
8408 {
8409 uint32_t result_type = ops[0];
8410 uint32_t id = ops[1];
8411
8412 auto expr = access_chain_internal(ops[2], &ops[3], 1, 0, nullptr);
8413 emit_op(result_type, id, expr, should_forward(ops[2]));
8414 inherit_expression_dependencies(id, ops[2]);
8415 inherit_expression_dependencies(id, ops[3]);
8416 break;
8417 }
8418
8419 case OpCompositeExtract:
8420 {
8421 uint32_t result_type = ops[0];
8422 uint32_t id = ops[1];
8423 length -= 3;
8424
8425 auto &type = get<SPIRType>(result_type);
8426
8427 // We can only split the expression here if our expression is forwarded as a temporary.
8428 bool allow_base_expression = forced_temporaries.find(id) == end(forced_temporaries);
8429
8430 // Do not allow base expression for struct members. We risk doing "swizzle" optimizations in this case.
8431 auto &composite_type = expression_type(ops[2]);
8432 if (composite_type.basetype == SPIRType::Struct || !composite_type.array.empty())
8433 allow_base_expression = false;
8434
8435 // Packed expressions cannot be split up.
8436 if (has_extended_decoration(ops[2], SPIRVCrossDecorationPhysicalTypePacked))
8437 allow_base_expression = false;
8438
8439 // Cannot use base expression for row-major matrix row-extraction since we need to interleave access pattern
8440 // into the base expression.
8441 if (is_non_native_row_major_matrix(ops[2]))
8442 allow_base_expression = false;
8443
8444 AccessChainMeta meta;
8445 SPIRExpression *e = nullptr;
8446
8447 // Only apply this optimization if result is scalar.
8448 if (allow_base_expression && should_forward(ops[2]) && type.vecsize == 1 && type.columns == 1 && length == 1)
8449 {
8450 // We want to split the access chain from the base.
8451 // This is so we can later combine different CompositeExtract results
8452 // with CompositeConstruct without emitting code like
8453 //
8454 // vec3 temp = texture(...).xyz
8455 // vec4(temp.x, temp.y, temp.z, 1.0).
8456 //
8457 // when we actually wanted to emit this
8458 // vec4(texture(...).xyz, 1.0).
8459 //
8460 // Including the base will prevent this and would trigger multiple reads
8461 // from expression causing it to be forced to an actual temporary in GLSL.
8462 auto expr = access_chain_internal(ops[2], &ops[3], length,
8463 ACCESS_CHAIN_INDEX_IS_LITERAL_BIT | ACCESS_CHAIN_CHAIN_ONLY_BIT, &meta);
8464 e = &emit_op(result_type, id, expr, true, should_suppress_usage_tracking(ops[2]));
8465 inherit_expression_dependencies(id, ops[2]);
8466 e->base_expression = ops[2];
8467 }
8468 else
8469 {
8470 auto expr = access_chain_internal(ops[2], &ops[3], length, ACCESS_CHAIN_INDEX_IS_LITERAL_BIT, &meta);
8471 e = &emit_op(result_type, id, expr, should_forward(ops[2]), should_suppress_usage_tracking(ops[2]));
8472 inherit_expression_dependencies(id, ops[2]);
8473 }
8474
8475 // Pass through some meta information to the loaded expression.
8476 // We can still end up loading a buffer type to a variable, then CompositeExtract from it
8477 // instead of loading everything through an access chain.
8478 e->need_transpose = meta.need_transpose;
8479 if (meta.storage_is_packed)
8480 set_extended_decoration(id, SPIRVCrossDecorationPhysicalTypePacked);
8481 if (meta.storage_physical_type != 0)
8482 set_extended_decoration(id, SPIRVCrossDecorationPhysicalTypeID, meta.storage_physical_type);
8483 if (meta.storage_is_invariant)
8484 set_decoration(id, DecorationInvariant);
8485
8486 break;
8487 }
8488
8489 case OpCompositeInsert:
8490 {
8491 uint32_t result_type = ops[0];
8492 uint32_t id = ops[1];
8493 uint32_t obj = ops[2];
8494 uint32_t composite = ops[3];
8495 const auto *elems = &ops[4];
8496 length -= 4;
8497
8498 flush_variable_declaration(composite);
8499
8500 // Make a copy, then use access chain to store the variable.
8501 statement(declare_temporary(result_type, id), to_expression(composite), ";");
8502 set<SPIRExpression>(id, to_name(id), result_type, true);
8503 auto chain = access_chain_internal(id, elems, length, ACCESS_CHAIN_INDEX_IS_LITERAL_BIT, nullptr);
8504 statement(chain, " = ", to_expression(obj), ";");
8505
8506 break;
8507 }
8508
8509 case OpCopyMemory:
8510 {
8511 uint32_t lhs = ops[0];
8512 uint32_t rhs = ops[1];
8513 if (lhs != rhs)
8514 {
8515 flush_variable_declaration(lhs);
8516 flush_variable_declaration(rhs);
8517 statement(to_expression(lhs), " = ", to_expression(rhs), ";");
8518 register_write(lhs);
8519 }
8520 break;
8521 }
8522
8523 case OpCopyObject:
8524 {
8525 uint32_t result_type = ops[0];
8526 uint32_t id = ops[1];
8527 uint32_t rhs = ops[2];
8528 bool pointer = get<SPIRType>(result_type).pointer;
8529
8530 auto *chain = maybe_get<SPIRAccessChain>(rhs);
8531 if (chain)
8532 {
8533 // Cannot lower to a SPIRExpression, just copy the object.
8534 auto &e = set<SPIRAccessChain>(id, *chain);
8535 e.self = id;
8536 }
8537 else if (expression_is_lvalue(rhs) && !pointer)
8538 {
8539 // Need a copy.
8540 // For pointer types, we copy the pointer itself.
8541 statement(declare_temporary(result_type, id), to_unpacked_expression(rhs), ";");
8542 set<SPIRExpression>(id, to_name(id), result_type, true);
8543 }
8544 else
8545 {
8546 // RHS expression is immutable, so just forward it.
8547 // Copying these things really make no sense, but
8548 // seems to be allowed anyways.
8549 auto &e = set<SPIRExpression>(id, to_expression(rhs), result_type, true);
8550 if (pointer)
8551 {
8552 auto *var = maybe_get_backing_variable(rhs);
8553 e.loaded_from = var ? var->self : ID(0);
8554 }
8555
8556 // If we're copying an access chain, need to inherit the read expressions.
8557 auto *rhs_expr = maybe_get<SPIRExpression>(rhs);
8558 if (rhs_expr)
8559 {
8560 e.implied_read_expressions = rhs_expr->implied_read_expressions;
8561 e.expression_dependencies = rhs_expr->expression_dependencies;
8562 }
8563 }
8564 break;
8565 }
8566
8567 case OpVectorShuffle:
8568 {
8569 uint32_t result_type = ops[0];
8570 uint32_t id = ops[1];
8571 uint32_t vec0 = ops[2];
8572 uint32_t vec1 = ops[3];
8573 const auto *elems = &ops[4];
8574 length -= 4;
8575
8576 auto &type0 = expression_type(vec0);
8577
8578 // If we have the undefined swizzle index -1, we need to swizzle in undefined data,
8579 // or in our case, T(0).
8580 bool shuffle = false;
8581 for (uint32_t i = 0; i < length; i++)
8582 if (elems[i] >= type0.vecsize || elems[i] == 0xffffffffu)
8583 shuffle = true;
8584
8585 // Cannot use swizzles with packed expressions, force shuffle path.
8586 if (!shuffle && has_extended_decoration(vec0, SPIRVCrossDecorationPhysicalTypePacked))
8587 shuffle = true;
8588
8589 string expr;
8590 bool should_fwd, trivial_forward;
8591
8592 if (shuffle)
8593 {
8594 should_fwd = should_forward(vec0) && should_forward(vec1);
8595 trivial_forward = should_suppress_usage_tracking(vec0) && should_suppress_usage_tracking(vec1);
8596
8597 // Constructor style and shuffling from two different vectors.
8598 SmallVector<string> args;
8599 for (uint32_t i = 0; i < length; i++)
8600 {
8601 if (elems[i] == 0xffffffffu)
8602 {
8603 // Use a constant 0 here.
8604 // We could use the first component or similar, but then we risk propagating
8605 // a value we might not need, and bog down codegen.
8606 SPIRConstant c;
8607 c.constant_type = type0.parent_type;
8608 assert(type0.parent_type != ID(0));
8609 args.push_back(constant_expression(c));
8610 }
8611 else if (elems[i] >= type0.vecsize)
8612 args.push_back(to_extract_component_expression(vec1, elems[i] - type0.vecsize));
8613 else
8614 args.push_back(to_extract_component_expression(vec0, elems[i]));
8615 }
8616 expr += join(type_to_glsl_constructor(get<SPIRType>(result_type)), "(", merge(args), ")");
8617 }
8618 else
8619 {
8620 should_fwd = should_forward(vec0);
8621 trivial_forward = should_suppress_usage_tracking(vec0);
8622
8623 // We only source from first vector, so can use swizzle.
8624 // If the vector is packed, unpack it before applying a swizzle (needed for MSL)
8625 expr += to_enclosed_unpacked_expression(vec0);
8626 expr += ".";
8627 for (uint32_t i = 0; i < length; i++)
8628 {
8629 assert(elems[i] != 0xffffffffu);
8630 expr += index_to_swizzle(elems[i]);
8631 }
8632
8633 if (backend.swizzle_is_function && length > 1)
8634 expr += "()";
8635 }
8636
8637 // A shuffle is trivial in that it doesn't actually *do* anything.
8638 // We inherit the forwardedness from our arguments to avoid flushing out to temporaries when it's not really needed.
8639
8640 emit_op(result_type, id, expr, should_fwd, trivial_forward);
8641
8642 inherit_expression_dependencies(id, vec0);
8643 if (vec0 != vec1)
8644 inherit_expression_dependencies(id, vec1);
8645 break;
8646 }
8647
8648 // ALU
8649 case OpIsNan:
8650 GLSL_UFOP(isnan);
8651 break;
8652
8653 case OpIsInf:
8654 GLSL_UFOP(isinf);
8655 break;
8656
8657 case OpSNegate:
8658 case OpFNegate:
8659 GLSL_UOP(-);
8660 break;
8661
8662 case OpIAdd:
8663 {
8664 // For simple arith ops, prefer the output type if there's a mismatch to avoid extra bitcasts.
8665 auto type = get<SPIRType>(ops[0]).basetype;
8666 GLSL_BOP_CAST(+, type);
8667 break;
8668 }
8669
8670 case OpFAdd:
8671 GLSL_BOP(+);
8672 break;
8673
8674 case OpISub:
8675 {
8676 auto type = get<SPIRType>(ops[0]).basetype;
8677 GLSL_BOP_CAST(-, type);
8678 break;
8679 }
8680
8681 case OpFSub:
8682 GLSL_BOP(-);
8683 break;
8684
8685 case OpIMul:
8686 {
8687 auto type = get<SPIRType>(ops[0]).basetype;
8688 GLSL_BOP_CAST(*, type);
8689 break;
8690 }
8691
8692 case OpVectorTimesMatrix:
8693 case OpMatrixTimesVector:
8694 {
8695 // If the matrix needs transpose, just flip the multiply order.
8696 auto *e = maybe_get<SPIRExpression>(ops[opcode == OpMatrixTimesVector ? 2 : 3]);
8697 if (e && e->need_transpose)
8698 {
8699 e->need_transpose = false;
8700 string expr;
8701
8702 if (opcode == OpMatrixTimesVector)
8703 expr = join(to_enclosed_unpacked_expression(ops[3]), " * ",
8704 enclose_expression(to_unpacked_row_major_matrix_expression(ops[2])));
8705 else
8706 expr = join(enclose_expression(to_unpacked_row_major_matrix_expression(ops[3])), " * ",
8707 to_enclosed_unpacked_expression(ops[2]));
8708
8709 bool forward = should_forward(ops[2]) && should_forward(ops[3]);
8710 emit_op(ops[0], ops[1], expr, forward);
8711 e->need_transpose = true;
8712 inherit_expression_dependencies(ops[1], ops[2]);
8713 inherit_expression_dependencies(ops[1], ops[3]);
8714 }
8715 else
8716 GLSL_BOP(*);
8717 break;
8718 }
8719
8720 case OpMatrixTimesMatrix:
8721 {
8722 auto *a = maybe_get<SPIRExpression>(ops[2]);
8723 auto *b = maybe_get<SPIRExpression>(ops[3]);
8724
8725 // If both matrices need transpose, we can multiply in flipped order and tag the expression as transposed.
8726 // a^T * b^T = (b * a)^T.
8727 if (a && b && a->need_transpose && b->need_transpose)
8728 {
8729 a->need_transpose = false;
8730 b->need_transpose = false;
8731 auto expr = join(enclose_expression(to_unpacked_row_major_matrix_expression(ops[3])), " * ",
8732 enclose_expression(to_unpacked_row_major_matrix_expression(ops[2])));
8733 bool forward = should_forward(ops[2]) && should_forward(ops[3]);
8734 auto &e = emit_op(ops[0], ops[1], expr, forward);
8735 e.need_transpose = true;
8736 a->need_transpose = true;
8737 b->need_transpose = true;
8738 inherit_expression_dependencies(ops[1], ops[2]);
8739 inherit_expression_dependencies(ops[1], ops[3]);
8740 }
8741 else
8742 GLSL_BOP(*);
8743
8744 break;
8745 }
8746
8747 case OpFMul:
8748 case OpMatrixTimesScalar:
8749 case OpVectorTimesScalar:
8750 GLSL_BOP(*);
8751 break;
8752
8753 case OpOuterProduct:
8754 GLSL_BFOP(outerProduct);
8755 break;
8756
8757 case OpDot:
8758 GLSL_BFOP(dot);
8759 break;
8760
8761 case OpTranspose:
8762 GLSL_UFOP(transpose);
8763 break;
8764
8765 case OpSRem:
8766 {
8767 uint32_t result_type = ops[0];
8768 uint32_t result_id = ops[1];
8769 uint32_t op0 = ops[2];
8770 uint32_t op1 = ops[3];
8771
8772 // Needs special handling.
8773 bool forward = should_forward(op0) && should_forward(op1);
8774 auto expr = join(to_enclosed_expression(op0), " - ", to_enclosed_expression(op1), " * ", "(",
8775 to_enclosed_expression(op0), " / ", to_enclosed_expression(op1), ")");
8776
8777 emit_op(result_type, result_id, expr, forward);
8778 inherit_expression_dependencies(result_id, op0);
8779 inherit_expression_dependencies(result_id, op1);
8780 break;
8781 }
8782
8783 case OpSDiv:
8784 GLSL_BOP_CAST(/, int_type);
8785 break;
8786
8787 case OpUDiv:
8788 GLSL_BOP_CAST(/, uint_type);
8789 break;
8790
8791 case OpIAddCarry:
8792 case OpISubBorrow:
8793 {
8794 if (options.es && options.version < 310)
8795 SPIRV_CROSS_THROW("Extended arithmetic is only available from ESSL 310.");
8796 else if (!options.es && options.version < 400)
8797 SPIRV_CROSS_THROW("Extended arithmetic is only available from GLSL 400.");
8798
8799 uint32_t result_type = ops[0];
8800 uint32_t result_id = ops[1];
8801 uint32_t op0 = ops[2];
8802 uint32_t op1 = ops[3];
8803 auto &type = get<SPIRType>(result_type);
8804 emit_uninitialized_temporary_expression(result_type, result_id);
8805 const char *op = opcode == OpIAddCarry ? "uaddCarry" : "usubBorrow";
8806
8807 statement(to_expression(result_id), ".", to_member_name(type, 0), " = ", op, "(", to_expression(op0), ", ",
8808 to_expression(op1), ", ", to_expression(result_id), ".", to_member_name(type, 1), ");");
8809 break;
8810 }
8811
8812 case OpUMulExtended:
8813 case OpSMulExtended:
8814 {
8815 if (options.es && options.version < 310)
8816 SPIRV_CROSS_THROW("Extended arithmetic is only available from ESSL 310.");
8817 else if (!options.es && options.version < 400)
8818 SPIRV_CROSS_THROW("Extended arithmetic is only available from GLSL 4000.");
8819
8820 uint32_t result_type = ops[0];
8821 uint32_t result_id = ops[1];
8822 uint32_t op0 = ops[2];
8823 uint32_t op1 = ops[3];
8824 auto &type = get<SPIRType>(result_type);
8825 emit_uninitialized_temporary_expression(result_type, result_id);
8826 const char *op = opcode == OpUMulExtended ? "umulExtended" : "imulExtended";
8827
8828 statement(op, "(", to_expression(op0), ", ", to_expression(op1), ", ", to_expression(result_id), ".",
8829 to_member_name(type, 1), ", ", to_expression(result_id), ".", to_member_name(type, 0), ");");
8830 break;
8831 }
8832
8833 case OpFDiv:
8834 GLSL_BOP(/);
8835 break;
8836
8837 case OpShiftRightLogical:
8838 GLSL_BOP_CAST(>>, uint_type);
8839 break;
8840
8841 case OpShiftRightArithmetic:
8842 GLSL_BOP_CAST(>>, int_type);
8843 break;
8844
8845 case OpShiftLeftLogical:
8846 {
8847 auto type = get<SPIRType>(ops[0]).basetype;
8848 GLSL_BOP_CAST(<<, type);
8849 break;
8850 }
8851
8852 case OpBitwiseOr:
8853 {
8854 auto type = get<SPIRType>(ops[0]).basetype;
8855 GLSL_BOP_CAST(|, type);
8856 break;
8857 }
8858
8859 case OpBitwiseXor:
8860 {
8861 auto type = get<SPIRType>(ops[0]).basetype;
8862 GLSL_BOP_CAST(^, type);
8863 break;
8864 }
8865
8866 case OpBitwiseAnd:
8867 {
8868 auto type = get<SPIRType>(ops[0]).basetype;
8869 GLSL_BOP_CAST(&, type);
8870 break;
8871 }
8872
8873 case OpNot:
8874 GLSL_UOP(~);
8875 break;
8876
8877 case OpUMod:
8878 GLSL_BOP_CAST(%, uint_type);
8879 break;
8880
8881 case OpSMod:
8882 GLSL_BOP_CAST(%, int_type);
8883 break;
8884
8885 case OpFMod:
8886 GLSL_BFOP(mod);
8887 break;
8888
8889 case OpFRem:
8890 {
8891 if (is_legacy())
8892 SPIRV_CROSS_THROW("OpFRem requires trunc() and is only supported on non-legacy targets. A workaround is "
8893 "needed for legacy.");
8894
8895 uint32_t result_type = ops[0];
8896 uint32_t result_id = ops[1];
8897 uint32_t op0 = ops[2];
8898 uint32_t op1 = ops[3];
8899
8900 // Needs special handling.
8901 bool forward = should_forward(op0) && should_forward(op1);
8902 auto expr = join(to_enclosed_expression(op0), " - ", to_enclosed_expression(op1), " * ", "trunc(",
8903 to_enclosed_expression(op0), " / ", to_enclosed_expression(op1), ")");
8904
8905 emit_op(result_type, result_id, expr, forward);
8906 inherit_expression_dependencies(result_id, op0);
8907 inherit_expression_dependencies(result_id, op1);
8908 break;
8909 }
8910
8911 // Relational
8912 case OpAny:
8913 GLSL_UFOP(any);
8914 break;
8915
8916 case OpAll:
8917 GLSL_UFOP(all);
8918 break;
8919
8920 case OpSelect:
8921 emit_mix_op(ops[0], ops[1], ops[4], ops[3], ops[2]);
8922 break;
8923
8924 case OpLogicalOr:
8925 {
8926 // No vector variant in GLSL for logical OR.
8927 auto result_type = ops[0];
8928 auto id = ops[1];
8929 auto &type = get<SPIRType>(result_type);
8930
8931 if (type.vecsize > 1)
8932 emit_unrolled_binary_op(result_type, id, ops[2], ops[3], "||", false, SPIRType::Unknown);
8933 else
8934 GLSL_BOP(||);
8935 break;
8936 }
8937
8938 case OpLogicalAnd:
8939 {
8940 // No vector variant in GLSL for logical AND.
8941 auto result_type = ops[0];
8942 auto id = ops[1];
8943 auto &type = get<SPIRType>(result_type);
8944
8945 if (type.vecsize > 1)
8946 emit_unrolled_binary_op(result_type, id, ops[2], ops[3], "&&", false, SPIRType::Unknown);
8947 else
8948 GLSL_BOP(&&);
8949 break;
8950 }
8951
8952 case OpLogicalNot:
8953 {
8954 auto &type = get<SPIRType>(ops[0]);
8955 if (type.vecsize > 1)
8956 GLSL_UFOP(not);
8957 else
8958 GLSL_UOP(!);
8959 break;
8960 }
8961
8962 case OpIEqual:
8963 {
8964 if (expression_type(ops[2]).vecsize > 1)
8965 GLSL_BFOP_CAST(equal, int_type);
8966 else
8967 GLSL_BOP_CAST(==, int_type);
8968 break;
8969 }
8970
8971 case OpLogicalEqual:
8972 case OpFOrdEqual:
8973 {
8974 if (expression_type(ops[2]).vecsize > 1)
8975 GLSL_BFOP(equal);
8976 else
8977 GLSL_BOP(==);
8978 break;
8979 }
8980
8981 case OpINotEqual:
8982 {
8983 if (expression_type(ops[2]).vecsize > 1)
8984 GLSL_BFOP_CAST(notEqual, int_type);
8985 else
8986 GLSL_BOP_CAST(!=, int_type);
8987 break;
8988 }
8989
8990 case OpLogicalNotEqual:
8991 case OpFOrdNotEqual:
8992 {
8993 if (expression_type(ops[2]).vecsize > 1)
8994 GLSL_BFOP(notEqual);
8995 else
8996 GLSL_BOP(!=);
8997 break;
8998 }
8999
9000 case OpUGreaterThan:
9001 case OpSGreaterThan:
9002 {
9003 auto type = opcode == OpUGreaterThan ? uint_type : int_type;
9004 if (expression_type(ops[2]).vecsize > 1)
9005 GLSL_BFOP_CAST(greaterThan, type);
9006 else
9007 GLSL_BOP_CAST(>, type);
9008 break;
9009 }
9010
9011 case OpFOrdGreaterThan:
9012 {
9013 if (expression_type(ops[2]).vecsize > 1)
9014 GLSL_BFOP(greaterThan);
9015 else
9016 GLSL_BOP(>);
9017 break;
9018 }
9019
9020 case OpUGreaterThanEqual:
9021 case OpSGreaterThanEqual:
9022 {
9023 auto type = opcode == OpUGreaterThanEqual ? uint_type : int_type;
9024 if (expression_type(ops[2]).vecsize > 1)
9025 GLSL_BFOP_CAST(greaterThanEqual, type);
9026 else
9027 GLSL_BOP_CAST(>=, type);
9028 break;
9029 }
9030
9031 case OpFOrdGreaterThanEqual:
9032 {
9033 if (expression_type(ops[2]).vecsize > 1)
9034 GLSL_BFOP(greaterThanEqual);
9035 else
9036 GLSL_BOP(>=);
9037 break;
9038 }
9039
9040 case OpULessThan:
9041 case OpSLessThan:
9042 {
9043 auto type = opcode == OpULessThan ? uint_type : int_type;
9044 if (expression_type(ops[2]).vecsize > 1)
9045 GLSL_BFOP_CAST(lessThan, type);
9046 else
9047 GLSL_BOP_CAST(<, type);
9048 break;
9049 }
9050
9051 case OpFOrdLessThan:
9052 {
9053 if (expression_type(ops[2]).vecsize > 1)
9054 GLSL_BFOP(lessThan);
9055 else
9056 GLSL_BOP(<);
9057 break;
9058 }
9059
9060 case OpULessThanEqual:
9061 case OpSLessThanEqual:
9062 {
9063 auto type = opcode == OpULessThanEqual ? uint_type : int_type;
9064 if (expression_type(ops[2]).vecsize > 1)
9065 GLSL_BFOP_CAST(lessThanEqual, type);
9066 else
9067 GLSL_BOP_CAST(<=, type);
9068 break;
9069 }
9070
9071 case OpFOrdLessThanEqual:
9072 {
9073 if (expression_type(ops[2]).vecsize > 1)
9074 GLSL_BFOP(lessThanEqual);
9075 else
9076 GLSL_BOP(<=);
9077 break;
9078 }
9079
9080 // Conversion
9081 case OpSConvert:
9082 case OpConvertSToF:
9083 case OpUConvert:
9084 case OpConvertUToF:
9085 {
9086 auto input_type = opcode == OpSConvert || opcode == OpConvertSToF ? int_type : uint_type;
9087 uint32_t result_type = ops[0];
9088 uint32_t id = ops[1];
9089
9090 auto &type = get<SPIRType>(result_type);
9091 auto &arg_type = expression_type(ops[2]);
9092 auto func = type_to_glsl_constructor(type);
9093
9094 // If we're sign-extending or zero-extending, we need to make sure we cast from the correct type.
9095 // For truncation, it does not matter, so don't emit useless casts.
9096 if (arg_type.width < type.width)
9097 emit_unary_func_op_cast(result_type, id, ops[2], func.c_str(), input_type, type.basetype);
9098 else
9099 emit_unary_func_op(result_type, id, ops[2], func.c_str());
9100 break;
9101 }
9102
9103 case OpConvertFToU:
9104 case OpConvertFToS:
9105 {
9106 // Cast to expected arithmetic type, then potentially bitcast away to desired signedness.
9107 uint32_t result_type = ops[0];
9108 uint32_t id = ops[1];
9109 auto &type = get<SPIRType>(result_type);
9110 auto expected_type = type;
9111 auto &float_type = expression_type(ops[2]);
9112 expected_type.basetype =
9113 opcode == OpConvertFToS ? to_signed_basetype(type.width) : to_unsigned_basetype(type.width);
9114
9115 auto func = type_to_glsl_constructor(expected_type);
9116 emit_unary_func_op_cast(result_type, id, ops[2], func.c_str(), float_type.basetype, expected_type.basetype);
9117 break;
9118 }
9119
9120 case OpFConvert:
9121 {
9122 uint32_t result_type = ops[0];
9123 uint32_t id = ops[1];
9124
9125 auto func = type_to_glsl_constructor(get<SPIRType>(result_type));
9126 emit_unary_func_op(result_type, id, ops[2], func.c_str());
9127 break;
9128 }
9129
9130 case OpBitcast:
9131 {
9132 uint32_t result_type = ops[0];
9133 uint32_t id = ops[1];
9134 uint32_t arg = ops[2];
9135
9136 auto op = bitcast_glsl_op(get<SPIRType>(result_type), expression_type(arg));
9137 emit_unary_func_op(result_type, id, arg, op.c_str());
9138 break;
9139 }
9140
9141 case OpQuantizeToF16:
9142 {
9143 uint32_t result_type = ops[0];
9144 uint32_t id = ops[1];
9145 uint32_t arg = ops[2];
9146
9147 string op;
9148 auto &type = get<SPIRType>(result_type);
9149
9150 switch (type.vecsize)
9151 {
9152 case 1:
9153 op = join("unpackHalf2x16(packHalf2x16(vec2(", to_expression(arg), "))).x");
9154 break;
9155 case 2:
9156 op = join("unpackHalf2x16(packHalf2x16(", to_expression(arg), "))");
9157 break;
9158 case 3:
9159 {
9160 auto op0 = join("unpackHalf2x16(packHalf2x16(", to_expression(arg), ".xy))");
9161 auto op1 = join("unpackHalf2x16(packHalf2x16(", to_expression(arg), ".zz)).x");
9162 op = join("vec3(", op0, ", ", op1, ")");
9163 break;
9164 }
9165 case 4:
9166 {
9167 auto op0 = join("unpackHalf2x16(packHalf2x16(", to_expression(arg), ".xy))");
9168 auto op1 = join("unpackHalf2x16(packHalf2x16(", to_expression(arg), ".zw))");
9169 op = join("vec4(", op0, ", ", op1, ")");
9170 break;
9171 }
9172 default:
9173 SPIRV_CROSS_THROW("Illegal argument to OpQuantizeToF16.");
9174 }
9175
9176 emit_op(result_type, id, op, should_forward(arg));
9177 inherit_expression_dependencies(id, arg);
9178 break;
9179 }
9180
9181 // Derivatives
9182 case OpDPdx:
9183 GLSL_UFOP(dFdx);
9184 if (is_legacy_es())
9185 require_extension_internal("GL_OES_standard_derivatives");
9186 register_control_dependent_expression(ops[1]);
9187 break;
9188
9189 case OpDPdy:
9190 GLSL_UFOP(dFdy);
9191 if (is_legacy_es())
9192 require_extension_internal("GL_OES_standard_derivatives");
9193 register_control_dependent_expression(ops[1]);
9194 break;
9195
9196 case OpDPdxFine:
9197 GLSL_UFOP(dFdxFine);
9198 if (options.es)
9199 {
9200 SPIRV_CROSS_THROW("GL_ARB_derivative_control is unavailable in OpenGL ES.");
9201 }
9202 if (options.version < 450)
9203 require_extension_internal("GL_ARB_derivative_control");
9204 register_control_dependent_expression(ops[1]);
9205 break;
9206
9207 case OpDPdyFine:
9208 GLSL_UFOP(dFdyFine);
9209 if (options.es)
9210 {
9211 SPIRV_CROSS_THROW("GL_ARB_derivative_control is unavailable in OpenGL ES.");
9212 }
9213 if (options.version < 450)
9214 require_extension_internal("GL_ARB_derivative_control");
9215 register_control_dependent_expression(ops[1]);
9216 break;
9217
9218 case OpDPdxCoarse:
9219 if (options.es)
9220 {
9221 SPIRV_CROSS_THROW("GL_ARB_derivative_control is unavailable in OpenGL ES.");
9222 }
9223 GLSL_UFOP(dFdxCoarse);
9224 if (options.version < 450)
9225 require_extension_internal("GL_ARB_derivative_control");
9226 register_control_dependent_expression(ops[1]);
9227 break;
9228
9229 case OpDPdyCoarse:
9230 GLSL_UFOP(dFdyCoarse);
9231 if (options.es)
9232 {
9233 SPIRV_CROSS_THROW("GL_ARB_derivative_control is unavailable in OpenGL ES.");
9234 }
9235 if (options.version < 450)
9236 require_extension_internal("GL_ARB_derivative_control");
9237 register_control_dependent_expression(ops[1]);
9238 break;
9239
9240 case OpFwidth:
9241 GLSL_UFOP(fwidth);
9242 if (is_legacy_es())
9243 require_extension_internal("GL_OES_standard_derivatives");
9244 register_control_dependent_expression(ops[1]);
9245 break;
9246
9247 case OpFwidthCoarse:
9248 GLSL_UFOP(fwidthCoarse);
9249 if (options.es)
9250 {
9251 SPIRV_CROSS_THROW("GL_ARB_derivative_control is unavailable in OpenGL ES.");
9252 }
9253 if (options.version < 450)
9254 require_extension_internal("GL_ARB_derivative_control");
9255 register_control_dependent_expression(ops[1]);
9256 break;
9257
9258 case OpFwidthFine:
9259 GLSL_UFOP(fwidthFine);
9260 if (options.es)
9261 {
9262 SPIRV_CROSS_THROW("GL_ARB_derivative_control is unavailable in OpenGL ES.");
9263 }
9264 if (options.version < 450)
9265 require_extension_internal("GL_ARB_derivative_control");
9266 register_control_dependent_expression(ops[1]);
9267 break;
9268
9269 // Bitfield
9270 case OpBitFieldInsert:
9271 {
9272 emit_bitfield_insert_op(ops[0], ops[1], ops[2], ops[3], ops[4], ops[5], "bitfieldInsert", SPIRType::Int);
9273 break;
9274 }
9275
9276 case OpBitFieldSExtract:
9277 {
9278 emit_trinary_func_op_bitextract(ops[0], ops[1], ops[2], ops[3], ops[4], "bitfieldExtract", int_type, int_type,
9279 SPIRType::Int, SPIRType::Int);
9280 break;
9281 }
9282
9283 case OpBitFieldUExtract:
9284 {
9285 emit_trinary_func_op_bitextract(ops[0], ops[1], ops[2], ops[3], ops[4], "bitfieldExtract", uint_type, uint_type,
9286 SPIRType::Int, SPIRType::Int);
9287 break;
9288 }
9289
9290 case OpBitReverse:
9291 // BitReverse does not have issues with sign since result type must match input type.
9292 GLSL_UFOP(bitfieldReverse);
9293 break;
9294
9295 case OpBitCount:
9296 {
9297 auto basetype = expression_type(ops[2]).basetype;
9298 emit_unary_func_op_cast(ops[0], ops[1], ops[2], "bitCount", basetype, int_type);
9299 break;
9300 }
9301
9302 // Atomics
9303 case OpAtomicExchange:
9304 {
9305 uint32_t result_type = ops[0];
9306 uint32_t id = ops[1];
9307 uint32_t ptr = ops[2];
9308 // Ignore semantics for now, probably only relevant to CL.
9309 uint32_t val = ops[5];
9310 const char *op = check_atomic_image(ptr) ? "imageAtomicExchange" : "atomicExchange";
9311 forced_temporaries.insert(id);
9312 emit_binary_func_op(result_type, id, ptr, val, op);
9313 flush_all_atomic_capable_variables();
9314 break;
9315 }
9316
9317 case OpAtomicCompareExchange:
9318 {
9319 uint32_t result_type = ops[0];
9320 uint32_t id = ops[1];
9321 uint32_t ptr = ops[2];
9322 uint32_t val = ops[6];
9323 uint32_t comp = ops[7];
9324 const char *op = check_atomic_image(ptr) ? "imageAtomicCompSwap" : "atomicCompSwap";
9325
9326 forced_temporaries.insert(id);
9327 emit_trinary_func_op(result_type, id, ptr, comp, val, op);
9328 flush_all_atomic_capable_variables();
9329 break;
9330 }
9331
9332 case OpAtomicLoad:
9333 flush_all_atomic_capable_variables();
9334 // FIXME: Image?
9335 // OpAtomicLoad seems to only be relevant for atomic counters.
9336 forced_temporaries.insert(ops[1]);
9337 GLSL_UFOP(atomicCounter);
9338 break;
9339
9340 case OpAtomicStore:
9341 SPIRV_CROSS_THROW("Unsupported opcode OpAtomicStore.");
9342
9343 case OpAtomicIIncrement:
9344 case OpAtomicIDecrement:
9345 {
9346 forced_temporaries.insert(ops[1]);
9347 auto &type = expression_type(ops[2]);
9348 if (type.storage == StorageClassAtomicCounter)
9349 {
9350 // Legacy GLSL stuff, not sure if this is relevant to support.
9351 if (opcode == OpAtomicIIncrement)
9352 GLSL_UFOP(atomicCounterIncrement);
9353 else
9354 GLSL_UFOP(atomicCounterDecrement);
9355 }
9356 else
9357 {
9358 bool atomic_image = check_atomic_image(ops[2]);
9359 bool unsigned_type = (type.basetype == SPIRType::UInt) ||
9360 (atomic_image && get<SPIRType>(type.image.type).basetype == SPIRType::UInt);
9361 const char *op = atomic_image ? "imageAtomicAdd" : "atomicAdd";
9362
9363 const char *increment = nullptr;
9364 if (opcode == OpAtomicIIncrement && unsigned_type)
9365 increment = "1u";
9366 else if (opcode == OpAtomicIIncrement)
9367 increment = "1";
9368 else if (unsigned_type)
9369 increment = "uint(-1)";
9370 else
9371 increment = "-1";
9372
9373 emit_op(ops[0], ops[1], join(op, "(", to_expression(ops[2]), ", ", increment, ")"), false);
9374 }
9375
9376 flush_all_atomic_capable_variables();
9377 break;
9378 }
9379
9380 case OpAtomicIAdd:
9381 {
9382 const char *op = check_atomic_image(ops[2]) ? "imageAtomicAdd" : "atomicAdd";
9383 forced_temporaries.insert(ops[1]);
9384 emit_binary_func_op(ops[0], ops[1], ops[2], ops[5], op);
9385 flush_all_atomic_capable_variables();
9386 break;
9387 }
9388
9389 case OpAtomicISub:
9390 {
9391 const char *op = check_atomic_image(ops[2]) ? "imageAtomicAdd" : "atomicAdd";
9392 forced_temporaries.insert(ops[1]);
9393 auto expr = join(op, "(", to_expression(ops[2]), ", -", to_enclosed_expression(ops[5]), ")");
9394 emit_op(ops[0], ops[1], expr, should_forward(ops[2]) && should_forward(ops[5]));
9395 flush_all_atomic_capable_variables();
9396 break;
9397 }
9398
9399 case OpAtomicSMin:
9400 case OpAtomicUMin:
9401 {
9402 const char *op = check_atomic_image(ops[2]) ? "imageAtomicMin" : "atomicMin";
9403 forced_temporaries.insert(ops[1]);
9404 emit_binary_func_op(ops[0], ops[1], ops[2], ops[5], op);
9405 flush_all_atomic_capable_variables();
9406 break;
9407 }
9408
9409 case OpAtomicSMax:
9410 case OpAtomicUMax:
9411 {
9412 const char *op = check_atomic_image(ops[2]) ? "imageAtomicMax" : "atomicMax";
9413 forced_temporaries.insert(ops[1]);
9414 emit_binary_func_op(ops[0], ops[1], ops[2], ops[5], op);
9415 flush_all_atomic_capable_variables();
9416 break;
9417 }
9418
9419 case OpAtomicAnd:
9420 {
9421 const char *op = check_atomic_image(ops[2]) ? "imageAtomicAnd" : "atomicAnd";
9422 forced_temporaries.insert(ops[1]);
9423 emit_binary_func_op(ops[0], ops[1], ops[2], ops[5], op);
9424 flush_all_atomic_capable_variables();
9425 break;
9426 }
9427
9428 case OpAtomicOr:
9429 {
9430 const char *op = check_atomic_image(ops[2]) ? "imageAtomicOr" : "atomicOr";
9431 forced_temporaries.insert(ops[1]);
9432 emit_binary_func_op(ops[0], ops[1], ops[2], ops[5], op);
9433 flush_all_atomic_capable_variables();
9434 break;
9435 }
9436
9437 case OpAtomicXor:
9438 {
9439 const char *op = check_atomic_image(ops[2]) ? "imageAtomicXor" : "atomicXor";
9440 forced_temporaries.insert(ops[1]);
9441 emit_binary_func_op(ops[0], ops[1], ops[2], ops[5], op);
9442 flush_all_atomic_capable_variables();
9443 break;
9444 }
9445
9446 // Geometry shaders
9447 case OpEmitVertex:
9448 statement("EmitVertex();");
9449 break;
9450
9451 case OpEndPrimitive:
9452 statement("EndPrimitive();");
9453 break;
9454
9455 case OpEmitStreamVertex:
9456 statement("EmitStreamVertex();");
9457 break;
9458
9459 case OpEndStreamPrimitive:
9460 statement("EndStreamPrimitive();");
9461 break;
9462
9463 // Textures
9464 case OpImageSampleExplicitLod:
9465 case OpImageSampleProjExplicitLod:
9466 case OpImageSampleDrefExplicitLod:
9467 case OpImageSampleProjDrefExplicitLod:
9468 case OpImageSampleImplicitLod:
9469 case OpImageSampleProjImplicitLod:
9470 case OpImageSampleDrefImplicitLod:
9471 case OpImageSampleProjDrefImplicitLod:
9472 case OpImageFetch:
9473 case OpImageGather:
9474 case OpImageDrefGather:
9475 // Gets a bit hairy, so move this to a separate instruction.
9476 emit_texture_op(instruction);
9477 break;
9478
9479 case OpImage:
9480 {
9481 uint32_t result_type = ops[0];
9482 uint32_t id = ops[1];
9483
9484 // Suppress usage tracking.
9485 auto &e = emit_op(result_type, id, to_expression(ops[2]), true, true);
9486
9487 // When using the image, we need to know which variable it is actually loaded from.
9488 auto *var = maybe_get_backing_variable(ops[2]);
9489 e.loaded_from = var ? var->self : ID(0);
9490 break;
9491 }
9492
9493 case OpImageQueryLod:
9494 {
9495 if (!options.es && options.version < 400)
9496 {
9497 require_extension_internal("GL_ARB_texture_query_lod");
9498 // For some reason, the ARB spec is all-caps.
9499 GLSL_BFOP(textureQueryLOD);
9500 }
9501 else if (options.es)
9502 SPIRV_CROSS_THROW("textureQueryLod not supported in ES profile.");
9503 else
9504 GLSL_BFOP(textureQueryLod);
9505 register_control_dependent_expression(ops[1]);
9506 break;
9507 }
9508
9509 case OpImageQueryLevels:
9510 {
9511 uint32_t result_type = ops[0];
9512 uint32_t id = ops[1];
9513
9514 if (!options.es && options.version < 430)
9515 require_extension_internal("GL_ARB_texture_query_levels");
9516 if (options.es)
9517 SPIRV_CROSS_THROW("textureQueryLevels not supported in ES profile.");
9518
9519 auto expr = join("textureQueryLevels(", convert_separate_image_to_expression(ops[2]), ")");
9520 auto &restype = get<SPIRType>(ops[0]);
9521 expr = bitcast_expression(restype, SPIRType::Int, expr);
9522 emit_op(result_type, id, expr, true);
9523 break;
9524 }
9525
9526 case OpImageQuerySamples:
9527 {
9528 auto &type = expression_type(ops[2]);
9529 uint32_t result_type = ops[0];
9530 uint32_t id = ops[1];
9531
9532 string expr;
9533 if (type.image.sampled == 2)
9534 expr = join("imageSamples(", to_expression(ops[2]), ")");
9535 else
9536 expr = join("textureSamples(", convert_separate_image_to_expression(ops[2]), ")");
9537
9538 auto &restype = get<SPIRType>(ops[0]);
9539 expr = bitcast_expression(restype, SPIRType::Int, expr);
9540 emit_op(result_type, id, expr, true);
9541 break;
9542 }
9543
9544 case OpSampledImage:
9545 {
9546 uint32_t result_type = ops[0];
9547 uint32_t id = ops[1];
9548 emit_sampled_image_op(result_type, id, ops[2], ops[3]);
9549 inherit_expression_dependencies(id, ops[2]);
9550 inherit_expression_dependencies(id, ops[3]);
9551 break;
9552 }
9553
9554 case OpImageQuerySizeLod:
9555 {
9556 uint32_t result_type = ops[0];
9557 uint32_t id = ops[1];
9558
9559 auto expr = join("textureSize(", convert_separate_image_to_expression(ops[2]), ", ",
9560 bitcast_expression(SPIRType::Int, ops[3]), ")");
9561 auto &restype = get<SPIRType>(ops[0]);
9562 expr = bitcast_expression(restype, SPIRType::Int, expr);
9563 emit_op(result_type, id, expr, true);
9564 break;
9565 }
9566
9567 // Image load/store
9568 case OpImageRead:
9569 {
9570 // We added Nonreadable speculatively to the OpImage variable due to glslangValidator
9571 // not adding the proper qualifiers.
9572 // If it turns out we need to read the image after all, remove the qualifier and recompile.
9573 auto *var = maybe_get_backing_variable(ops[2]);
9574 if (var)
9575 {
9576 auto &flags = ir.meta[var->self].decoration.decoration_flags;
9577 if (flags.get(DecorationNonReadable))
9578 {
9579 flags.clear(DecorationNonReadable);
9580 force_recompile();
9581 }
9582 }
9583
9584 uint32_t result_type = ops[0];
9585 uint32_t id = ops[1];
9586
9587 bool pure;
9588 string imgexpr;
9589 auto &type = expression_type(ops[2]);
9590
9591 if (var && var->remapped_variable) // Remapped input, just read as-is without any op-code
9592 {
9593 if (type.image.ms)
9594 SPIRV_CROSS_THROW("Trying to remap multisampled image to variable, this is not possible.");
9595
9596 auto itr =
9597 find_if(begin(pls_inputs), end(pls_inputs), [var](const PlsRemap &pls) { return pls.id == var->self; });
9598
9599 if (itr == end(pls_inputs))
9600 {
9601 // For non-PLS inputs, we rely on subpass type remapping information to get it right
9602 // since ImageRead always returns 4-component vectors and the backing type is opaque.
9603 if (!var->remapped_components)
9604 SPIRV_CROSS_THROW("subpassInput was remapped, but remap_components is not set correctly.");
9605 imgexpr = remap_swizzle(get<SPIRType>(result_type), var->remapped_components, to_expression(ops[2]));
9606 }
9607 else
9608 {
9609 // PLS input could have different number of components than what the SPIR expects, swizzle to
9610 // the appropriate vector size.
9611 uint32_t components = pls_format_to_components(itr->format);
9612 imgexpr = remap_swizzle(get<SPIRType>(result_type), components, to_expression(ops[2]));
9613 }
9614 pure = true;
9615 }
9616 else if (type.image.dim == DimSubpassData)
9617 {
9618 if (options.vulkan_semantics)
9619 {
9620 // With Vulkan semantics, use the proper Vulkan GLSL construct.
9621 if (type.image.ms)
9622 {
9623 uint32_t operands = ops[4];
9624 if (operands != ImageOperandsSampleMask || length != 6)
9625 SPIRV_CROSS_THROW(
9626 "Multisampled image used in OpImageRead, but unexpected operand mask was used.");
9627
9628 uint32_t samples = ops[5];
9629 imgexpr = join("subpassLoad(", to_expression(ops[2]), ", ", to_expression(samples), ")");
9630 }
9631 else
9632 imgexpr = join("subpassLoad(", to_expression(ops[2]), ")");
9633 }
9634 else
9635 {
9636 if (type.image.ms)
9637 {
9638 uint32_t operands = ops[4];
9639 if (operands != ImageOperandsSampleMask || length != 6)
9640 SPIRV_CROSS_THROW(
9641 "Multisampled image used in OpImageRead, but unexpected operand mask was used.");
9642
9643 uint32_t samples = ops[5];
9644 imgexpr = join("texelFetch(", to_expression(ops[2]), ", ivec2(gl_FragCoord.xy), ",
9645 to_expression(samples), ")");
9646 }
9647 else
9648 {
9649 // Implement subpass loads via texture barrier style sampling.
9650 imgexpr = join("texelFetch(", to_expression(ops[2]), ", ivec2(gl_FragCoord.xy), 0)");
9651 }
9652 }
9653 imgexpr = remap_swizzle(get<SPIRType>(result_type), 4, imgexpr);
9654 pure = true;
9655 }
9656 else
9657 {
9658 // imageLoad only accepts int coords, not uint.
9659 auto coord_expr = to_expression(ops[3]);
9660 auto target_coord_type = expression_type(ops[3]);
9661 target_coord_type.basetype = SPIRType::Int;
9662 coord_expr = bitcast_expression(target_coord_type, expression_type(ops[3]).basetype, coord_expr);
9663
9664 // Plain image load/store.
9665 if (type.image.ms)
9666 {
9667 uint32_t operands = ops[4];
9668 if (operands != ImageOperandsSampleMask || length != 6)
9669 SPIRV_CROSS_THROW("Multisampled image used in OpImageRead, but unexpected operand mask was used.");
9670
9671 uint32_t samples = ops[5];
9672 imgexpr =
9673 join("imageLoad(", to_expression(ops[2]), ", ", coord_expr, ", ", to_expression(samples), ")");
9674 }
9675 else
9676 imgexpr = join("imageLoad(", to_expression(ops[2]), ", ", coord_expr, ")");
9677
9678 imgexpr = remap_swizzle(get<SPIRType>(result_type), 4, imgexpr);
9679 pure = false;
9680 }
9681
9682 if (var && var->forwardable)
9683 {
9684 bool forward = forced_temporaries.find(id) == end(forced_temporaries);
9685 auto &e = emit_op(result_type, id, imgexpr, forward);
9686
9687 // We only need to track dependencies if we're reading from image load/store.
9688 if (!pure)
9689 {
9690 e.loaded_from = var->self;
9691 if (forward)
9692 var->dependees.push_back(id);
9693 }
9694 }
9695 else
9696 emit_op(result_type, id, imgexpr, false);
9697
9698 inherit_expression_dependencies(id, ops[2]);
9699 if (type.image.ms)
9700 inherit_expression_dependencies(id, ops[5]);
9701 break;
9702 }
9703
9704 case OpImageTexelPointer:
9705 {
9706 uint32_t result_type = ops[0];
9707 uint32_t id = ops[1];
9708
9709 auto coord_expr = to_expression(ops[3]);
9710 auto target_coord_type = expression_type(ops[3]);
9711 target_coord_type.basetype = SPIRType::Int;
9712 coord_expr = bitcast_expression(target_coord_type, expression_type(ops[3]).basetype, coord_expr);
9713
9714 auto &e = set<SPIRExpression>(id, join(to_expression(ops[2]), ", ", coord_expr), result_type, true);
9715
9716 // When using the pointer, we need to know which variable it is actually loaded from.
9717 auto *var = maybe_get_backing_variable(ops[2]);
9718 e.loaded_from = var ? var->self : ID(0);
9719 inherit_expression_dependencies(id, ops[3]);
9720 break;
9721 }
9722
9723 case OpImageWrite:
9724 {
9725 // We added Nonwritable speculatively to the OpImage variable due to glslangValidator
9726 // not adding the proper qualifiers.
9727 // If it turns out we need to write to the image after all, remove the qualifier and recompile.
9728 auto *var = maybe_get_backing_variable(ops[0]);
9729 if (var)
9730 {
9731 auto &flags = ir.meta[var->self].decoration.decoration_flags;
9732 if (flags.get(DecorationNonWritable))
9733 {
9734 flags.clear(DecorationNonWritable);
9735 force_recompile();
9736 }
9737 }
9738
9739 auto &type = expression_type(ops[0]);
9740 auto &value_type = expression_type(ops[2]);
9741 auto store_type = value_type;
9742 store_type.vecsize = 4;
9743
9744 // imageStore only accepts int coords, not uint.
9745 auto coord_expr = to_expression(ops[1]);
9746 auto target_coord_type = expression_type(ops[1]);
9747 target_coord_type.basetype = SPIRType::Int;
9748 coord_expr = bitcast_expression(target_coord_type, expression_type(ops[1]).basetype, coord_expr);
9749
9750 if (type.image.ms)
9751 {
9752 uint32_t operands = ops[3];
9753 if (operands != ImageOperandsSampleMask || length != 5)
9754 SPIRV_CROSS_THROW("Multisampled image used in OpImageWrite, but unexpected operand mask was used.");
9755 uint32_t samples = ops[4];
9756 statement("imageStore(", to_expression(ops[0]), ", ", coord_expr, ", ", to_expression(samples), ", ",
9757 remap_swizzle(store_type, value_type.vecsize, to_expression(ops[2])), ");");
9758 }
9759 else
9760 statement("imageStore(", to_expression(ops[0]), ", ", coord_expr, ", ",
9761 remap_swizzle(store_type, value_type.vecsize, to_expression(ops[2])), ");");
9762
9763 if (var && variable_storage_is_aliased(*var))
9764 flush_all_aliased_variables();
9765 break;
9766 }
9767
9768 case OpImageQuerySize:
9769 {
9770 auto &type = expression_type(ops[2]);
9771 uint32_t result_type = ops[0];
9772 uint32_t id = ops[1];
9773
9774 if (type.basetype == SPIRType::Image)
9775 {
9776 string expr;
9777 if (type.image.sampled == 2)
9778 {
9779 // The size of an image is always constant.
9780 expr = join("imageSize(", to_expression(ops[2]), ")");
9781 }
9782 else
9783 {
9784 // This path is hit for samplerBuffers and multisampled images which do not have LOD.
9785 expr = join("textureSize(", convert_separate_image_to_expression(ops[2]), ")");
9786 }
9787
9788 auto &restype = get<SPIRType>(ops[0]);
9789 expr = bitcast_expression(restype, SPIRType::Int, expr);
9790 emit_op(result_type, id, expr, true);
9791 }
9792 else
9793 SPIRV_CROSS_THROW("Invalid type for OpImageQuerySize.");
9794 break;
9795 }
9796
9797 // Compute
9798 case OpControlBarrier:
9799 case OpMemoryBarrier:
9800 {
9801 uint32_t execution_scope = 0;
9802 uint32_t memory;
9803 uint32_t semantics;
9804
9805 if (opcode == OpMemoryBarrier)
9806 {
9807 memory = get<SPIRConstant>(ops[0]).scalar();
9808 semantics = get<SPIRConstant>(ops[1]).scalar();
9809 }
9810 else
9811 {
9812 execution_scope = get<SPIRConstant>(ops[0]).scalar();
9813 memory = get<SPIRConstant>(ops[1]).scalar();
9814 semantics = get<SPIRConstant>(ops[2]).scalar();
9815 }
9816
9817 if (execution_scope == ScopeSubgroup || memory == ScopeSubgroup)
9818 {
9819 if (!options.vulkan_semantics)
9820 SPIRV_CROSS_THROW("Can only use subgroup operations in Vulkan semantics.");
9821 require_extension_internal("GL_KHR_shader_subgroup_basic");
9822 }
9823
9824 if (execution_scope != ScopeSubgroup && get_entry_point().model == ExecutionModelTessellationControl)
9825 {
9826 // Control shaders only have barriers, and it implies memory barriers.
9827 if (opcode == OpControlBarrier)
9828 statement("barrier();");
9829 break;
9830 }
9831
9832 // We only care about these flags, acquire/release and friends are not relevant to GLSL.
9833 semantics = mask_relevant_memory_semantics(semantics);
9834
9835 if (opcode == OpMemoryBarrier)
9836 {
9837 // If we are a memory barrier, and the next instruction is a control barrier, check if that memory barrier
9838 // does what we need, so we avoid redundant barriers.
9839 const Instruction *next = get_next_instruction_in_block(instruction);
9840 if (next && next->op == OpControlBarrier)
9841 {
9842 auto *next_ops = stream(*next);
9843 uint32_t next_memory = get<SPIRConstant>(next_ops[1]).scalar();
9844 uint32_t next_semantics = get<SPIRConstant>(next_ops[2]).scalar();
9845 next_semantics = mask_relevant_memory_semantics(next_semantics);
9846
9847 bool memory_scope_covered = false;
9848 if (next_memory == memory)
9849 memory_scope_covered = true;
9850 else if (next_semantics == MemorySemanticsWorkgroupMemoryMask)
9851 {
9852 // If we only care about workgroup memory, either Device or Workgroup scope is fine,
9853 // scope does not have to match.
9854 if ((next_memory == ScopeDevice || next_memory == ScopeWorkgroup) &&
9855 (memory == ScopeDevice || memory == ScopeWorkgroup))
9856 {
9857 memory_scope_covered = true;
9858 }
9859 }
9860 else if (memory == ScopeWorkgroup && next_memory == ScopeDevice)
9861 {
9862 // The control barrier has device scope, but the memory barrier just has workgroup scope.
9863 memory_scope_covered = true;
9864 }
9865
9866 // If we have the same memory scope, and all memory types are covered, we're good.
9867 if (memory_scope_covered && (semantics & next_semantics) == semantics)
9868 break;
9869 }
9870 }
9871
9872 // We are synchronizing some memory or syncing execution,
9873 // so we cannot forward any loads beyond the memory barrier.
9874 if (semantics || opcode == OpControlBarrier)
9875 {
9876 assert(current_emitting_block);
9877 flush_control_dependent_expressions(current_emitting_block->self);
9878 flush_all_active_variables();
9879 }
9880
9881 if (memory == ScopeWorkgroup) // Only need to consider memory within a group
9882 {
9883 if (semantics == MemorySemanticsWorkgroupMemoryMask)
9884 statement("memoryBarrierShared();");
9885 else if (semantics != 0)
9886 statement("groupMemoryBarrier();");
9887 }
9888 else if (memory == ScopeSubgroup)
9889 {
9890 const uint32_t all_barriers =
9891 MemorySemanticsWorkgroupMemoryMask | MemorySemanticsUniformMemoryMask | MemorySemanticsImageMemoryMask;
9892
9893 if (semantics & (MemorySemanticsCrossWorkgroupMemoryMask | MemorySemanticsSubgroupMemoryMask))
9894 {
9895 // These are not relevant for GLSL, but assume it means memoryBarrier().
9896 // memoryBarrier() does everything, so no need to test anything else.
9897 statement("subgroupMemoryBarrier();");
9898 }
9899 else if ((semantics & all_barriers) == all_barriers)
9900 {
9901 // Short-hand instead of emitting 3 barriers.
9902 statement("subgroupMemoryBarrier();");
9903 }
9904 else
9905 {
9906 // Pick out individual barriers.
9907 if (semantics & MemorySemanticsWorkgroupMemoryMask)
9908 statement("subgroupMemoryBarrierShared();");
9909 if (semantics & MemorySemanticsUniformMemoryMask)
9910 statement("subgroupMemoryBarrierBuffer();");
9911 if (semantics & MemorySemanticsImageMemoryMask)
9912 statement("subgroupMemoryBarrierImage();");
9913 }
9914 }
9915 else
9916 {
9917 const uint32_t all_barriers = MemorySemanticsWorkgroupMemoryMask | MemorySemanticsUniformMemoryMask |
9918 MemorySemanticsImageMemoryMask | MemorySemanticsAtomicCounterMemoryMask;
9919
9920 if (semantics & (MemorySemanticsCrossWorkgroupMemoryMask | MemorySemanticsSubgroupMemoryMask))
9921 {
9922 // These are not relevant for GLSL, but assume it means memoryBarrier().
9923 // memoryBarrier() does everything, so no need to test anything else.
9924 statement("memoryBarrier();");
9925 }
9926 else if ((semantics & all_barriers) == all_barriers)
9927 {
9928 // Short-hand instead of emitting 4 barriers.
9929 statement("memoryBarrier();");
9930 }
9931 else
9932 {
9933 // Pick out individual barriers.
9934 if (semantics & MemorySemanticsWorkgroupMemoryMask)
9935 statement("memoryBarrierShared();");
9936 if (semantics & MemorySemanticsUniformMemoryMask)
9937 statement("memoryBarrierBuffer();");
9938 if (semantics & MemorySemanticsImageMemoryMask)
9939 statement("memoryBarrierImage();");
9940 if (semantics & MemorySemanticsAtomicCounterMemoryMask)
9941 statement("memoryBarrierAtomicCounter();");
9942 }
9943 }
9944
9945 if (opcode == OpControlBarrier)
9946 {
9947 if (execution_scope == ScopeSubgroup)
9948 statement("subgroupBarrier();");
9949 else
9950 statement("barrier();");
9951 }
9952 break;
9953 }
9954
9955 case OpExtInst:
9956 {
9957 uint32_t extension_set = ops[2];
9958
9959 if (get<SPIRExtension>(extension_set).ext == SPIRExtension::GLSL)
9960 {
9961 emit_glsl_op(ops[0], ops[1], ops[3], &ops[4], length - 4);
9962 }
9963 else if (get<SPIRExtension>(extension_set).ext == SPIRExtension::SPV_AMD_shader_ballot)
9964 {
9965 emit_spv_amd_shader_ballot_op(ops[0], ops[1], ops[3], &ops[4], length - 4);
9966 }
9967 else if (get<SPIRExtension>(extension_set).ext == SPIRExtension::SPV_AMD_shader_explicit_vertex_parameter)
9968 {
9969 emit_spv_amd_shader_explicit_vertex_parameter_op(ops[0], ops[1], ops[3], &ops[4], length - 4);
9970 }
9971 else if (get<SPIRExtension>(extension_set).ext == SPIRExtension::SPV_AMD_shader_trinary_minmax)
9972 {
9973 emit_spv_amd_shader_trinary_minmax_op(ops[0], ops[1], ops[3], &ops[4], length - 4);
9974 }
9975 else if (get<SPIRExtension>(extension_set).ext == SPIRExtension::SPV_AMD_gcn_shader)
9976 {
9977 emit_spv_amd_gcn_shader_op(ops[0], ops[1], ops[3], &ops[4], length - 4);
9978 }
9979 else if (get<SPIRExtension>(extension_set).ext == SPIRExtension::SPV_debug_info)
9980 {
9981 break; // Ignore SPIR-V debug information extended instructions.
9982 }
9983 else
9984 {
9985 statement("// unimplemented ext op ", instruction.op);
9986 break;
9987 }
9988
9989 break;
9990 }
9991
9992 // Legacy sub-group stuff ...
9993 case OpSubgroupBallotKHR:
9994 {
9995 uint32_t result_type = ops[0];
9996 uint32_t id = ops[1];
9997 string expr;
9998 expr = join("uvec4(unpackUint2x32(ballotARB(" + to_expression(ops[2]) + ")), 0u, 0u)");
9999 emit_op(result_type, id, expr, should_forward(ops[2]));
10000
10001 require_extension_internal("GL_ARB_shader_ballot");
10002 inherit_expression_dependencies(id, ops[2]);
10003 register_control_dependent_expression(ops[1]);
10004 break;
10005 }
10006
10007 case OpSubgroupFirstInvocationKHR:
10008 {
10009 uint32_t result_type = ops[0];
10010 uint32_t id = ops[1];
10011 emit_unary_func_op(result_type, id, ops[2], "readFirstInvocationARB");
10012
10013 require_extension_internal("GL_ARB_shader_ballot");
10014 register_control_dependent_expression(ops[1]);
10015 break;
10016 }
10017
10018 case OpSubgroupReadInvocationKHR:
10019 {
10020 uint32_t result_type = ops[0];
10021 uint32_t id = ops[1];
10022 emit_binary_func_op(result_type, id, ops[2], ops[3], "readInvocationARB");
10023
10024 require_extension_internal("GL_ARB_shader_ballot");
10025 register_control_dependent_expression(ops[1]);
10026 break;
10027 }
10028
10029 case OpSubgroupAllKHR:
10030 {
10031 uint32_t result_type = ops[0];
10032 uint32_t id = ops[1];
10033 emit_unary_func_op(result_type, id, ops[2], "allInvocationsARB");
10034
10035 require_extension_internal("GL_ARB_shader_group_vote");
10036 register_control_dependent_expression(ops[1]);
10037 break;
10038 }
10039
10040 case OpSubgroupAnyKHR:
10041 {
10042 uint32_t result_type = ops[0];
10043 uint32_t id = ops[1];
10044 emit_unary_func_op(result_type, id, ops[2], "anyInvocationARB");
10045
10046 require_extension_internal("GL_ARB_shader_group_vote");
10047 register_control_dependent_expression(ops[1]);
10048 break;
10049 }
10050
10051 case OpSubgroupAllEqualKHR:
10052 {
10053 uint32_t result_type = ops[0];
10054 uint32_t id = ops[1];
10055 emit_unary_func_op(result_type, id, ops[2], "allInvocationsEqualARB");
10056
10057 require_extension_internal("GL_ARB_shader_group_vote");
10058 register_control_dependent_expression(ops[1]);
10059 break;
10060 }
10061
10062 case OpGroupIAddNonUniformAMD:
10063 case OpGroupFAddNonUniformAMD:
10064 {
10065 uint32_t result_type = ops[0];
10066 uint32_t id = ops[1];
10067 emit_unary_func_op(result_type, id, ops[4], "addInvocationsNonUniformAMD");
10068
10069 require_extension_internal("GL_AMD_shader_ballot");
10070 register_control_dependent_expression(ops[1]);
10071 break;
10072 }
10073
10074 case OpGroupFMinNonUniformAMD:
10075 case OpGroupUMinNonUniformAMD:
10076 case OpGroupSMinNonUniformAMD:
10077 {
10078 uint32_t result_type = ops[0];
10079 uint32_t id = ops[1];
10080 emit_unary_func_op(result_type, id, ops[4], "minInvocationsNonUniformAMD");
10081
10082 require_extension_internal("GL_AMD_shader_ballot");
10083 register_control_dependent_expression(ops[1]);
10084 break;
10085 }
10086
10087 case OpGroupFMaxNonUniformAMD:
10088 case OpGroupUMaxNonUniformAMD:
10089 case OpGroupSMaxNonUniformAMD:
10090 {
10091 uint32_t result_type = ops[0];
10092 uint32_t id = ops[1];
10093 emit_unary_func_op(result_type, id, ops[4], "maxInvocationsNonUniformAMD");
10094
10095 require_extension_internal("GL_AMD_shader_ballot");
10096 register_control_dependent_expression(ops[1]);
10097 break;
10098 }
10099
10100 case OpFragmentMaskFetchAMD:
10101 {
10102 auto &type = expression_type(ops[2]);
10103 uint32_t result_type = ops[0];
10104 uint32_t id = ops[1];
10105
10106 if (type.image.dim == spv::DimSubpassData)
10107 {
10108 emit_unary_func_op(result_type, id, ops[2], "fragmentMaskFetchAMD");
10109 }
10110 else
10111 {
10112 emit_binary_func_op(result_type, id, ops[2], ops[3], "fragmentMaskFetchAMD");
10113 }
10114
10115 require_extension_internal("GL_AMD_shader_fragment_mask");
10116 break;
10117 }
10118
10119 case OpFragmentFetchAMD:
10120 {
10121 auto &type = expression_type(ops[2]);
10122 uint32_t result_type = ops[0];
10123 uint32_t id = ops[1];
10124
10125 if (type.image.dim == spv::DimSubpassData)
10126 {
10127 emit_binary_func_op(result_type, id, ops[2], ops[4], "fragmentFetchAMD");
10128 }
10129 else
10130 {
10131 emit_trinary_func_op(result_type, id, ops[2], ops[3], ops[4], "fragmentFetchAMD");
10132 }
10133
10134 require_extension_internal("GL_AMD_shader_fragment_mask");
10135 break;
10136 }
10137
10138 // Vulkan 1.1 sub-group stuff ...
10139 case OpGroupNonUniformElect:
10140 case OpGroupNonUniformBroadcast:
10141 case OpGroupNonUniformBroadcastFirst:
10142 case OpGroupNonUniformBallot:
10143 case OpGroupNonUniformInverseBallot:
10144 case OpGroupNonUniformBallotBitExtract:
10145 case OpGroupNonUniformBallotBitCount:
10146 case OpGroupNonUniformBallotFindLSB:
10147 case OpGroupNonUniformBallotFindMSB:
10148 case OpGroupNonUniformShuffle:
10149 case OpGroupNonUniformShuffleXor:
10150 case OpGroupNonUniformShuffleUp:
10151 case OpGroupNonUniformShuffleDown:
10152 case OpGroupNonUniformAll:
10153 case OpGroupNonUniformAny:
10154 case OpGroupNonUniformAllEqual:
10155 case OpGroupNonUniformFAdd:
10156 case OpGroupNonUniformIAdd:
10157 case OpGroupNonUniformFMul:
10158 case OpGroupNonUniformIMul:
10159 case OpGroupNonUniformFMin:
10160 case OpGroupNonUniformFMax:
10161 case OpGroupNonUniformSMin:
10162 case OpGroupNonUniformSMax:
10163 case OpGroupNonUniformUMin:
10164 case OpGroupNonUniformUMax:
10165 case OpGroupNonUniformBitwiseAnd:
10166 case OpGroupNonUniformBitwiseOr:
10167 case OpGroupNonUniformBitwiseXor:
10168 case OpGroupNonUniformQuadSwap:
10169 case OpGroupNonUniformQuadBroadcast:
10170 emit_subgroup_op(instruction);
10171 break;
10172
10173 case OpFUnordEqual:
10174 case OpFUnordNotEqual:
10175 case OpFUnordLessThan:
10176 case OpFUnordGreaterThan:
10177 case OpFUnordLessThanEqual:
10178 case OpFUnordGreaterThanEqual:
10179 {
10180 // GLSL doesn't specify if floating point comparisons are ordered or unordered,
10181 // but glslang always emits ordered floating point compares for GLSL.
10182 // To get unordered compares, we can test the opposite thing and invert the result.
10183 // This way, we force true when there is any NaN present.
10184 uint32_t op0 = ops[2];
10185 uint32_t op1 = ops[3];
10186
10187 string expr;
10188 if (expression_type(op0).vecsize > 1)
10189 {
10190 const char *comp_op = nullptr;
10191 switch (opcode)
10192 {
10193 case OpFUnordEqual:
10194 comp_op = "notEqual";
10195 break;
10196
10197 case OpFUnordNotEqual:
10198 comp_op = "equal";
10199 break;
10200
10201 case OpFUnordLessThan:
10202 comp_op = "greaterThanEqual";
10203 break;
10204
10205 case OpFUnordLessThanEqual:
10206 comp_op = "greaterThan";
10207 break;
10208
10209 case OpFUnordGreaterThan:
10210 comp_op = "lessThanEqual";
10211 break;
10212
10213 case OpFUnordGreaterThanEqual:
10214 comp_op = "lessThan";
10215 break;
10216
10217 default:
10218 assert(0);
10219 break;
10220 }
10221
10222 expr = join("not(", comp_op, "(", to_unpacked_expression(op0), ", ", to_unpacked_expression(op1), "))");
10223 }
10224 else
10225 {
10226 const char *comp_op = nullptr;
10227 switch (opcode)
10228 {
10229 case OpFUnordEqual:
10230 comp_op = " != ";
10231 break;
10232
10233 case OpFUnordNotEqual:
10234 comp_op = " == ";
10235 break;
10236
10237 case OpFUnordLessThan:
10238 comp_op = " >= ";
10239 break;
10240
10241 case OpFUnordLessThanEqual:
10242 comp_op = " > ";
10243 break;
10244
10245 case OpFUnordGreaterThan:
10246 comp_op = " <= ";
10247 break;
10248
10249 case OpFUnordGreaterThanEqual:
10250 comp_op = " < ";
10251 break;
10252
10253 default:
10254 assert(0);
10255 break;
10256 }
10257
10258 expr = join("!(", to_enclosed_unpacked_expression(op0), comp_op, to_enclosed_unpacked_expression(op1), ")");
10259 }
10260
10261 emit_op(ops[0], ops[1], expr, should_forward(op0) && should_forward(op1));
10262 inherit_expression_dependencies(ops[1], op0);
10263 inherit_expression_dependencies(ops[1], op1);
10264 break;
10265 }
10266
10267 case OpReportIntersectionNV:
10268 statement("reportIntersectionNV(", to_expression(ops[0]), ", ", to_expression(ops[1]), ");");
10269 break;
10270 case OpIgnoreIntersectionNV:
10271 statement("ignoreIntersectionNV();");
10272 break;
10273 case OpTerminateRayNV:
10274 statement("terminateRayNV();");
10275 break;
10276 case OpTraceNV:
10277 statement("traceNV(", to_expression(ops[0]), ", ", to_expression(ops[1]), ", ", to_expression(ops[2]), ", ",
10278 to_expression(ops[3]), ", ", to_expression(ops[4]), ", ", to_expression(ops[5]), ", ",
10279 to_expression(ops[6]), ", ", to_expression(ops[7]), ", ", to_expression(ops[8]), ", ",
10280 to_expression(ops[9]), ", ", to_expression(ops[10]), ");");
10281 break;
10282 case OpExecuteCallableNV:
10283 statement("executeCallableNV(", to_expression(ops[0]), ", ", to_expression(ops[1]), ");");
10284 break;
10285
10286 case OpConvertUToPtr:
10287 {
10288 auto &type = get<SPIRType>(ops[0]);
10289 if (type.storage != StorageClassPhysicalStorageBufferEXT)
10290 SPIRV_CROSS_THROW("Only StorageClassPhysicalStorageBufferEXT is supported by OpConvertUToPtr.");
10291
10292 auto op = type_to_glsl(type);
10293 emit_unary_func_op(ops[0], ops[1], ops[2], op.c_str());
10294 break;
10295 }
10296
10297 case OpConvertPtrToU:
10298 {
10299 auto &type = get<SPIRType>(ops[0]);
10300 auto &ptr_type = expression_type(ops[2]);
10301 if (ptr_type.storage != StorageClassPhysicalStorageBufferEXT)
10302 SPIRV_CROSS_THROW("Only StorageClassPhysicalStorageBufferEXT is supported by OpConvertPtrToU.");
10303
10304 auto op = type_to_glsl(type);
10305 emit_unary_func_op(ops[0], ops[1], ops[2], op.c_str());
10306 break;
10307 }
10308
10309 case OpUndef:
10310 // Undefined value has been declared.
10311 break;
10312
10313 case OpLine:
10314 {
10315 emit_line_directive(ops[0], ops[1]);
10316 break;
10317 }
10318
10319 case OpNoLine:
10320 break;
10321
10322 case OpDemoteToHelperInvocationEXT:
10323 if (!options.vulkan_semantics)
10324 SPIRV_CROSS_THROW("GL_EXT_demote_to_helper_invocation is only supported in Vulkan GLSL.");
10325 require_extension_internal("GL_EXT_demote_to_helper_invocation");
10326 statement(backend.demote_literal, ";");
10327 break;
10328
10329 case OpIsHelperInvocationEXT:
10330 if (!options.vulkan_semantics)
10331 SPIRV_CROSS_THROW("GL_EXT_demote_to_helper_invocation is only supported in Vulkan GLSL.");
10332 require_extension_internal("GL_EXT_demote_to_helper_invocation");
10333 emit_op(ops[0], ops[1], "helperInvocationEXT()", false);
10334 break;
10335
10336 case OpBeginInvocationInterlockEXT:
10337 // If the interlock is complex, we emit this elsewhere.
10338 if (!interlocked_is_complex)
10339 {
10340 if (options.es)
10341 statement("beginInvocationInterlockNV();");
10342 else
10343 statement("beginInvocationInterlockARB();");
10344
10345 flush_all_active_variables();
10346 // Make sure forwarding doesn't propagate outside interlock region.
10347 }
10348 break;
10349
10350 case OpEndInvocationInterlockEXT:
10351 // If the interlock is complex, we emit this elsewhere.
10352 if (!interlocked_is_complex)
10353 {
10354 if (options.es)
10355 statement("endInvocationInterlockNV();");
10356 else
10357 statement("endInvocationInterlockARB();");
10358
10359 flush_all_active_variables();
10360 // Make sure forwarding doesn't propagate outside interlock region.
10361 }
10362 break;
10363
10364 default:
10365 statement("// unimplemented op ", instruction.op);
10366 break;
10367 }
10368 }
10369
10370 // Appends function arguments, mapped from global variables, beyond the specified arg index.
10371 // This is used when a function call uses fewer arguments than the function defines.
10372 // This situation may occur if the function signature has been dynamically modified to
10373 // extract global variables referenced from within the function, and convert them to
10374 // function arguments. This is necessary for shader languages that do not support global
10375 // access to shader input content from within a function (eg. Metal). Each additional
10376 // function args uses the name of the global variable. Function nesting will modify the
10377 // functions and function calls all the way up the nesting chain.
append_global_func_args(const SPIRFunction & func,uint32_t index,SmallVector<string> & arglist)10378 void CompilerGLSL::append_global_func_args(const SPIRFunction &func, uint32_t index, SmallVector<string> &arglist)
10379 {
10380 auto &args = func.arguments;
10381 uint32_t arg_cnt = uint32_t(args.size());
10382 for (uint32_t arg_idx = index; arg_idx < arg_cnt; arg_idx++)
10383 {
10384 auto &arg = args[arg_idx];
10385 assert(arg.alias_global_variable);
10386
10387 // If the underlying variable needs to be declared
10388 // (ie. a local variable with deferred declaration), do so now.
10389 uint32_t var_id = get<SPIRVariable>(arg.id).basevariable;
10390 if (var_id)
10391 flush_variable_declaration(var_id);
10392
10393 arglist.push_back(to_func_call_arg(arg, arg.id));
10394 }
10395 }
10396
to_member_name(const SPIRType & type,uint32_t index)10397 string CompilerGLSL::to_member_name(const SPIRType &type, uint32_t index)
10398 {
10399 if (type.type_alias != TypeID(0) &&
10400 !has_extended_decoration(type.type_alias, SPIRVCrossDecorationBufferBlockRepacked))
10401 {
10402 return to_member_name(get<SPIRType>(type.type_alias), index);
10403 }
10404
10405 auto &memb = ir.meta[type.self].members;
10406 if (index < memb.size() && !memb[index].alias.empty())
10407 return memb[index].alias;
10408 else
10409 return join("_m", index);
10410 }
10411
to_member_reference(uint32_t,const SPIRType & type,uint32_t index,bool)10412 string CompilerGLSL::to_member_reference(uint32_t, const SPIRType &type, uint32_t index, bool)
10413 {
10414 return join(".", to_member_name(type, index));
10415 }
10416
add_member_name(SPIRType & type,uint32_t index)10417 void CompilerGLSL::add_member_name(SPIRType &type, uint32_t index)
10418 {
10419 auto &memb = ir.meta[type.self].members;
10420 if (index < memb.size() && !memb[index].alias.empty())
10421 {
10422 auto &name = memb[index].alias;
10423 if (name.empty())
10424 return;
10425
10426 // Reserved for temporaries.
10427 if (name[0] == '_' && name.size() >= 2 && isdigit(name[1]))
10428 {
10429 name.clear();
10430 return;
10431 }
10432
10433 update_name_cache(type.member_name_cache, name);
10434 }
10435 }
10436
10437 // Checks whether the ID is a row_major matrix that requires conversion before use
is_non_native_row_major_matrix(uint32_t id)10438 bool CompilerGLSL::is_non_native_row_major_matrix(uint32_t id)
10439 {
10440 // Natively supported row-major matrices do not need to be converted.
10441 // Legacy targets do not support row major.
10442 if (backend.native_row_major_matrix && !is_legacy())
10443 return false;
10444
10445 // Non-matrix or column-major matrix types do not need to be converted.
10446 if (!has_decoration(id, DecorationRowMajor))
10447 return false;
10448
10449 // Only square row-major matrices can be converted at this time.
10450 // Converting non-square matrices will require defining custom GLSL function that
10451 // swaps matrix elements while retaining the original dimensional form of the matrix.
10452 const auto type = expression_type(id);
10453 if (type.columns != type.vecsize)
10454 SPIRV_CROSS_THROW("Row-major matrices must be square on this platform.");
10455
10456 return true;
10457 }
10458
10459 // Checks whether the member is a row_major matrix that requires conversion before use
member_is_non_native_row_major_matrix(const SPIRType & type,uint32_t index)10460 bool CompilerGLSL::member_is_non_native_row_major_matrix(const SPIRType &type, uint32_t index)
10461 {
10462 // Natively supported row-major matrices do not need to be converted.
10463 if (backend.native_row_major_matrix && !is_legacy())
10464 return false;
10465
10466 // Non-matrix or column-major matrix types do not need to be converted.
10467 if (!has_member_decoration(type.self, index, DecorationRowMajor))
10468 return false;
10469
10470 // Only square row-major matrices can be converted at this time.
10471 // Converting non-square matrices will require defining custom GLSL function that
10472 // swaps matrix elements while retaining the original dimensional form of the matrix.
10473 const auto mbr_type = get<SPIRType>(type.member_types[index]);
10474 if (mbr_type.columns != mbr_type.vecsize)
10475 SPIRV_CROSS_THROW("Row-major matrices must be square on this platform.");
10476
10477 return true;
10478 }
10479
10480 // Checks if we need to remap physical type IDs when declaring the type in a buffer.
member_is_remapped_physical_type(const SPIRType & type,uint32_t index) const10481 bool CompilerGLSL::member_is_remapped_physical_type(const SPIRType &type, uint32_t index) const
10482 {
10483 return has_extended_member_decoration(type.self, index, SPIRVCrossDecorationPhysicalTypeID);
10484 }
10485
10486 // Checks whether the member is in packed data type, that might need to be unpacked.
member_is_packed_physical_type(const SPIRType & type,uint32_t index) const10487 bool CompilerGLSL::member_is_packed_physical_type(const SPIRType &type, uint32_t index) const
10488 {
10489 return has_extended_member_decoration(type.self, index, SPIRVCrossDecorationPhysicalTypePacked);
10490 }
10491
10492 // Wraps the expression string in a function call that converts the
10493 // row_major matrix result of the expression to a column_major matrix.
10494 // Base implementation uses the standard library transpose() function.
10495 // Subclasses may override to use a different function.
convert_row_major_matrix(string exp_str,const SPIRType & exp_type,uint32_t,bool)10496 string CompilerGLSL::convert_row_major_matrix(string exp_str, const SPIRType &exp_type, uint32_t /* physical_type_id */,
10497 bool /*is_packed*/)
10498 {
10499 strip_enclosed_expression(exp_str);
10500 if (!is_matrix(exp_type))
10501 {
10502 auto column_index = exp_str.find_last_of('[');
10503 if (column_index == string::npos)
10504 return exp_str;
10505
10506 auto column_expr = exp_str.substr(column_index);
10507 exp_str.resize(column_index);
10508
10509 auto transposed_expr = type_to_glsl_constructor(exp_type) + "(";
10510
10511 // Loading a column from a row-major matrix. Unroll the load.
10512 for (uint32_t c = 0; c < exp_type.vecsize; c++)
10513 {
10514 transposed_expr += join(exp_str, '[', c, ']', column_expr);
10515 if (c + 1 < exp_type.vecsize)
10516 transposed_expr += ", ";
10517 }
10518
10519 transposed_expr += ")";
10520 return transposed_expr;
10521 }
10522 else
10523 return join("transpose(", exp_str, ")");
10524 }
10525
variable_decl(const SPIRType & type,const string & name,uint32_t id)10526 string CompilerGLSL::variable_decl(const SPIRType &type, const string &name, uint32_t id)
10527 {
10528 string type_name = type_to_glsl(type, id);
10529 remap_variable_type_name(type, name, type_name);
10530 return join(type_name, " ", name, type_to_array_glsl(type));
10531 }
10532
10533 // Emit a structure member. Subclasses may override to modify output,
10534 // or to dynamically add a padding member if needed.
emit_struct_member(const SPIRType & type,uint32_t member_type_id,uint32_t index,const string & qualifier,uint32_t)10535 void CompilerGLSL::emit_struct_member(const SPIRType &type, uint32_t member_type_id, uint32_t index,
10536 const string &qualifier, uint32_t)
10537 {
10538 auto &membertype = get<SPIRType>(member_type_id);
10539
10540 Bitset memberflags;
10541 auto &memb = ir.meta[type.self].members;
10542 if (index < memb.size())
10543 memberflags = memb[index].decoration_flags;
10544
10545 string qualifiers;
10546 bool is_block = ir.meta[type.self].decoration.decoration_flags.get(DecorationBlock) ||
10547 ir.meta[type.self].decoration.decoration_flags.get(DecorationBufferBlock);
10548
10549 if (is_block)
10550 qualifiers = to_interpolation_qualifiers(memberflags);
10551
10552 statement(layout_for_member(type, index), qualifiers, qualifier, flags_to_qualifiers_glsl(membertype, memberflags),
10553 variable_decl(membertype, to_member_name(type, index)), ";");
10554 }
10555
emit_struct_padding_target(const SPIRType &)10556 void CompilerGLSL::emit_struct_padding_target(const SPIRType &)
10557 {
10558 }
10559
flags_to_qualifiers_glsl(const SPIRType & type,const Bitset & flags)10560 const char *CompilerGLSL::flags_to_qualifiers_glsl(const SPIRType &type, const Bitset &flags)
10561 {
10562 // GL_EXT_buffer_reference variables can be marked as restrict.
10563 if (flags.get(DecorationRestrictPointerEXT))
10564 return "restrict ";
10565
10566 // Structs do not have precision qualifiers, neither do doubles (desktop only anyways, so no mediump/highp).
10567 if (type.basetype != SPIRType::Float && type.basetype != SPIRType::Int && type.basetype != SPIRType::UInt &&
10568 type.basetype != SPIRType::Image && type.basetype != SPIRType::SampledImage &&
10569 type.basetype != SPIRType::Sampler)
10570 return "";
10571
10572 if (options.es)
10573 {
10574 auto &execution = get_entry_point();
10575
10576 if (flags.get(DecorationRelaxedPrecision))
10577 {
10578 bool implied_fmediump = type.basetype == SPIRType::Float &&
10579 options.fragment.default_float_precision == Options::Mediump &&
10580 execution.model == ExecutionModelFragment;
10581
10582 bool implied_imediump = (type.basetype == SPIRType::Int || type.basetype == SPIRType::UInt) &&
10583 options.fragment.default_int_precision == Options::Mediump &&
10584 execution.model == ExecutionModelFragment;
10585
10586 return implied_fmediump || implied_imediump ? "" : "mediump ";
10587 }
10588 else
10589 {
10590 bool implied_fhighp =
10591 type.basetype == SPIRType::Float && ((options.fragment.default_float_precision == Options::Highp &&
10592 execution.model == ExecutionModelFragment) ||
10593 (execution.model != ExecutionModelFragment));
10594
10595 bool implied_ihighp = (type.basetype == SPIRType::Int || type.basetype == SPIRType::UInt) &&
10596 ((options.fragment.default_int_precision == Options::Highp &&
10597 execution.model == ExecutionModelFragment) ||
10598 (execution.model != ExecutionModelFragment));
10599
10600 return implied_fhighp || implied_ihighp ? "" : "highp ";
10601 }
10602 }
10603 else if (backend.allow_precision_qualifiers)
10604 {
10605 // Vulkan GLSL supports precision qualifiers, even in desktop profiles, which is convenient.
10606 // The default is highp however, so only emit mediump in the rare case that a shader has these.
10607 if (flags.get(DecorationRelaxedPrecision))
10608 return "mediump ";
10609 else
10610 return "";
10611 }
10612 else
10613 return "";
10614 }
10615
to_precision_qualifiers_glsl(uint32_t id)10616 const char *CompilerGLSL::to_precision_qualifiers_glsl(uint32_t id)
10617 {
10618 auto &type = expression_type(id);
10619 bool use_precision_qualifiers = backend.allow_precision_qualifiers || options.es;
10620 if (use_precision_qualifiers && (type.basetype == SPIRType::Image || type.basetype == SPIRType::SampledImage))
10621 {
10622 // Force mediump for the sampler type. We cannot declare 16-bit or smaller image types.
10623 auto &result_type = get<SPIRType>(type.image.type);
10624 if (result_type.width < 32)
10625 return "mediump ";
10626 }
10627 return flags_to_qualifiers_glsl(type, ir.meta[id].decoration.decoration_flags);
10628 }
10629
to_qualifiers_glsl(uint32_t id)10630 string CompilerGLSL::to_qualifiers_glsl(uint32_t id)
10631 {
10632 auto &flags = ir.meta[id].decoration.decoration_flags;
10633 string res;
10634
10635 auto *var = maybe_get<SPIRVariable>(id);
10636
10637 if (var && var->storage == StorageClassWorkgroup && !backend.shared_is_implied)
10638 res += "shared ";
10639
10640 res += to_interpolation_qualifiers(flags);
10641 if (var)
10642 res += to_storage_qualifiers_glsl(*var);
10643
10644 auto &type = expression_type(id);
10645 if (type.image.dim != DimSubpassData && type.image.sampled == 2)
10646 {
10647 if (flags.get(DecorationCoherent))
10648 res += "coherent ";
10649 if (flags.get(DecorationRestrict))
10650 res += "restrict ";
10651 if (flags.get(DecorationNonWritable))
10652 res += "readonly ";
10653 if (flags.get(DecorationNonReadable))
10654 res += "writeonly ";
10655 }
10656
10657 res += to_precision_qualifiers_glsl(id);
10658
10659 return res;
10660 }
10661
argument_decl(const SPIRFunction::Parameter & arg)10662 string CompilerGLSL::argument_decl(const SPIRFunction::Parameter &arg)
10663 {
10664 // glslangValidator seems to make all arguments pointer no matter what which is rather bizarre ...
10665 auto &type = expression_type(arg.id);
10666 const char *direction = "";
10667
10668 if (type.pointer)
10669 {
10670 if (arg.write_count && arg.read_count)
10671 direction = "inout ";
10672 else if (arg.write_count)
10673 direction = "out ";
10674 }
10675
10676 return join(direction, to_qualifiers_glsl(arg.id), variable_decl(type, to_name(arg.id), arg.id));
10677 }
10678
to_initializer_expression(const SPIRVariable & var)10679 string CompilerGLSL::to_initializer_expression(const SPIRVariable &var)
10680 {
10681 return to_expression(var.initializer);
10682 }
10683
variable_decl(const SPIRVariable & variable)10684 string CompilerGLSL::variable_decl(const SPIRVariable &variable)
10685 {
10686 // Ignore the pointer type since GLSL doesn't have pointers.
10687 auto &type = get_variable_data_type(variable);
10688
10689 if (type.pointer_depth > 1)
10690 SPIRV_CROSS_THROW("Cannot declare pointer-to-pointer types.");
10691
10692 auto res = join(to_qualifiers_glsl(variable.self), variable_decl(type, to_name(variable.self), variable.self));
10693
10694 if (variable.loop_variable && variable.static_expression)
10695 {
10696 uint32_t expr = variable.static_expression;
10697 if (ir.ids[expr].get_type() != TypeUndef)
10698 res += join(" = ", to_expression(variable.static_expression));
10699 }
10700 else if (variable.initializer)
10701 {
10702 uint32_t expr = variable.initializer;
10703 if (ir.ids[expr].get_type() != TypeUndef)
10704 res += join(" = ", to_initializer_expression(variable));
10705 }
10706 return res;
10707 }
10708
to_pls_qualifiers_glsl(const SPIRVariable & variable)10709 const char *CompilerGLSL::to_pls_qualifiers_glsl(const SPIRVariable &variable)
10710 {
10711 auto &flags = ir.meta[variable.self].decoration.decoration_flags;
10712 if (flags.get(DecorationRelaxedPrecision))
10713 return "mediump ";
10714 else
10715 return "highp ";
10716 }
10717
pls_decl(const PlsRemap & var)10718 string CompilerGLSL::pls_decl(const PlsRemap &var)
10719 {
10720 auto &variable = get<SPIRVariable>(var.id);
10721
10722 SPIRType type;
10723 type.vecsize = pls_format_to_components(var.format);
10724 type.basetype = pls_format_to_basetype(var.format);
10725
10726 return join(to_pls_layout(var.format), to_pls_qualifiers_glsl(variable), type_to_glsl(type), " ",
10727 to_name(variable.self));
10728 }
10729
to_array_size_literal(const SPIRType & type) const10730 uint32_t CompilerGLSL::to_array_size_literal(const SPIRType &type) const
10731 {
10732 return to_array_size_literal(type, uint32_t(type.array.size() - 1));
10733 }
10734
to_array_size_literal(const SPIRType & type,uint32_t index) const10735 uint32_t CompilerGLSL::to_array_size_literal(const SPIRType &type, uint32_t index) const
10736 {
10737 assert(type.array.size() == type.array_size_literal.size());
10738
10739 if (type.array_size_literal[index])
10740 {
10741 return type.array[index];
10742 }
10743 else
10744 {
10745 // Use the default spec constant value.
10746 // This is the best we can do.
10747 uint32_t array_size_id = type.array[index];
10748
10749 // Explicitly check for this case. The error message you would get (bad cast) makes no sense otherwise.
10750 if (ir.ids[array_size_id].get_type() == TypeConstantOp)
10751 SPIRV_CROSS_THROW("An array size was found to be an OpSpecConstantOp. This is not supported since "
10752 "SPIRV-Cross cannot deduce the actual size here.");
10753
10754 uint32_t array_size = get<SPIRConstant>(array_size_id).scalar();
10755 return array_size;
10756 }
10757 }
10758
to_array_size(const SPIRType & type,uint32_t index)10759 string CompilerGLSL::to_array_size(const SPIRType &type, uint32_t index)
10760 {
10761 assert(type.array.size() == type.array_size_literal.size());
10762
10763 // Tessellation control and evaluation shaders must have either gl_MaxPatchVertices or unsized arrays for input arrays.
10764 // Opt for unsized as it's the more "correct" variant to use.
10765 if (type.storage == StorageClassInput &&
10766 (get_entry_point().model == ExecutionModelTessellationControl ||
10767 get_entry_point().model == ExecutionModelTessellationEvaluation) &&
10768 index == uint32_t(type.array.size() - 1))
10769 return "";
10770
10771 auto &size = type.array[index];
10772 if (!type.array_size_literal[index])
10773 return to_expression(size);
10774 else if (size)
10775 return convert_to_string(size);
10776 else if (!backend.unsized_array_supported)
10777 {
10778 // For runtime-sized arrays, we can work around
10779 // lack of standard support for this by simply having
10780 // a single element array.
10781 //
10782 // Runtime length arrays must always be the last element
10783 // in an interface block.
10784 return "1";
10785 }
10786 else
10787 return "";
10788 }
10789
type_to_array_glsl(const SPIRType & type)10790 string CompilerGLSL::type_to_array_glsl(const SPIRType &type)
10791 {
10792 if (type.pointer && type.storage == StorageClassPhysicalStorageBufferEXT && type.basetype != SPIRType::Struct)
10793 {
10794 // We are using a wrapped pointer type, and we should not emit any array declarations here.
10795 return "";
10796 }
10797
10798 if (type.array.empty())
10799 return "";
10800
10801 if (options.flatten_multidimensional_arrays)
10802 {
10803 string res;
10804 res += "[";
10805 for (auto i = uint32_t(type.array.size()); i; i--)
10806 {
10807 res += enclose_expression(to_array_size(type, i - 1));
10808 if (i > 1)
10809 res += " * ";
10810 }
10811 res += "]";
10812 return res;
10813 }
10814 else
10815 {
10816 if (type.array.size() > 1)
10817 {
10818 if (!options.es && options.version < 430)
10819 require_extension_internal("GL_ARB_arrays_of_arrays");
10820 else if (options.es && options.version < 310)
10821 SPIRV_CROSS_THROW("Arrays of arrays not supported before ESSL version 310. "
10822 "Try using --flatten-multidimensional-arrays or set "
10823 "options.flatten_multidimensional_arrays to true.");
10824 }
10825
10826 string res;
10827 for (auto i = uint32_t(type.array.size()); i; i--)
10828 {
10829 res += "[";
10830 res += to_array_size(type, i - 1);
10831 res += "]";
10832 }
10833 return res;
10834 }
10835 }
10836
image_type_glsl(const SPIRType & type,uint32_t id)10837 string CompilerGLSL::image_type_glsl(const SPIRType &type, uint32_t id)
10838 {
10839 auto &imagetype = get<SPIRType>(type.image.type);
10840 string res;
10841
10842 switch (imagetype.basetype)
10843 {
10844 case SPIRType::Int:
10845 case SPIRType::Short:
10846 case SPIRType::SByte:
10847 res = "i";
10848 break;
10849 case SPIRType::UInt:
10850 case SPIRType::UShort:
10851 case SPIRType::UByte:
10852 res = "u";
10853 break;
10854 default:
10855 break;
10856 }
10857
10858 // For half image types, we will force mediump for the sampler, and cast to f16 after any sampling operation.
10859 // We cannot express a true half texture type in GLSL. Neither for short integer formats for that matter.
10860
10861 if (type.basetype == SPIRType::Image && type.image.dim == DimSubpassData && options.vulkan_semantics)
10862 return res + "subpassInput" + (type.image.ms ? "MS" : "");
10863
10864 // If we're emulating subpassInput with samplers, force sampler2D
10865 // so we don't have to specify format.
10866 if (type.basetype == SPIRType::Image && type.image.dim != DimSubpassData)
10867 {
10868 // Sampler buffers are always declared as samplerBuffer even though they might be separate images in the SPIR-V.
10869 if (type.image.dim == DimBuffer && type.image.sampled == 1)
10870 res += "sampler";
10871 else
10872 res += type.image.sampled == 2 ? "image" : "texture";
10873 }
10874 else
10875 res += "sampler";
10876
10877 switch (type.image.dim)
10878 {
10879 case Dim1D:
10880 res += "1D";
10881 break;
10882 case Dim2D:
10883 res += "2D";
10884 break;
10885 case Dim3D:
10886 res += "3D";
10887 break;
10888 case DimCube:
10889 res += "Cube";
10890 break;
10891 case DimRect:
10892 if (options.es)
10893 SPIRV_CROSS_THROW("Rectangle textures are not supported on OpenGL ES.");
10894
10895 if (is_legacy_desktop())
10896 require_extension_internal("GL_ARB_texture_rectangle");
10897
10898 res += "2DRect";
10899 break;
10900
10901 case DimBuffer:
10902 if (options.es && options.version < 320)
10903 require_extension_internal("GL_OES_texture_buffer");
10904 else if (!options.es && options.version < 300)
10905 require_extension_internal("GL_EXT_texture_buffer_object");
10906 res += "Buffer";
10907 break;
10908
10909 case DimSubpassData:
10910 res += "2D";
10911 break;
10912 default:
10913 SPIRV_CROSS_THROW("Only 1D, 2D, 2DRect, 3D, Buffer, InputTarget and Cube textures supported.");
10914 }
10915
10916 if (type.image.ms)
10917 res += "MS";
10918 if (type.image.arrayed)
10919 {
10920 if (is_legacy_desktop())
10921 require_extension_internal("GL_EXT_texture_array");
10922 res += "Array";
10923 }
10924
10925 // "Shadow" state in GLSL only exists for samplers and combined image samplers.
10926 if (((type.basetype == SPIRType::SampledImage) || (type.basetype == SPIRType::Sampler)) &&
10927 image_is_comparison(type, id))
10928 {
10929 res += "Shadow";
10930 }
10931
10932 return res;
10933 }
10934
type_to_glsl_constructor(const SPIRType & type)10935 string CompilerGLSL::type_to_glsl_constructor(const SPIRType &type)
10936 {
10937 if (backend.use_array_constructor && type.array.size() > 1)
10938 {
10939 if (options.flatten_multidimensional_arrays)
10940 SPIRV_CROSS_THROW("Cannot flatten constructors of multidimensional array constructors, e.g. float[][]().");
10941 else if (!options.es && options.version < 430)
10942 require_extension_internal("GL_ARB_arrays_of_arrays");
10943 else if (options.es && options.version < 310)
10944 SPIRV_CROSS_THROW("Arrays of arrays not supported before ESSL version 310.");
10945 }
10946
10947 auto e = type_to_glsl(type);
10948 if (backend.use_array_constructor)
10949 {
10950 for (uint32_t i = 0; i < type.array.size(); i++)
10951 e += "[]";
10952 }
10953 return e;
10954 }
10955
10956 // The optional id parameter indicates the object whose type we are trying
10957 // to find the description for. It is optional. Most type descriptions do not
10958 // depend on a specific object's use of that type.
type_to_glsl(const SPIRType & type,uint32_t id)10959 string CompilerGLSL::type_to_glsl(const SPIRType &type, uint32_t id)
10960 {
10961 if (type.pointer && type.storage == StorageClassPhysicalStorageBufferEXT && type.basetype != SPIRType::Struct)
10962 {
10963 // Need to create a magic type name which compacts the entire type information.
10964 string name = type_to_glsl(get_pointee_type(type));
10965 for (size_t i = 0; i < type.array.size(); i++)
10966 {
10967 if (type.array_size_literal[i])
10968 name += join(type.array[i], "_");
10969 else
10970 name += join("id", type.array[i], "_");
10971 }
10972 name += "Pointer";
10973 return name;
10974 }
10975
10976 switch (type.basetype)
10977 {
10978 case SPIRType::Struct:
10979 // Need OpName lookup here to get a "sensible" name for a struct.
10980 if (backend.explicit_struct_type)
10981 return join("struct ", to_name(type.self));
10982 else
10983 return to_name(type.self);
10984
10985 case SPIRType::Image:
10986 case SPIRType::SampledImage:
10987 return image_type_glsl(type, id);
10988
10989 case SPIRType::Sampler:
10990 // The depth field is set by calling code based on the variable ID of the sampler, effectively reintroducing
10991 // this distinction into the type system.
10992 return comparison_ids.count(id) ? "samplerShadow" : "sampler";
10993
10994 case SPIRType::AccelerationStructureNV:
10995 return "accelerationStructureNV";
10996
10997 case SPIRType::Void:
10998 return "void";
10999
11000 default:
11001 break;
11002 }
11003
11004 if (type.basetype == SPIRType::UInt && is_legacy())
11005 SPIRV_CROSS_THROW("Unsigned integers are not supported on legacy targets.");
11006
11007 if (type.vecsize == 1 && type.columns == 1) // Scalar builtin
11008 {
11009 switch (type.basetype)
11010 {
11011 case SPIRType::Boolean:
11012 return "bool";
11013 case SPIRType::SByte:
11014 return backend.basic_int8_type;
11015 case SPIRType::UByte:
11016 return backend.basic_uint8_type;
11017 case SPIRType::Short:
11018 return backend.basic_int16_type;
11019 case SPIRType::UShort:
11020 return backend.basic_uint16_type;
11021 case SPIRType::Int:
11022 return backend.basic_int_type;
11023 case SPIRType::UInt:
11024 return backend.basic_uint_type;
11025 case SPIRType::AtomicCounter:
11026 return "atomic_uint";
11027 case SPIRType::Half:
11028 return "float16_t";
11029 case SPIRType::Float:
11030 return "float";
11031 case SPIRType::Double:
11032 return "double";
11033 case SPIRType::Int64:
11034 return "int64_t";
11035 case SPIRType::UInt64:
11036 return "uint64_t";
11037 default:
11038 return "???";
11039 }
11040 }
11041 else if (type.vecsize > 1 && type.columns == 1) // Vector builtin
11042 {
11043 switch (type.basetype)
11044 {
11045 case SPIRType::Boolean:
11046 return join("bvec", type.vecsize);
11047 case SPIRType::SByte:
11048 return join("i8vec", type.vecsize);
11049 case SPIRType::UByte:
11050 return join("u8vec", type.vecsize);
11051 case SPIRType::Short:
11052 return join("i16vec", type.vecsize);
11053 case SPIRType::UShort:
11054 return join("u16vec", type.vecsize);
11055 case SPIRType::Int:
11056 return join("ivec", type.vecsize);
11057 case SPIRType::UInt:
11058 return join("uvec", type.vecsize);
11059 case SPIRType::Half:
11060 return join("f16vec", type.vecsize);
11061 case SPIRType::Float:
11062 return join("vec", type.vecsize);
11063 case SPIRType::Double:
11064 return join("dvec", type.vecsize);
11065 case SPIRType::Int64:
11066 return join("i64vec", type.vecsize);
11067 case SPIRType::UInt64:
11068 return join("u64vec", type.vecsize);
11069 default:
11070 return "???";
11071 }
11072 }
11073 else if (type.vecsize == type.columns) // Simple Matrix builtin
11074 {
11075 switch (type.basetype)
11076 {
11077 case SPIRType::Boolean:
11078 return join("bmat", type.vecsize);
11079 case SPIRType::Int:
11080 return join("imat", type.vecsize);
11081 case SPIRType::UInt:
11082 return join("umat", type.vecsize);
11083 case SPIRType::Half:
11084 return join("f16mat", type.vecsize);
11085 case SPIRType::Float:
11086 return join("mat", type.vecsize);
11087 case SPIRType::Double:
11088 return join("dmat", type.vecsize);
11089 // Matrix types not supported for int64/uint64.
11090 default:
11091 return "???";
11092 }
11093 }
11094 else
11095 {
11096 switch (type.basetype)
11097 {
11098 case SPIRType::Boolean:
11099 return join("bmat", type.columns, "x", type.vecsize);
11100 case SPIRType::Int:
11101 return join("imat", type.columns, "x", type.vecsize);
11102 case SPIRType::UInt:
11103 return join("umat", type.columns, "x", type.vecsize);
11104 case SPIRType::Half:
11105 return join("f16mat", type.columns, "x", type.vecsize);
11106 case SPIRType::Float:
11107 return join("mat", type.columns, "x", type.vecsize);
11108 case SPIRType::Double:
11109 return join("dmat", type.columns, "x", type.vecsize);
11110 // Matrix types not supported for int64/uint64.
11111 default:
11112 return "???";
11113 }
11114 }
11115 }
11116
add_variable(unordered_set<string> & variables_primary,const unordered_set<string> & variables_secondary,string & name)11117 void CompilerGLSL::add_variable(unordered_set<string> &variables_primary,
11118 const unordered_set<string> &variables_secondary, string &name)
11119 {
11120 if (name.empty())
11121 return;
11122
11123 // Reserved for temporaries.
11124 if (name[0] == '_' && name.size() >= 2 && isdigit(name[1]))
11125 {
11126 name.clear();
11127 return;
11128 }
11129
11130 // Avoid double underscores.
11131 name = sanitize_underscores(name);
11132
11133 update_name_cache(variables_primary, variables_secondary, name);
11134 }
11135
add_local_variable_name(uint32_t id)11136 void CompilerGLSL::add_local_variable_name(uint32_t id)
11137 {
11138 add_variable(local_variable_names, block_names, ir.meta[id].decoration.alias);
11139 }
11140
add_resource_name(uint32_t id)11141 void CompilerGLSL::add_resource_name(uint32_t id)
11142 {
11143 add_variable(resource_names, block_names, ir.meta[id].decoration.alias);
11144 }
11145
add_header_line(const std::string & line)11146 void CompilerGLSL::add_header_line(const std::string &line)
11147 {
11148 header_lines.push_back(line);
11149 }
11150
has_extension(const std::string & ext) const11151 bool CompilerGLSL::has_extension(const std::string &ext) const
11152 {
11153 auto itr = find(begin(forced_extensions), end(forced_extensions), ext);
11154 return itr != end(forced_extensions);
11155 }
11156
require_extension(const std::string & ext)11157 void CompilerGLSL::require_extension(const std::string &ext)
11158 {
11159 if (!has_extension(ext))
11160 forced_extensions.push_back(ext);
11161 }
11162
require_extension_internal(const string & ext)11163 void CompilerGLSL::require_extension_internal(const string &ext)
11164 {
11165 if (backend.supports_extensions && !has_extension(ext))
11166 {
11167 forced_extensions.push_back(ext);
11168 force_recompile();
11169 }
11170 }
11171
flatten_buffer_block(VariableID id)11172 void CompilerGLSL::flatten_buffer_block(VariableID id)
11173 {
11174 auto &var = get<SPIRVariable>(id);
11175 auto &type = get<SPIRType>(var.basetype);
11176 auto name = to_name(type.self, false);
11177 auto &flags = ir.meta[type.self].decoration.decoration_flags;
11178
11179 if (!type.array.empty())
11180 SPIRV_CROSS_THROW(name + " is an array of UBOs.");
11181 if (type.basetype != SPIRType::Struct)
11182 SPIRV_CROSS_THROW(name + " is not a struct.");
11183 if (!flags.get(DecorationBlock))
11184 SPIRV_CROSS_THROW(name + " is not a block.");
11185 if (type.member_types.empty())
11186 SPIRV_CROSS_THROW(name + " is an empty struct.");
11187
11188 flattened_buffer_blocks.insert(id);
11189 }
11190
builtin_translates_to_nonarray(spv::BuiltIn) const11191 bool CompilerGLSL::builtin_translates_to_nonarray(spv::BuiltIn /*builtin*/) const
11192 {
11193 return false; // GLSL itself does not need to translate array builtin types to non-array builtin types
11194 }
11195
check_atomic_image(uint32_t id)11196 bool CompilerGLSL::check_atomic_image(uint32_t id)
11197 {
11198 auto &type = expression_type(id);
11199 if (type.storage == StorageClassImage)
11200 {
11201 if (options.es && options.version < 320)
11202 require_extension_internal("GL_OES_shader_image_atomic");
11203
11204 auto *var = maybe_get_backing_variable(id);
11205 if (var)
11206 {
11207 auto &flags = ir.meta[var->self].decoration.decoration_flags;
11208 if (flags.get(DecorationNonWritable) || flags.get(DecorationNonReadable))
11209 {
11210 flags.clear(DecorationNonWritable);
11211 flags.clear(DecorationNonReadable);
11212 force_recompile();
11213 }
11214 }
11215 return true;
11216 }
11217 else
11218 return false;
11219 }
11220
add_function_overload(const SPIRFunction & func)11221 void CompilerGLSL::add_function_overload(const SPIRFunction &func)
11222 {
11223 Hasher hasher;
11224 for (auto &arg : func.arguments)
11225 {
11226 // Parameters can vary with pointer type or not,
11227 // but that will not change the signature in GLSL/HLSL,
11228 // so strip the pointer type before hashing.
11229 uint32_t type_id = get_pointee_type_id(arg.type);
11230 auto &type = get<SPIRType>(type_id);
11231
11232 if (!combined_image_samplers.empty())
11233 {
11234 // If we have combined image samplers, we cannot really trust the image and sampler arguments
11235 // we pass down to callees, because they may be shuffled around.
11236 // Ignore these arguments, to make sure that functions need to differ in some other way
11237 // to be considered different overloads.
11238 if (type.basetype == SPIRType::SampledImage ||
11239 (type.basetype == SPIRType::Image && type.image.sampled == 1) || type.basetype == SPIRType::Sampler)
11240 {
11241 continue;
11242 }
11243 }
11244
11245 hasher.u32(type_id);
11246 }
11247 uint64_t types_hash = hasher.get();
11248
11249 auto function_name = to_name(func.self);
11250 auto itr = function_overloads.find(function_name);
11251 if (itr != end(function_overloads))
11252 {
11253 // There exists a function with this name already.
11254 auto &overloads = itr->second;
11255 if (overloads.count(types_hash) != 0)
11256 {
11257 // Overload conflict, assign a new name.
11258 add_resource_name(func.self);
11259 function_overloads[to_name(func.self)].insert(types_hash);
11260 }
11261 else
11262 {
11263 // Can reuse the name.
11264 overloads.insert(types_hash);
11265 }
11266 }
11267 else
11268 {
11269 // First time we see this function name.
11270 add_resource_name(func.self);
11271 function_overloads[to_name(func.self)].insert(types_hash);
11272 }
11273 }
11274
emit_function_prototype(SPIRFunction & func,const Bitset & return_flags)11275 void CompilerGLSL::emit_function_prototype(SPIRFunction &func, const Bitset &return_flags)
11276 {
11277 if (func.self != ir.default_entry_point)
11278 add_function_overload(func);
11279
11280 // Avoid shadow declarations.
11281 local_variable_names = resource_names;
11282
11283 string decl;
11284
11285 auto &type = get<SPIRType>(func.return_type);
11286 decl += flags_to_qualifiers_glsl(type, return_flags);
11287 decl += type_to_glsl(type);
11288 decl += type_to_array_glsl(type);
11289 decl += " ";
11290
11291 if (func.self == ir.default_entry_point)
11292 {
11293 // If we need complex fallback in GLSL, we just wrap main() in a function
11294 // and interlock the entire shader ...
11295 if (interlocked_is_complex)
11296 decl += "spvMainInterlockedBody";
11297 else
11298 decl += "main";
11299
11300 processing_entry_point = true;
11301 }
11302 else
11303 decl += to_name(func.self);
11304
11305 decl += "(";
11306 SmallVector<string> arglist;
11307 for (auto &arg : func.arguments)
11308 {
11309 // Do not pass in separate images or samplers if we're remapping
11310 // to combined image samplers.
11311 if (skip_argument(arg.id))
11312 continue;
11313
11314 // Might change the variable name if it already exists in this function.
11315 // SPIRV OpName doesn't have any semantic effect, so it's valid for an implementation
11316 // to use same name for variables.
11317 // Since we want to make the GLSL debuggable and somewhat sane, use fallback names for variables which are duplicates.
11318 add_local_variable_name(arg.id);
11319
11320 arglist.push_back(argument_decl(arg));
11321
11322 // Hold a pointer to the parameter so we can invalidate the readonly field if needed.
11323 auto *var = maybe_get<SPIRVariable>(arg.id);
11324 if (var)
11325 var->parameter = &arg;
11326 }
11327
11328 for (auto &arg : func.shadow_arguments)
11329 {
11330 // Might change the variable name if it already exists in this function.
11331 // SPIRV OpName doesn't have any semantic effect, so it's valid for an implementation
11332 // to use same name for variables.
11333 // Since we want to make the GLSL debuggable and somewhat sane, use fallback names for variables which are duplicates.
11334 add_local_variable_name(arg.id);
11335
11336 arglist.push_back(argument_decl(arg));
11337
11338 // Hold a pointer to the parameter so we can invalidate the readonly field if needed.
11339 auto *var = maybe_get<SPIRVariable>(arg.id);
11340 if (var)
11341 var->parameter = &arg;
11342 }
11343
11344 decl += merge(arglist);
11345 decl += ")";
11346 statement(decl);
11347 }
11348
emit_function(SPIRFunction & func,const Bitset & return_flags)11349 void CompilerGLSL::emit_function(SPIRFunction &func, const Bitset &return_flags)
11350 {
11351 // Avoid potential cycles.
11352 if (func.active)
11353 return;
11354 func.active = true;
11355
11356 // If we depend on a function, emit that function before we emit our own function.
11357 for (auto block : func.blocks)
11358 {
11359 auto &b = get<SPIRBlock>(block);
11360 for (auto &i : b.ops)
11361 {
11362 auto ops = stream(i);
11363 auto op = static_cast<Op>(i.op);
11364
11365 if (op == OpFunctionCall)
11366 {
11367 // Recursively emit functions which are called.
11368 uint32_t id = ops[2];
11369 emit_function(get<SPIRFunction>(id), ir.meta[ops[1]].decoration.decoration_flags);
11370 }
11371 }
11372 }
11373
11374 if (func.entry_line.file_id != 0)
11375 emit_line_directive(func.entry_line.file_id, func.entry_line.line_literal);
11376 emit_function_prototype(func, return_flags);
11377 begin_scope();
11378
11379 if (func.self == ir.default_entry_point)
11380 emit_entry_point_declarations();
11381
11382 current_function = &func;
11383 auto &entry_block = get<SPIRBlock>(func.entry_block);
11384
11385 for (auto &v : func.local_variables)
11386 {
11387 auto &var = get<SPIRVariable>(v);
11388 var.deferred_declaration = false;
11389
11390 if (var.storage == StorageClassWorkgroup)
11391 {
11392 // Special variable type which cannot have initializer,
11393 // need to be declared as standalone variables.
11394 // Comes from MSL which can push global variables as local variables in main function.
11395 add_local_variable_name(var.self);
11396 statement(variable_decl(var), ";");
11397 var.deferred_declaration = false;
11398 }
11399 else if (var.storage == StorageClassPrivate)
11400 {
11401 // These variables will not have had their CFG usage analyzed, so move it to the entry block.
11402 // Comes from MSL which can push global variables as local variables in main function.
11403 // We could just declare them right now, but we would miss out on an important initialization case which is
11404 // LUT declaration in MSL.
11405 // If we don't declare the variable when it is assigned we're forced to go through a helper function
11406 // which copies elements one by one.
11407 add_local_variable_name(var.self);
11408 auto &dominated = entry_block.dominated_variables;
11409 if (find(begin(dominated), end(dominated), var.self) == end(dominated))
11410 entry_block.dominated_variables.push_back(var.self);
11411 var.deferred_declaration = true;
11412 }
11413 else if (var.storage == StorageClassFunction && var.remapped_variable && var.static_expression)
11414 {
11415 // No need to declare this variable, it has a static expression.
11416 var.deferred_declaration = false;
11417 }
11418 else if (expression_is_lvalue(v))
11419 {
11420 add_local_variable_name(var.self);
11421
11422 if (var.initializer)
11423 statement(variable_decl_function_local(var), ";");
11424 else
11425 {
11426 // Don't declare variable until first use to declutter the GLSL output quite a lot.
11427 // If we don't touch the variable before first branch,
11428 // declare it then since we need variable declaration to be in top scope.
11429 var.deferred_declaration = true;
11430 }
11431 }
11432 else
11433 {
11434 // HACK: SPIR-V in older glslang output likes to use samplers and images as local variables, but GLSL does not allow this.
11435 // For these types (non-lvalue), we enforce forwarding through a shadowed variable.
11436 // This means that when we OpStore to these variables, we just write in the expression ID directly.
11437 // This breaks any kind of branching, since the variable must be statically assigned.
11438 // Branching on samplers and images would be pretty much impossible to fake in GLSL.
11439 var.statically_assigned = true;
11440 }
11441
11442 var.loop_variable_enable = false;
11443
11444 // Loop variables are never declared outside their for-loop, so block any implicit declaration.
11445 if (var.loop_variable)
11446 var.deferred_declaration = false;
11447 }
11448
11449 // Enforce declaration order for regression testing purposes.
11450 for (auto &block_id : func.blocks)
11451 {
11452 auto &block = get<SPIRBlock>(block_id);
11453 sort(begin(block.dominated_variables), end(block.dominated_variables));
11454 }
11455
11456 for (auto &line : current_function->fixup_hooks_in)
11457 line();
11458
11459 emit_block_chain(entry_block);
11460
11461 end_scope();
11462 processing_entry_point = false;
11463 statement("");
11464
11465 // Make sure deferred declaration state for local variables is cleared when we are done with function.
11466 // We risk declaring Private/Workgroup variables in places we are not supposed to otherwise.
11467 for (auto &v : func.local_variables)
11468 {
11469 auto &var = get<SPIRVariable>(v);
11470 var.deferred_declaration = false;
11471 }
11472 }
11473
emit_fixup()11474 void CompilerGLSL::emit_fixup()
11475 {
11476 auto &execution = get_entry_point();
11477 if (execution.model == ExecutionModelVertex)
11478 {
11479 if (options.vertex.fixup_clipspace)
11480 {
11481 const char *suffix = backend.float_literal_suffix ? "f" : "";
11482 statement("gl_Position.z = 2.0", suffix, " * gl_Position.z - gl_Position.w;");
11483 }
11484
11485 if (options.vertex.flip_vert_y)
11486 statement("gl_Position.y = -gl_Position.y;");
11487 }
11488 }
11489
flush_phi(BlockID from,BlockID to)11490 void CompilerGLSL::flush_phi(BlockID from, BlockID to)
11491 {
11492 auto &child = get<SPIRBlock>(to);
11493 if (child.ignore_phi_from_block == from)
11494 return;
11495
11496 unordered_set<uint32_t> temporary_phi_variables;
11497
11498 for (auto itr = begin(child.phi_variables); itr != end(child.phi_variables); ++itr)
11499 {
11500 auto &phi = *itr;
11501
11502 if (phi.parent == from)
11503 {
11504 auto &var = get<SPIRVariable>(phi.function_variable);
11505
11506 // A Phi variable might be a loop variable, so flush to static expression.
11507 if (var.loop_variable && !var.loop_variable_enable)
11508 var.static_expression = phi.local_variable;
11509 else
11510 {
11511 flush_variable_declaration(phi.function_variable);
11512
11513 // Check if we are going to write to a Phi variable that another statement will read from
11514 // as part of another Phi node in our target block.
11515 // For this case, we will need to copy phi.function_variable to a temporary, and use that for future reads.
11516 // This is judged to be extremely rare, so deal with it here using a simple, but suboptimal algorithm.
11517 bool need_saved_temporary =
11518 find_if(itr + 1, end(child.phi_variables), [&](const SPIRBlock::Phi &future_phi) -> bool {
11519 return future_phi.local_variable == ID(phi.function_variable) && future_phi.parent == from;
11520 }) != end(child.phi_variables);
11521
11522 if (need_saved_temporary)
11523 {
11524 // Need to make sure we declare the phi variable with a copy at the right scope.
11525 // We cannot safely declare a temporary here since we might be inside a continue block.
11526 if (!var.allocate_temporary_copy)
11527 {
11528 var.allocate_temporary_copy = true;
11529 force_recompile();
11530 }
11531 statement("_", phi.function_variable, "_copy", " = ", to_name(phi.function_variable), ";");
11532 temporary_phi_variables.insert(phi.function_variable);
11533 }
11534
11535 // This might be called in continue block, so make sure we
11536 // use this to emit ESSL 1.0 compliant increments/decrements.
11537 auto lhs = to_expression(phi.function_variable);
11538
11539 string rhs;
11540 if (temporary_phi_variables.count(phi.local_variable))
11541 rhs = join("_", phi.local_variable, "_copy");
11542 else
11543 rhs = to_pointer_expression(phi.local_variable);
11544
11545 if (!optimize_read_modify_write(get<SPIRType>(var.basetype), lhs, rhs))
11546 statement(lhs, " = ", rhs, ";");
11547 }
11548
11549 register_write(phi.function_variable);
11550 }
11551 }
11552 }
11553
branch_to_continue(BlockID from,BlockID to)11554 void CompilerGLSL::branch_to_continue(BlockID from, BlockID to)
11555 {
11556 auto &to_block = get<SPIRBlock>(to);
11557 if (from == to)
11558 return;
11559
11560 assert(is_continue(to));
11561 if (to_block.complex_continue)
11562 {
11563 // Just emit the whole block chain as is.
11564 auto usage_counts = expression_usage_counts;
11565
11566 emit_block_chain(to_block);
11567
11568 // Expression usage counts are moot after returning from the continue block.
11569 expression_usage_counts = usage_counts;
11570 }
11571 else
11572 {
11573 auto &from_block = get<SPIRBlock>(from);
11574 bool outside_control_flow = false;
11575 uint32_t loop_dominator = 0;
11576
11577 // FIXME: Refactor this to not use the old loop_dominator tracking.
11578 if (from_block.merge_block)
11579 {
11580 // If we are a loop header, we don't set the loop dominator,
11581 // so just use "self" here.
11582 loop_dominator = from;
11583 }
11584 else if (from_block.loop_dominator != BlockID(SPIRBlock::NoDominator))
11585 {
11586 loop_dominator = from_block.loop_dominator;
11587 }
11588
11589 if (loop_dominator != 0)
11590 {
11591 auto &cfg = get_cfg_for_current_function();
11592
11593 // For non-complex continue blocks, we implicitly branch to the continue block
11594 // by having the continue block be part of the loop header in for (; ; continue-block).
11595 outside_control_flow = cfg.node_terminates_control_flow_in_sub_graph(loop_dominator, from);
11596 }
11597
11598 // Some simplification for for-loops. We always end up with a useless continue;
11599 // statement since we branch to a loop block.
11600 // Walk the CFG, if we unconditionally execute the block calling continue assuming we're in the loop block,
11601 // we can avoid writing out an explicit continue statement.
11602 // Similar optimization to return statements if we know we're outside flow control.
11603 if (!outside_control_flow)
11604 statement("continue;");
11605 }
11606 }
11607
branch(BlockID from,BlockID to)11608 void CompilerGLSL::branch(BlockID from, BlockID to)
11609 {
11610 flush_phi(from, to);
11611 flush_control_dependent_expressions(from);
11612
11613 bool to_is_continue = is_continue(to);
11614
11615 // This is only a continue if we branch to our loop dominator.
11616 if ((ir.block_meta[to] & ParsedIR::BLOCK_META_LOOP_HEADER_BIT) != 0 && get<SPIRBlock>(from).loop_dominator == to)
11617 {
11618 // This can happen if we had a complex continue block which was emitted.
11619 // Once the continue block tries to branch to the loop header, just emit continue;
11620 // and end the chain here.
11621 statement("continue;");
11622 }
11623 else if (is_break(to))
11624 {
11625 // Very dirty workaround.
11626 // Switch constructs are able to break, but they cannot break out of a loop at the same time.
11627 // Only sensible solution is to make a ladder variable, which we declare at the top of the switch block,
11628 // write to the ladder here, and defer the break.
11629 // The loop we're breaking out of must dominate the switch block, or there is no ladder breaking case.
11630 if (current_emitting_switch && is_loop_break(to) &&
11631 current_emitting_switch->loop_dominator != BlockID(SPIRBlock::NoDominator) &&
11632 get<SPIRBlock>(current_emitting_switch->loop_dominator).merge_block == to)
11633 {
11634 if (!current_emitting_switch->need_ladder_break)
11635 {
11636 force_recompile();
11637 current_emitting_switch->need_ladder_break = true;
11638 }
11639
11640 statement("_", current_emitting_switch->self, "_ladder_break = true;");
11641 }
11642 statement("break;");
11643 }
11644 else if (to_is_continue || from == to)
11645 {
11646 // For from == to case can happen for a do-while loop which branches into itself.
11647 // We don't mark these cases as continue blocks, but the only possible way to branch into
11648 // ourselves is through means of continue blocks.
11649
11650 // If we are merging to a continue block, there is no need to emit the block chain for continue here.
11651 // We can branch to the continue block after we merge execution.
11652
11653 // Here we make use of structured control flow rules from spec:
11654 // 2.11: - the merge block declared by a header block cannot be a merge block declared by any other header block
11655 // - each header block must strictly dominate its merge block, unless the merge block is unreachable in the CFG
11656 // If we are branching to a merge block, we must be inside a construct which dominates the merge block.
11657 auto &block_meta = ir.block_meta[to];
11658 bool branching_to_merge =
11659 (block_meta & (ParsedIR::BLOCK_META_SELECTION_MERGE_BIT | ParsedIR::BLOCK_META_MULTISELECT_MERGE_BIT |
11660 ParsedIR::BLOCK_META_LOOP_MERGE_BIT)) != 0;
11661 if (!to_is_continue || !branching_to_merge)
11662 branch_to_continue(from, to);
11663 }
11664 else if (!is_conditional(to))
11665 emit_block_chain(get<SPIRBlock>(to));
11666
11667 // It is important that we check for break before continue.
11668 // A block might serve two purposes, a break block for the inner scope, and
11669 // a continue block in the outer scope.
11670 // Inner scope always takes precedence.
11671 }
11672
branch(BlockID from,uint32_t cond,BlockID true_block,BlockID false_block)11673 void CompilerGLSL::branch(BlockID from, uint32_t cond, BlockID true_block, BlockID false_block)
11674 {
11675 auto &from_block = get<SPIRBlock>(from);
11676 BlockID merge_block = from_block.merge == SPIRBlock::MergeSelection ? from_block.next_block : BlockID(0);
11677
11678 // If we branch directly to a selection merge target, we don't need a code path.
11679 // This covers both merge out of if () / else () as well as a break for switch blocks.
11680 bool true_sub = !is_conditional(true_block);
11681 bool false_sub = !is_conditional(false_block);
11682
11683 bool true_block_is_selection_merge = true_block == merge_block;
11684 bool false_block_is_selection_merge = false_block == merge_block;
11685
11686 if (true_sub)
11687 {
11688 emit_block_hints(get<SPIRBlock>(from));
11689 statement("if (", to_expression(cond), ")");
11690 begin_scope();
11691 branch(from, true_block);
11692 end_scope();
11693
11694 // If we merge to continue, we handle that explicitly in emit_block_chain(),
11695 // so there is no need to branch to it directly here.
11696 // break; is required to handle ladder fallthrough cases, so keep that in for now, even
11697 // if we could potentially handle it in emit_block_chain().
11698 if (false_sub || (!false_block_is_selection_merge && is_continue(false_block)) || is_break(false_block))
11699 {
11700 statement("else");
11701 begin_scope();
11702 branch(from, false_block);
11703 end_scope();
11704 }
11705 else if (flush_phi_required(from, false_block))
11706 {
11707 statement("else");
11708 begin_scope();
11709 flush_phi(from, false_block);
11710 end_scope();
11711 }
11712 }
11713 else if (false_sub)
11714 {
11715 // Only need false path, use negative conditional.
11716 emit_block_hints(get<SPIRBlock>(from));
11717 statement("if (!", to_enclosed_expression(cond), ")");
11718 begin_scope();
11719 branch(from, false_block);
11720 end_scope();
11721
11722 if ((!true_block_is_selection_merge && is_continue(true_block)) || is_break(true_block))
11723 {
11724 statement("else");
11725 begin_scope();
11726 branch(from, true_block);
11727 end_scope();
11728 }
11729 else if (flush_phi_required(from, true_block))
11730 {
11731 statement("else");
11732 begin_scope();
11733 flush_phi(from, true_block);
11734 end_scope();
11735 }
11736 }
11737 }
11738
11739 // FIXME: This currently cannot handle complex continue blocks
11740 // as in do-while.
11741 // This should be seen as a "trivial" continue block.
emit_continue_block(uint32_t continue_block,bool follow_true_block,bool follow_false_block)11742 string CompilerGLSL::emit_continue_block(uint32_t continue_block, bool follow_true_block, bool follow_false_block)
11743 {
11744 auto *block = &get<SPIRBlock>(continue_block);
11745
11746 // While emitting the continue block, declare_temporary will check this
11747 // if we have to emit temporaries.
11748 current_continue_block = block;
11749
11750 SmallVector<string> statements;
11751
11752 // Capture all statements into our list.
11753 auto *old = redirect_statement;
11754 redirect_statement = &statements;
11755
11756 // Stamp out all blocks one after each other.
11757 while ((ir.block_meta[block->self] & ParsedIR::BLOCK_META_LOOP_HEADER_BIT) == 0)
11758 {
11759 // Write out all instructions we have in this block.
11760 emit_block_instructions(*block);
11761
11762 // For plain branchless for/while continue blocks.
11763 if (block->next_block)
11764 {
11765 flush_phi(continue_block, block->next_block);
11766 block = &get<SPIRBlock>(block->next_block);
11767 }
11768 // For do while blocks. The last block will be a select block.
11769 else if (block->true_block && follow_true_block)
11770 {
11771 flush_phi(continue_block, block->true_block);
11772 block = &get<SPIRBlock>(block->true_block);
11773 }
11774 else if (block->false_block && follow_false_block)
11775 {
11776 flush_phi(continue_block, block->false_block);
11777 block = &get<SPIRBlock>(block->false_block);
11778 }
11779 else
11780 {
11781 SPIRV_CROSS_THROW("Invalid continue block detected!");
11782 }
11783 }
11784
11785 // Restore old pointer.
11786 redirect_statement = old;
11787
11788 // Somewhat ugly, strip off the last ';' since we use ',' instead.
11789 // Ideally, we should select this behavior in statement().
11790 for (auto &s : statements)
11791 {
11792 if (!s.empty() && s.back() == ';')
11793 s.erase(s.size() - 1, 1);
11794 }
11795
11796 current_continue_block = nullptr;
11797 return merge(statements);
11798 }
11799
emit_while_loop_initializers(const SPIRBlock & block)11800 void CompilerGLSL::emit_while_loop_initializers(const SPIRBlock &block)
11801 {
11802 // While loops do not take initializers, so declare all of them outside.
11803 for (auto &loop_var : block.loop_variables)
11804 {
11805 auto &var = get<SPIRVariable>(loop_var);
11806 statement(variable_decl(var), ";");
11807 }
11808 }
11809
emit_for_loop_initializers(const SPIRBlock & block)11810 string CompilerGLSL::emit_for_loop_initializers(const SPIRBlock &block)
11811 {
11812 if (block.loop_variables.empty())
11813 return "";
11814
11815 bool same_types = for_loop_initializers_are_same_type(block);
11816 // We can only declare for loop initializers if all variables are of same type.
11817 // If we cannot do this, declare individual variables before the loop header.
11818
11819 // We might have a loop variable candidate which was not assigned to for some reason.
11820 uint32_t missing_initializers = 0;
11821 for (auto &variable : block.loop_variables)
11822 {
11823 uint32_t expr = get<SPIRVariable>(variable).static_expression;
11824
11825 // Sometimes loop variables are initialized with OpUndef, but we can just declare
11826 // a plain variable without initializer in this case.
11827 if (expr == 0 || ir.ids[expr].get_type() == TypeUndef)
11828 missing_initializers++;
11829 }
11830
11831 if (block.loop_variables.size() == 1 && missing_initializers == 0)
11832 {
11833 return variable_decl(get<SPIRVariable>(block.loop_variables.front()));
11834 }
11835 else if (!same_types || missing_initializers == uint32_t(block.loop_variables.size()))
11836 {
11837 for (auto &loop_var : block.loop_variables)
11838 statement(variable_decl(get<SPIRVariable>(loop_var)), ";");
11839 return "";
11840 }
11841 else
11842 {
11843 // We have a mix of loop variables, either ones with a clear initializer, or ones without.
11844 // Separate the two streams.
11845 string expr;
11846
11847 for (auto &loop_var : block.loop_variables)
11848 {
11849 uint32_t static_expr = get<SPIRVariable>(loop_var).static_expression;
11850 if (static_expr == 0 || ir.ids[static_expr].get_type() == TypeUndef)
11851 {
11852 statement(variable_decl(get<SPIRVariable>(loop_var)), ";");
11853 }
11854 else
11855 {
11856 auto &var = get<SPIRVariable>(loop_var);
11857 auto &type = get_variable_data_type(var);
11858 if (expr.empty())
11859 {
11860 // For loop initializers are of the form <type id = value, id = value, id = value, etc ...
11861 expr = join(to_qualifiers_glsl(var.self), type_to_glsl(type), " ");
11862 }
11863 else
11864 {
11865 expr += ", ";
11866 // In MSL, being based on C++, the asterisk marking a pointer
11867 // binds to the identifier, not the type.
11868 if (type.pointer)
11869 expr += "* ";
11870 }
11871
11872 expr += join(to_name(loop_var), " = ", to_pointer_expression(var.static_expression));
11873 }
11874 }
11875 return expr;
11876 }
11877 }
11878
for_loop_initializers_are_same_type(const SPIRBlock & block)11879 bool CompilerGLSL::for_loop_initializers_are_same_type(const SPIRBlock &block)
11880 {
11881 if (block.loop_variables.size() <= 1)
11882 return true;
11883
11884 uint32_t expected = 0;
11885 Bitset expected_flags;
11886 for (auto &var : block.loop_variables)
11887 {
11888 // Don't care about uninitialized variables as they will not be part of the initializers.
11889 uint32_t expr = get<SPIRVariable>(var).static_expression;
11890 if (expr == 0 || ir.ids[expr].get_type() == TypeUndef)
11891 continue;
11892
11893 if (expected == 0)
11894 {
11895 expected = get<SPIRVariable>(var).basetype;
11896 expected_flags = get_decoration_bitset(var);
11897 }
11898 else if (expected != get<SPIRVariable>(var).basetype)
11899 return false;
11900
11901 // Precision flags and things like that must also match.
11902 if (expected_flags != get_decoration_bitset(var))
11903 return false;
11904 }
11905
11906 return true;
11907 }
11908
attempt_emit_loop_header(SPIRBlock & block,SPIRBlock::Method method)11909 bool CompilerGLSL::attempt_emit_loop_header(SPIRBlock &block, SPIRBlock::Method method)
11910 {
11911 SPIRBlock::ContinueBlockType continue_type = continue_block_type(get<SPIRBlock>(block.continue_block));
11912
11913 if (method == SPIRBlock::MergeToSelectForLoop || method == SPIRBlock::MergeToSelectContinueForLoop)
11914 {
11915 uint32_t current_count = statement_count;
11916 // If we're trying to create a true for loop,
11917 // we need to make sure that all opcodes before branch statement do not actually emit any code.
11918 // We can then take the condition expression and create a for (; cond ; ) { body; } structure instead.
11919 emit_block_instructions(block);
11920
11921 bool condition_is_temporary = forced_temporaries.find(block.condition) == end(forced_temporaries);
11922
11923 // This can work! We only did trivial things which could be forwarded in block body!
11924 if (current_count == statement_count && condition_is_temporary)
11925 {
11926 switch (continue_type)
11927 {
11928 case SPIRBlock::ForLoop:
11929 {
11930 // This block may be a dominating block, so make sure we flush undeclared variables before building the for loop header.
11931 flush_undeclared_variables(block);
11932
11933 // Important that we do this in this order because
11934 // emitting the continue block can invalidate the condition expression.
11935 auto initializer = emit_for_loop_initializers(block);
11936 auto condition = to_expression(block.condition);
11937
11938 // Condition might have to be inverted.
11939 if (execution_is_noop(get<SPIRBlock>(block.true_block), get<SPIRBlock>(block.merge_block)))
11940 condition = join("!", enclose_expression(condition));
11941
11942 emit_block_hints(block);
11943 if (method != SPIRBlock::MergeToSelectContinueForLoop)
11944 {
11945 auto continue_block = emit_continue_block(block.continue_block, false, false);
11946 statement("for (", initializer, "; ", condition, "; ", continue_block, ")");
11947 }
11948 else
11949 statement("for (", initializer, "; ", condition, "; )");
11950 break;
11951 }
11952
11953 case SPIRBlock::WhileLoop:
11954 {
11955 // This block may be a dominating block, so make sure we flush undeclared variables before building the while loop header.
11956 flush_undeclared_variables(block);
11957 emit_while_loop_initializers(block);
11958 emit_block_hints(block);
11959
11960 auto condition = to_expression(block.condition);
11961 // Condition might have to be inverted.
11962 if (execution_is_noop(get<SPIRBlock>(block.true_block), get<SPIRBlock>(block.merge_block)))
11963 condition = join("!", enclose_expression(condition));
11964
11965 statement("while (", condition, ")");
11966 break;
11967 }
11968
11969 default:
11970 block.disable_block_optimization = true;
11971 force_recompile();
11972 begin_scope(); // We'll see an end_scope() later.
11973 return false;
11974 }
11975
11976 begin_scope();
11977 return true;
11978 }
11979 else
11980 {
11981 block.disable_block_optimization = true;
11982 force_recompile();
11983 begin_scope(); // We'll see an end_scope() later.
11984 return false;
11985 }
11986 }
11987 else if (method == SPIRBlock::MergeToDirectForLoop)
11988 {
11989 auto &child = get<SPIRBlock>(block.next_block);
11990
11991 // This block may be a dominating block, so make sure we flush undeclared variables before building the for loop header.
11992 flush_undeclared_variables(child);
11993
11994 uint32_t current_count = statement_count;
11995
11996 // If we're trying to create a true for loop,
11997 // we need to make sure that all opcodes before branch statement do not actually emit any code.
11998 // We can then take the condition expression and create a for (; cond ; ) { body; } structure instead.
11999 emit_block_instructions(child);
12000
12001 bool condition_is_temporary = forced_temporaries.find(child.condition) == end(forced_temporaries);
12002
12003 if (current_count == statement_count && condition_is_temporary)
12004 {
12005 uint32_t target_block = child.true_block;
12006
12007 switch (continue_type)
12008 {
12009 case SPIRBlock::ForLoop:
12010 {
12011 // Important that we do this in this order because
12012 // emitting the continue block can invalidate the condition expression.
12013 auto initializer = emit_for_loop_initializers(block);
12014 auto condition = to_expression(child.condition);
12015
12016 // Condition might have to be inverted.
12017 if (execution_is_noop(get<SPIRBlock>(child.true_block), get<SPIRBlock>(block.merge_block)))
12018 {
12019 condition = join("!", enclose_expression(condition));
12020 target_block = child.false_block;
12021 }
12022
12023 auto continue_block = emit_continue_block(block.continue_block, false, false);
12024 emit_block_hints(block);
12025 statement("for (", initializer, "; ", condition, "; ", continue_block, ")");
12026 break;
12027 }
12028
12029 case SPIRBlock::WhileLoop:
12030 {
12031 emit_while_loop_initializers(block);
12032 emit_block_hints(block);
12033
12034 auto condition = to_expression(child.condition);
12035 // Condition might have to be inverted.
12036 if (execution_is_noop(get<SPIRBlock>(child.true_block), get<SPIRBlock>(block.merge_block)))
12037 {
12038 condition = join("!", enclose_expression(condition));
12039 target_block = child.false_block;
12040 }
12041
12042 statement("while (", condition, ")");
12043 break;
12044 }
12045
12046 default:
12047 block.disable_block_optimization = true;
12048 force_recompile();
12049 begin_scope(); // We'll see an end_scope() later.
12050 return false;
12051 }
12052
12053 begin_scope();
12054 branch(child.self, target_block);
12055 return true;
12056 }
12057 else
12058 {
12059 block.disable_block_optimization = true;
12060 force_recompile();
12061 begin_scope(); // We'll see an end_scope() later.
12062 return false;
12063 }
12064 }
12065 else
12066 return false;
12067 }
12068
flush_undeclared_variables(SPIRBlock & block)12069 void CompilerGLSL::flush_undeclared_variables(SPIRBlock &block)
12070 {
12071 for (auto &v : block.dominated_variables)
12072 flush_variable_declaration(v);
12073 }
12074
emit_hoisted_temporaries(SmallVector<pair<TypeID,ID>> & temporaries)12075 void CompilerGLSL::emit_hoisted_temporaries(SmallVector<pair<TypeID, ID>> &temporaries)
12076 {
12077 // If we need to force temporaries for certain IDs due to continue blocks, do it before starting loop header.
12078 // Need to sort these to ensure that reference output is stable.
12079 sort(begin(temporaries), end(temporaries),
12080 [](const pair<TypeID, ID> &a, const pair<TypeID, ID> &b) { return a.second < b.second; });
12081
12082 for (auto &tmp : temporaries)
12083 {
12084 add_local_variable_name(tmp.second);
12085 auto &flags = ir.meta[tmp.second].decoration.decoration_flags;
12086 auto &type = get<SPIRType>(tmp.first);
12087 statement(flags_to_qualifiers_glsl(type, flags), variable_decl(type, to_name(tmp.second)), ";");
12088
12089 hoisted_temporaries.insert(tmp.second);
12090 forced_temporaries.insert(tmp.second);
12091
12092 // The temporary might be read from before it's assigned, set up the expression now.
12093 set<SPIRExpression>(tmp.second, to_name(tmp.second), tmp.first, true);
12094 }
12095 }
12096
emit_block_chain(SPIRBlock & block)12097 void CompilerGLSL::emit_block_chain(SPIRBlock &block)
12098 {
12099 bool select_branch_to_true_block = false;
12100 bool select_branch_to_false_block = false;
12101 bool skip_direct_branch = false;
12102 bool emitted_loop_header_variables = false;
12103 bool force_complex_continue_block = false;
12104
12105 emit_hoisted_temporaries(block.declare_temporary);
12106
12107 SPIRBlock::ContinueBlockType continue_type = SPIRBlock::ContinueNone;
12108 if (block.continue_block)
12109 continue_type = continue_block_type(get<SPIRBlock>(block.continue_block));
12110
12111 // If we have loop variables, stop masking out access to the variable now.
12112 for (auto var_id : block.loop_variables)
12113 {
12114 auto &var = get<SPIRVariable>(var_id);
12115 var.loop_variable_enable = true;
12116 // We're not going to declare the variable directly, so emit a copy here.
12117 emit_variable_temporary_copies(var);
12118 }
12119
12120 // Remember deferred declaration state. We will restore it before returning.
12121 SmallVector<bool, 64> rearm_dominated_variables(block.dominated_variables.size());
12122 for (size_t i = 0; i < block.dominated_variables.size(); i++)
12123 {
12124 uint32_t var_id = block.dominated_variables[i];
12125 auto &var = get<SPIRVariable>(var_id);
12126 rearm_dominated_variables[i] = var.deferred_declaration;
12127 }
12128
12129 // This is the method often used by spirv-opt to implement loops.
12130 // The loop header goes straight into the continue block.
12131 // However, don't attempt this on ESSL 1.0, because if a loop variable is used in a continue block,
12132 // it *MUST* be used in the continue block. This loop method will not work.
12133 if (!is_legacy_es() && block_is_loop_candidate(block, SPIRBlock::MergeToSelectContinueForLoop))
12134 {
12135 flush_undeclared_variables(block);
12136 if (attempt_emit_loop_header(block, SPIRBlock::MergeToSelectContinueForLoop))
12137 {
12138 if (execution_is_noop(get<SPIRBlock>(block.true_block), get<SPIRBlock>(block.merge_block)))
12139 select_branch_to_false_block = true;
12140 else
12141 select_branch_to_true_block = true;
12142
12143 emitted_loop_header_variables = true;
12144 force_complex_continue_block = true;
12145 }
12146 }
12147 // This is the older loop behavior in glslang which branches to loop body directly from the loop header.
12148 else if (block_is_loop_candidate(block, SPIRBlock::MergeToSelectForLoop))
12149 {
12150 flush_undeclared_variables(block);
12151 if (attempt_emit_loop_header(block, SPIRBlock::MergeToSelectForLoop))
12152 {
12153 // The body of while, is actually just the true (or false) block, so always branch there unconditionally.
12154 if (execution_is_noop(get<SPIRBlock>(block.true_block), get<SPIRBlock>(block.merge_block)))
12155 select_branch_to_false_block = true;
12156 else
12157 select_branch_to_true_block = true;
12158
12159 emitted_loop_header_variables = true;
12160 }
12161 }
12162 // This is the newer loop behavior in glslang which branches from Loop header directly to
12163 // a new block, which in turn has a OpBranchSelection without a selection merge.
12164 else if (block_is_loop_candidate(block, SPIRBlock::MergeToDirectForLoop))
12165 {
12166 flush_undeclared_variables(block);
12167 if (attempt_emit_loop_header(block, SPIRBlock::MergeToDirectForLoop))
12168 {
12169 skip_direct_branch = true;
12170 emitted_loop_header_variables = true;
12171 }
12172 }
12173 else if (continue_type == SPIRBlock::DoWhileLoop)
12174 {
12175 flush_undeclared_variables(block);
12176 emit_while_loop_initializers(block);
12177 emitted_loop_header_variables = true;
12178 // We have some temporaries where the loop header is the dominator.
12179 // We risk a case where we have code like:
12180 // for (;;) { create-temporary; break; } consume-temporary;
12181 // so force-declare temporaries here.
12182 emit_hoisted_temporaries(block.potential_declare_temporary);
12183 statement("do");
12184 begin_scope();
12185
12186 emit_block_instructions(block);
12187 }
12188 else if (block.merge == SPIRBlock::MergeLoop)
12189 {
12190 flush_undeclared_variables(block);
12191 emit_while_loop_initializers(block);
12192 emitted_loop_header_variables = true;
12193
12194 // We have a generic loop without any distinguishable pattern like for, while or do while.
12195 get<SPIRBlock>(block.continue_block).complex_continue = true;
12196 continue_type = SPIRBlock::ComplexLoop;
12197
12198 // We have some temporaries where the loop header is the dominator.
12199 // We risk a case where we have code like:
12200 // for (;;) { create-temporary; break; } consume-temporary;
12201 // so force-declare temporaries here.
12202 emit_hoisted_temporaries(block.potential_declare_temporary);
12203 statement("for (;;)");
12204 begin_scope();
12205
12206 emit_block_instructions(block);
12207 }
12208 else
12209 {
12210 emit_block_instructions(block);
12211 }
12212
12213 // If we didn't successfully emit a loop header and we had loop variable candidates, we have a problem
12214 // as writes to said loop variables might have been masked out, we need a recompile.
12215 if (!emitted_loop_header_variables && !block.loop_variables.empty())
12216 {
12217 force_recompile();
12218 for (auto var : block.loop_variables)
12219 get<SPIRVariable>(var).loop_variable = false;
12220 block.loop_variables.clear();
12221 }
12222
12223 flush_undeclared_variables(block);
12224 bool emit_next_block = true;
12225
12226 // Handle end of block.
12227 switch (block.terminator)
12228 {
12229 case SPIRBlock::Direct:
12230 // True when emitting complex continue block.
12231 if (block.loop_dominator == block.next_block)
12232 {
12233 branch(block.self, block.next_block);
12234 emit_next_block = false;
12235 }
12236 // True if MergeToDirectForLoop succeeded.
12237 else if (skip_direct_branch)
12238 emit_next_block = false;
12239 else if (is_continue(block.next_block) || is_break(block.next_block) || is_conditional(block.next_block))
12240 {
12241 branch(block.self, block.next_block);
12242 emit_next_block = false;
12243 }
12244 break;
12245
12246 case SPIRBlock::Select:
12247 // True if MergeToSelectForLoop or MergeToSelectContinueForLoop succeeded.
12248 if (select_branch_to_true_block)
12249 {
12250 if (force_complex_continue_block)
12251 {
12252 assert(block.true_block == block.continue_block);
12253
12254 // We're going to emit a continue block directly here, so make sure it's marked as complex.
12255 auto &complex_continue = get<SPIRBlock>(block.continue_block).complex_continue;
12256 bool old_complex = complex_continue;
12257 complex_continue = true;
12258 branch(block.self, block.true_block);
12259 complex_continue = old_complex;
12260 }
12261 else
12262 branch(block.self, block.true_block);
12263 }
12264 else if (select_branch_to_false_block)
12265 {
12266 if (force_complex_continue_block)
12267 {
12268 assert(block.false_block == block.continue_block);
12269
12270 // We're going to emit a continue block directly here, so make sure it's marked as complex.
12271 auto &complex_continue = get<SPIRBlock>(block.continue_block).complex_continue;
12272 bool old_complex = complex_continue;
12273 complex_continue = true;
12274 branch(block.self, block.false_block);
12275 complex_continue = old_complex;
12276 }
12277 else
12278 branch(block.self, block.false_block);
12279 }
12280 else
12281 branch(block.self, block.condition, block.true_block, block.false_block);
12282 break;
12283
12284 case SPIRBlock::MultiSelect:
12285 {
12286 auto &type = expression_type(block.condition);
12287 bool unsigned_case =
12288 type.basetype == SPIRType::UInt || type.basetype == SPIRType::UShort || type.basetype == SPIRType::UByte;
12289
12290 if (block.merge == SPIRBlock::MergeNone)
12291 SPIRV_CROSS_THROW("Switch statement is not structured");
12292
12293 if (type.basetype == SPIRType::UInt64 || type.basetype == SPIRType::Int64)
12294 {
12295 // SPIR-V spec suggests this is allowed, but we cannot support it in higher level languages.
12296 SPIRV_CROSS_THROW("Cannot use 64-bit switch selectors.");
12297 }
12298
12299 const char *label_suffix = "";
12300 if (type.basetype == SPIRType::UInt && backend.uint32_t_literal_suffix)
12301 label_suffix = "u";
12302 else if (type.basetype == SPIRType::UShort)
12303 label_suffix = backend.uint16_t_literal_suffix;
12304 else if (type.basetype == SPIRType::Short)
12305 label_suffix = backend.int16_t_literal_suffix;
12306
12307 SPIRBlock *old_emitting_switch = current_emitting_switch;
12308 current_emitting_switch = █
12309
12310 if (block.need_ladder_break)
12311 statement("bool _", block.self, "_ladder_break = false;");
12312
12313 // Find all unique case constructs.
12314 unordered_map<uint32_t, SmallVector<uint32_t>> case_constructs;
12315 SmallVector<uint32_t> block_declaration_order;
12316 SmallVector<uint32_t> literals_to_merge;
12317
12318 // If a switch case branches to the default block for some reason, we can just remove that literal from consideration
12319 // and let the default: block handle it.
12320 // 2.11 in SPIR-V spec states that for fall-through cases, there is a very strict declaration order which we can take advantage of here.
12321 // We only need to consider possible fallthrough if order[i] branches to order[i + 1].
12322 for (auto &c : block.cases)
12323 {
12324 if (c.block != block.next_block && c.block != block.default_block)
12325 {
12326 if (!case_constructs.count(c.block))
12327 block_declaration_order.push_back(c.block);
12328 case_constructs[c.block].push_back(c.value);
12329 }
12330 else if (c.block == block.next_block && block.default_block != block.next_block)
12331 {
12332 // We might have to flush phi inside specific case labels.
12333 // If we can piggyback on default:, do so instead.
12334 literals_to_merge.push_back(c.value);
12335 }
12336 }
12337
12338 // Empty literal array -> default.
12339 if (block.default_block != block.next_block)
12340 {
12341 auto &default_block = get<SPIRBlock>(block.default_block);
12342
12343 // We need to slide in the default block somewhere in this chain
12344 // if there are fall-through scenarios since the default is declared separately in OpSwitch.
12345 // Only consider trivial fall-through cases here.
12346 size_t num_blocks = block_declaration_order.size();
12347 bool injected_block = false;
12348
12349 for (size_t i = 0; i < num_blocks; i++)
12350 {
12351 auto &case_block = get<SPIRBlock>(block_declaration_order[i]);
12352 if (execution_is_direct_branch(case_block, default_block))
12353 {
12354 // Fallthrough to default block, we must inject the default block here.
12355 block_declaration_order.insert(begin(block_declaration_order) + i + 1, block.default_block);
12356 injected_block = true;
12357 break;
12358 }
12359 else if (execution_is_direct_branch(default_block, case_block))
12360 {
12361 // Default case is falling through to another case label, we must inject the default block here.
12362 block_declaration_order.insert(begin(block_declaration_order) + i, block.default_block);
12363 injected_block = true;
12364 break;
12365 }
12366 }
12367
12368 // Order does not matter.
12369 if (!injected_block)
12370 block_declaration_order.push_back(block.default_block);
12371
12372 case_constructs[block.default_block] = {};
12373 }
12374
12375 size_t num_blocks = block_declaration_order.size();
12376
12377 const auto to_case_label = [](uint32_t literal, bool is_unsigned_case) -> string {
12378 return is_unsigned_case ? convert_to_string(literal) : convert_to_string(int32_t(literal));
12379 };
12380
12381 // We need to deal with a complex scenario for OpPhi. If we have case-fallthrough and Phi in the picture,
12382 // we need to flush phi nodes outside the switch block in a branch,
12383 // and skip any Phi handling inside the case label to make fall-through work as expected.
12384 // This kind of code-gen is super awkward and it's a last resort. Normally we would want to handle this
12385 // inside the case label if at all possible.
12386 for (size_t i = 1; i < num_blocks; i++)
12387 {
12388 if (flush_phi_required(block.self, block_declaration_order[i]) &&
12389 flush_phi_required(block_declaration_order[i - 1], block_declaration_order[i]))
12390 {
12391 uint32_t target_block = block_declaration_order[i];
12392
12393 // Make sure we flush Phi, it might have been marked to be ignored earlier.
12394 get<SPIRBlock>(target_block).ignore_phi_from_block = 0;
12395
12396 auto &literals = case_constructs[target_block];
12397
12398 if (literals.empty())
12399 {
12400 // Oh boy, gotta make a complete negative test instead! o.o
12401 // Find all possible literals that would *not* make us enter the default block.
12402 // If none of those literals match, we flush Phi ...
12403 SmallVector<string> conditions;
12404 for (size_t j = 0; j < num_blocks; j++)
12405 {
12406 auto &negative_literals = case_constructs[block_declaration_order[j]];
12407 for (auto &case_label : negative_literals)
12408 conditions.push_back(join(to_enclosed_expression(block.condition),
12409 " != ", to_case_label(case_label, unsigned_case)));
12410 }
12411
12412 statement("if (", merge(conditions, " && "), ")");
12413 begin_scope();
12414 flush_phi(block.self, target_block);
12415 end_scope();
12416 }
12417 else
12418 {
12419 SmallVector<string> conditions;
12420 conditions.reserve(literals.size());
12421 for (auto &case_label : literals)
12422 conditions.push_back(join(to_enclosed_expression(block.condition),
12423 " == ", to_case_label(case_label, unsigned_case)));
12424 statement("if (", merge(conditions, " || "), ")");
12425 begin_scope();
12426 flush_phi(block.self, target_block);
12427 end_scope();
12428 }
12429
12430 // Mark the block so that we don't flush Phi from header to case label.
12431 get<SPIRBlock>(target_block).ignore_phi_from_block = block.self;
12432 }
12433 }
12434
12435 emit_block_hints(block);
12436 statement("switch (", to_expression(block.condition), ")");
12437 begin_scope();
12438
12439 for (size_t i = 0; i < num_blocks; i++)
12440 {
12441 uint32_t target_block = block_declaration_order[i];
12442 auto &literals = case_constructs[target_block];
12443
12444 if (literals.empty())
12445 {
12446 // Default case.
12447 statement("default:");
12448 }
12449 else
12450 {
12451 for (auto &case_literal : literals)
12452 {
12453 // The case label value must be sign-extended properly in SPIR-V, so we can assume 32-bit values here.
12454 statement("case ", to_case_label(case_literal, unsigned_case), label_suffix, ":");
12455 }
12456 }
12457
12458 auto &case_block = get<SPIRBlock>(target_block);
12459 if (backend.support_case_fallthrough && i + 1 < num_blocks &&
12460 execution_is_direct_branch(case_block, get<SPIRBlock>(block_declaration_order[i + 1])))
12461 {
12462 // We will fall through here, so just terminate the block chain early.
12463 // We still need to deal with Phi potentially.
12464 // No need for a stack-like thing here since we only do fall-through when there is a
12465 // single trivial branch to fall-through target..
12466 current_emitting_switch_fallthrough = true;
12467 }
12468 else
12469 current_emitting_switch_fallthrough = false;
12470
12471 begin_scope();
12472 branch(block.self, target_block);
12473 end_scope();
12474
12475 current_emitting_switch_fallthrough = false;
12476 }
12477
12478 // Might still have to flush phi variables if we branch from loop header directly to merge target.
12479 if (flush_phi_required(block.self, block.next_block))
12480 {
12481 if (block.default_block == block.next_block || !literals_to_merge.empty())
12482 {
12483 for (auto &case_literal : literals_to_merge)
12484 statement("case ", to_case_label(case_literal, unsigned_case), label_suffix, ":");
12485
12486 if (block.default_block == block.next_block)
12487 statement("default:");
12488
12489 begin_scope();
12490 flush_phi(block.self, block.next_block);
12491 statement("break;");
12492 end_scope();
12493 }
12494 }
12495
12496 end_scope();
12497
12498 if (block.need_ladder_break)
12499 {
12500 statement("if (_", block.self, "_ladder_break)");
12501 begin_scope();
12502 statement("break;");
12503 end_scope();
12504 }
12505
12506 current_emitting_switch = old_emitting_switch;
12507 break;
12508 }
12509
12510 case SPIRBlock::Return:
12511 {
12512 for (auto &line : current_function->fixup_hooks_out)
12513 line();
12514
12515 if (processing_entry_point)
12516 emit_fixup();
12517
12518 auto &cfg = get_cfg_for_current_function();
12519
12520 if (block.return_value)
12521 {
12522 auto &type = expression_type(block.return_value);
12523 if (!type.array.empty() && !backend.can_return_array)
12524 {
12525 // If we cannot return arrays, we will have a special out argument we can write to instead.
12526 // The backend is responsible for setting this up, and redirection the return values as appropriate.
12527 if (ir.ids[block.return_value].get_type() != TypeUndef)
12528 {
12529 emit_array_copy("SPIRV_Cross_return_value", block.return_value, StorageClassFunction,
12530 get_backing_variable_storage(block.return_value));
12531 }
12532
12533 if (!cfg.node_terminates_control_flow_in_sub_graph(current_function->entry_block, block.self) ||
12534 block.loop_dominator != BlockID(SPIRBlock::NoDominator))
12535 {
12536 statement("return;");
12537 }
12538 }
12539 else
12540 {
12541 // OpReturnValue can return Undef, so don't emit anything for this case.
12542 if (ir.ids[block.return_value].get_type() != TypeUndef)
12543 statement("return ", to_expression(block.return_value), ";");
12544 }
12545 }
12546 else if (!cfg.node_terminates_control_flow_in_sub_graph(current_function->entry_block, block.self) ||
12547 block.loop_dominator != BlockID(SPIRBlock::NoDominator))
12548 {
12549 // If this block is the very final block and not called from control flow,
12550 // we do not need an explicit return which looks out of place. Just end the function here.
12551 // In the very weird case of for(;;) { return; } executing return is unconditional,
12552 // but we actually need a return here ...
12553 statement("return;");
12554 }
12555 break;
12556 }
12557
12558 case SPIRBlock::Kill:
12559 statement(backend.discard_literal, ";");
12560 break;
12561
12562 case SPIRBlock::Unreachable:
12563 emit_next_block = false;
12564 break;
12565
12566 default:
12567 SPIRV_CROSS_THROW("Unimplemented block terminator.");
12568 }
12569
12570 if (block.next_block && emit_next_block)
12571 {
12572 // If we hit this case, we're dealing with an unconditional branch, which means we will output
12573 // that block after this. If we had selection merge, we already flushed phi variables.
12574 if (block.merge != SPIRBlock::MergeSelection)
12575 flush_phi(block.self, block.next_block);
12576
12577 // For switch fallthrough cases, we terminate the chain here, but we still need to handle Phi.
12578 if (!current_emitting_switch_fallthrough)
12579 {
12580 // For merge selects we might have ignored the fact that a merge target
12581 // could have been a break; or continue;
12582 // We will need to deal with it here.
12583 if (is_loop_break(block.next_block))
12584 {
12585 // Cannot check for just break, because switch statements will also use break.
12586 assert(block.merge == SPIRBlock::MergeSelection);
12587 statement("break;");
12588 }
12589 else if (is_continue(block.next_block))
12590 {
12591 assert(block.merge == SPIRBlock::MergeSelection);
12592 branch_to_continue(block.self, block.next_block);
12593 }
12594 else if (BlockID(block.self) != block.next_block)
12595 emit_block_chain(get<SPIRBlock>(block.next_block));
12596 }
12597 }
12598
12599 if (block.merge == SPIRBlock::MergeLoop)
12600 {
12601 if (continue_type == SPIRBlock::DoWhileLoop)
12602 {
12603 // Make sure that we run the continue block to get the expressions set, but this
12604 // should become an empty string.
12605 // We have no fallbacks if we cannot forward everything to temporaries ...
12606 const auto &continue_block = get<SPIRBlock>(block.continue_block);
12607 bool positive_test = execution_is_noop(get<SPIRBlock>(continue_block.true_block),
12608 get<SPIRBlock>(continue_block.loop_dominator));
12609
12610 uint32_t current_count = statement_count;
12611 auto statements = emit_continue_block(block.continue_block, positive_test, !positive_test);
12612 if (statement_count != current_count)
12613 {
12614 // The DoWhile block has side effects, force ComplexLoop pattern next pass.
12615 get<SPIRBlock>(block.continue_block).complex_continue = true;
12616 force_recompile();
12617 }
12618
12619 // Might have to invert the do-while test here.
12620 auto condition = to_expression(continue_block.condition);
12621 if (!positive_test)
12622 condition = join("!", enclose_expression(condition));
12623
12624 end_scope_decl(join("while (", condition, ")"));
12625 }
12626 else
12627 end_scope();
12628
12629 // We cannot break out of two loops at once, so don't check for break; here.
12630 // Using block.self as the "from" block isn't quite right, but it has the same scope
12631 // and dominance structure, so it's fine.
12632 if (is_continue(block.merge_block))
12633 branch_to_continue(block.self, block.merge_block);
12634 else
12635 emit_block_chain(get<SPIRBlock>(block.merge_block));
12636 }
12637
12638 // Forget about control dependent expressions now.
12639 block.invalidate_expressions.clear();
12640
12641 // After we return, we must be out of scope, so if we somehow have to re-emit this function,
12642 // re-declare variables if necessary.
12643 assert(rearm_dominated_variables.size() == block.dominated_variables.size());
12644 for (size_t i = 0; i < block.dominated_variables.size(); i++)
12645 {
12646 uint32_t var = block.dominated_variables[i];
12647 get<SPIRVariable>(var).deferred_declaration = rearm_dominated_variables[i];
12648 }
12649
12650 // Just like for deferred declaration, we need to forget about loop variable enable
12651 // if our block chain is reinstantiated later.
12652 for (auto &var_id : block.loop_variables)
12653 get<SPIRVariable>(var_id).loop_variable_enable = false;
12654 }
12655
begin_scope()12656 void CompilerGLSL::begin_scope()
12657 {
12658 statement("{");
12659 indent++;
12660 }
12661
end_scope()12662 void CompilerGLSL::end_scope()
12663 {
12664 if (!indent)
12665 SPIRV_CROSS_THROW("Popping empty indent stack.");
12666 indent--;
12667 statement("}");
12668 }
12669
end_scope(const string & trailer)12670 void CompilerGLSL::end_scope(const string &trailer)
12671 {
12672 if (!indent)
12673 SPIRV_CROSS_THROW("Popping empty indent stack.");
12674 indent--;
12675 statement("}", trailer);
12676 }
12677
end_scope_decl()12678 void CompilerGLSL::end_scope_decl()
12679 {
12680 if (!indent)
12681 SPIRV_CROSS_THROW("Popping empty indent stack.");
12682 indent--;
12683 statement("};");
12684 }
12685
end_scope_decl(const string & decl)12686 void CompilerGLSL::end_scope_decl(const string &decl)
12687 {
12688 if (!indent)
12689 SPIRV_CROSS_THROW("Popping empty indent stack.");
12690 indent--;
12691 statement("} ", decl, ";");
12692 }
12693
check_function_call_constraints(const uint32_t * args,uint32_t length)12694 void CompilerGLSL::check_function_call_constraints(const uint32_t *args, uint32_t length)
12695 {
12696 // If our variable is remapped, and we rely on type-remapping information as
12697 // well, then we cannot pass the variable as a function parameter.
12698 // Fixing this is non-trivial without stamping out variants of the same function,
12699 // so for now warn about this and suggest workarounds instead.
12700 for (uint32_t i = 0; i < length; i++)
12701 {
12702 auto *var = maybe_get<SPIRVariable>(args[i]);
12703 if (!var || !var->remapped_variable)
12704 continue;
12705
12706 auto &type = get<SPIRType>(var->basetype);
12707 if (type.basetype == SPIRType::Image && type.image.dim == DimSubpassData)
12708 {
12709 SPIRV_CROSS_THROW("Tried passing a remapped subpassInput variable to a function. "
12710 "This will not work correctly because type-remapping information is lost. "
12711 "To workaround, please consider not passing the subpass input as a function parameter, "
12712 "or use in/out variables instead which do not need type remapping information.");
12713 }
12714 }
12715 }
12716
get_next_instruction_in_block(const Instruction & instr)12717 const Instruction *CompilerGLSL::get_next_instruction_in_block(const Instruction &instr)
12718 {
12719 // FIXME: This is kind of hacky. There should be a cleaner way.
12720 auto offset = uint32_t(&instr - current_emitting_block->ops.data());
12721 if ((offset + 1) < current_emitting_block->ops.size())
12722 return ¤t_emitting_block->ops[offset + 1];
12723 else
12724 return nullptr;
12725 }
12726
mask_relevant_memory_semantics(uint32_t semantics)12727 uint32_t CompilerGLSL::mask_relevant_memory_semantics(uint32_t semantics)
12728 {
12729 return semantics & (MemorySemanticsAtomicCounterMemoryMask | MemorySemanticsImageMemoryMask |
12730 MemorySemanticsWorkgroupMemoryMask | MemorySemanticsUniformMemoryMask |
12731 MemorySemanticsCrossWorkgroupMemoryMask | MemorySemanticsSubgroupMemoryMask);
12732 }
12733
emit_array_copy(const string & lhs,uint32_t rhs_id,StorageClass,StorageClass)12734 void CompilerGLSL::emit_array_copy(const string &lhs, uint32_t rhs_id, StorageClass, StorageClass)
12735 {
12736 statement(lhs, " = ", to_expression(rhs_id), ";");
12737 }
12738
unroll_array_from_complex_load(uint32_t target_id,uint32_t source_id,std::string & expr)12739 void CompilerGLSL::unroll_array_from_complex_load(uint32_t target_id, uint32_t source_id, std::string &expr)
12740 {
12741 if (!backend.force_gl_in_out_block)
12742 return;
12743 // This path is only relevant for GL backends.
12744
12745 auto *var = maybe_get<SPIRVariable>(source_id);
12746 if (!var)
12747 return;
12748
12749 if (var->storage != StorageClassInput)
12750 return;
12751
12752 auto &type = get_variable_data_type(*var);
12753 if (type.array.empty())
12754 return;
12755
12756 auto builtin = BuiltIn(get_decoration(var->self, DecorationBuiltIn));
12757 bool is_builtin = is_builtin_variable(*var) && (builtin == BuiltInPointSize || builtin == BuiltInPosition);
12758 bool is_tess = is_tessellation_shader();
12759
12760 // Tessellation input arrays are special in that they are unsized, so we cannot directly copy from it.
12761 // We must unroll the array load.
12762 // For builtins, we couldn't catch this case normally,
12763 // because this is resolved in the OpAccessChain in most cases.
12764 // If we load the entire array, we have no choice but to unroll here.
12765 if (is_builtin || is_tess)
12766 {
12767 auto new_expr = join("_", target_id, "_unrolled");
12768 statement(variable_decl(type, new_expr, target_id), ";");
12769 string array_expr;
12770 if (type.array_size_literal.back())
12771 {
12772 array_expr = convert_to_string(type.array.back());
12773 if (type.array.back() == 0)
12774 SPIRV_CROSS_THROW("Cannot unroll an array copy from unsized array.");
12775 }
12776 else
12777 array_expr = to_expression(type.array.back());
12778
12779 // The array size might be a specialization constant, so use a for-loop instead.
12780 statement("for (int i = 0; i < int(", array_expr, "); i++)");
12781 begin_scope();
12782 if (is_builtin)
12783 statement(new_expr, "[i] = gl_in[i].", expr, ";");
12784 else
12785 statement(new_expr, "[i] = ", expr, "[i];");
12786 end_scope();
12787
12788 expr = move(new_expr);
12789 }
12790 }
12791
bitcast_from_builtin_load(uint32_t source_id,std::string & expr,const SPIRType & expr_type)12792 void CompilerGLSL::bitcast_from_builtin_load(uint32_t source_id, std::string &expr, const SPIRType &expr_type)
12793 {
12794 auto *var = maybe_get_backing_variable(source_id);
12795 if (var)
12796 source_id = var->self;
12797
12798 // Only interested in standalone builtin variables.
12799 if (!has_decoration(source_id, DecorationBuiltIn))
12800 return;
12801
12802 auto builtin = static_cast<BuiltIn>(get_decoration(source_id, DecorationBuiltIn));
12803 auto expected_type = expr_type.basetype;
12804
12805 // TODO: Fill in for more builtins.
12806 switch (builtin)
12807 {
12808 case BuiltInLayer:
12809 case BuiltInPrimitiveId:
12810 case BuiltInViewportIndex:
12811 case BuiltInInstanceId:
12812 case BuiltInInstanceIndex:
12813 case BuiltInVertexId:
12814 case BuiltInVertexIndex:
12815 case BuiltInSampleId:
12816 case BuiltInBaseVertex:
12817 case BuiltInBaseInstance:
12818 case BuiltInDrawIndex:
12819 case BuiltInFragStencilRefEXT:
12820 expected_type = SPIRType::Int;
12821 break;
12822
12823 case BuiltInGlobalInvocationId:
12824 case BuiltInLocalInvocationId:
12825 case BuiltInWorkgroupId:
12826 case BuiltInLocalInvocationIndex:
12827 case BuiltInWorkgroupSize:
12828 case BuiltInNumWorkgroups:
12829 expected_type = SPIRType::UInt;
12830 break;
12831
12832 default:
12833 break;
12834 }
12835
12836 if (expected_type != expr_type.basetype)
12837 expr = bitcast_expression(expr_type, expected_type, expr);
12838 }
12839
bitcast_to_builtin_store(uint32_t target_id,std::string & expr,const SPIRType & expr_type)12840 void CompilerGLSL::bitcast_to_builtin_store(uint32_t target_id, std::string &expr, const SPIRType &expr_type)
12841 {
12842 // Only interested in standalone builtin variables.
12843 if (!has_decoration(target_id, DecorationBuiltIn))
12844 return;
12845
12846 auto builtin = static_cast<BuiltIn>(get_decoration(target_id, DecorationBuiltIn));
12847 auto expected_type = expr_type.basetype;
12848
12849 // TODO: Fill in for more builtins.
12850 switch (builtin)
12851 {
12852 case BuiltInLayer:
12853 case BuiltInPrimitiveId:
12854 case BuiltInViewportIndex:
12855 case BuiltInFragStencilRefEXT:
12856 expected_type = SPIRType::Int;
12857 break;
12858
12859 default:
12860 break;
12861 }
12862
12863 if (expected_type != expr_type.basetype)
12864 {
12865 auto type = expr_type;
12866 type.basetype = expected_type;
12867 expr = bitcast_expression(type, expr_type.basetype, expr);
12868 }
12869 }
12870
convert_non_uniform_expression(const SPIRType & type,std::string & expr)12871 void CompilerGLSL::convert_non_uniform_expression(const SPIRType &type, std::string &expr)
12872 {
12873 if (*backend.nonuniform_qualifier == '\0')
12874 return;
12875
12876 // Handle SPV_EXT_descriptor_indexing.
12877 if (type.basetype == SPIRType::Sampler || type.basetype == SPIRType::SampledImage ||
12878 type.basetype == SPIRType::Image)
12879 {
12880 // The image/sampler ID must be declared as non-uniform.
12881 // However, it is not legal GLSL to have
12882 // nonuniformEXT(samplers[index]), so we must move the nonuniform qualifier
12883 // to the array indexing, like
12884 // samplers[nonuniformEXT(index)].
12885 // While the access chain will generally be nonuniformEXT, it's not necessarily so,
12886 // so we might have to fixup the OpLoad-ed expression late.
12887
12888 auto start_array_index = expr.find_first_of('[');
12889 auto end_array_index = expr.find_last_of(']');
12890 // Doesn't really make sense to declare a non-arrayed image with nonuniformEXT, but there's
12891 // nothing we can do here to express that.
12892 if (start_array_index == string::npos || end_array_index == string::npos || end_array_index < start_array_index)
12893 return;
12894
12895 start_array_index++;
12896
12897 expr = join(expr.substr(0, start_array_index), backend.nonuniform_qualifier, "(",
12898 expr.substr(start_array_index, end_array_index - start_array_index), ")",
12899 expr.substr(end_array_index, string::npos));
12900 }
12901 }
12902
emit_block_hints(const SPIRBlock &)12903 void CompilerGLSL::emit_block_hints(const SPIRBlock &)
12904 {
12905 }
12906
preserve_alias_on_reset(uint32_t id)12907 void CompilerGLSL::preserve_alias_on_reset(uint32_t id)
12908 {
12909 preserved_aliases[id] = get_name(id);
12910 }
12911
reset_name_caches()12912 void CompilerGLSL::reset_name_caches()
12913 {
12914 for (auto &preserved : preserved_aliases)
12915 set_name(preserved.first, preserved.second);
12916
12917 preserved_aliases.clear();
12918 resource_names.clear();
12919 block_input_names.clear();
12920 block_output_names.clear();
12921 block_ubo_names.clear();
12922 block_ssbo_names.clear();
12923 block_names.clear();
12924 function_overloads.clear();
12925 }
12926
fixup_type_alias()12927 void CompilerGLSL::fixup_type_alias()
12928 {
12929 // Due to how some backends work, the "master" type of type_alias must be a block-like type if it exists.
12930 // FIXME: Multiple alias types which are both block-like will be awkward, for now, it's best to just drop the type
12931 // alias if the slave type is a block type.
12932 ir.for_each_typed_id<SPIRType>([&](uint32_t self, SPIRType &type) {
12933 if (type.type_alias && type_is_block_like(type))
12934 {
12935 // Become the master.
12936 ir.for_each_typed_id<SPIRType>([&](uint32_t other_id, SPIRType &other_type) {
12937 if (other_id == type.self)
12938 return;
12939
12940 if (other_type.type_alias == type.type_alias)
12941 other_type.type_alias = type.self;
12942 });
12943
12944 this->get<SPIRType>(type.type_alias).type_alias = self;
12945 type.type_alias = 0;
12946 }
12947 });
12948
12949 ir.for_each_typed_id<SPIRType>([&](uint32_t, SPIRType &type) {
12950 if (type.type_alias && type_is_block_like(type))
12951 {
12952 // This is not allowed, drop the type_alias.
12953 type.type_alias = 0;
12954 }
12955 else if (type.type_alias && !type_is_block_like(this->get<SPIRType>(type.type_alias)))
12956 {
12957 // If the alias master is not a block-like type, there is no reason to use type aliasing.
12958 // This case can happen if two structs are declared with the same name, but they are unrelated.
12959 // Aliases are only used to deal with aliased types for structs which are used in different buffer types
12960 // which all create a variant of the same struct with different DecorationOffset values.
12961 type.type_alias = 0;
12962 }
12963 });
12964 }
12965
reorder_type_alias()12966 void CompilerGLSL::reorder_type_alias()
12967 {
12968 // Reorder declaration of types so that the master of the type alias is always emitted first.
12969 // We need this in case a type B depends on type A (A must come before in the vector), but A is an alias of a type Abuffer, which
12970 // means declaration of A doesn't happen (yet), and order would be B, ABuffer and not ABuffer, B. Fix this up here.
12971 auto loop_lock = ir.create_loop_hard_lock();
12972
12973 auto &type_ids = ir.ids_for_type[TypeType];
12974 for (auto alias_itr = begin(type_ids); alias_itr != end(type_ids); ++alias_itr)
12975 {
12976 auto &type = get<SPIRType>(*alias_itr);
12977 if (type.type_alias != TypeID(0) &&
12978 !has_extended_decoration(type.type_alias, SPIRVCrossDecorationBufferBlockRepacked))
12979 {
12980 // We will skip declaring this type, so make sure the type_alias type comes before.
12981 auto master_itr = find(begin(type_ids), end(type_ids), ID(type.type_alias));
12982 assert(master_itr != end(type_ids));
12983
12984 if (alias_itr < master_itr)
12985 {
12986 // Must also swap the type order for the constant-type joined array.
12987 auto &joined_types = ir.ids_for_constant_or_type;
12988 auto alt_alias_itr = find(begin(joined_types), end(joined_types), *alias_itr);
12989 auto alt_master_itr = find(begin(joined_types), end(joined_types), *master_itr);
12990 assert(alt_alias_itr != end(joined_types));
12991 assert(alt_master_itr != end(joined_types));
12992
12993 swap(*alias_itr, *master_itr);
12994 swap(*alt_alias_itr, *alt_master_itr);
12995 }
12996 }
12997 }
12998 }
12999
emit_line_directive(uint32_t file_id,uint32_t line_literal)13000 void CompilerGLSL::emit_line_directive(uint32_t file_id, uint32_t line_literal)
13001 {
13002 // If we are redirecting statements, ignore the line directive.
13003 // Common case here is continue blocks.
13004 if (redirect_statement)
13005 return;
13006
13007 if (options.emit_line_directives)
13008 {
13009 require_extension_internal("GL_GOOGLE_cpp_style_line_directive");
13010 statement_no_indent("#line ", line_literal, " \"", get<SPIRString>(file_id).str, "\"");
13011 }
13012 }
13013
propagate_nonuniform_qualifier(uint32_t id)13014 void CompilerGLSL::propagate_nonuniform_qualifier(uint32_t id)
13015 {
13016 // SPIR-V might only tag the very last ID with NonUniformEXT, but for codegen,
13017 // we need to know NonUniformEXT a little earlier, when the resource is actually loaded.
13018 // Back-propagate the qualifier based on the expression dependency chain.
13019
13020 if (!has_decoration(id, DecorationNonUniformEXT))
13021 {
13022 set_decoration(id, DecorationNonUniformEXT);
13023 force_recompile();
13024 }
13025
13026 auto *e = maybe_get<SPIRExpression>(id);
13027 auto *combined = maybe_get<SPIRCombinedImageSampler>(id);
13028 auto *chain = maybe_get<SPIRAccessChain>(id);
13029 if (e)
13030 {
13031 for (auto &expr : e->expression_dependencies)
13032 propagate_nonuniform_qualifier(expr);
13033 for (auto &expr : e->implied_read_expressions)
13034 propagate_nonuniform_qualifier(expr);
13035 }
13036 else if (combined)
13037 {
13038 propagate_nonuniform_qualifier(combined->image);
13039 propagate_nonuniform_qualifier(combined->sampler);
13040 }
13041 else if (chain)
13042 {
13043 for (auto &expr : chain->implied_read_expressions)
13044 propagate_nonuniform_qualifier(expr);
13045 }
13046 }
13047