1 /*
2 * Copyright 2015-2021 Arm Limited
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 /*
18 * At your option, you may choose to accept this material under either:
19 * 1. The Apache License, Version 2.0, found at <http://www.apache.org/licenses/LICENSE-2.0>, or
20 * 2. The MIT License, found at <http://opensource.org/licenses/MIT>.
21 * SPDX-License-Identifier: Apache-2.0 OR MIT.
22 */
23
24 #include "spirv_glsl.hpp"
25 #include "GLSL.std.450.h"
26 #include "spirv_common.hpp"
27 #include <algorithm>
28 #include <assert.h>
29 #include <cmath>
30 #include <limits>
31 #include <locale.h>
32 #include <utility>
33
34 #ifndef _WIN32
35 #include <langinfo.h>
36 #endif
37 #include <locale.h>
38
39 using namespace spv;
40 using namespace SPIRV_CROSS_NAMESPACE;
41 using namespace std;
42
is_unsigned_opcode(Op op)43 static bool is_unsigned_opcode(Op op)
44 {
45 // Don't have to be exhaustive, only relevant for legacy target checking ...
46 switch (op)
47 {
48 case OpShiftRightLogical:
49 case OpUGreaterThan:
50 case OpUGreaterThanEqual:
51 case OpULessThan:
52 case OpULessThanEqual:
53 case OpUConvert:
54 case OpUDiv:
55 case OpUMod:
56 case OpUMulExtended:
57 case OpConvertUToF:
58 case OpConvertFToU:
59 return true;
60
61 default:
62 return false;
63 }
64 }
65
is_unsigned_glsl_opcode(GLSLstd450 op)66 static bool is_unsigned_glsl_opcode(GLSLstd450 op)
67 {
68 // Don't have to be exhaustive, only relevant for legacy target checking ...
69 switch (op)
70 {
71 case GLSLstd450UClamp:
72 case GLSLstd450UMin:
73 case GLSLstd450UMax:
74 case GLSLstd450FindUMsb:
75 return true;
76
77 default:
78 return false;
79 }
80 }
81
packing_is_vec4_padded(BufferPackingStandard packing)82 static bool packing_is_vec4_padded(BufferPackingStandard packing)
83 {
84 switch (packing)
85 {
86 case BufferPackingHLSLCbuffer:
87 case BufferPackingHLSLCbufferPackOffset:
88 case BufferPackingStd140:
89 case BufferPackingStd140EnhancedLayout:
90 return true;
91
92 default:
93 return false;
94 }
95 }
96
packing_is_hlsl(BufferPackingStandard packing)97 static bool packing_is_hlsl(BufferPackingStandard packing)
98 {
99 switch (packing)
100 {
101 case BufferPackingHLSLCbuffer:
102 case BufferPackingHLSLCbufferPackOffset:
103 return true;
104
105 default:
106 return false;
107 }
108 }
109
packing_has_flexible_offset(BufferPackingStandard packing)110 static bool packing_has_flexible_offset(BufferPackingStandard packing)
111 {
112 switch (packing)
113 {
114 case BufferPackingStd140:
115 case BufferPackingStd430:
116 case BufferPackingScalar:
117 case BufferPackingHLSLCbuffer:
118 return false;
119
120 default:
121 return true;
122 }
123 }
124
packing_is_scalar(BufferPackingStandard packing)125 static bool packing_is_scalar(BufferPackingStandard packing)
126 {
127 switch (packing)
128 {
129 case BufferPackingScalar:
130 case BufferPackingScalarEnhancedLayout:
131 return true;
132
133 default:
134 return false;
135 }
136 }
137
packing_to_substruct_packing(BufferPackingStandard packing)138 static BufferPackingStandard packing_to_substruct_packing(BufferPackingStandard packing)
139 {
140 switch (packing)
141 {
142 case BufferPackingStd140EnhancedLayout:
143 return BufferPackingStd140;
144 case BufferPackingStd430EnhancedLayout:
145 return BufferPackingStd430;
146 case BufferPackingHLSLCbufferPackOffset:
147 return BufferPackingHLSLCbuffer;
148 case BufferPackingScalarEnhancedLayout:
149 return BufferPackingScalar;
150 default:
151 return packing;
152 }
153 }
154
init()155 void CompilerGLSL::init()
156 {
157 if (ir.source.known)
158 {
159 options.es = ir.source.es;
160 options.version = ir.source.version;
161 }
162
163 // Query the locale to see what the decimal point is.
164 // We'll rely on fixing it up ourselves in the rare case we have a comma-as-decimal locale
165 // rather than setting locales ourselves. Settings locales in a safe and isolated way is rather
166 // tricky.
167 #ifdef _WIN32
168 // On Windows, localeconv uses thread-local storage, so it should be fine.
169 const struct lconv *conv = localeconv();
170 if (conv && conv->decimal_point)
171 current_locale_radix_character = *conv->decimal_point;
172 #elif defined(__ANDROID__) && __ANDROID_API__ < 26
173 // nl_langinfo is not supported on this platform, fall back to the worse alternative.
174 const struct lconv *conv = localeconv();
175 if (conv && conv->decimal_point)
176 current_locale_radix_character = *conv->decimal_point;
177 #else
178 // localeconv, the portable function is not MT safe ...
179 const char *decimal_point = nl_langinfo(RADIXCHAR);
180 if (decimal_point && *decimal_point != '\0')
181 current_locale_radix_character = *decimal_point;
182 #endif
183 }
184
to_pls_layout(PlsFormat format)185 static const char *to_pls_layout(PlsFormat format)
186 {
187 switch (format)
188 {
189 case PlsR11FG11FB10F:
190 return "layout(r11f_g11f_b10f) ";
191 case PlsR32F:
192 return "layout(r32f) ";
193 case PlsRG16F:
194 return "layout(rg16f) ";
195 case PlsRGB10A2:
196 return "layout(rgb10_a2) ";
197 case PlsRGBA8:
198 return "layout(rgba8) ";
199 case PlsRG16:
200 return "layout(rg16) ";
201 case PlsRGBA8I:
202 return "layout(rgba8i)";
203 case PlsRG16I:
204 return "layout(rg16i) ";
205 case PlsRGB10A2UI:
206 return "layout(rgb10_a2ui) ";
207 case PlsRGBA8UI:
208 return "layout(rgba8ui) ";
209 case PlsRG16UI:
210 return "layout(rg16ui) ";
211 case PlsR32UI:
212 return "layout(r32ui) ";
213 default:
214 return "";
215 }
216 }
217
pls_format_to_basetype(PlsFormat format)218 static SPIRType::BaseType pls_format_to_basetype(PlsFormat format)
219 {
220 switch (format)
221 {
222 default:
223 case PlsR11FG11FB10F:
224 case PlsR32F:
225 case PlsRG16F:
226 case PlsRGB10A2:
227 case PlsRGBA8:
228 case PlsRG16:
229 return SPIRType::Float;
230
231 case PlsRGBA8I:
232 case PlsRG16I:
233 return SPIRType::Int;
234
235 case PlsRGB10A2UI:
236 case PlsRGBA8UI:
237 case PlsRG16UI:
238 case PlsR32UI:
239 return SPIRType::UInt;
240 }
241 }
242
pls_format_to_components(PlsFormat format)243 static uint32_t pls_format_to_components(PlsFormat format)
244 {
245 switch (format)
246 {
247 default:
248 case PlsR32F:
249 case PlsR32UI:
250 return 1;
251
252 case PlsRG16F:
253 case PlsRG16:
254 case PlsRG16UI:
255 case PlsRG16I:
256 return 2;
257
258 case PlsR11FG11FB10F:
259 return 3;
260
261 case PlsRGB10A2:
262 case PlsRGBA8:
263 case PlsRGBA8I:
264 case PlsRGB10A2UI:
265 case PlsRGBA8UI:
266 return 4;
267 }
268 }
269
vector_swizzle(int vecsize,int index)270 const char *CompilerGLSL::vector_swizzle(int vecsize, int index)
271 {
272 static const char *const swizzle[4][4] = {
273 { ".x", ".y", ".z", ".w" },
274 { ".xy", ".yz", ".zw", nullptr },
275 { ".xyz", ".yzw", nullptr, nullptr },
276 #if defined(__GNUC__) && (__GNUC__ == 9)
277 // This works around a GCC 9 bug, see details in https://gcc.gnu.org/bugzilla/show_bug.cgi?id=90947.
278 // This array ends up being compiled as all nullptrs, tripping the assertions below.
279 { "", nullptr, nullptr, "$" },
280 #else
281 { "", nullptr, nullptr, nullptr },
282 #endif
283 };
284
285 assert(vecsize >= 1 && vecsize <= 4);
286 assert(index >= 0 && index < 4);
287 assert(swizzle[vecsize - 1][index]);
288
289 return swizzle[vecsize - 1][index];
290 }
291
reset()292 void CompilerGLSL::reset()
293 {
294 // We do some speculative optimizations which should pretty much always work out,
295 // but just in case the SPIR-V is rather weird, recompile until it's happy.
296 // This typically only means one extra pass.
297 clear_force_recompile();
298
299 // Clear invalid expression tracking.
300 invalid_expressions.clear();
301 current_function = nullptr;
302
303 // Clear temporary usage tracking.
304 expression_usage_counts.clear();
305 forwarded_temporaries.clear();
306 suppressed_usage_tracking.clear();
307
308 // Ensure that we declare phi-variable copies even if the original declaration isn't deferred
309 flushed_phi_variables.clear();
310
311 reset_name_caches();
312
313 ir.for_each_typed_id<SPIRFunction>([&](uint32_t, SPIRFunction &func) {
314 func.active = false;
315 func.flush_undeclared = true;
316 });
317
318 ir.for_each_typed_id<SPIRVariable>([&](uint32_t, SPIRVariable &var) { var.dependees.clear(); });
319
320 ir.reset_all_of_type<SPIRExpression>();
321 ir.reset_all_of_type<SPIRAccessChain>();
322
323 statement_count = 0;
324 indent = 0;
325 current_loop_level = 0;
326 }
327
remap_pls_variables()328 void CompilerGLSL::remap_pls_variables()
329 {
330 for (auto &input : pls_inputs)
331 {
332 auto &var = get<SPIRVariable>(input.id);
333
334 bool input_is_target = false;
335 if (var.storage == StorageClassUniformConstant)
336 {
337 auto &type = get<SPIRType>(var.basetype);
338 input_is_target = type.image.dim == DimSubpassData;
339 }
340
341 if (var.storage != StorageClassInput && !input_is_target)
342 SPIRV_CROSS_THROW("Can only use in and target variables for PLS inputs.");
343 var.remapped_variable = true;
344 }
345
346 for (auto &output : pls_outputs)
347 {
348 auto &var = get<SPIRVariable>(output.id);
349 if (var.storage != StorageClassOutput)
350 SPIRV_CROSS_THROW("Can only use out variables for PLS outputs.");
351 var.remapped_variable = true;
352 }
353 }
354
remap_ext_framebuffer_fetch(uint32_t input_attachment_index,uint32_t color_location)355 void CompilerGLSL::remap_ext_framebuffer_fetch(uint32_t input_attachment_index, uint32_t color_location)
356 {
357 subpass_to_framebuffer_fetch_attachment.push_back({ input_attachment_index, color_location });
358 inout_color_attachments.insert(color_location);
359 }
360
find_static_extensions()361 void CompilerGLSL::find_static_extensions()
362 {
363 ir.for_each_typed_id<SPIRType>([&](uint32_t, const SPIRType &type) {
364 if (type.basetype == SPIRType::Double)
365 {
366 if (options.es)
367 SPIRV_CROSS_THROW("FP64 not supported in ES profile.");
368 if (!options.es && options.version < 400)
369 require_extension_internal("GL_ARB_gpu_shader_fp64");
370 }
371 else if (type.basetype == SPIRType::Int64 || type.basetype == SPIRType::UInt64)
372 {
373 if (options.es)
374 SPIRV_CROSS_THROW("64-bit integers not supported in ES profile.");
375 if (!options.es)
376 require_extension_internal("GL_ARB_gpu_shader_int64");
377 }
378 else if (type.basetype == SPIRType::Half)
379 {
380 require_extension_internal("GL_EXT_shader_explicit_arithmetic_types_float16");
381 if (options.vulkan_semantics)
382 require_extension_internal("GL_EXT_shader_16bit_storage");
383 }
384 else if (type.basetype == SPIRType::SByte || type.basetype == SPIRType::UByte)
385 {
386 require_extension_internal("GL_EXT_shader_explicit_arithmetic_types_int8");
387 if (options.vulkan_semantics)
388 require_extension_internal("GL_EXT_shader_8bit_storage");
389 }
390 else if (type.basetype == SPIRType::Short || type.basetype == SPIRType::UShort)
391 {
392 require_extension_internal("GL_EXT_shader_explicit_arithmetic_types_int16");
393 if (options.vulkan_semantics)
394 require_extension_internal("GL_EXT_shader_16bit_storage");
395 }
396 });
397
398 auto &execution = get_entry_point();
399 switch (execution.model)
400 {
401 case ExecutionModelGLCompute:
402 if (!options.es && options.version < 430)
403 require_extension_internal("GL_ARB_compute_shader");
404 if (options.es && options.version < 310)
405 SPIRV_CROSS_THROW("At least ESSL 3.10 required for compute shaders.");
406 break;
407
408 case ExecutionModelGeometry:
409 if (options.es && options.version < 320)
410 require_extension_internal("GL_EXT_geometry_shader");
411 if (!options.es && options.version < 150)
412 require_extension_internal("GL_ARB_geometry_shader4");
413
414 if (execution.flags.get(ExecutionModeInvocations) && execution.invocations != 1)
415 {
416 // Instanced GS is part of 400 core or this extension.
417 if (!options.es && options.version < 400)
418 require_extension_internal("GL_ARB_gpu_shader5");
419 }
420 break;
421
422 case ExecutionModelTessellationEvaluation:
423 case ExecutionModelTessellationControl:
424 if (options.es && options.version < 320)
425 require_extension_internal("GL_EXT_tessellation_shader");
426 if (!options.es && options.version < 400)
427 require_extension_internal("GL_ARB_tessellation_shader");
428 break;
429
430 case ExecutionModelRayGenerationKHR:
431 case ExecutionModelIntersectionKHR:
432 case ExecutionModelAnyHitKHR:
433 case ExecutionModelClosestHitKHR:
434 case ExecutionModelMissKHR:
435 case ExecutionModelCallableKHR:
436 // NV enums are aliases.
437 if (options.es || options.version < 460)
438 SPIRV_CROSS_THROW("Ray tracing shaders require non-es profile with version 460 or above.");
439 if (!options.vulkan_semantics)
440 SPIRV_CROSS_THROW("Ray tracing requires Vulkan semantics.");
441
442 // Need to figure out if we should target KHR or NV extension based on capabilities.
443 for (auto &cap : ir.declared_capabilities)
444 {
445 if (cap == CapabilityRayTracingKHR || cap == CapabilityRayQueryKHR)
446 {
447 ray_tracing_is_khr = true;
448 break;
449 }
450 }
451
452 if (ray_tracing_is_khr)
453 {
454 // In KHR ray tracing we pass payloads by pointer instead of location,
455 // so make sure we assign locations properly.
456 ray_tracing_khr_fixup_locations();
457 require_extension_internal("GL_EXT_ray_tracing");
458 }
459 else
460 require_extension_internal("GL_NV_ray_tracing");
461 break;
462
463 default:
464 break;
465 }
466
467 if (!pls_inputs.empty() || !pls_outputs.empty())
468 {
469 if (execution.model != ExecutionModelFragment)
470 SPIRV_CROSS_THROW("Can only use GL_EXT_shader_pixel_local_storage in fragment shaders.");
471 require_extension_internal("GL_EXT_shader_pixel_local_storage");
472 }
473
474 if (!inout_color_attachments.empty())
475 {
476 if (execution.model != ExecutionModelFragment)
477 SPIRV_CROSS_THROW("Can only use GL_EXT_shader_framebuffer_fetch in fragment shaders.");
478 if (options.vulkan_semantics)
479 SPIRV_CROSS_THROW("Cannot use EXT_shader_framebuffer_fetch in Vulkan GLSL.");
480 require_extension_internal("GL_EXT_shader_framebuffer_fetch");
481 }
482
483 if (options.separate_shader_objects && !options.es && options.version < 410)
484 require_extension_internal("GL_ARB_separate_shader_objects");
485
486 if (ir.addressing_model == AddressingModelPhysicalStorageBuffer64EXT)
487 {
488 if (!options.vulkan_semantics)
489 SPIRV_CROSS_THROW("GL_EXT_buffer_reference is only supported in Vulkan GLSL.");
490 if (options.es && options.version < 320)
491 SPIRV_CROSS_THROW("GL_EXT_buffer_reference requires ESSL 320.");
492 else if (!options.es && options.version < 450)
493 SPIRV_CROSS_THROW("GL_EXT_buffer_reference requires GLSL 450.");
494 require_extension_internal("GL_EXT_buffer_reference");
495 }
496 else if (ir.addressing_model != AddressingModelLogical)
497 {
498 SPIRV_CROSS_THROW("Only Logical and PhysicalStorageBuffer64EXT addressing models are supported.");
499 }
500
501 // Check for nonuniform qualifier and passthrough.
502 // Instead of looping over all decorations to find this, just look at capabilities.
503 for (auto &cap : ir.declared_capabilities)
504 {
505 switch (cap)
506 {
507 case CapabilityShaderNonUniformEXT:
508 if (!options.vulkan_semantics)
509 require_extension_internal("GL_NV_gpu_shader5");
510 else
511 require_extension_internal("GL_EXT_nonuniform_qualifier");
512 break;
513 case CapabilityRuntimeDescriptorArrayEXT:
514 if (!options.vulkan_semantics)
515 SPIRV_CROSS_THROW("GL_EXT_nonuniform_qualifier is only supported in Vulkan GLSL.");
516 require_extension_internal("GL_EXT_nonuniform_qualifier");
517 break;
518
519 case CapabilityGeometryShaderPassthroughNV:
520 if (execution.model == ExecutionModelGeometry)
521 {
522 require_extension_internal("GL_NV_geometry_shader_passthrough");
523 execution.geometry_passthrough = true;
524 }
525 break;
526
527 case CapabilityVariablePointers:
528 case CapabilityVariablePointersStorageBuffer:
529 SPIRV_CROSS_THROW("VariablePointers capability is not supported in GLSL.");
530
531 default:
532 break;
533 }
534 }
535 }
536
ray_tracing_khr_fixup_locations()537 void CompilerGLSL::ray_tracing_khr_fixup_locations()
538 {
539 uint32_t location = 0;
540 ir.for_each_typed_id<SPIRVariable>([&](uint32_t, SPIRVariable &var) {
541 if (var.storage != StorageClassRayPayloadKHR && var.storage != StorageClassCallableDataKHR)
542 return;
543 if (!interface_variable_exists_in_entry_point(var.self))
544 return;
545 set_decoration(var.self, DecorationLocation, location++);
546 });
547 }
548
compile()549 string CompilerGLSL::compile()
550 {
551 ir.fixup_reserved_names();
552
553 if (options.vulkan_semantics)
554 backend.allow_precision_qualifiers = true;
555 else
556 {
557 // only NV_gpu_shader5 supports divergent indexing on OpenGL, and it does so without extra qualifiers
558 backend.nonuniform_qualifier = "";
559 backend.needs_row_major_load_workaround = true;
560 }
561 backend.force_gl_in_out_block = true;
562 backend.supports_extensions = true;
563 backend.use_array_constructor = true;
564
565 if (is_legacy_es())
566 backend.support_case_fallthrough = false;
567
568 // Scan the SPIR-V to find trivial uses of extensions.
569 fixup_type_alias();
570 reorder_type_alias();
571 build_function_control_flow_graphs_and_analyze();
572 find_static_extensions();
573 fixup_image_load_store_access();
574 update_active_builtins();
575 analyze_image_and_sampler_usage();
576 analyze_interlocked_resource_usage();
577 if (!inout_color_attachments.empty())
578 emit_inout_fragment_outputs_copy_to_subpass_inputs();
579
580 // Shaders might cast unrelated data to pointers of non-block types.
581 // Find all such instances and make sure we can cast the pointers to a synthesized block type.
582 if (ir.addressing_model == AddressingModelPhysicalStorageBuffer64EXT)
583 analyze_non_block_pointer_types();
584
585 uint32_t pass_count = 0;
586 do
587 {
588 if (pass_count >= 3)
589 SPIRV_CROSS_THROW("Over 3 compilation loops detected. Must be a bug!");
590
591 reset();
592
593 buffer.reset();
594
595 emit_header();
596 emit_resources();
597 emit_extension_workarounds(get_execution_model());
598
599 emit_function(get<SPIRFunction>(ir.default_entry_point), Bitset());
600
601 pass_count++;
602 } while (is_forcing_recompilation());
603
604 // Implement the interlocked wrapper function at the end.
605 // The body was implemented in lieu of main().
606 if (interlocked_is_complex)
607 {
608 statement("void main()");
609 begin_scope();
610 statement("// Interlocks were used in a way not compatible with GLSL, this is very slow.");
611 if (options.es)
612 statement("beginInvocationInterlockNV();");
613 else
614 statement("beginInvocationInterlockARB();");
615 statement("spvMainInterlockedBody();");
616 if (options.es)
617 statement("endInvocationInterlockNV();");
618 else
619 statement("endInvocationInterlockARB();");
620 end_scope();
621 }
622
623 // Entry point in GLSL is always main().
624 get_entry_point().name = "main";
625
626 return buffer.str();
627 }
628
get_partial_source()629 std::string CompilerGLSL::get_partial_source()
630 {
631 return buffer.str();
632 }
633
build_workgroup_size(SmallVector<string> & arguments,const SpecializationConstant & wg_x,const SpecializationConstant & wg_y,const SpecializationConstant & wg_z)634 void CompilerGLSL::build_workgroup_size(SmallVector<string> &arguments, const SpecializationConstant &wg_x,
635 const SpecializationConstant &wg_y, const SpecializationConstant &wg_z)
636 {
637 auto &execution = get_entry_point();
638
639 if (wg_x.id)
640 {
641 if (options.vulkan_semantics)
642 arguments.push_back(join("local_size_x_id = ", wg_x.constant_id));
643 else
644 arguments.push_back(join("local_size_x = ", get<SPIRConstant>(wg_x.id).specialization_constant_macro_name));
645 }
646 else
647 arguments.push_back(join("local_size_x = ", execution.workgroup_size.x));
648
649 if (wg_y.id)
650 {
651 if (options.vulkan_semantics)
652 arguments.push_back(join("local_size_y_id = ", wg_y.constant_id));
653 else
654 arguments.push_back(join("local_size_y = ", get<SPIRConstant>(wg_y.id).specialization_constant_macro_name));
655 }
656 else
657 arguments.push_back(join("local_size_y = ", execution.workgroup_size.y));
658
659 if (wg_z.id)
660 {
661 if (options.vulkan_semantics)
662 arguments.push_back(join("local_size_z_id = ", wg_z.constant_id));
663 else
664 arguments.push_back(join("local_size_z = ", get<SPIRConstant>(wg_z.id).specialization_constant_macro_name));
665 }
666 else
667 arguments.push_back(join("local_size_z = ", execution.workgroup_size.z));
668 }
669
request_subgroup_feature(ShaderSubgroupSupportHelper::Feature feature)670 void CompilerGLSL::request_subgroup_feature(ShaderSubgroupSupportHelper::Feature feature)
671 {
672 if (options.vulkan_semantics)
673 {
674 auto khr_extension = ShaderSubgroupSupportHelper::get_KHR_extension_for_feature(feature);
675 require_extension_internal(ShaderSubgroupSupportHelper::get_extension_name(khr_extension));
676 }
677 else
678 {
679 if (!shader_subgroup_supporter.is_feature_requested(feature))
680 force_recompile();
681 shader_subgroup_supporter.request_feature(feature);
682 }
683 }
684
emit_header()685 void CompilerGLSL::emit_header()
686 {
687 auto &execution = get_entry_point();
688 statement("#version ", options.version, options.es && options.version > 100 ? " es" : "");
689
690 if (!options.es && options.version < 420)
691 {
692 // Needed for binding = # on UBOs, etc.
693 if (options.enable_420pack_extension)
694 {
695 statement("#ifdef GL_ARB_shading_language_420pack");
696 statement("#extension GL_ARB_shading_language_420pack : require");
697 statement("#endif");
698 }
699 // Needed for: layout(early_fragment_tests) in;
700 if (execution.flags.get(ExecutionModeEarlyFragmentTests))
701 require_extension_internal("GL_ARB_shader_image_load_store");
702 }
703
704 // Needed for: layout(post_depth_coverage) in;
705 if (execution.flags.get(ExecutionModePostDepthCoverage))
706 require_extension_internal("GL_ARB_post_depth_coverage");
707
708 // Needed for: layout({pixel,sample}_interlock_[un]ordered) in;
709 if (execution.flags.get(ExecutionModePixelInterlockOrderedEXT) ||
710 execution.flags.get(ExecutionModePixelInterlockUnorderedEXT) ||
711 execution.flags.get(ExecutionModeSampleInterlockOrderedEXT) ||
712 execution.flags.get(ExecutionModeSampleInterlockUnorderedEXT))
713 {
714 if (options.es)
715 {
716 if (options.version < 310)
717 SPIRV_CROSS_THROW("At least ESSL 3.10 required for fragment shader interlock.");
718 require_extension_internal("GL_NV_fragment_shader_interlock");
719 }
720 else
721 {
722 if (options.version < 420)
723 require_extension_internal("GL_ARB_shader_image_load_store");
724 require_extension_internal("GL_ARB_fragment_shader_interlock");
725 }
726 }
727
728 for (auto &ext : forced_extensions)
729 {
730 if (ext == "GL_EXT_shader_explicit_arithmetic_types_float16")
731 {
732 // Special case, this extension has a potential fallback to another vendor extension in normal GLSL.
733 // GL_AMD_gpu_shader_half_float is a superset, so try that first.
734 statement("#if defined(GL_AMD_gpu_shader_half_float)");
735 statement("#extension GL_AMD_gpu_shader_half_float : require");
736 if (!options.vulkan_semantics)
737 {
738 statement("#elif defined(GL_NV_gpu_shader5)");
739 statement("#extension GL_NV_gpu_shader5 : require");
740 }
741 else
742 {
743 statement("#elif defined(GL_EXT_shader_explicit_arithmetic_types_float16)");
744 statement("#extension GL_EXT_shader_explicit_arithmetic_types_float16 : require");
745 }
746 statement("#else");
747 statement("#error No extension available for FP16.");
748 statement("#endif");
749 }
750 else if (ext == "GL_EXT_shader_explicit_arithmetic_types_int16")
751 {
752 if (options.vulkan_semantics)
753 statement("#extension GL_EXT_shader_explicit_arithmetic_types_int16 : require");
754 else
755 {
756 statement("#if defined(GL_AMD_gpu_shader_int16)");
757 statement("#extension GL_AMD_gpu_shader_int16 : require");
758 statement("#else");
759 statement("#error No extension available for Int16.");
760 statement("#endif");
761 }
762 }
763 else if (ext == "GL_ARB_post_depth_coverage")
764 {
765 if (options.es)
766 statement("#extension GL_EXT_post_depth_coverage : require");
767 else
768 {
769 statement("#if defined(GL_ARB_post_depth_coverge)");
770 statement("#extension GL_ARB_post_depth_coverage : require");
771 statement("#else");
772 statement("#extension GL_EXT_post_depth_coverage : require");
773 statement("#endif");
774 }
775 }
776 else if (!options.vulkan_semantics && ext == "GL_ARB_shader_draw_parameters")
777 {
778 // Soft-enable this extension on plain GLSL.
779 statement("#ifdef ", ext);
780 statement("#extension ", ext, " : enable");
781 statement("#endif");
782 }
783 else
784 statement("#extension ", ext, " : require");
785 }
786
787 if (!options.vulkan_semantics)
788 {
789 using Supp = ShaderSubgroupSupportHelper;
790 auto result = shader_subgroup_supporter.resolve();
791
792 for (uint32_t feature_index = 0; feature_index < Supp::FeatureCount; feature_index++)
793 {
794 auto feature = static_cast<Supp::Feature>(feature_index);
795 if (!shader_subgroup_supporter.is_feature_requested(feature))
796 continue;
797
798 auto exts = Supp::get_candidates_for_feature(feature, result);
799 if (exts.empty())
800 continue;
801
802 statement("");
803
804 for (auto &ext : exts)
805 {
806 const char *name = Supp::get_extension_name(ext);
807 const char *extra_predicate = Supp::get_extra_required_extension_predicate(ext);
808 auto extra_names = Supp::get_extra_required_extension_names(ext);
809 statement(&ext != &exts.front() ? "#elif" : "#if", " defined(", name, ")",
810 (*extra_predicate != '\0' ? " && " : ""), extra_predicate);
811 for (const auto &e : extra_names)
812 statement("#extension ", e, " : enable");
813 statement("#extension ", name, " : require");
814 }
815
816 if (!Supp::can_feature_be_implemented_without_extensions(feature))
817 {
818 statement("#else");
819 statement("#error No extensions available to emulate requested subgroup feature.");
820 }
821
822 statement("#endif");
823 }
824 }
825
826 for (auto &header : header_lines)
827 statement(header);
828
829 SmallVector<string> inputs;
830 SmallVector<string> outputs;
831
832 switch (execution.model)
833 {
834 case ExecutionModelGeometry:
835 if ((execution.flags.get(ExecutionModeInvocations)) && execution.invocations != 1)
836 inputs.push_back(join("invocations = ", execution.invocations));
837 if (execution.flags.get(ExecutionModeInputPoints))
838 inputs.push_back("points");
839 if (execution.flags.get(ExecutionModeInputLines))
840 inputs.push_back("lines");
841 if (execution.flags.get(ExecutionModeInputLinesAdjacency))
842 inputs.push_back("lines_adjacency");
843 if (execution.flags.get(ExecutionModeTriangles))
844 inputs.push_back("triangles");
845 if (execution.flags.get(ExecutionModeInputTrianglesAdjacency))
846 inputs.push_back("triangles_adjacency");
847
848 if (!execution.geometry_passthrough)
849 {
850 // For passthrough, these are implies and cannot be declared in shader.
851 outputs.push_back(join("max_vertices = ", execution.output_vertices));
852 if (execution.flags.get(ExecutionModeOutputTriangleStrip))
853 outputs.push_back("triangle_strip");
854 if (execution.flags.get(ExecutionModeOutputPoints))
855 outputs.push_back("points");
856 if (execution.flags.get(ExecutionModeOutputLineStrip))
857 outputs.push_back("line_strip");
858 }
859 break;
860
861 case ExecutionModelTessellationControl:
862 if (execution.flags.get(ExecutionModeOutputVertices))
863 outputs.push_back(join("vertices = ", execution.output_vertices));
864 break;
865
866 case ExecutionModelTessellationEvaluation:
867 if (execution.flags.get(ExecutionModeQuads))
868 inputs.push_back("quads");
869 if (execution.flags.get(ExecutionModeTriangles))
870 inputs.push_back("triangles");
871 if (execution.flags.get(ExecutionModeIsolines))
872 inputs.push_back("isolines");
873 if (execution.flags.get(ExecutionModePointMode))
874 inputs.push_back("point_mode");
875
876 if (!execution.flags.get(ExecutionModeIsolines))
877 {
878 if (execution.flags.get(ExecutionModeVertexOrderCw))
879 inputs.push_back("cw");
880 if (execution.flags.get(ExecutionModeVertexOrderCcw))
881 inputs.push_back("ccw");
882 }
883
884 if (execution.flags.get(ExecutionModeSpacingFractionalEven))
885 inputs.push_back("fractional_even_spacing");
886 if (execution.flags.get(ExecutionModeSpacingFractionalOdd))
887 inputs.push_back("fractional_odd_spacing");
888 if (execution.flags.get(ExecutionModeSpacingEqual))
889 inputs.push_back("equal_spacing");
890 break;
891
892 case ExecutionModelGLCompute:
893 {
894 if (execution.workgroup_size.constant != 0)
895 {
896 SpecializationConstant wg_x, wg_y, wg_z;
897 get_work_group_size_specialization_constants(wg_x, wg_y, wg_z);
898
899 // If there are any spec constants on legacy GLSL, defer declaration, we need to set up macro
900 // declarations before we can emit the work group size.
901 if (options.vulkan_semantics ||
902 ((wg_x.id == ConstantID(0)) && (wg_y.id == ConstantID(0)) && (wg_z.id == ConstantID(0))))
903 build_workgroup_size(inputs, wg_x, wg_y, wg_z);
904 }
905 else
906 {
907 inputs.push_back(join("local_size_x = ", execution.workgroup_size.x));
908 inputs.push_back(join("local_size_y = ", execution.workgroup_size.y));
909 inputs.push_back(join("local_size_z = ", execution.workgroup_size.z));
910 }
911 break;
912 }
913
914 case ExecutionModelFragment:
915 if (options.es)
916 {
917 switch (options.fragment.default_float_precision)
918 {
919 case Options::Lowp:
920 statement("precision lowp float;");
921 break;
922
923 case Options::Mediump:
924 statement("precision mediump float;");
925 break;
926
927 case Options::Highp:
928 statement("precision highp float;");
929 break;
930
931 default:
932 break;
933 }
934
935 switch (options.fragment.default_int_precision)
936 {
937 case Options::Lowp:
938 statement("precision lowp int;");
939 break;
940
941 case Options::Mediump:
942 statement("precision mediump int;");
943 break;
944
945 case Options::Highp:
946 statement("precision highp int;");
947 break;
948
949 default:
950 break;
951 }
952 }
953
954 if (execution.flags.get(ExecutionModeEarlyFragmentTests))
955 inputs.push_back("early_fragment_tests");
956 if (execution.flags.get(ExecutionModePostDepthCoverage))
957 inputs.push_back("post_depth_coverage");
958
959 if (execution.flags.get(ExecutionModePixelInterlockOrderedEXT))
960 inputs.push_back("pixel_interlock_ordered");
961 else if (execution.flags.get(ExecutionModePixelInterlockUnorderedEXT))
962 inputs.push_back("pixel_interlock_unordered");
963 else if (execution.flags.get(ExecutionModeSampleInterlockOrderedEXT))
964 inputs.push_back("sample_interlock_ordered");
965 else if (execution.flags.get(ExecutionModeSampleInterlockUnorderedEXT))
966 inputs.push_back("sample_interlock_unordered");
967
968 if (!options.es && execution.flags.get(ExecutionModeDepthGreater))
969 statement("layout(depth_greater) out float gl_FragDepth;");
970 else if (!options.es && execution.flags.get(ExecutionModeDepthLess))
971 statement("layout(depth_less) out float gl_FragDepth;");
972
973 break;
974
975 default:
976 break;
977 }
978
979 if (!inputs.empty())
980 statement("layout(", merge(inputs), ") in;");
981 if (!outputs.empty())
982 statement("layout(", merge(outputs), ") out;");
983
984 statement("");
985 }
986
type_is_empty(const SPIRType & type)987 bool CompilerGLSL::type_is_empty(const SPIRType &type)
988 {
989 return type.basetype == SPIRType::Struct && type.member_types.empty();
990 }
991
emit_struct(SPIRType & type)992 void CompilerGLSL::emit_struct(SPIRType &type)
993 {
994 // Struct types can be stamped out multiple times
995 // with just different offsets, matrix layouts, etc ...
996 // Type-punning with these types is legal, which complicates things
997 // when we are storing struct and array types in an SSBO for example.
998 // If the type master is packed however, we can no longer assume that the struct declaration will be redundant.
999 if (type.type_alias != TypeID(0) &&
1000 !has_extended_decoration(type.type_alias, SPIRVCrossDecorationBufferBlockRepacked))
1001 return;
1002
1003 add_resource_name(type.self);
1004 auto name = type_to_glsl(type);
1005
1006 statement(!backend.explicit_struct_type ? "struct " : "", name);
1007 begin_scope();
1008
1009 type.member_name_cache.clear();
1010
1011 uint32_t i = 0;
1012 bool emitted = false;
1013 for (auto &member : type.member_types)
1014 {
1015 add_member_name(type, i);
1016 emit_struct_member(type, member, i);
1017 i++;
1018 emitted = true;
1019 }
1020
1021 // Don't declare empty structs in GLSL, this is not allowed.
1022 if (type_is_empty(type) && !backend.supports_empty_struct)
1023 {
1024 statement("int empty_struct_member;");
1025 emitted = true;
1026 }
1027
1028 if (has_extended_decoration(type.self, SPIRVCrossDecorationPaddingTarget))
1029 emit_struct_padding_target(type);
1030
1031 end_scope_decl();
1032
1033 if (emitted)
1034 statement("");
1035 }
1036
to_interpolation_qualifiers(const Bitset & flags)1037 string CompilerGLSL::to_interpolation_qualifiers(const Bitset &flags)
1038 {
1039 string res;
1040 //if (flags & (1ull << DecorationSmooth))
1041 // res += "smooth ";
1042 if (flags.get(DecorationFlat))
1043 res += "flat ";
1044 if (flags.get(DecorationNoPerspective))
1045 res += "noperspective ";
1046 if (flags.get(DecorationCentroid))
1047 res += "centroid ";
1048 if (flags.get(DecorationPatch))
1049 res += "patch ";
1050 if (flags.get(DecorationSample))
1051 res += "sample ";
1052 if (flags.get(DecorationInvariant))
1053 res += "invariant ";
1054 if (flags.get(DecorationExplicitInterpAMD))
1055 res += "__explicitInterpAMD ";
1056
1057 return res;
1058 }
1059
layout_for_member(const SPIRType & type,uint32_t index)1060 string CompilerGLSL::layout_for_member(const SPIRType &type, uint32_t index)
1061 {
1062 if (is_legacy())
1063 return "";
1064
1065 bool is_block = ir.meta[type.self].decoration.decoration_flags.get(DecorationBlock) ||
1066 ir.meta[type.self].decoration.decoration_flags.get(DecorationBufferBlock);
1067 if (!is_block)
1068 return "";
1069
1070 auto &memb = ir.meta[type.self].members;
1071 if (index >= memb.size())
1072 return "";
1073 auto &dec = memb[index];
1074
1075 SmallVector<string> attr;
1076
1077 if (has_member_decoration(type.self, index, DecorationPassthroughNV))
1078 attr.push_back("passthrough");
1079
1080 // We can only apply layouts on members in block interfaces.
1081 // This is a bit problematic because in SPIR-V decorations are applied on the struct types directly.
1082 // This is not supported on GLSL, so we have to make the assumption that if a struct within our buffer block struct
1083 // has a decoration, it was originally caused by a top-level layout() qualifier in GLSL.
1084 //
1085 // We would like to go from (SPIR-V style):
1086 //
1087 // struct Foo { layout(row_major) mat4 matrix; };
1088 // buffer UBO { Foo foo; };
1089 //
1090 // to
1091 //
1092 // struct Foo { mat4 matrix; }; // GLSL doesn't support any layout shenanigans in raw struct declarations.
1093 // buffer UBO { layout(row_major) Foo foo; }; // Apply the layout on top-level.
1094 auto flags = combined_decoration_for_member(type, index);
1095
1096 if (flags.get(DecorationRowMajor))
1097 attr.push_back("row_major");
1098 // We don't emit any global layouts, so column_major is default.
1099 //if (flags & (1ull << DecorationColMajor))
1100 // attr.push_back("column_major");
1101
1102 if (dec.decoration_flags.get(DecorationLocation) && can_use_io_location(type.storage, true))
1103 attr.push_back(join("location = ", dec.location));
1104
1105 // Can only declare component if we can declare location.
1106 if (dec.decoration_flags.get(DecorationComponent) && can_use_io_location(type.storage, true))
1107 {
1108 if (!options.es)
1109 {
1110 if (options.version < 440 && options.version >= 140)
1111 require_extension_internal("GL_ARB_enhanced_layouts");
1112 else if (options.version < 140)
1113 SPIRV_CROSS_THROW("Component decoration is not supported in targets below GLSL 1.40.");
1114 attr.push_back(join("component = ", dec.component));
1115 }
1116 else
1117 SPIRV_CROSS_THROW("Component decoration is not supported in ES targets.");
1118 }
1119
1120 // SPIRVCrossDecorationPacked is set by layout_for_variable earlier to mark that we need to emit offset qualifiers.
1121 // This is only done selectively in GLSL as needed.
1122 if (has_extended_decoration(type.self, SPIRVCrossDecorationExplicitOffset) &&
1123 dec.decoration_flags.get(DecorationOffset))
1124 attr.push_back(join("offset = ", dec.offset));
1125 else if (type.storage == StorageClassOutput && dec.decoration_flags.get(DecorationOffset))
1126 attr.push_back(join("xfb_offset = ", dec.offset));
1127
1128 if (attr.empty())
1129 return "";
1130
1131 string res = "layout(";
1132 res += merge(attr);
1133 res += ") ";
1134 return res;
1135 }
1136
format_to_glsl(spv::ImageFormat format)1137 const char *CompilerGLSL::format_to_glsl(spv::ImageFormat format)
1138 {
1139 if (options.es && is_desktop_only_format(format))
1140 SPIRV_CROSS_THROW("Attempting to use image format not supported in ES profile.");
1141
1142 switch (format)
1143 {
1144 case ImageFormatRgba32f:
1145 return "rgba32f";
1146 case ImageFormatRgba16f:
1147 return "rgba16f";
1148 case ImageFormatR32f:
1149 return "r32f";
1150 case ImageFormatRgba8:
1151 return "rgba8";
1152 case ImageFormatRgba8Snorm:
1153 return "rgba8_snorm";
1154 case ImageFormatRg32f:
1155 return "rg32f";
1156 case ImageFormatRg16f:
1157 return "rg16f";
1158 case ImageFormatRgba32i:
1159 return "rgba32i";
1160 case ImageFormatRgba16i:
1161 return "rgba16i";
1162 case ImageFormatR32i:
1163 return "r32i";
1164 case ImageFormatRgba8i:
1165 return "rgba8i";
1166 case ImageFormatRg32i:
1167 return "rg32i";
1168 case ImageFormatRg16i:
1169 return "rg16i";
1170 case ImageFormatRgba32ui:
1171 return "rgba32ui";
1172 case ImageFormatRgba16ui:
1173 return "rgba16ui";
1174 case ImageFormatR32ui:
1175 return "r32ui";
1176 case ImageFormatRgba8ui:
1177 return "rgba8ui";
1178 case ImageFormatRg32ui:
1179 return "rg32ui";
1180 case ImageFormatRg16ui:
1181 return "rg16ui";
1182 case ImageFormatR11fG11fB10f:
1183 return "r11f_g11f_b10f";
1184 case ImageFormatR16f:
1185 return "r16f";
1186 case ImageFormatRgb10A2:
1187 return "rgb10_a2";
1188 case ImageFormatR8:
1189 return "r8";
1190 case ImageFormatRg8:
1191 return "rg8";
1192 case ImageFormatR16:
1193 return "r16";
1194 case ImageFormatRg16:
1195 return "rg16";
1196 case ImageFormatRgba16:
1197 return "rgba16";
1198 case ImageFormatR16Snorm:
1199 return "r16_snorm";
1200 case ImageFormatRg16Snorm:
1201 return "rg16_snorm";
1202 case ImageFormatRgba16Snorm:
1203 return "rgba16_snorm";
1204 case ImageFormatR8Snorm:
1205 return "r8_snorm";
1206 case ImageFormatRg8Snorm:
1207 return "rg8_snorm";
1208 case ImageFormatR8ui:
1209 return "r8ui";
1210 case ImageFormatRg8ui:
1211 return "rg8ui";
1212 case ImageFormatR16ui:
1213 return "r16ui";
1214 case ImageFormatRgb10a2ui:
1215 return "rgb10_a2ui";
1216 case ImageFormatR8i:
1217 return "r8i";
1218 case ImageFormatRg8i:
1219 return "rg8i";
1220 case ImageFormatR16i:
1221 return "r16i";
1222 default:
1223 case ImageFormatUnknown:
1224 return nullptr;
1225 }
1226 }
1227
type_to_packed_base_size(const SPIRType & type,BufferPackingStandard)1228 uint32_t CompilerGLSL::type_to_packed_base_size(const SPIRType &type, BufferPackingStandard)
1229 {
1230 switch (type.basetype)
1231 {
1232 case SPIRType::Double:
1233 case SPIRType::Int64:
1234 case SPIRType::UInt64:
1235 return 8;
1236 case SPIRType::Float:
1237 case SPIRType::Int:
1238 case SPIRType::UInt:
1239 return 4;
1240 case SPIRType::Half:
1241 case SPIRType::Short:
1242 case SPIRType::UShort:
1243 return 2;
1244 case SPIRType::SByte:
1245 case SPIRType::UByte:
1246 return 1;
1247
1248 default:
1249 SPIRV_CROSS_THROW("Unrecognized type in type_to_packed_base_size.");
1250 }
1251 }
1252
type_to_packed_alignment(const SPIRType & type,const Bitset & flags,BufferPackingStandard packing)1253 uint32_t CompilerGLSL::type_to_packed_alignment(const SPIRType &type, const Bitset &flags,
1254 BufferPackingStandard packing)
1255 {
1256 // If using PhysicalStorageBufferEXT storage class, this is a pointer,
1257 // and is 64-bit.
1258 if (type.storage == StorageClassPhysicalStorageBufferEXT)
1259 {
1260 if (!type.pointer)
1261 SPIRV_CROSS_THROW("Types in PhysicalStorageBufferEXT must be pointers.");
1262
1263 if (ir.addressing_model == AddressingModelPhysicalStorageBuffer64EXT)
1264 {
1265 if (packing_is_vec4_padded(packing) && type_is_array_of_pointers(type))
1266 return 16;
1267 else
1268 return 8;
1269 }
1270 else
1271 SPIRV_CROSS_THROW("AddressingModelPhysicalStorageBuffer64EXT must be used for PhysicalStorageBufferEXT.");
1272 }
1273
1274 if (!type.array.empty())
1275 {
1276 uint32_t minimum_alignment = 1;
1277 if (packing_is_vec4_padded(packing))
1278 minimum_alignment = 16;
1279
1280 auto *tmp = &get<SPIRType>(type.parent_type);
1281 while (!tmp->array.empty())
1282 tmp = &get<SPIRType>(tmp->parent_type);
1283
1284 // Get the alignment of the base type, then maybe round up.
1285 return max(minimum_alignment, type_to_packed_alignment(*tmp, flags, packing));
1286 }
1287
1288 if (type.basetype == SPIRType::Struct)
1289 {
1290 // Rule 9. Structs alignments are maximum alignment of its members.
1291 uint32_t alignment = 1;
1292 for (uint32_t i = 0; i < type.member_types.size(); i++)
1293 {
1294 auto member_flags = ir.meta[type.self].members[i].decoration_flags;
1295 alignment =
1296 max(alignment, type_to_packed_alignment(get<SPIRType>(type.member_types[i]), member_flags, packing));
1297 }
1298
1299 // In std140, struct alignment is rounded up to 16.
1300 if (packing_is_vec4_padded(packing))
1301 alignment = max(alignment, 16u);
1302
1303 return alignment;
1304 }
1305 else
1306 {
1307 const uint32_t base_alignment = type_to_packed_base_size(type, packing);
1308
1309 // Alignment requirement for scalar block layout is always the alignment for the most basic component.
1310 if (packing_is_scalar(packing))
1311 return base_alignment;
1312
1313 // Vectors are *not* aligned in HLSL, but there's an extra rule where vectors cannot straddle
1314 // a vec4, this is handled outside since that part knows our current offset.
1315 if (type.columns == 1 && packing_is_hlsl(packing))
1316 return base_alignment;
1317
1318 // From 7.6.2.2 in GL 4.5 core spec.
1319 // Rule 1
1320 if (type.vecsize == 1 && type.columns == 1)
1321 return base_alignment;
1322
1323 // Rule 2
1324 if ((type.vecsize == 2 || type.vecsize == 4) && type.columns == 1)
1325 return type.vecsize * base_alignment;
1326
1327 // Rule 3
1328 if (type.vecsize == 3 && type.columns == 1)
1329 return 4 * base_alignment;
1330
1331 // Rule 4 implied. Alignment does not change in std430.
1332
1333 // Rule 5. Column-major matrices are stored as arrays of
1334 // vectors.
1335 if (flags.get(DecorationColMajor) && type.columns > 1)
1336 {
1337 if (packing_is_vec4_padded(packing))
1338 return 4 * base_alignment;
1339 else if (type.vecsize == 3)
1340 return 4 * base_alignment;
1341 else
1342 return type.vecsize * base_alignment;
1343 }
1344
1345 // Rule 6 implied.
1346
1347 // Rule 7.
1348 if (flags.get(DecorationRowMajor) && type.vecsize > 1)
1349 {
1350 if (packing_is_vec4_padded(packing))
1351 return 4 * base_alignment;
1352 else if (type.columns == 3)
1353 return 4 * base_alignment;
1354 else
1355 return type.columns * base_alignment;
1356 }
1357
1358 // Rule 8 implied.
1359 }
1360
1361 SPIRV_CROSS_THROW("Did not find suitable rule for type. Bogus decorations?");
1362 }
1363
type_to_packed_array_stride(const SPIRType & type,const Bitset & flags,BufferPackingStandard packing)1364 uint32_t CompilerGLSL::type_to_packed_array_stride(const SPIRType &type, const Bitset &flags,
1365 BufferPackingStandard packing)
1366 {
1367 // Array stride is equal to aligned size of the underlying type.
1368 uint32_t parent = type.parent_type;
1369 assert(parent);
1370
1371 auto &tmp = get<SPIRType>(parent);
1372
1373 uint32_t size = type_to_packed_size(tmp, flags, packing);
1374 uint32_t alignment = type_to_packed_alignment(type, flags, packing);
1375 return (size + alignment - 1) & ~(alignment - 1);
1376 }
1377
type_to_packed_size(const SPIRType & type,const Bitset & flags,BufferPackingStandard packing)1378 uint32_t CompilerGLSL::type_to_packed_size(const SPIRType &type, const Bitset &flags, BufferPackingStandard packing)
1379 {
1380 if (!type.array.empty())
1381 {
1382 uint32_t packed_size = to_array_size_literal(type) * type_to_packed_array_stride(type, flags, packing);
1383
1384 // For arrays of vectors and matrices in HLSL, the last element has a size which depends on its vector size,
1385 // so that it is possible to pack other vectors into the last element.
1386 if (packing_is_hlsl(packing) && type.basetype != SPIRType::Struct)
1387 packed_size -= (4 - type.vecsize) * (type.width / 8);
1388
1389 return packed_size;
1390 }
1391
1392 // If using PhysicalStorageBufferEXT storage class, this is a pointer,
1393 // and is 64-bit.
1394 if (type.storage == StorageClassPhysicalStorageBufferEXT)
1395 {
1396 if (!type.pointer)
1397 SPIRV_CROSS_THROW("Types in PhysicalStorageBufferEXT must be pointers.");
1398
1399 if (ir.addressing_model == AddressingModelPhysicalStorageBuffer64EXT)
1400 return 8;
1401 else
1402 SPIRV_CROSS_THROW("AddressingModelPhysicalStorageBuffer64EXT must be used for PhysicalStorageBufferEXT.");
1403 }
1404
1405 uint32_t size = 0;
1406
1407 if (type.basetype == SPIRType::Struct)
1408 {
1409 uint32_t pad_alignment = 1;
1410
1411 for (uint32_t i = 0; i < type.member_types.size(); i++)
1412 {
1413 auto member_flags = ir.meta[type.self].members[i].decoration_flags;
1414 auto &member_type = get<SPIRType>(type.member_types[i]);
1415
1416 uint32_t packed_alignment = type_to_packed_alignment(member_type, member_flags, packing);
1417 uint32_t alignment = max(packed_alignment, pad_alignment);
1418
1419 // The next member following a struct member is aligned to the base alignment of the struct that came before.
1420 // GL 4.5 spec, 7.6.2.2.
1421 if (member_type.basetype == SPIRType::Struct)
1422 pad_alignment = packed_alignment;
1423 else
1424 pad_alignment = 1;
1425
1426 size = (size + alignment - 1) & ~(alignment - 1);
1427 size += type_to_packed_size(member_type, member_flags, packing);
1428 }
1429 }
1430 else
1431 {
1432 const uint32_t base_alignment = type_to_packed_base_size(type, packing);
1433
1434 if (packing_is_scalar(packing))
1435 {
1436 size = type.vecsize * type.columns * base_alignment;
1437 }
1438 else
1439 {
1440 if (type.columns == 1)
1441 size = type.vecsize * base_alignment;
1442
1443 if (flags.get(DecorationColMajor) && type.columns > 1)
1444 {
1445 if (packing_is_vec4_padded(packing))
1446 size = type.columns * 4 * base_alignment;
1447 else if (type.vecsize == 3)
1448 size = type.columns * 4 * base_alignment;
1449 else
1450 size = type.columns * type.vecsize * base_alignment;
1451 }
1452
1453 if (flags.get(DecorationRowMajor) && type.vecsize > 1)
1454 {
1455 if (packing_is_vec4_padded(packing))
1456 size = type.vecsize * 4 * base_alignment;
1457 else if (type.columns == 3)
1458 size = type.vecsize * 4 * base_alignment;
1459 else
1460 size = type.vecsize * type.columns * base_alignment;
1461 }
1462
1463 // For matrices in HLSL, the last element has a size which depends on its vector size,
1464 // so that it is possible to pack other vectors into the last element.
1465 if (packing_is_hlsl(packing) && type.columns > 1)
1466 size -= (4 - type.vecsize) * (type.width / 8);
1467 }
1468 }
1469
1470 return size;
1471 }
1472
buffer_is_packing_standard(const SPIRType & type,BufferPackingStandard packing,uint32_t * failed_validation_index,uint32_t start_offset,uint32_t end_offset)1473 bool CompilerGLSL::buffer_is_packing_standard(const SPIRType &type, BufferPackingStandard packing,
1474 uint32_t *failed_validation_index, uint32_t start_offset,
1475 uint32_t end_offset)
1476 {
1477 // This is very tricky and error prone, but try to be exhaustive and correct here.
1478 // SPIR-V doesn't directly say if we're using std430 or std140.
1479 // SPIR-V communicates this using Offset and ArrayStride decorations (which is what really matters),
1480 // so we have to try to infer whether or not the original GLSL source was std140 or std430 based on this information.
1481 // We do not have to consider shared or packed since these layouts are not allowed in Vulkan SPIR-V (they are useless anyways, and custom offsets would do the same thing).
1482 //
1483 // It is almost certain that we're using std430, but it gets tricky with arrays in particular.
1484 // We will assume std430, but infer std140 if we can prove the struct is not compliant with std430.
1485 //
1486 // The only two differences between std140 and std430 are related to padding alignment/array stride
1487 // in arrays and structs. In std140 they take minimum vec4 alignment.
1488 // std430 only removes the vec4 requirement.
1489
1490 uint32_t offset = 0;
1491 uint32_t pad_alignment = 1;
1492
1493 bool is_top_level_block =
1494 has_decoration(type.self, DecorationBlock) || has_decoration(type.self, DecorationBufferBlock);
1495
1496 for (uint32_t i = 0; i < type.member_types.size(); i++)
1497 {
1498 auto &memb_type = get<SPIRType>(type.member_types[i]);
1499 auto member_flags = ir.meta[type.self].members[i].decoration_flags;
1500
1501 // Verify alignment rules.
1502 uint32_t packed_alignment = type_to_packed_alignment(memb_type, member_flags, packing);
1503
1504 // This is a rather dirty workaround to deal with some cases of OpSpecConstantOp used as array size, e.g:
1505 // layout(constant_id = 0) const int s = 10;
1506 // const int S = s + 5; // SpecConstantOp
1507 // buffer Foo { int data[S]; }; // <-- Very hard for us to deduce a fixed value here,
1508 // we would need full implementation of compile-time constant folding. :(
1509 // If we are the last member of a struct, there might be cases where the actual size of that member is irrelevant
1510 // for our analysis (e.g. unsized arrays).
1511 // This lets us simply ignore that there are spec constant op sized arrays in our buffers.
1512 // Querying size of this member will fail, so just don't call it unless we have to.
1513 //
1514 // This is likely "best effort" we can support without going into unacceptably complicated workarounds.
1515 bool member_can_be_unsized =
1516 is_top_level_block && size_t(i + 1) == type.member_types.size() && !memb_type.array.empty();
1517
1518 uint32_t packed_size = 0;
1519 if (!member_can_be_unsized || packing_is_hlsl(packing))
1520 packed_size = type_to_packed_size(memb_type, member_flags, packing);
1521
1522 // We only need to care about this if we have non-array types which can straddle the vec4 boundary.
1523 if (packing_is_hlsl(packing))
1524 {
1525 // If a member straddles across a vec4 boundary, alignment is actually vec4.
1526 uint32_t begin_word = offset / 16;
1527 uint32_t end_word = (offset + packed_size - 1) / 16;
1528 if (begin_word != end_word)
1529 packed_alignment = max(packed_alignment, 16u);
1530 }
1531
1532 uint32_t actual_offset = type_struct_member_offset(type, i);
1533 // Field is not in the specified range anymore and we can ignore any further fields.
1534 if (actual_offset >= end_offset)
1535 break;
1536
1537 uint32_t alignment = max(packed_alignment, pad_alignment);
1538 offset = (offset + alignment - 1) & ~(alignment - 1);
1539
1540 // The next member following a struct member is aligned to the base alignment of the struct that came before.
1541 // GL 4.5 spec, 7.6.2.2.
1542 if (memb_type.basetype == SPIRType::Struct && !memb_type.pointer)
1543 pad_alignment = packed_alignment;
1544 else
1545 pad_alignment = 1;
1546
1547 // Only care about packing if we are in the given range
1548 if (actual_offset >= start_offset)
1549 {
1550 // We only care about offsets in std140, std430, etc ...
1551 // For EnhancedLayout variants, we have the flexibility to choose our own offsets.
1552 if (!packing_has_flexible_offset(packing))
1553 {
1554 if (actual_offset != offset) // This cannot be the packing we're looking for.
1555 {
1556 if (failed_validation_index)
1557 *failed_validation_index = i;
1558 return false;
1559 }
1560 }
1561 else if ((actual_offset & (alignment - 1)) != 0)
1562 {
1563 // We still need to verify that alignment rules are observed, even if we have explicit offset.
1564 if (failed_validation_index)
1565 *failed_validation_index = i;
1566 return false;
1567 }
1568
1569 // Verify array stride rules.
1570 if (!memb_type.array.empty() && type_to_packed_array_stride(memb_type, member_flags, packing) !=
1571 type_struct_member_array_stride(type, i))
1572 {
1573 if (failed_validation_index)
1574 *failed_validation_index = i;
1575 return false;
1576 }
1577
1578 // Verify that sub-structs also follow packing rules.
1579 // We cannot use enhanced layouts on substructs, so they better be up to spec.
1580 auto substruct_packing = packing_to_substruct_packing(packing);
1581
1582 if (!memb_type.pointer && !memb_type.member_types.empty() &&
1583 !buffer_is_packing_standard(memb_type, substruct_packing))
1584 {
1585 if (failed_validation_index)
1586 *failed_validation_index = i;
1587 return false;
1588 }
1589 }
1590
1591 // Bump size.
1592 offset = actual_offset + packed_size;
1593 }
1594
1595 return true;
1596 }
1597
can_use_io_location(StorageClass storage,bool block)1598 bool CompilerGLSL::can_use_io_location(StorageClass storage, bool block)
1599 {
1600 // Location specifiers are must have in SPIR-V, but they aren't really supported in earlier versions of GLSL.
1601 // Be very explicit here about how to solve the issue.
1602 if ((get_execution_model() != ExecutionModelVertex && storage == StorageClassInput) ||
1603 (get_execution_model() != ExecutionModelFragment && storage == StorageClassOutput))
1604 {
1605 uint32_t minimum_desktop_version = block ? 440 : 410;
1606 // ARB_enhanced_layouts vs ARB_separate_shader_objects ...
1607
1608 if (!options.es && options.version < minimum_desktop_version && !options.separate_shader_objects)
1609 return false;
1610 else if (options.es && options.version < 310)
1611 return false;
1612 }
1613
1614 if ((get_execution_model() == ExecutionModelVertex && storage == StorageClassInput) ||
1615 (get_execution_model() == ExecutionModelFragment && storage == StorageClassOutput))
1616 {
1617 if (options.es && options.version < 300)
1618 return false;
1619 else if (!options.es && options.version < 330)
1620 return false;
1621 }
1622
1623 if (storage == StorageClassUniform || storage == StorageClassUniformConstant || storage == StorageClassPushConstant)
1624 {
1625 if (options.es && options.version < 310)
1626 return false;
1627 else if (!options.es && options.version < 430)
1628 return false;
1629 }
1630
1631 return true;
1632 }
1633
layout_for_variable(const SPIRVariable & var)1634 string CompilerGLSL::layout_for_variable(const SPIRVariable &var)
1635 {
1636 // FIXME: Come up with a better solution for when to disable layouts.
1637 // Having layouts depend on extensions as well as which types
1638 // of layouts are used. For now, the simple solution is to just disable
1639 // layouts for legacy versions.
1640 if (is_legacy())
1641 return "";
1642
1643 if (subpass_input_is_framebuffer_fetch(var.self))
1644 return "";
1645
1646 SmallVector<string> attr;
1647
1648 auto &type = get<SPIRType>(var.basetype);
1649 auto &flags = get_decoration_bitset(var.self);
1650 auto &typeflags = get_decoration_bitset(type.self);
1651
1652 if (flags.get(DecorationPassthroughNV))
1653 attr.push_back("passthrough");
1654
1655 if (options.vulkan_semantics && var.storage == StorageClassPushConstant)
1656 attr.push_back("push_constant");
1657 else if (var.storage == StorageClassShaderRecordBufferKHR)
1658 attr.push_back(ray_tracing_is_khr ? "shaderRecordEXT" : "shaderRecordNV");
1659
1660 if (flags.get(DecorationRowMajor))
1661 attr.push_back("row_major");
1662 if (flags.get(DecorationColMajor))
1663 attr.push_back("column_major");
1664
1665 if (options.vulkan_semantics)
1666 {
1667 if (flags.get(DecorationInputAttachmentIndex))
1668 attr.push_back(join("input_attachment_index = ", get_decoration(var.self, DecorationInputAttachmentIndex)));
1669 }
1670
1671 bool is_block = has_decoration(type.self, DecorationBlock);
1672 if (flags.get(DecorationLocation) && can_use_io_location(var.storage, is_block))
1673 {
1674 Bitset combined_decoration;
1675 for (uint32_t i = 0; i < ir.meta[type.self].members.size(); i++)
1676 combined_decoration.merge_or(combined_decoration_for_member(type, i));
1677
1678 // If our members have location decorations, we don't need to
1679 // emit location decorations at the top as well (looks weird).
1680 if (!combined_decoration.get(DecorationLocation))
1681 attr.push_back(join("location = ", get_decoration(var.self, DecorationLocation)));
1682 }
1683
1684 // Transform feedback
1685 bool uses_enhanced_layouts = false;
1686 if (is_block && var.storage == StorageClassOutput)
1687 {
1688 // For blocks, there is a restriction where xfb_stride/xfb_buffer must only be declared on the block itself,
1689 // since all members must match the same xfb_buffer. The only thing we will declare for members of the block
1690 // is the xfb_offset.
1691 uint32_t member_count = uint32_t(type.member_types.size());
1692 bool have_xfb_buffer_stride = false;
1693 bool have_any_xfb_offset = false;
1694 bool have_geom_stream = false;
1695 uint32_t xfb_stride = 0, xfb_buffer = 0, geom_stream = 0;
1696
1697 if (flags.get(DecorationXfbBuffer) && flags.get(DecorationXfbStride))
1698 {
1699 have_xfb_buffer_stride = true;
1700 xfb_buffer = get_decoration(var.self, DecorationXfbBuffer);
1701 xfb_stride = get_decoration(var.self, DecorationXfbStride);
1702 }
1703
1704 if (flags.get(DecorationStream))
1705 {
1706 have_geom_stream = true;
1707 geom_stream = get_decoration(var.self, DecorationStream);
1708 }
1709
1710 // Verify that none of the members violate our assumption.
1711 for (uint32_t i = 0; i < member_count; i++)
1712 {
1713 if (has_member_decoration(type.self, i, DecorationStream))
1714 {
1715 uint32_t member_geom_stream = get_member_decoration(type.self, i, DecorationStream);
1716 if (have_geom_stream && member_geom_stream != geom_stream)
1717 SPIRV_CROSS_THROW("IO block member Stream mismatch.");
1718 have_geom_stream = true;
1719 geom_stream = member_geom_stream;
1720 }
1721
1722 // Only members with an Offset decoration participate in XFB.
1723 if (!has_member_decoration(type.self, i, DecorationOffset))
1724 continue;
1725 have_any_xfb_offset = true;
1726
1727 if (has_member_decoration(type.self, i, DecorationXfbBuffer))
1728 {
1729 uint32_t buffer_index = get_member_decoration(type.self, i, DecorationXfbBuffer);
1730 if (have_xfb_buffer_stride && buffer_index != xfb_buffer)
1731 SPIRV_CROSS_THROW("IO block member XfbBuffer mismatch.");
1732 have_xfb_buffer_stride = true;
1733 xfb_buffer = buffer_index;
1734 }
1735
1736 if (has_member_decoration(type.self, i, DecorationXfbStride))
1737 {
1738 uint32_t stride = get_member_decoration(type.self, i, DecorationXfbStride);
1739 if (have_xfb_buffer_stride && stride != xfb_stride)
1740 SPIRV_CROSS_THROW("IO block member XfbStride mismatch.");
1741 have_xfb_buffer_stride = true;
1742 xfb_stride = stride;
1743 }
1744 }
1745
1746 if (have_xfb_buffer_stride && have_any_xfb_offset)
1747 {
1748 attr.push_back(join("xfb_buffer = ", xfb_buffer));
1749 attr.push_back(join("xfb_stride = ", xfb_stride));
1750 uses_enhanced_layouts = true;
1751 }
1752
1753 if (have_geom_stream)
1754 {
1755 if (get_execution_model() != ExecutionModelGeometry)
1756 SPIRV_CROSS_THROW("Geometry streams can only be used in geometry shaders.");
1757 if (options.es)
1758 SPIRV_CROSS_THROW("Multiple geometry streams not supported in ESSL.");
1759 if (options.version < 400)
1760 require_extension_internal("GL_ARB_transform_feedback3");
1761 attr.push_back(join("stream = ", get_decoration(var.self, DecorationStream)));
1762 }
1763 }
1764 else if (var.storage == StorageClassOutput)
1765 {
1766 if (flags.get(DecorationXfbBuffer) && flags.get(DecorationXfbStride) && flags.get(DecorationOffset))
1767 {
1768 // XFB for standalone variables, we can emit all decorations.
1769 attr.push_back(join("xfb_buffer = ", get_decoration(var.self, DecorationXfbBuffer)));
1770 attr.push_back(join("xfb_stride = ", get_decoration(var.self, DecorationXfbStride)));
1771 attr.push_back(join("xfb_offset = ", get_decoration(var.self, DecorationOffset)));
1772 uses_enhanced_layouts = true;
1773 }
1774
1775 if (flags.get(DecorationStream))
1776 {
1777 if (get_execution_model() != ExecutionModelGeometry)
1778 SPIRV_CROSS_THROW("Geometry streams can only be used in geometry shaders.");
1779 if (options.es)
1780 SPIRV_CROSS_THROW("Multiple geometry streams not supported in ESSL.");
1781 if (options.version < 400)
1782 require_extension_internal("GL_ARB_transform_feedback3");
1783 attr.push_back(join("stream = ", get_decoration(var.self, DecorationStream)));
1784 }
1785 }
1786
1787 // Can only declare Component if we can declare location.
1788 if (flags.get(DecorationComponent) && can_use_io_location(var.storage, is_block))
1789 {
1790 uses_enhanced_layouts = true;
1791 attr.push_back(join("component = ", get_decoration(var.self, DecorationComponent)));
1792 }
1793
1794 if (uses_enhanced_layouts)
1795 {
1796 if (!options.es)
1797 {
1798 if (options.version < 440 && options.version >= 140)
1799 require_extension_internal("GL_ARB_enhanced_layouts");
1800 else if (options.version < 140)
1801 SPIRV_CROSS_THROW("GL_ARB_enhanced_layouts is not supported in targets below GLSL 1.40.");
1802 if (!options.es && options.version < 440)
1803 require_extension_internal("GL_ARB_enhanced_layouts");
1804 }
1805 else if (options.es)
1806 SPIRV_CROSS_THROW("GL_ARB_enhanced_layouts is not supported in ESSL.");
1807 }
1808
1809 if (flags.get(DecorationIndex))
1810 attr.push_back(join("index = ", get_decoration(var.self, DecorationIndex)));
1811
1812 // Do not emit set = decoration in regular GLSL output, but
1813 // we need to preserve it in Vulkan GLSL mode.
1814 if (var.storage != StorageClassPushConstant && var.storage != StorageClassShaderRecordBufferKHR)
1815 {
1816 if (flags.get(DecorationDescriptorSet) && options.vulkan_semantics)
1817 attr.push_back(join("set = ", get_decoration(var.self, DecorationDescriptorSet)));
1818 }
1819
1820 bool push_constant_block = options.vulkan_semantics && var.storage == StorageClassPushConstant;
1821 bool ssbo_block = var.storage == StorageClassStorageBuffer || var.storage == StorageClassShaderRecordBufferKHR ||
1822 (var.storage == StorageClassUniform && typeflags.get(DecorationBufferBlock));
1823 bool emulated_ubo = var.storage == StorageClassPushConstant && options.emit_push_constant_as_uniform_buffer;
1824 bool ubo_block = var.storage == StorageClassUniform && typeflags.get(DecorationBlock);
1825
1826 // GL 3.0/GLSL 1.30 is not considered legacy, but it doesn't have UBOs ...
1827 bool can_use_buffer_blocks = (options.es && options.version >= 300) || (!options.es && options.version >= 140);
1828
1829 // pretend no UBOs when options say so
1830 if (ubo_block && options.emit_uniform_buffer_as_plain_uniforms)
1831 can_use_buffer_blocks = false;
1832
1833 bool can_use_binding;
1834 if (options.es)
1835 can_use_binding = options.version >= 310;
1836 else
1837 can_use_binding = options.enable_420pack_extension || (options.version >= 420);
1838
1839 // Make sure we don't emit binding layout for a classic uniform on GLSL 1.30.
1840 if (!can_use_buffer_blocks && var.storage == StorageClassUniform)
1841 can_use_binding = false;
1842
1843 if (var.storage == StorageClassShaderRecordBufferKHR)
1844 can_use_binding = false;
1845
1846 if (can_use_binding && flags.get(DecorationBinding))
1847 attr.push_back(join("binding = ", get_decoration(var.self, DecorationBinding)));
1848
1849 if (var.storage != StorageClassOutput && flags.get(DecorationOffset))
1850 attr.push_back(join("offset = ", get_decoration(var.self, DecorationOffset)));
1851
1852 // Instead of adding explicit offsets for every element here, just assume we're using std140 or std430.
1853 // If SPIR-V does not comply with either layout, we cannot really work around it.
1854 if (can_use_buffer_blocks && (ubo_block || emulated_ubo))
1855 {
1856 attr.push_back(buffer_to_packing_standard(type, false));
1857 }
1858 else if (can_use_buffer_blocks && (push_constant_block || ssbo_block))
1859 {
1860 attr.push_back(buffer_to_packing_standard(type, true));
1861 }
1862
1863 // For images, the type itself adds a layout qualifer.
1864 // Only emit the format for storage images.
1865 if (type.basetype == SPIRType::Image && type.image.sampled == 2)
1866 {
1867 const char *fmt = format_to_glsl(type.image.format);
1868 if (fmt)
1869 attr.push_back(fmt);
1870 }
1871
1872 if (attr.empty())
1873 return "";
1874
1875 string res = "layout(";
1876 res += merge(attr);
1877 res += ") ";
1878 return res;
1879 }
1880
buffer_to_packing_standard(const SPIRType & type,bool support_std430_without_scalar_layout)1881 string CompilerGLSL::buffer_to_packing_standard(const SPIRType &type, bool support_std430_without_scalar_layout)
1882 {
1883 if (support_std430_without_scalar_layout && buffer_is_packing_standard(type, BufferPackingStd430))
1884 return "std430";
1885 else if (buffer_is_packing_standard(type, BufferPackingStd140))
1886 return "std140";
1887 else if (options.vulkan_semantics && buffer_is_packing_standard(type, BufferPackingScalar))
1888 {
1889 require_extension_internal("GL_EXT_scalar_block_layout");
1890 return "scalar";
1891 }
1892 else if (support_std430_without_scalar_layout &&
1893 buffer_is_packing_standard(type, BufferPackingStd430EnhancedLayout))
1894 {
1895 if (options.es && !options.vulkan_semantics)
1896 SPIRV_CROSS_THROW("Push constant block cannot be expressed as neither std430 nor std140. ES-targets do "
1897 "not support GL_ARB_enhanced_layouts.");
1898 if (!options.es && !options.vulkan_semantics && options.version < 440)
1899 require_extension_internal("GL_ARB_enhanced_layouts");
1900
1901 set_extended_decoration(type.self, SPIRVCrossDecorationExplicitOffset);
1902 return "std430";
1903 }
1904 else if (buffer_is_packing_standard(type, BufferPackingStd140EnhancedLayout))
1905 {
1906 // Fallback time. We might be able to use the ARB_enhanced_layouts to deal with this difference,
1907 // however, we can only use layout(offset) on the block itself, not any substructs, so the substructs better be the appropriate layout.
1908 // Enhanced layouts seem to always work in Vulkan GLSL, so no need for extensions there.
1909 if (options.es && !options.vulkan_semantics)
1910 SPIRV_CROSS_THROW("Push constant block cannot be expressed as neither std430 nor std140. ES-targets do "
1911 "not support GL_ARB_enhanced_layouts.");
1912 if (!options.es && !options.vulkan_semantics && options.version < 440)
1913 require_extension_internal("GL_ARB_enhanced_layouts");
1914
1915 set_extended_decoration(type.self, SPIRVCrossDecorationExplicitOffset);
1916 return "std140";
1917 }
1918 else if (options.vulkan_semantics && buffer_is_packing_standard(type, BufferPackingScalarEnhancedLayout))
1919 {
1920 set_extended_decoration(type.self, SPIRVCrossDecorationExplicitOffset);
1921 require_extension_internal("GL_EXT_scalar_block_layout");
1922 return "scalar";
1923 }
1924 else if (!support_std430_without_scalar_layout && options.vulkan_semantics &&
1925 buffer_is_packing_standard(type, BufferPackingStd430))
1926 {
1927 // UBOs can support std430 with GL_EXT_scalar_block_layout.
1928 require_extension_internal("GL_EXT_scalar_block_layout");
1929 return "std430";
1930 }
1931 else if (!support_std430_without_scalar_layout && options.vulkan_semantics &&
1932 buffer_is_packing_standard(type, BufferPackingStd430EnhancedLayout))
1933 {
1934 // UBOs can support std430 with GL_EXT_scalar_block_layout.
1935 set_extended_decoration(type.self, SPIRVCrossDecorationExplicitOffset);
1936 require_extension_internal("GL_EXT_scalar_block_layout");
1937 return "std430";
1938 }
1939 else
1940 {
1941 SPIRV_CROSS_THROW("Buffer block cannot be expressed as any of std430, std140, scalar, even with enhanced "
1942 "layouts. You can try flattening this block to support a more flexible layout.");
1943 }
1944 }
1945
emit_push_constant_block(const SPIRVariable & var)1946 void CompilerGLSL::emit_push_constant_block(const SPIRVariable &var)
1947 {
1948 if (flattened_buffer_blocks.count(var.self))
1949 emit_buffer_block_flattened(var);
1950 else if (options.vulkan_semantics)
1951 emit_push_constant_block_vulkan(var);
1952 else if (options.emit_push_constant_as_uniform_buffer)
1953 emit_buffer_block_native(var);
1954 else
1955 emit_push_constant_block_glsl(var);
1956 }
1957
emit_push_constant_block_vulkan(const SPIRVariable & var)1958 void CompilerGLSL::emit_push_constant_block_vulkan(const SPIRVariable &var)
1959 {
1960 emit_buffer_block(var);
1961 }
1962
emit_push_constant_block_glsl(const SPIRVariable & var)1963 void CompilerGLSL::emit_push_constant_block_glsl(const SPIRVariable &var)
1964 {
1965 // OpenGL has no concept of push constant blocks, implement it as a uniform struct.
1966 auto &type = get<SPIRType>(var.basetype);
1967
1968 auto &flags = ir.meta[var.self].decoration.decoration_flags;
1969 flags.clear(DecorationBinding);
1970 flags.clear(DecorationDescriptorSet);
1971
1972 #if 0
1973 if (flags & ((1ull << DecorationBinding) | (1ull << DecorationDescriptorSet)))
1974 SPIRV_CROSS_THROW("Push constant blocks cannot be compiled to GLSL with Binding or Set syntax. "
1975 "Remap to location with reflection API first or disable these decorations.");
1976 #endif
1977
1978 // We're emitting the push constant block as a regular struct, so disable the block qualifier temporarily.
1979 // Otherwise, we will end up emitting layout() qualifiers on naked structs which is not allowed.
1980 auto &block_flags = ir.meta[type.self].decoration.decoration_flags;
1981 bool block_flag = block_flags.get(DecorationBlock);
1982 block_flags.clear(DecorationBlock);
1983
1984 emit_struct(type);
1985
1986 if (block_flag)
1987 block_flags.set(DecorationBlock);
1988
1989 emit_uniform(var);
1990 statement("");
1991 }
1992
emit_buffer_block(const SPIRVariable & var)1993 void CompilerGLSL::emit_buffer_block(const SPIRVariable &var)
1994 {
1995 auto &type = get<SPIRType>(var.basetype);
1996 bool ubo_block = var.storage == StorageClassUniform && has_decoration(type.self, DecorationBlock);
1997
1998 if (flattened_buffer_blocks.count(var.self))
1999 emit_buffer_block_flattened(var);
2000 else if (is_legacy() || (!options.es && options.version == 130) ||
2001 (ubo_block && options.emit_uniform_buffer_as_plain_uniforms))
2002 emit_buffer_block_legacy(var);
2003 else
2004 emit_buffer_block_native(var);
2005 }
2006
emit_buffer_block_legacy(const SPIRVariable & var)2007 void CompilerGLSL::emit_buffer_block_legacy(const SPIRVariable &var)
2008 {
2009 auto &type = get<SPIRType>(var.basetype);
2010 bool ssbo = var.storage == StorageClassStorageBuffer ||
2011 ir.meta[type.self].decoration.decoration_flags.get(DecorationBufferBlock);
2012 if (ssbo)
2013 SPIRV_CROSS_THROW("SSBOs not supported in legacy targets.");
2014
2015 // We're emitting the push constant block as a regular struct, so disable the block qualifier temporarily.
2016 // Otherwise, we will end up emitting layout() qualifiers on naked structs which is not allowed.
2017 auto &block_flags = ir.meta[type.self].decoration.decoration_flags;
2018 bool block_flag = block_flags.get(DecorationBlock);
2019 block_flags.clear(DecorationBlock);
2020 emit_struct(type);
2021 if (block_flag)
2022 block_flags.set(DecorationBlock);
2023 emit_uniform(var);
2024 statement("");
2025 }
2026
emit_buffer_reference_block(SPIRType & type,bool forward_declaration)2027 void CompilerGLSL::emit_buffer_reference_block(SPIRType &type, bool forward_declaration)
2028 {
2029 string buffer_name;
2030
2031 if (forward_declaration)
2032 {
2033 // Block names should never alias, but from HLSL input they kind of can because block types are reused for UAVs ...
2034 // Allow aliased name since we might be declaring the block twice. Once with buffer reference (forward declared) and one proper declaration.
2035 // The names must match up.
2036 buffer_name = to_name(type.self, false);
2037
2038 // Shaders never use the block by interface name, so we don't
2039 // have to track this other than updating name caches.
2040 // If we have a collision for any reason, just fallback immediately.
2041 if (ir.meta[type.self].decoration.alias.empty() ||
2042 block_ssbo_names.find(buffer_name) != end(block_ssbo_names) ||
2043 resource_names.find(buffer_name) != end(resource_names))
2044 {
2045 buffer_name = join("_", type.self);
2046 }
2047
2048 // Make sure we get something unique for both global name scope and block name scope.
2049 // See GLSL 4.5 spec: section 4.3.9 for details.
2050 add_variable(block_ssbo_names, resource_names, buffer_name);
2051
2052 // If for some reason buffer_name is an illegal name, make a final fallback to a workaround name.
2053 // This cannot conflict with anything else, so we're safe now.
2054 // We cannot reuse this fallback name in neither global scope (blocked by block_names) nor block name scope.
2055 if (buffer_name.empty())
2056 buffer_name = join("_", type.self);
2057
2058 block_names.insert(buffer_name);
2059 block_ssbo_names.insert(buffer_name);
2060
2061 // Ensure we emit the correct name when emitting non-forward pointer type.
2062 ir.meta[type.self].decoration.alias = buffer_name;
2063 }
2064 else if (type.basetype != SPIRType::Struct)
2065 buffer_name = type_to_glsl(type);
2066 else
2067 buffer_name = to_name(type.self, false);
2068
2069 if (!forward_declaration)
2070 {
2071 if (type.basetype == SPIRType::Struct)
2072 {
2073 auto flags = ir.get_buffer_block_type_flags(type);
2074 string decorations;
2075 if (flags.get(DecorationRestrict))
2076 decorations += " restrict";
2077 if (flags.get(DecorationCoherent))
2078 decorations += " coherent";
2079 if (flags.get(DecorationNonReadable))
2080 decorations += " writeonly";
2081 if (flags.get(DecorationNonWritable))
2082 decorations += " readonly";
2083 statement("layout(buffer_reference, ", buffer_to_packing_standard(type, true),
2084 ")", decorations, " buffer ", buffer_name);
2085 }
2086 else
2087 statement("layout(buffer_reference) buffer ", buffer_name);
2088
2089 begin_scope();
2090
2091 if (type.basetype == SPIRType::Struct)
2092 {
2093 type.member_name_cache.clear();
2094
2095 uint32_t i = 0;
2096 for (auto &member : type.member_types)
2097 {
2098 add_member_name(type, i);
2099 emit_struct_member(type, member, i);
2100 i++;
2101 }
2102 }
2103 else
2104 {
2105 auto &pointee_type = get_pointee_type(type);
2106 statement(type_to_glsl(pointee_type), " value", type_to_array_glsl(pointee_type), ";");
2107 }
2108
2109 end_scope_decl();
2110 statement("");
2111 }
2112 else
2113 {
2114 statement("layout(buffer_reference) buffer ", buffer_name, ";");
2115 }
2116 }
2117
emit_buffer_block_native(const SPIRVariable & var)2118 void CompilerGLSL::emit_buffer_block_native(const SPIRVariable &var)
2119 {
2120 auto &type = get<SPIRType>(var.basetype);
2121
2122 Bitset flags = ir.get_buffer_block_flags(var);
2123 bool ssbo = var.storage == StorageClassStorageBuffer || var.storage == StorageClassShaderRecordBufferKHR ||
2124 ir.meta[type.self].decoration.decoration_flags.get(DecorationBufferBlock);
2125 bool is_restrict = ssbo && flags.get(DecorationRestrict);
2126 bool is_writeonly = ssbo && flags.get(DecorationNonReadable);
2127 bool is_readonly = ssbo && flags.get(DecorationNonWritable);
2128 bool is_coherent = ssbo && flags.get(DecorationCoherent);
2129
2130 // Block names should never alias, but from HLSL input they kind of can because block types are reused for UAVs ...
2131 auto buffer_name = to_name(type.self, false);
2132
2133 auto &block_namespace = ssbo ? block_ssbo_names : block_ubo_names;
2134
2135 // Shaders never use the block by interface name, so we don't
2136 // have to track this other than updating name caches.
2137 // If we have a collision for any reason, just fallback immediately.
2138 if (ir.meta[type.self].decoration.alias.empty() || block_namespace.find(buffer_name) != end(block_namespace) ||
2139 resource_names.find(buffer_name) != end(resource_names))
2140 {
2141 buffer_name = get_block_fallback_name(var.self);
2142 }
2143
2144 // Make sure we get something unique for both global name scope and block name scope.
2145 // See GLSL 4.5 spec: section 4.3.9 for details.
2146 add_variable(block_namespace, resource_names, buffer_name);
2147
2148 // If for some reason buffer_name is an illegal name, make a final fallback to a workaround name.
2149 // This cannot conflict with anything else, so we're safe now.
2150 // We cannot reuse this fallback name in neither global scope (blocked by block_names) nor block name scope.
2151 if (buffer_name.empty())
2152 buffer_name = join("_", get<SPIRType>(var.basetype).self, "_", var.self);
2153
2154 block_names.insert(buffer_name);
2155 block_namespace.insert(buffer_name);
2156
2157 // Save for post-reflection later.
2158 declared_block_names[var.self] = buffer_name;
2159
2160 statement(layout_for_variable(var), is_coherent ? "coherent " : "", is_restrict ? "restrict " : "",
2161 is_writeonly ? "writeonly " : "", is_readonly ? "readonly " : "", ssbo ? "buffer " : "uniform ",
2162 buffer_name);
2163
2164 begin_scope();
2165
2166 type.member_name_cache.clear();
2167
2168 uint32_t i = 0;
2169 for (auto &member : type.member_types)
2170 {
2171 add_member_name(type, i);
2172 emit_struct_member(type, member, i);
2173 i++;
2174 }
2175
2176 // var.self can be used as a backup name for the block name,
2177 // so we need to make sure we don't disturb the name here on a recompile.
2178 // It will need to be reset if we have to recompile.
2179 preserve_alias_on_reset(var.self);
2180 add_resource_name(var.self);
2181 end_scope_decl(to_name(var.self) + type_to_array_glsl(type));
2182 statement("");
2183 }
2184
emit_buffer_block_flattened(const SPIRVariable & var)2185 void CompilerGLSL::emit_buffer_block_flattened(const SPIRVariable &var)
2186 {
2187 auto &type = get<SPIRType>(var.basetype);
2188
2189 // Block names should never alias.
2190 auto buffer_name = to_name(type.self, false);
2191 size_t buffer_size = (get_declared_struct_size(type) + 15) / 16;
2192
2193 SPIRType::BaseType basic_type;
2194 if (get_common_basic_type(type, basic_type))
2195 {
2196 SPIRType tmp;
2197 tmp.basetype = basic_type;
2198 tmp.vecsize = 4;
2199 if (basic_type != SPIRType::Float && basic_type != SPIRType::Int && basic_type != SPIRType::UInt)
2200 SPIRV_CROSS_THROW("Basic types in a flattened UBO must be float, int or uint.");
2201
2202 auto flags = ir.get_buffer_block_flags(var);
2203 statement("uniform ", flags_to_qualifiers_glsl(tmp, flags), type_to_glsl(tmp), " ", buffer_name, "[",
2204 buffer_size, "];");
2205 }
2206 else
2207 SPIRV_CROSS_THROW("All basic types in a flattened block must be the same.");
2208 }
2209
to_storage_qualifiers_glsl(const SPIRVariable & var)2210 const char *CompilerGLSL::to_storage_qualifiers_glsl(const SPIRVariable &var)
2211 {
2212 auto &execution = get_entry_point();
2213
2214 if (subpass_input_is_framebuffer_fetch(var.self))
2215 return "";
2216
2217 if (var.storage == StorageClassInput || var.storage == StorageClassOutput)
2218 {
2219 if (is_legacy() && execution.model == ExecutionModelVertex)
2220 return var.storage == StorageClassInput ? "attribute " : "varying ";
2221 else if (is_legacy() && execution.model == ExecutionModelFragment)
2222 return "varying "; // Fragment outputs are renamed so they never hit this case.
2223 else if (execution.model == ExecutionModelFragment && var.storage == StorageClassOutput)
2224 {
2225 if (inout_color_attachments.count(get_decoration(var.self, DecorationLocation)) != 0)
2226 return "inout ";
2227 else
2228 return "out ";
2229 }
2230 else
2231 return var.storage == StorageClassInput ? "in " : "out ";
2232 }
2233 else if (var.storage == StorageClassUniformConstant || var.storage == StorageClassUniform ||
2234 var.storage == StorageClassPushConstant)
2235 {
2236 return "uniform ";
2237 }
2238 else if (var.storage == StorageClassRayPayloadKHR)
2239 {
2240 return ray_tracing_is_khr ? "rayPayloadEXT " : "rayPayloadNV ";
2241 }
2242 else if (var.storage == StorageClassIncomingRayPayloadKHR)
2243 {
2244 return ray_tracing_is_khr ? "rayPayloadInEXT " : "rayPayloadInNV ";
2245 }
2246 else if (var.storage == StorageClassHitAttributeKHR)
2247 {
2248 return ray_tracing_is_khr ? "hitAttributeEXT " : "hitAttributeNV ";
2249 }
2250 else if (var.storage == StorageClassCallableDataKHR)
2251 {
2252 return ray_tracing_is_khr ? "callableDataEXT " : "callableDataNV ";
2253 }
2254 else if (var.storage == StorageClassIncomingCallableDataKHR)
2255 {
2256 return ray_tracing_is_khr ? "callableDataInEXT " : "callableDataInNV ";
2257 }
2258
2259 return "";
2260 }
2261
emit_flattened_io_block_member(const std::string & basename,const SPIRType & type,const char * qual,const SmallVector<uint32_t> & indices)2262 void CompilerGLSL::emit_flattened_io_block_member(const std::string &basename, const SPIRType &type, const char *qual,
2263 const SmallVector<uint32_t> &indices)
2264 {
2265 uint32_t member_type_id = type.self;
2266 const SPIRType *member_type = &type;
2267 const SPIRType *parent_type = nullptr;
2268 auto flattened_name = basename;
2269 for (auto &index : indices)
2270 {
2271 flattened_name += "_";
2272 flattened_name += to_member_name(*member_type, index);
2273 parent_type = member_type;
2274 member_type_id = member_type->member_types[index];
2275 member_type = &get<SPIRType>(member_type_id);
2276 }
2277
2278 assert(member_type->basetype != SPIRType::Struct);
2279
2280 // We're overriding struct member names, so ensure we do so on the primary type.
2281 if (parent_type->type_alias)
2282 parent_type = &get<SPIRType>(parent_type->type_alias);
2283
2284 // Sanitize underscores because joining the two identifiers might create more than 1 underscore in a row,
2285 // which is not allowed.
2286 ParsedIR::sanitize_underscores(flattened_name);
2287
2288 uint32_t last_index = indices.back();
2289
2290 // Pass in the varying qualifier here so it will appear in the correct declaration order.
2291 // Replace member name while emitting it so it encodes both struct name and member name.
2292 auto backup_name = get_member_name(parent_type->self, last_index);
2293 auto member_name = to_member_name(*parent_type, last_index);
2294 set_member_name(parent_type->self, last_index, flattened_name);
2295 emit_struct_member(*parent_type, member_type_id, last_index, qual);
2296 // Restore member name.
2297 set_member_name(parent_type->self, last_index, member_name);
2298 }
2299
emit_flattened_io_block_struct(const std::string & basename,const SPIRType & type,const char * qual,const SmallVector<uint32_t> & indices)2300 void CompilerGLSL::emit_flattened_io_block_struct(const std::string &basename, const SPIRType &type, const char *qual,
2301 const SmallVector<uint32_t> &indices)
2302 {
2303 auto sub_indices = indices;
2304 sub_indices.push_back(0);
2305
2306 const SPIRType *member_type = &type;
2307 for (auto &index : indices)
2308 member_type = &get<SPIRType>(member_type->member_types[index]);
2309
2310 assert(member_type->basetype == SPIRType::Struct);
2311
2312 if (!member_type->array.empty())
2313 SPIRV_CROSS_THROW("Cannot flatten array of structs in I/O blocks.");
2314
2315 for (uint32_t i = 0; i < uint32_t(member_type->member_types.size()); i++)
2316 {
2317 sub_indices.back() = i;
2318 if (get<SPIRType>(member_type->member_types[i]).basetype == SPIRType::Struct)
2319 emit_flattened_io_block_struct(basename, type, qual, sub_indices);
2320 else
2321 emit_flattened_io_block_member(basename, type, qual, sub_indices);
2322 }
2323 }
2324
emit_flattened_io_block(const SPIRVariable & var,const char * qual)2325 void CompilerGLSL::emit_flattened_io_block(const SPIRVariable &var, const char *qual)
2326 {
2327 auto &var_type = get<SPIRType>(var.basetype);
2328 if (!var_type.array.empty())
2329 SPIRV_CROSS_THROW("Array of varying structs cannot be flattened to legacy-compatible varyings.");
2330
2331 // Emit flattened types based on the type alias. Normally, we are never supposed to emit
2332 // struct declarations for aliased types.
2333 auto &type = var_type.type_alias ? get<SPIRType>(var_type.type_alias) : var_type;
2334
2335 auto old_flags = ir.meta[type.self].decoration.decoration_flags;
2336 // Emit the members as if they are part of a block to get all qualifiers.
2337 ir.meta[type.self].decoration.decoration_flags.set(DecorationBlock);
2338
2339 type.member_name_cache.clear();
2340
2341 SmallVector<uint32_t> member_indices;
2342 member_indices.push_back(0);
2343 auto basename = to_name(var.self);
2344
2345 uint32_t i = 0;
2346 for (auto &member : type.member_types)
2347 {
2348 add_member_name(type, i);
2349 auto &membertype = get<SPIRType>(member);
2350
2351 member_indices.back() = i;
2352 if (membertype.basetype == SPIRType::Struct)
2353 emit_flattened_io_block_struct(basename, type, qual, member_indices);
2354 else
2355 emit_flattened_io_block_member(basename, type, qual, member_indices);
2356 i++;
2357 }
2358
2359 ir.meta[type.self].decoration.decoration_flags = old_flags;
2360
2361 // Treat this variable as fully flattened from now on.
2362 flattened_structs[var.self] = true;
2363 }
2364
emit_interface_block(const SPIRVariable & var)2365 void CompilerGLSL::emit_interface_block(const SPIRVariable &var)
2366 {
2367 auto &type = get<SPIRType>(var.basetype);
2368
2369 if (var.storage == StorageClassInput && type.basetype == SPIRType::Double &&
2370 !options.es && options.version < 410)
2371 {
2372 require_extension_internal("GL_ARB_vertex_attrib_64bit");
2373 }
2374
2375 // Either make it plain in/out or in/out blocks depending on what shader is doing ...
2376 bool block = ir.meta[type.self].decoration.decoration_flags.get(DecorationBlock);
2377 const char *qual = to_storage_qualifiers_glsl(var);
2378
2379 if (block)
2380 {
2381 // ESSL earlier than 310 and GLSL earlier than 150 did not support
2382 // I/O variables which are struct types.
2383 // To support this, flatten the struct into separate varyings instead.
2384 if (options.force_flattened_io_blocks || (options.es && options.version < 310) ||
2385 (!options.es && options.version < 150))
2386 {
2387 // I/O blocks on ES require version 310 with Android Extension Pack extensions, or core version 320.
2388 // On desktop, I/O blocks were introduced with geometry shaders in GL 3.2 (GLSL 150).
2389 emit_flattened_io_block(var, qual);
2390 }
2391 else
2392 {
2393 if (options.es && options.version < 320)
2394 {
2395 // Geometry and tessellation extensions imply this extension.
2396 if (!has_extension("GL_EXT_geometry_shader") && !has_extension("GL_EXT_tessellation_shader"))
2397 require_extension_internal("GL_EXT_shader_io_blocks");
2398 }
2399
2400 // Workaround to make sure we can emit "patch in/out" correctly.
2401 fixup_io_block_patch_qualifiers(var);
2402
2403 // Block names should never alias.
2404 auto block_name = to_name(type.self, false);
2405
2406 // The namespace for I/O blocks is separate from other variables in GLSL.
2407 auto &block_namespace = type.storage == StorageClassInput ? block_input_names : block_output_names;
2408
2409 // Shaders never use the block by interface name, so we don't
2410 // have to track this other than updating name caches.
2411 if (block_name.empty() || block_namespace.find(block_name) != end(block_namespace))
2412 block_name = get_fallback_name(type.self);
2413 else
2414 block_namespace.insert(block_name);
2415
2416 // If for some reason buffer_name is an illegal name, make a final fallback to a workaround name.
2417 // This cannot conflict with anything else, so we're safe now.
2418 if (block_name.empty())
2419 block_name = join("_", get<SPIRType>(var.basetype).self, "_", var.self);
2420
2421 // Instance names cannot alias block names.
2422 resource_names.insert(block_name);
2423
2424 bool is_patch = has_decoration(var.self, DecorationPatch);
2425 statement(layout_for_variable(var), (is_patch ? "patch " : ""), qual, block_name);
2426 begin_scope();
2427
2428 type.member_name_cache.clear();
2429
2430 uint32_t i = 0;
2431 for (auto &member : type.member_types)
2432 {
2433 add_member_name(type, i);
2434 emit_struct_member(type, member, i);
2435 i++;
2436 }
2437
2438 add_resource_name(var.self);
2439 end_scope_decl(join(to_name(var.self), type_to_array_glsl(type)));
2440 statement("");
2441 }
2442 }
2443 else
2444 {
2445 // ESSL earlier than 310 and GLSL earlier than 150 did not support
2446 // I/O variables which are struct types.
2447 // To support this, flatten the struct into separate varyings instead.
2448 if (type.basetype == SPIRType::Struct &&
2449 (options.force_flattened_io_blocks || (options.es && options.version < 310) ||
2450 (!options.es && options.version < 150)))
2451 {
2452 emit_flattened_io_block(var, qual);
2453 }
2454 else
2455 {
2456 add_resource_name(var.self);
2457
2458 // Tessellation control and evaluation shaders must have either gl_MaxPatchVertices or unsized arrays for input arrays.
2459 // Opt for unsized as it's the more "correct" variant to use.
2460 bool control_point_input_array = type.storage == StorageClassInput && !type.array.empty() &&
2461 !has_decoration(var.self, DecorationPatch) &&
2462 (get_entry_point().model == ExecutionModelTessellationControl ||
2463 get_entry_point().model == ExecutionModelTessellationEvaluation);
2464
2465 uint32_t old_array_size = 0;
2466 bool old_array_size_literal = true;
2467
2468 if (control_point_input_array)
2469 {
2470 swap(type.array.back(), old_array_size);
2471 swap(type.array_size_literal.back(), old_array_size_literal);
2472 }
2473
2474 statement(layout_for_variable(var), to_qualifiers_glsl(var.self),
2475 variable_decl(type, to_name(var.self), var.self), ";");
2476
2477 if (control_point_input_array)
2478 {
2479 swap(type.array.back(), old_array_size);
2480 swap(type.array_size_literal.back(), old_array_size_literal);
2481 }
2482 }
2483 }
2484 }
2485
emit_uniform(const SPIRVariable & var)2486 void CompilerGLSL::emit_uniform(const SPIRVariable &var)
2487 {
2488 auto &type = get<SPIRType>(var.basetype);
2489 if (type.basetype == SPIRType::Image && type.image.sampled == 2 && type.image.dim != DimSubpassData)
2490 {
2491 if (!options.es && options.version < 420)
2492 require_extension_internal("GL_ARB_shader_image_load_store");
2493 else if (options.es && options.version < 310)
2494 SPIRV_CROSS_THROW("At least ESSL 3.10 required for shader image load store.");
2495 }
2496
2497 add_resource_name(var.self);
2498 statement(layout_for_variable(var), variable_decl(var), ";");
2499 }
2500
constant_value_macro_name(uint32_t id)2501 string CompilerGLSL::constant_value_macro_name(uint32_t id)
2502 {
2503 return join("SPIRV_CROSS_CONSTANT_ID_", id);
2504 }
2505
emit_specialization_constant_op(const SPIRConstantOp & constant)2506 void CompilerGLSL::emit_specialization_constant_op(const SPIRConstantOp &constant)
2507 {
2508 auto &type = get<SPIRType>(constant.basetype);
2509 auto name = to_name(constant.self);
2510 statement("const ", variable_decl(type, name), " = ", constant_op_expression(constant), ";");
2511 }
2512
emit_constant(const SPIRConstant & constant)2513 void CompilerGLSL::emit_constant(const SPIRConstant &constant)
2514 {
2515 auto &type = get<SPIRType>(constant.constant_type);
2516 auto name = to_name(constant.self);
2517
2518 SpecializationConstant wg_x, wg_y, wg_z;
2519 ID workgroup_size_id = get_work_group_size_specialization_constants(wg_x, wg_y, wg_z);
2520
2521 // This specialization constant is implicitly declared by emitting layout() in;
2522 if (constant.self == workgroup_size_id)
2523 return;
2524
2525 // These specialization constants are implicitly declared by emitting layout() in;
2526 // In legacy GLSL, we will still need to emit macros for these, so a layout() in; declaration
2527 // later can use macro overrides for work group size.
2528 bool is_workgroup_size_constant = ConstantID(constant.self) == wg_x.id || ConstantID(constant.self) == wg_y.id ||
2529 ConstantID(constant.self) == wg_z.id;
2530
2531 if (options.vulkan_semantics && is_workgroup_size_constant)
2532 {
2533 // Vulkan GLSL does not need to declare workgroup spec constants explicitly, it is handled in layout().
2534 return;
2535 }
2536 else if (!options.vulkan_semantics && is_workgroup_size_constant &&
2537 !has_decoration(constant.self, DecorationSpecId))
2538 {
2539 // Only bother declaring a workgroup size if it is actually a specialization constant, because we need macros.
2540 return;
2541 }
2542
2543 // Only scalars have constant IDs.
2544 if (has_decoration(constant.self, DecorationSpecId))
2545 {
2546 if (options.vulkan_semantics)
2547 {
2548 statement("layout(constant_id = ", get_decoration(constant.self, DecorationSpecId), ") const ",
2549 variable_decl(type, name), " = ", constant_expression(constant), ";");
2550 }
2551 else
2552 {
2553 const string ¯o_name = constant.specialization_constant_macro_name;
2554 statement("#ifndef ", macro_name);
2555 statement("#define ", macro_name, " ", constant_expression(constant));
2556 statement("#endif");
2557
2558 // For workgroup size constants, only emit the macros.
2559 if (!is_workgroup_size_constant)
2560 statement("const ", variable_decl(type, name), " = ", macro_name, ";");
2561 }
2562 }
2563 else
2564 {
2565 statement("const ", variable_decl(type, name), " = ", constant_expression(constant), ";");
2566 }
2567 }
2568
emit_entry_point_declarations()2569 void CompilerGLSL::emit_entry_point_declarations()
2570 {
2571 }
2572
replace_illegal_names(const unordered_set<string> & keywords)2573 void CompilerGLSL::replace_illegal_names(const unordered_set<string> &keywords)
2574 {
2575 ir.for_each_typed_id<SPIRVariable>([&](uint32_t, const SPIRVariable &var) {
2576 if (is_hidden_variable(var))
2577 return;
2578
2579 auto *meta = ir.find_meta(var.self);
2580 if (!meta)
2581 return;
2582
2583 auto &m = meta->decoration;
2584 if (keywords.find(m.alias) != end(keywords))
2585 m.alias = join("_", m.alias);
2586 });
2587
2588 ir.for_each_typed_id<SPIRFunction>([&](uint32_t, const SPIRFunction &func) {
2589 auto *meta = ir.find_meta(func.self);
2590 if (!meta)
2591 return;
2592
2593 auto &m = meta->decoration;
2594 if (keywords.find(m.alias) != end(keywords))
2595 m.alias = join("_", m.alias);
2596 });
2597
2598 ir.for_each_typed_id<SPIRType>([&](uint32_t, const SPIRType &type) {
2599 auto *meta = ir.find_meta(type.self);
2600 if (!meta)
2601 return;
2602
2603 auto &m = meta->decoration;
2604 if (keywords.find(m.alias) != end(keywords))
2605 m.alias = join("_", m.alias);
2606
2607 for (auto &memb : meta->members)
2608 if (keywords.find(memb.alias) != end(keywords))
2609 memb.alias = join("_", memb.alias);
2610 });
2611 }
2612
replace_illegal_names()2613 void CompilerGLSL::replace_illegal_names()
2614 {
2615 // clang-format off
2616 static const unordered_set<string> keywords = {
2617 "abs", "acos", "acosh", "all", "any", "asin", "asinh", "atan", "atanh",
2618 "atomicAdd", "atomicCompSwap", "atomicCounter", "atomicCounterDecrement", "atomicCounterIncrement",
2619 "atomicExchange", "atomicMax", "atomicMin", "atomicOr", "atomicXor",
2620 "bitCount", "bitfieldExtract", "bitfieldInsert", "bitfieldReverse",
2621 "ceil", "cos", "cosh", "cross", "degrees",
2622 "dFdx", "dFdxCoarse", "dFdxFine",
2623 "dFdy", "dFdyCoarse", "dFdyFine",
2624 "distance", "dot", "EmitStreamVertex", "EmitVertex", "EndPrimitive", "EndStreamPrimitive", "equal", "exp", "exp2",
2625 "faceforward", "findLSB", "findMSB", "float16BitsToInt16", "float16BitsToUint16", "floatBitsToInt", "floatBitsToUint", "floor", "fma", "fract",
2626 "frexp", "fwidth", "fwidthCoarse", "fwidthFine",
2627 "greaterThan", "greaterThanEqual", "groupMemoryBarrier",
2628 "imageAtomicAdd", "imageAtomicAnd", "imageAtomicCompSwap", "imageAtomicExchange", "imageAtomicMax", "imageAtomicMin", "imageAtomicOr", "imageAtomicXor",
2629 "imageLoad", "imageSamples", "imageSize", "imageStore", "imulExtended", "int16BitsToFloat16", "intBitsToFloat", "interpolateAtOffset", "interpolateAtCentroid", "interpolateAtSample",
2630 "inverse", "inversesqrt", "isinf", "isnan", "ldexp", "length", "lessThan", "lessThanEqual", "log", "log2",
2631 "matrixCompMult", "max", "memoryBarrier", "memoryBarrierAtomicCounter", "memoryBarrierBuffer", "memoryBarrierImage", "memoryBarrierShared",
2632 "min", "mix", "mod", "modf", "noise", "noise1", "noise2", "noise3", "noise4", "normalize", "not", "notEqual",
2633 "outerProduct", "packDouble2x32", "packHalf2x16", "packInt2x16", "packInt4x16", "packSnorm2x16", "packSnorm4x8",
2634 "packUint2x16", "packUint4x16", "packUnorm2x16", "packUnorm4x8", "pow",
2635 "radians", "reflect", "refract", "round", "roundEven", "sign", "sin", "sinh", "smoothstep", "sqrt", "step",
2636 "tan", "tanh", "texelFetch", "texelFetchOffset", "texture", "textureGather", "textureGatherOffset", "textureGatherOffsets",
2637 "textureGrad", "textureGradOffset", "textureLod", "textureLodOffset", "textureOffset", "textureProj", "textureProjGrad",
2638 "textureProjGradOffset", "textureProjLod", "textureProjLodOffset", "textureProjOffset", "textureQueryLevels", "textureQueryLod", "textureSamples", "textureSize",
2639 "transpose", "trunc", "uaddCarry", "uint16BitsToFloat16", "uintBitsToFloat", "umulExtended", "unpackDouble2x32", "unpackHalf2x16", "unpackInt2x16", "unpackInt4x16",
2640 "unpackSnorm2x16", "unpackSnorm4x8", "unpackUint2x16", "unpackUint4x16", "unpackUnorm2x16", "unpackUnorm4x8", "usubBorrow",
2641
2642 "active", "asm", "atomic_uint", "attribute", "bool", "break", "buffer",
2643 "bvec2", "bvec3", "bvec4", "case", "cast", "centroid", "class", "coherent", "common", "const", "continue", "default", "discard",
2644 "dmat2", "dmat2x2", "dmat2x3", "dmat2x4", "dmat3", "dmat3x2", "dmat3x3", "dmat3x4", "dmat4", "dmat4x2", "dmat4x3", "dmat4x4",
2645 "do", "double", "dvec2", "dvec3", "dvec4", "else", "enum", "extern", "external", "false", "filter", "fixed", "flat", "float",
2646 "for", "fvec2", "fvec3", "fvec4", "goto", "half", "highp", "hvec2", "hvec3", "hvec4", "if", "iimage1D", "iimage1DArray",
2647 "iimage2D", "iimage2DArray", "iimage2DMS", "iimage2DMSArray", "iimage2DRect", "iimage3D", "iimageBuffer", "iimageCube",
2648 "iimageCubeArray", "image1D", "image1DArray", "image2D", "image2DArray", "image2DMS", "image2DMSArray", "image2DRect",
2649 "image3D", "imageBuffer", "imageCube", "imageCubeArray", "in", "inline", "inout", "input", "int", "interface", "invariant",
2650 "isampler1D", "isampler1DArray", "isampler2D", "isampler2DArray", "isampler2DMS", "isampler2DMSArray", "isampler2DRect",
2651 "isampler3D", "isamplerBuffer", "isamplerCube", "isamplerCubeArray", "ivec2", "ivec3", "ivec4", "layout", "long", "lowp",
2652 "mat2", "mat2x2", "mat2x3", "mat2x4", "mat3", "mat3x2", "mat3x3", "mat3x4", "mat4", "mat4x2", "mat4x3", "mat4x4", "mediump",
2653 "namespace", "noinline", "noperspective", "out", "output", "packed", "partition", "patch", "precise", "precision", "public", "readonly",
2654 "resource", "restrict", "return", "sample", "sampler1D", "sampler1DArray", "sampler1DArrayShadow",
2655 "sampler1DShadow", "sampler2D", "sampler2DArray", "sampler2DArrayShadow", "sampler2DMS", "sampler2DMSArray",
2656 "sampler2DRect", "sampler2DRectShadow", "sampler2DShadow", "sampler3D", "sampler3DRect", "samplerBuffer",
2657 "samplerCube", "samplerCubeArray", "samplerCubeArrayShadow", "samplerCubeShadow", "shared", "short", "sizeof", "smooth", "static",
2658 "struct", "subroutine", "superp", "switch", "template", "this", "true", "typedef", "uimage1D", "uimage1DArray", "uimage2D",
2659 "uimage2DArray", "uimage2DMS", "uimage2DMSArray", "uimage2DRect", "uimage3D", "uimageBuffer", "uimageCube",
2660 "uimageCubeArray", "uint", "uniform", "union", "unsigned", "usampler1D", "usampler1DArray", "usampler2D", "usampler2DArray",
2661 "usampler2DMS", "usampler2DMSArray", "usampler2DRect", "usampler3D", "usamplerBuffer", "usamplerCube",
2662 "usamplerCubeArray", "using", "uvec2", "uvec3", "uvec4", "varying", "vec2", "vec3", "vec4", "void", "volatile",
2663 "while", "writeonly",
2664 };
2665 // clang-format on
2666
2667 replace_illegal_names(keywords);
2668 }
2669
replace_fragment_output(SPIRVariable & var)2670 void CompilerGLSL::replace_fragment_output(SPIRVariable &var)
2671 {
2672 auto &m = ir.meta[var.self].decoration;
2673 uint32_t location = 0;
2674 if (m.decoration_flags.get(DecorationLocation))
2675 location = m.location;
2676
2677 // If our variable is arrayed, we must not emit the array part of this as the SPIR-V will
2678 // do the access chain part of this for us.
2679 auto &type = get<SPIRType>(var.basetype);
2680
2681 if (type.array.empty())
2682 {
2683 // Redirect the write to a specific render target in legacy GLSL.
2684 m.alias = join("gl_FragData[", location, "]");
2685
2686 if (is_legacy_es() && location != 0)
2687 require_extension_internal("GL_EXT_draw_buffers");
2688 }
2689 else if (type.array.size() == 1)
2690 {
2691 // If location is non-zero, we probably have to add an offset.
2692 // This gets really tricky since we'd have to inject an offset in the access chain.
2693 // FIXME: This seems like an extremely odd-ball case, so it's probably fine to leave it like this for now.
2694 m.alias = "gl_FragData";
2695 if (location != 0)
2696 SPIRV_CROSS_THROW("Arrayed output variable used, but location is not 0. "
2697 "This is unimplemented in SPIRV-Cross.");
2698
2699 if (is_legacy_es())
2700 require_extension_internal("GL_EXT_draw_buffers");
2701 }
2702 else
2703 SPIRV_CROSS_THROW("Array-of-array output variable used. This cannot be implemented in legacy GLSL.");
2704
2705 var.compat_builtin = true; // We don't want to declare this variable, but use the name as-is.
2706 }
2707
replace_fragment_outputs()2708 void CompilerGLSL::replace_fragment_outputs()
2709 {
2710 ir.for_each_typed_id<SPIRVariable>([&](uint32_t, SPIRVariable &var) {
2711 auto &type = this->get<SPIRType>(var.basetype);
2712
2713 if (!is_builtin_variable(var) && !var.remapped_variable && type.pointer && var.storage == StorageClassOutput)
2714 replace_fragment_output(var);
2715 });
2716 }
2717
remap_swizzle(const SPIRType & out_type,uint32_t input_components,const string & expr)2718 string CompilerGLSL::remap_swizzle(const SPIRType &out_type, uint32_t input_components, const string &expr)
2719 {
2720 if (out_type.vecsize == input_components)
2721 return expr;
2722 else if (input_components == 1 && !backend.can_swizzle_scalar)
2723 return join(type_to_glsl(out_type), "(", expr, ")");
2724 else
2725 {
2726 // FIXME: This will not work with packed expressions.
2727 auto e = enclose_expression(expr) + ".";
2728 // Just clamp the swizzle index if we have more outputs than inputs.
2729 for (uint32_t c = 0; c < out_type.vecsize; c++)
2730 e += index_to_swizzle(min(c, input_components - 1));
2731 if (backend.swizzle_is_function && out_type.vecsize > 1)
2732 e += "()";
2733
2734 remove_duplicate_swizzle(e);
2735 return e;
2736 }
2737 }
2738
emit_pls()2739 void CompilerGLSL::emit_pls()
2740 {
2741 auto &execution = get_entry_point();
2742 if (execution.model != ExecutionModelFragment)
2743 SPIRV_CROSS_THROW("Pixel local storage only supported in fragment shaders.");
2744
2745 if (!options.es)
2746 SPIRV_CROSS_THROW("Pixel local storage only supported in OpenGL ES.");
2747
2748 if (options.version < 300)
2749 SPIRV_CROSS_THROW("Pixel local storage only supported in ESSL 3.0 and above.");
2750
2751 if (!pls_inputs.empty())
2752 {
2753 statement("__pixel_local_inEXT _PLSIn");
2754 begin_scope();
2755 for (auto &input : pls_inputs)
2756 statement(pls_decl(input), ";");
2757 end_scope_decl();
2758 statement("");
2759 }
2760
2761 if (!pls_outputs.empty())
2762 {
2763 statement("__pixel_local_outEXT _PLSOut");
2764 begin_scope();
2765 for (auto &output : pls_outputs)
2766 statement(pls_decl(output), ";");
2767 end_scope_decl();
2768 statement("");
2769 }
2770 }
2771
fixup_image_load_store_access()2772 void CompilerGLSL::fixup_image_load_store_access()
2773 {
2774 if (!options.enable_storage_image_qualifier_deduction)
2775 return;
2776
2777 ir.for_each_typed_id<SPIRVariable>([&](uint32_t var, const SPIRVariable &) {
2778 auto &vartype = expression_type(var);
2779 if (vartype.basetype == SPIRType::Image && vartype.image.sampled == 2)
2780 {
2781 // Very old glslangValidator and HLSL compilers do not emit required qualifiers here.
2782 // Solve this by making the image access as restricted as possible and loosen up if we need to.
2783 // If any no-read/no-write flags are actually set, assume that the compiler knows what it's doing.
2784
2785 auto &flags = ir.meta[var].decoration.decoration_flags;
2786 if (!flags.get(DecorationNonWritable) && !flags.get(DecorationNonReadable))
2787 {
2788 flags.set(DecorationNonWritable);
2789 flags.set(DecorationNonReadable);
2790 }
2791 }
2792 });
2793 }
2794
is_block_builtin(BuiltIn builtin)2795 static bool is_block_builtin(BuiltIn builtin)
2796 {
2797 return builtin == BuiltInPosition || builtin == BuiltInPointSize || builtin == BuiltInClipDistance ||
2798 builtin == BuiltInCullDistance;
2799 }
2800
should_force_emit_builtin_block(StorageClass storage)2801 bool CompilerGLSL::should_force_emit_builtin_block(StorageClass storage)
2802 {
2803 // If the builtin block uses XFB, we need to force explicit redeclaration of the builtin block.
2804
2805 if (storage != StorageClassOutput)
2806 return false;
2807 bool should_force = false;
2808
2809 ir.for_each_typed_id<SPIRVariable>([&](uint32_t, SPIRVariable &var) {
2810 if (should_force)
2811 return;
2812
2813 auto &type = this->get<SPIRType>(var.basetype);
2814 bool block = has_decoration(type.self, DecorationBlock);
2815 if (var.storage == storage && block && is_builtin_variable(var))
2816 {
2817 uint32_t member_count = uint32_t(type.member_types.size());
2818 for (uint32_t i = 0; i < member_count; i++)
2819 {
2820 if (has_member_decoration(type.self, i, DecorationBuiltIn) &&
2821 is_block_builtin(BuiltIn(get_member_decoration(type.self, i, DecorationBuiltIn))) &&
2822 has_member_decoration(type.self, i, DecorationOffset))
2823 {
2824 should_force = true;
2825 }
2826 }
2827 }
2828 else if (var.storage == storage && !block && is_builtin_variable(var))
2829 {
2830 if (is_block_builtin(BuiltIn(get_decoration(type.self, DecorationBuiltIn))) &&
2831 has_decoration(var.self, DecorationOffset))
2832 {
2833 should_force = true;
2834 }
2835 }
2836 });
2837
2838 // If we're declaring clip/cull planes with control points we need to force block declaration.
2839 if (get_execution_model() == ExecutionModelTessellationControl &&
2840 (clip_distance_count || cull_distance_count))
2841 {
2842 should_force = true;
2843 }
2844
2845 return should_force;
2846 }
2847
fixup_implicit_builtin_block_names()2848 void CompilerGLSL::fixup_implicit_builtin_block_names()
2849 {
2850 ir.for_each_typed_id<SPIRVariable>([&](uint32_t, SPIRVariable &var) {
2851 auto &type = this->get<SPIRType>(var.basetype);
2852 bool block = has_decoration(type.self, DecorationBlock);
2853 if ((var.storage == StorageClassOutput || var.storage == StorageClassInput) && block &&
2854 is_builtin_variable(var))
2855 {
2856 // Make sure the array has a supported name in the code.
2857 if (var.storage == StorageClassOutput)
2858 set_name(var.self, "gl_out");
2859 else if (var.storage == StorageClassInput)
2860 set_name(var.self, "gl_in");
2861 }
2862 });
2863 }
2864
emit_declared_builtin_block(StorageClass storage,ExecutionModel model)2865 void CompilerGLSL::emit_declared_builtin_block(StorageClass storage, ExecutionModel model)
2866 {
2867 Bitset emitted_builtins;
2868 Bitset global_builtins;
2869 const SPIRVariable *block_var = nullptr;
2870 bool emitted_block = false;
2871 bool builtin_array = false;
2872
2873 // Need to use declared size in the type.
2874 // These variables might have been declared, but not statically used, so we haven't deduced their size yet.
2875 uint32_t cull_distance_size = 0;
2876 uint32_t clip_distance_size = 0;
2877
2878 bool have_xfb_buffer_stride = false;
2879 bool have_geom_stream = false;
2880 bool have_any_xfb_offset = false;
2881 uint32_t xfb_stride = 0, xfb_buffer = 0, geom_stream = 0;
2882 std::unordered_map<uint32_t, uint32_t> builtin_xfb_offsets;
2883
2884 ir.for_each_typed_id<SPIRVariable>([&](uint32_t, SPIRVariable &var) {
2885 auto &type = this->get<SPIRType>(var.basetype);
2886 bool block = has_decoration(type.self, DecorationBlock);
2887 Bitset builtins;
2888
2889 if (var.storage == storage && block && is_builtin_variable(var))
2890 {
2891 uint32_t index = 0;
2892 for (auto &m : ir.meta[type.self].members)
2893 {
2894 if (m.builtin)
2895 {
2896 builtins.set(m.builtin_type);
2897 if (m.builtin_type == BuiltInCullDistance)
2898 cull_distance_size = to_array_size_literal(this->get<SPIRType>(type.member_types[index]));
2899 else if (m.builtin_type == BuiltInClipDistance)
2900 clip_distance_size = to_array_size_literal(this->get<SPIRType>(type.member_types[index]));
2901
2902 if (is_block_builtin(m.builtin_type) && m.decoration_flags.get(DecorationOffset))
2903 {
2904 have_any_xfb_offset = true;
2905 builtin_xfb_offsets[m.builtin_type] = m.offset;
2906 }
2907
2908 if (is_block_builtin(m.builtin_type) && m.decoration_flags.get(DecorationStream))
2909 {
2910 uint32_t stream = m.stream;
2911 if (have_geom_stream && geom_stream != stream)
2912 SPIRV_CROSS_THROW("IO block member Stream mismatch.");
2913 have_geom_stream = true;
2914 geom_stream = stream;
2915 }
2916 }
2917 index++;
2918 }
2919
2920 if (storage == StorageClassOutput && has_decoration(var.self, DecorationXfbBuffer) &&
2921 has_decoration(var.self, DecorationXfbStride))
2922 {
2923 uint32_t buffer_index = get_decoration(var.self, DecorationXfbBuffer);
2924 uint32_t stride = get_decoration(var.self, DecorationXfbStride);
2925 if (have_xfb_buffer_stride && buffer_index != xfb_buffer)
2926 SPIRV_CROSS_THROW("IO block member XfbBuffer mismatch.");
2927 if (have_xfb_buffer_stride && stride != xfb_stride)
2928 SPIRV_CROSS_THROW("IO block member XfbBuffer mismatch.");
2929 have_xfb_buffer_stride = true;
2930 xfb_buffer = buffer_index;
2931 xfb_stride = stride;
2932 }
2933
2934 if (storage == StorageClassOutput && has_decoration(var.self, DecorationStream))
2935 {
2936 uint32_t stream = get_decoration(var.self, DecorationStream);
2937 if (have_geom_stream && geom_stream != stream)
2938 SPIRV_CROSS_THROW("IO block member Stream mismatch.");
2939 have_geom_stream = true;
2940 geom_stream = stream;
2941 }
2942 }
2943 else if (var.storage == storage && !block && is_builtin_variable(var))
2944 {
2945 // While we're at it, collect all declared global builtins (HLSL mostly ...).
2946 auto &m = ir.meta[var.self].decoration;
2947 if (m.builtin)
2948 {
2949 global_builtins.set(m.builtin_type);
2950 if (m.builtin_type == BuiltInCullDistance)
2951 cull_distance_size = to_array_size_literal(type);
2952 else if (m.builtin_type == BuiltInClipDistance)
2953 clip_distance_size = to_array_size_literal(type);
2954
2955 if (is_block_builtin(m.builtin_type) && m.decoration_flags.get(DecorationXfbStride) &&
2956 m.decoration_flags.get(DecorationXfbBuffer) && m.decoration_flags.get(DecorationOffset))
2957 {
2958 have_any_xfb_offset = true;
2959 builtin_xfb_offsets[m.builtin_type] = m.offset;
2960 uint32_t buffer_index = m.xfb_buffer;
2961 uint32_t stride = m.xfb_stride;
2962 if (have_xfb_buffer_stride && buffer_index != xfb_buffer)
2963 SPIRV_CROSS_THROW("IO block member XfbBuffer mismatch.");
2964 if (have_xfb_buffer_stride && stride != xfb_stride)
2965 SPIRV_CROSS_THROW("IO block member XfbBuffer mismatch.");
2966 have_xfb_buffer_stride = true;
2967 xfb_buffer = buffer_index;
2968 xfb_stride = stride;
2969 }
2970
2971 if (is_block_builtin(m.builtin_type) && m.decoration_flags.get(DecorationStream))
2972 {
2973 uint32_t stream = get_decoration(var.self, DecorationStream);
2974 if (have_geom_stream && geom_stream != stream)
2975 SPIRV_CROSS_THROW("IO block member Stream mismatch.");
2976 have_geom_stream = true;
2977 geom_stream = stream;
2978 }
2979 }
2980 }
2981
2982 if (builtins.empty())
2983 return;
2984
2985 if (emitted_block)
2986 SPIRV_CROSS_THROW("Cannot use more than one builtin I/O block.");
2987
2988 emitted_builtins = builtins;
2989 emitted_block = true;
2990 builtin_array = !type.array.empty();
2991 block_var = &var;
2992 });
2993
2994 global_builtins =
2995 Bitset(global_builtins.get_lower() & ((1ull << BuiltInPosition) | (1ull << BuiltInPointSize) |
2996 (1ull << BuiltInClipDistance) | (1ull << BuiltInCullDistance)));
2997
2998 // Try to collect all other declared builtins.
2999 if (!emitted_block)
3000 emitted_builtins = global_builtins;
3001
3002 // Can't declare an empty interface block.
3003 if (emitted_builtins.empty())
3004 return;
3005
3006 if (storage == StorageClassOutput)
3007 {
3008 SmallVector<string> attr;
3009 if (have_xfb_buffer_stride && have_any_xfb_offset)
3010 {
3011 if (!options.es)
3012 {
3013 if (options.version < 440 && options.version >= 140)
3014 require_extension_internal("GL_ARB_enhanced_layouts");
3015 else if (options.version < 140)
3016 SPIRV_CROSS_THROW("Component decoration is not supported in targets below GLSL 1.40.");
3017 if (!options.es && options.version < 440)
3018 require_extension_internal("GL_ARB_enhanced_layouts");
3019 }
3020 else if (options.es)
3021 SPIRV_CROSS_THROW("Need GL_ARB_enhanced_layouts for xfb_stride or xfb_buffer.");
3022 attr.push_back(join("xfb_buffer = ", xfb_buffer, ", xfb_stride = ", xfb_stride));
3023 }
3024
3025 if (have_geom_stream)
3026 {
3027 if (get_execution_model() != ExecutionModelGeometry)
3028 SPIRV_CROSS_THROW("Geometry streams can only be used in geometry shaders.");
3029 if (options.es)
3030 SPIRV_CROSS_THROW("Multiple geometry streams not supported in ESSL.");
3031 if (options.version < 400)
3032 require_extension_internal("GL_ARB_transform_feedback3");
3033 attr.push_back(join("stream = ", geom_stream));
3034 }
3035
3036 if (!attr.empty())
3037 statement("layout(", merge(attr), ") out gl_PerVertex");
3038 else
3039 statement("out gl_PerVertex");
3040 }
3041 else
3042 {
3043 // If we have passthrough, there is no way PerVertex cannot be passthrough.
3044 if (get_entry_point().geometry_passthrough)
3045 statement("layout(passthrough) in gl_PerVertex");
3046 else
3047 statement("in gl_PerVertex");
3048 }
3049
3050 begin_scope();
3051 if (emitted_builtins.get(BuiltInPosition))
3052 {
3053 auto itr = builtin_xfb_offsets.find(BuiltInPosition);
3054 if (itr != end(builtin_xfb_offsets))
3055 statement("layout(xfb_offset = ", itr->second, ") vec4 gl_Position;");
3056 else
3057 statement("vec4 gl_Position;");
3058 }
3059
3060 if (emitted_builtins.get(BuiltInPointSize))
3061 {
3062 auto itr = builtin_xfb_offsets.find(BuiltInPointSize);
3063 if (itr != end(builtin_xfb_offsets))
3064 statement("layout(xfb_offset = ", itr->second, ") float gl_PointSize;");
3065 else
3066 statement("float gl_PointSize;");
3067 }
3068
3069 if (emitted_builtins.get(BuiltInClipDistance))
3070 {
3071 auto itr = builtin_xfb_offsets.find(BuiltInClipDistance);
3072 if (itr != end(builtin_xfb_offsets))
3073 statement("layout(xfb_offset = ", itr->second, ") float gl_ClipDistance[", clip_distance_size, "];");
3074 else
3075 statement("float gl_ClipDistance[", clip_distance_size, "];");
3076 }
3077
3078 if (emitted_builtins.get(BuiltInCullDistance))
3079 {
3080 auto itr = builtin_xfb_offsets.find(BuiltInCullDistance);
3081 if (itr != end(builtin_xfb_offsets))
3082 statement("layout(xfb_offset = ", itr->second, ") float gl_CullDistance[", cull_distance_size, "];");
3083 else
3084 statement("float gl_CullDistance[", cull_distance_size, "];");
3085 }
3086
3087 if (builtin_array)
3088 {
3089 if (model == ExecutionModelTessellationControl && storage == StorageClassOutput)
3090 end_scope_decl(join(to_name(block_var->self), "[", get_entry_point().output_vertices, "]"));
3091 else
3092 end_scope_decl(join(to_name(block_var->self), "[]"));
3093 }
3094 else
3095 end_scope_decl();
3096 statement("");
3097 }
3098
declare_undefined_values()3099 void CompilerGLSL::declare_undefined_values()
3100 {
3101 bool emitted = false;
3102 ir.for_each_typed_id<SPIRUndef>([&](uint32_t, const SPIRUndef &undef) {
3103 auto &type = this->get<SPIRType>(undef.basetype);
3104 // OpUndef can be void for some reason ...
3105 if (type.basetype == SPIRType::Void)
3106 return;
3107
3108 string initializer;
3109 if (options.force_zero_initialized_variables && type_can_zero_initialize(type))
3110 initializer = join(" = ", to_zero_initialized_expression(undef.basetype));
3111
3112 statement(variable_decl(type, to_name(undef.self), undef.self), initializer, ";");
3113 emitted = true;
3114 });
3115
3116 if (emitted)
3117 statement("");
3118 }
3119
variable_is_lut(const SPIRVariable & var) const3120 bool CompilerGLSL::variable_is_lut(const SPIRVariable &var) const
3121 {
3122 bool statically_assigned = var.statically_assigned && var.static_expression != ID(0) && var.remapped_variable;
3123
3124 if (statically_assigned)
3125 {
3126 auto *constant = maybe_get<SPIRConstant>(var.static_expression);
3127 if (constant && constant->is_used_as_lut)
3128 return true;
3129 }
3130
3131 return false;
3132 }
3133
emit_resources()3134 void CompilerGLSL::emit_resources()
3135 {
3136 auto &execution = get_entry_point();
3137
3138 replace_illegal_names();
3139
3140 // Legacy GL uses gl_FragData[], redeclare all fragment outputs
3141 // with builtins.
3142 if (execution.model == ExecutionModelFragment && is_legacy())
3143 replace_fragment_outputs();
3144
3145 // Emit PLS blocks if we have such variables.
3146 if (!pls_inputs.empty() || !pls_outputs.empty())
3147 emit_pls();
3148
3149 switch (execution.model)
3150 {
3151 case ExecutionModelGeometry:
3152 case ExecutionModelTessellationControl:
3153 case ExecutionModelTessellationEvaluation:
3154 fixup_implicit_builtin_block_names();
3155 break;
3156
3157 default:
3158 break;
3159 }
3160
3161 // Emit custom gl_PerVertex for SSO compatibility.
3162 if (options.separate_shader_objects && !options.es && execution.model != ExecutionModelFragment)
3163 {
3164 switch (execution.model)
3165 {
3166 case ExecutionModelGeometry:
3167 case ExecutionModelTessellationControl:
3168 case ExecutionModelTessellationEvaluation:
3169 emit_declared_builtin_block(StorageClassInput, execution.model);
3170 emit_declared_builtin_block(StorageClassOutput, execution.model);
3171 break;
3172
3173 case ExecutionModelVertex:
3174 emit_declared_builtin_block(StorageClassOutput, execution.model);
3175 break;
3176
3177 default:
3178 break;
3179 }
3180 }
3181 else if (should_force_emit_builtin_block(StorageClassOutput))
3182 {
3183 emit_declared_builtin_block(StorageClassOutput, execution.model);
3184 }
3185 else if (execution.geometry_passthrough)
3186 {
3187 // Need to declare gl_in with Passthrough.
3188 // If we're doing passthrough, we cannot emit an output block, so the output block test above will never pass.
3189 emit_declared_builtin_block(StorageClassInput, execution.model);
3190 }
3191 else
3192 {
3193 // Need to redeclare clip/cull distance with explicit size to use them.
3194 // SPIR-V mandates these builtins have a size declared.
3195 const char *storage = execution.model == ExecutionModelFragment ? "in" : "out";
3196 if (clip_distance_count != 0)
3197 statement(storage, " float gl_ClipDistance[", clip_distance_count, "];");
3198 if (cull_distance_count != 0)
3199 statement(storage, " float gl_CullDistance[", cull_distance_count, "];");
3200 if (clip_distance_count != 0 || cull_distance_count != 0)
3201 statement("");
3202 }
3203
3204 if (position_invariant)
3205 {
3206 statement("invariant gl_Position;");
3207 statement("");
3208 }
3209
3210 bool emitted = false;
3211
3212 // If emitted Vulkan GLSL,
3213 // emit specialization constants as actual floats,
3214 // spec op expressions will redirect to the constant name.
3215 //
3216 {
3217 auto loop_lock = ir.create_loop_hard_lock();
3218 for (auto &id_ : ir.ids_for_constant_or_type)
3219 {
3220 auto &id = ir.ids[id_];
3221
3222 if (id.get_type() == TypeConstant)
3223 {
3224 auto &c = id.get<SPIRConstant>();
3225
3226 bool needs_declaration = c.specialization || c.is_used_as_lut;
3227
3228 if (needs_declaration)
3229 {
3230 if (!options.vulkan_semantics && c.specialization)
3231 {
3232 c.specialization_constant_macro_name =
3233 constant_value_macro_name(get_decoration(c.self, DecorationSpecId));
3234 }
3235 emit_constant(c);
3236 emitted = true;
3237 }
3238 }
3239 else if (id.get_type() == TypeConstantOp)
3240 {
3241 emit_specialization_constant_op(id.get<SPIRConstantOp>());
3242 emitted = true;
3243 }
3244 else if (id.get_type() == TypeType)
3245 {
3246 auto *type = &id.get<SPIRType>();
3247
3248 bool is_natural_struct = type->basetype == SPIRType::Struct && type->array.empty() && !type->pointer &&
3249 (!has_decoration(type->self, DecorationBlock) &&
3250 !has_decoration(type->self, DecorationBufferBlock));
3251
3252 // Special case, ray payload and hit attribute blocks are not really blocks, just regular structs.
3253 if (type->basetype == SPIRType::Struct && type->pointer &&
3254 has_decoration(type->self, DecorationBlock) &&
3255 (type->storage == StorageClassRayPayloadKHR || type->storage == StorageClassIncomingRayPayloadKHR ||
3256 type->storage == StorageClassHitAttributeKHR))
3257 {
3258 type = &get<SPIRType>(type->parent_type);
3259 is_natural_struct = true;
3260 }
3261
3262 if (is_natural_struct)
3263 {
3264 if (emitted)
3265 statement("");
3266 emitted = false;
3267
3268 emit_struct(*type);
3269 }
3270 }
3271 }
3272 }
3273
3274 if (emitted)
3275 statement("");
3276
3277 // If we needed to declare work group size late, check here.
3278 // If the work group size depends on a specialization constant, we need to declare the layout() block
3279 // after constants (and their macros) have been declared.
3280 if (execution.model == ExecutionModelGLCompute && !options.vulkan_semantics &&
3281 execution.workgroup_size.constant != 0)
3282 {
3283 SpecializationConstant wg_x, wg_y, wg_z;
3284 get_work_group_size_specialization_constants(wg_x, wg_y, wg_z);
3285
3286 if ((wg_x.id != ConstantID(0)) || (wg_y.id != ConstantID(0)) || (wg_z.id != ConstantID(0)))
3287 {
3288 SmallVector<string> inputs;
3289 build_workgroup_size(inputs, wg_x, wg_y, wg_z);
3290 statement("layout(", merge(inputs), ") in;");
3291 statement("");
3292 }
3293 }
3294
3295 emitted = false;
3296
3297 if (ir.addressing_model == AddressingModelPhysicalStorageBuffer64EXT)
3298 {
3299 for (auto type : physical_storage_non_block_pointer_types)
3300 {
3301 emit_buffer_reference_block(get<SPIRType>(type), false);
3302 }
3303
3304 // Output buffer reference blocks.
3305 // Do this in two stages, one with forward declaration,
3306 // and one without. Buffer reference blocks can reference themselves
3307 // to support things like linked lists.
3308 ir.for_each_typed_id<SPIRType>([&](uint32_t, SPIRType &type) {
3309 bool has_block_flags = has_decoration(type.self, DecorationBlock);
3310 if (has_block_flags && type.pointer && type.pointer_depth == 1 && !type_is_array_of_pointers(type) &&
3311 type.storage == StorageClassPhysicalStorageBufferEXT)
3312 {
3313 emit_buffer_reference_block(type, true);
3314 }
3315 });
3316
3317 ir.for_each_typed_id<SPIRType>([&](uint32_t, SPIRType &type) {
3318 bool has_block_flags = has_decoration(type.self, DecorationBlock);
3319 if (has_block_flags && type.pointer && type.pointer_depth == 1 && !type_is_array_of_pointers(type) &&
3320 type.storage == StorageClassPhysicalStorageBufferEXT)
3321 {
3322 emit_buffer_reference_block(type, false);
3323 }
3324 });
3325 }
3326
3327 // Output UBOs and SSBOs
3328 ir.for_each_typed_id<SPIRVariable>([&](uint32_t, SPIRVariable &var) {
3329 auto &type = this->get<SPIRType>(var.basetype);
3330
3331 bool is_block_storage = type.storage == StorageClassStorageBuffer || type.storage == StorageClassUniform ||
3332 type.storage == StorageClassShaderRecordBufferKHR;
3333 bool has_block_flags = ir.meta[type.self].decoration.decoration_flags.get(DecorationBlock) ||
3334 ir.meta[type.self].decoration.decoration_flags.get(DecorationBufferBlock);
3335
3336 if (var.storage != StorageClassFunction && type.pointer && is_block_storage && !is_hidden_variable(var) &&
3337 has_block_flags)
3338 {
3339 emit_buffer_block(var);
3340 }
3341 });
3342
3343 // Output push constant blocks
3344 ir.for_each_typed_id<SPIRVariable>([&](uint32_t, SPIRVariable &var) {
3345 auto &type = this->get<SPIRType>(var.basetype);
3346 if (var.storage != StorageClassFunction && type.pointer && type.storage == StorageClassPushConstant &&
3347 !is_hidden_variable(var))
3348 {
3349 emit_push_constant_block(var);
3350 }
3351 });
3352
3353 bool skip_separate_image_sampler = !combined_image_samplers.empty() || !options.vulkan_semantics;
3354
3355 // Output Uniform Constants (values, samplers, images, etc).
3356 ir.for_each_typed_id<SPIRVariable>([&](uint32_t, SPIRVariable &var) {
3357 auto &type = this->get<SPIRType>(var.basetype);
3358
3359 // If we're remapping separate samplers and images, only emit the combined samplers.
3360 if (skip_separate_image_sampler)
3361 {
3362 // Sampler buffers are always used without a sampler, and they will also work in regular GL.
3363 bool sampler_buffer = type.basetype == SPIRType::Image && type.image.dim == DimBuffer;
3364 bool separate_image = type.basetype == SPIRType::Image && type.image.sampled == 1;
3365 bool separate_sampler = type.basetype == SPIRType::Sampler;
3366 if (!sampler_buffer && (separate_image || separate_sampler))
3367 return;
3368 }
3369
3370 if (var.storage != StorageClassFunction && type.pointer &&
3371 (type.storage == StorageClassUniformConstant || type.storage == StorageClassAtomicCounter ||
3372 type.storage == StorageClassRayPayloadKHR || type.storage == StorageClassIncomingRayPayloadKHR ||
3373 type.storage == StorageClassCallableDataKHR || type.storage == StorageClassIncomingCallableDataKHR ||
3374 type.storage == StorageClassHitAttributeKHR) &&
3375 !is_hidden_variable(var))
3376 {
3377 emit_uniform(var);
3378 emitted = true;
3379 }
3380 });
3381
3382 if (emitted)
3383 statement("");
3384 emitted = false;
3385
3386 bool emitted_base_instance = false;
3387
3388 // Output in/out interfaces.
3389 ir.for_each_typed_id<SPIRVariable>([&](uint32_t, SPIRVariable &var) {
3390 auto &type = this->get<SPIRType>(var.basetype);
3391
3392 bool is_hidden = is_hidden_variable(var);
3393
3394 // Unused output I/O variables might still be required to implement framebuffer fetch.
3395 if (var.storage == StorageClassOutput && !is_legacy() &&
3396 inout_color_attachments.count(get_decoration(var.self, DecorationLocation)) != 0)
3397 {
3398 is_hidden = false;
3399 }
3400
3401 if (var.storage != StorageClassFunction && type.pointer &&
3402 (var.storage == StorageClassInput || var.storage == StorageClassOutput) &&
3403 interface_variable_exists_in_entry_point(var.self) && !is_hidden)
3404 {
3405 emit_interface_block(var);
3406 emitted = true;
3407 }
3408 else if (is_builtin_variable(var))
3409 {
3410 auto builtin = BuiltIn(get_decoration(var.self, DecorationBuiltIn));
3411 // For gl_InstanceIndex emulation on GLES, the API user needs to
3412 // supply this uniform.
3413
3414 // The draw parameter extension is soft-enabled on GL with some fallbacks.
3415 if (!options.vulkan_semantics)
3416 {
3417 if (!emitted_base_instance &&
3418 ((options.vertex.support_nonzero_base_instance && builtin == BuiltInInstanceIndex) ||
3419 (builtin == BuiltInBaseInstance)))
3420 {
3421 statement("#ifdef GL_ARB_shader_draw_parameters");
3422 statement("#define SPIRV_Cross_BaseInstance gl_BaseInstanceARB");
3423 statement("#else");
3424 // A crude, but simple workaround which should be good enough for non-indirect draws.
3425 statement("uniform int SPIRV_Cross_BaseInstance;");
3426 statement("#endif");
3427 emitted = true;
3428 emitted_base_instance = true;
3429 }
3430 else if (builtin == BuiltInBaseVertex)
3431 {
3432 statement("#ifdef GL_ARB_shader_draw_parameters");
3433 statement("#define SPIRV_Cross_BaseVertex gl_BaseVertexARB");
3434 statement("#else");
3435 // A crude, but simple workaround which should be good enough for non-indirect draws.
3436 statement("uniform int SPIRV_Cross_BaseVertex;");
3437 statement("#endif");
3438 }
3439 else if (builtin == BuiltInDrawIndex)
3440 {
3441 statement("#ifndef GL_ARB_shader_draw_parameters");
3442 // Cannot really be worked around.
3443 statement("#error GL_ARB_shader_draw_parameters is not supported.");
3444 statement("#endif");
3445 }
3446 }
3447 }
3448 });
3449
3450 // Global variables.
3451 for (auto global : global_variables)
3452 {
3453 auto &var = get<SPIRVariable>(global);
3454 if (var.storage != StorageClassOutput)
3455 {
3456 if (!variable_is_lut(var))
3457 {
3458 add_resource_name(var.self);
3459
3460 string initializer;
3461 if (options.force_zero_initialized_variables && var.storage == StorageClassPrivate &&
3462 !var.initializer && !var.static_expression && type_can_zero_initialize(get_variable_data_type(var)))
3463 {
3464 initializer = join(" = ", to_zero_initialized_expression(get_variable_data_type_id(var)));
3465 }
3466
3467 statement(variable_decl(var), initializer, ";");
3468 emitted = true;
3469 }
3470 }
3471 else if (var.initializer && maybe_get<SPIRConstant>(var.initializer) != nullptr)
3472 {
3473 emit_output_variable_initializer(var);
3474 }
3475 }
3476
3477 if (emitted)
3478 statement("");
3479
3480 declare_undefined_values();
3481 }
3482
emit_output_variable_initializer(const SPIRVariable & var)3483 void CompilerGLSL::emit_output_variable_initializer(const SPIRVariable &var)
3484 {
3485 // If a StorageClassOutput variable has an initializer, we need to initialize it in main().
3486 auto &entry_func = this->get<SPIRFunction>(ir.default_entry_point);
3487 auto &type = get<SPIRType>(var.basetype);
3488 bool is_patch = has_decoration(var.self, DecorationPatch);
3489 bool is_block = has_decoration(type.self, DecorationBlock);
3490 bool is_control_point = get_execution_model() == ExecutionModelTessellationControl && !is_patch;
3491
3492 if (is_block)
3493 {
3494 uint32_t member_count = uint32_t(type.member_types.size());
3495 bool type_is_array = type.array.size() == 1;
3496 uint32_t array_size = 1;
3497 if (type_is_array)
3498 array_size = to_array_size_literal(type);
3499 uint32_t iteration_count = is_control_point ? 1 : array_size;
3500
3501 // If the initializer is a block, we must initialize each block member one at a time.
3502 for (uint32_t i = 0; i < member_count; i++)
3503 {
3504 // These outputs might not have been properly declared, so don't initialize them in that case.
3505 if (has_member_decoration(type.self, i, DecorationBuiltIn))
3506 {
3507 if (get_member_decoration(type.self, i, DecorationBuiltIn) == BuiltInCullDistance &&
3508 !cull_distance_count)
3509 continue;
3510
3511 if (get_member_decoration(type.self, i, DecorationBuiltIn) == BuiltInClipDistance &&
3512 !clip_distance_count)
3513 continue;
3514 }
3515
3516 // We need to build a per-member array first, essentially transposing from AoS to SoA.
3517 // This code path hits when we have an array of blocks.
3518 string lut_name;
3519 if (type_is_array)
3520 {
3521 lut_name = join("_", var.self, "_", i, "_init");
3522 uint32_t member_type_id = get<SPIRType>(var.basetype).member_types[i];
3523 auto &member_type = get<SPIRType>(member_type_id);
3524 auto array_type = member_type;
3525 array_type.parent_type = member_type_id;
3526 array_type.array.push_back(array_size);
3527 array_type.array_size_literal.push_back(true);
3528
3529 SmallVector<string> exprs;
3530 exprs.reserve(array_size);
3531 auto &c = get<SPIRConstant>(var.initializer);
3532 for (uint32_t j = 0; j < array_size; j++)
3533 exprs.push_back(to_expression(get<SPIRConstant>(c.subconstants[j]).subconstants[i]));
3534 statement("const ", type_to_glsl(array_type), " ", lut_name, type_to_array_glsl(array_type), " = ",
3535 type_to_glsl_constructor(array_type), "(", merge(exprs, ", "), ");");
3536 }
3537
3538 for (uint32_t j = 0; j < iteration_count; j++)
3539 {
3540 entry_func.fixup_hooks_in.push_back([=, &var]() {
3541 AccessChainMeta meta;
3542 auto &c = this->get<SPIRConstant>(var.initializer);
3543
3544 uint32_t invocation_id = 0;
3545 uint32_t member_index_id = 0;
3546 if (is_control_point)
3547 {
3548 uint32_t ids = ir.increase_bound_by(3);
3549 SPIRType uint_type;
3550 uint_type.basetype = SPIRType::UInt;
3551 uint_type.width = 32;
3552 set<SPIRType>(ids, uint_type);
3553 set<SPIRExpression>(ids + 1, builtin_to_glsl(BuiltInInvocationId, StorageClassInput), ids, true);
3554 set<SPIRConstant>(ids + 2, ids, i, false);
3555 invocation_id = ids + 1;
3556 member_index_id = ids + 2;
3557 }
3558
3559 if (is_patch)
3560 {
3561 statement("if (gl_InvocationID == 0)");
3562 begin_scope();
3563 }
3564
3565 if (type_is_array && !is_control_point)
3566 {
3567 uint32_t indices[2] = { j, i };
3568 auto chain = access_chain_internal(var.self, indices, 2, ACCESS_CHAIN_INDEX_IS_LITERAL_BIT, &meta);
3569 statement(chain, " = ", lut_name, "[", j, "];");
3570 }
3571 else if (is_control_point)
3572 {
3573 uint32_t indices[2] = { invocation_id, member_index_id };
3574 auto chain = access_chain_internal(var.self, indices, 2, 0, &meta);
3575 statement(chain, " = ", lut_name, "[", builtin_to_glsl(BuiltInInvocationId, StorageClassInput), "];");
3576 }
3577 else
3578 {
3579 auto chain =
3580 access_chain_internal(var.self, &i, 1, ACCESS_CHAIN_INDEX_IS_LITERAL_BIT, &meta);
3581 statement(chain, " = ", to_expression(c.subconstants[i]), ";");
3582 }
3583
3584 if (is_patch)
3585 end_scope();
3586 });
3587 }
3588 }
3589 }
3590 else if (is_control_point)
3591 {
3592 auto lut_name = join("_", var.self, "_init");
3593 statement("const ", type_to_glsl(type), " ", lut_name, type_to_array_glsl(type),
3594 " = ", to_expression(var.initializer), ";");
3595 entry_func.fixup_hooks_in.push_back([&, lut_name]() {
3596 statement(to_expression(var.self), "[gl_InvocationID] = ", lut_name, "[gl_InvocationID];");
3597 });
3598 }
3599 else
3600 {
3601 auto lut_name = join("_", var.self, "_init");
3602 statement("const ", type_to_glsl(type), " ", lut_name,
3603 type_to_array_glsl(type), " = ", to_expression(var.initializer), ";");
3604 entry_func.fixup_hooks_in.push_back([&, lut_name, is_patch]() {
3605 if (is_patch)
3606 {
3607 statement("if (gl_InvocationID == 0)");
3608 begin_scope();
3609 }
3610 statement(to_expression(var.self), " = ", lut_name, ";");
3611 if (is_patch)
3612 end_scope();
3613 });
3614 }
3615 }
3616
emit_extension_workarounds(spv::ExecutionModel model)3617 void CompilerGLSL::emit_extension_workarounds(spv::ExecutionModel model)
3618 {
3619 static const char *workaround_types[] = { "int", "ivec2", "ivec3", "ivec4", "uint", "uvec2", "uvec3", "uvec4",
3620 "float", "vec2", "vec3", "vec4", "double", "dvec2", "dvec3", "dvec4" };
3621
3622 if (!options.vulkan_semantics)
3623 {
3624 using Supp = ShaderSubgroupSupportHelper;
3625 auto result = shader_subgroup_supporter.resolve();
3626
3627 if (shader_subgroup_supporter.is_feature_requested(Supp::SubgroupMask))
3628 {
3629 auto exts = Supp::get_candidates_for_feature(Supp::SubgroupMask, result);
3630
3631 for (auto &e : exts)
3632 {
3633 const char *name = Supp::get_extension_name(e);
3634 statement(&e == &exts.front() ? "#if" : "#elif", " defined(", name, ")");
3635
3636 switch (e)
3637 {
3638 case Supp::NV_shader_thread_group:
3639 statement("#define gl_SubgroupEqMask uvec4(gl_ThreadEqMaskNV, 0u, 0u, 0u)");
3640 statement("#define gl_SubgroupGeMask uvec4(gl_ThreadGeMaskNV, 0u, 0u, 0u)");
3641 statement("#define gl_SubgroupGtMask uvec4(gl_ThreadGtMaskNV, 0u, 0u, 0u)");
3642 statement("#define gl_SubgroupLeMask uvec4(gl_ThreadLeMaskNV, 0u, 0u, 0u)");
3643 statement("#define gl_SubgroupLtMask uvec4(gl_ThreadLtMaskNV, 0u, 0u, 0u)");
3644 break;
3645 case Supp::ARB_shader_ballot:
3646 statement("#define gl_SubgroupEqMask uvec4(unpackUint2x32(gl_SubGroupEqMaskARB), 0u, 0u)");
3647 statement("#define gl_SubgroupGeMask uvec4(unpackUint2x32(gl_SubGroupGeMaskARB), 0u, 0u)");
3648 statement("#define gl_SubgroupGtMask uvec4(unpackUint2x32(gl_SubGroupGtMaskARB), 0u, 0u)");
3649 statement("#define gl_SubgroupLeMask uvec4(unpackUint2x32(gl_SubGroupLeMaskARB), 0u, 0u)");
3650 statement("#define gl_SubgroupLtMask uvec4(unpackUint2x32(gl_SubGroupLtMaskARB), 0u, 0u)");
3651 break;
3652 default:
3653 break;
3654 }
3655 }
3656 statement("#endif");
3657 statement("");
3658 }
3659
3660 if (shader_subgroup_supporter.is_feature_requested(Supp::SubgroupSize))
3661 {
3662 auto exts = Supp::get_candidates_for_feature(Supp::SubgroupSize, result);
3663
3664 for (auto &e : exts)
3665 {
3666 const char *name = Supp::get_extension_name(e);
3667 statement(&e == &exts.front() ? "#if" : "#elif", " defined(", name, ")");
3668
3669 switch (e)
3670 {
3671 case Supp::NV_shader_thread_group:
3672 statement("#define gl_SubgroupSize gl_WarpSizeNV");
3673 break;
3674 case Supp::ARB_shader_ballot:
3675 statement("#define gl_SubgroupSize gl_SubGroupSizeARB");
3676 break;
3677 case Supp::AMD_gcn_shader:
3678 statement("#define gl_SubgroupSize uint(gl_SIMDGroupSizeAMD)");
3679 break;
3680 default:
3681 break;
3682 }
3683 }
3684 statement("#endif");
3685 statement("");
3686 }
3687
3688 if (shader_subgroup_supporter.is_feature_requested(Supp::SubgroupInvocationID))
3689 {
3690 auto exts = Supp::get_candidates_for_feature(Supp::SubgroupInvocationID, result);
3691
3692 for (auto &e : exts)
3693 {
3694 const char *name = Supp::get_extension_name(e);
3695 statement(&e == &exts.front() ? "#if" : "#elif", " defined(", name, ")");
3696
3697 switch (e)
3698 {
3699 case Supp::NV_shader_thread_group:
3700 statement("#define gl_SubgroupInvocationID gl_ThreadInWarpNV");
3701 break;
3702 case Supp::ARB_shader_ballot:
3703 statement("#define gl_SubgroupInvocationID gl_SubGroupInvocationARB");
3704 break;
3705 default:
3706 break;
3707 }
3708 }
3709 statement("#endif");
3710 statement("");
3711 }
3712
3713 if (shader_subgroup_supporter.is_feature_requested(Supp::SubgroupID))
3714 {
3715 auto exts = Supp::get_candidates_for_feature(Supp::SubgroupID, result);
3716
3717 for (auto &e : exts)
3718 {
3719 const char *name = Supp::get_extension_name(e);
3720 statement(&e == &exts.front() ? "#if" : "#elif", " defined(", name, ")");
3721
3722 switch (e)
3723 {
3724 case Supp::NV_shader_thread_group:
3725 statement("#define gl_SubgroupID gl_WarpIDNV");
3726 break;
3727 default:
3728 break;
3729 }
3730 }
3731 statement("#endif");
3732 statement("");
3733 }
3734
3735 if (shader_subgroup_supporter.is_feature_requested(Supp::NumSubgroups))
3736 {
3737 auto exts = Supp::get_candidates_for_feature(Supp::NumSubgroups, result);
3738
3739 for (auto &e : exts)
3740 {
3741 const char *name = Supp::get_extension_name(e);
3742 statement(&e == &exts.front() ? "#if" : "#elif", " defined(", name, ")");
3743
3744 switch (e)
3745 {
3746 case Supp::NV_shader_thread_group:
3747 statement("#define gl_NumSubgroups gl_WarpsPerSMNV");
3748 break;
3749 default:
3750 break;
3751 }
3752 }
3753 statement("#endif");
3754 statement("");
3755 }
3756
3757 if (shader_subgroup_supporter.is_feature_requested(Supp::SubgroupBroadcast_First))
3758 {
3759 auto exts = Supp::get_candidates_for_feature(Supp::SubgroupBroadcast_First, result);
3760
3761 for (auto &e : exts)
3762 {
3763 const char *name = Supp::get_extension_name(e);
3764 statement(&e == &exts.front() ? "#if" : "#elif", " defined(", name, ")");
3765
3766 switch (e)
3767 {
3768 case Supp::NV_shader_thread_shuffle:
3769 for (const char *t : workaround_types)
3770 {
3771 statement(t, " subgroupBroadcastFirst(", t,
3772 " value) { return shuffleNV(value, findLSB(ballotThreadNV(true)), gl_WarpSizeNV); }");
3773 }
3774 for (const char *t : workaround_types)
3775 {
3776 statement(t, " subgroupBroadcast(", t,
3777 " value, uint id) { return shuffleNV(value, id, gl_WarpSizeNV); }");
3778 }
3779 break;
3780 case Supp::ARB_shader_ballot:
3781 for (const char *t : workaround_types)
3782 {
3783 statement(t, " subgroupBroadcastFirst(", t,
3784 " value) { return readFirstInvocationARB(value); }");
3785 }
3786 for (const char *t : workaround_types)
3787 {
3788 statement(t, " subgroupBroadcast(", t,
3789 " value, uint id) { return readInvocationARB(value, id); }");
3790 }
3791 break;
3792 default:
3793 break;
3794 }
3795 }
3796 statement("#endif");
3797 statement("");
3798 }
3799
3800 if (shader_subgroup_supporter.is_feature_requested(Supp::SubgroupBallotFindLSB_MSB))
3801 {
3802 auto exts = Supp::get_candidates_for_feature(Supp::SubgroupBallotFindLSB_MSB, result);
3803
3804 for (auto &e : exts)
3805 {
3806 const char *name = Supp::get_extension_name(e);
3807 statement(&e == &exts.front() ? "#if" : "#elif", " defined(", name, ")");
3808
3809 switch (e)
3810 {
3811 case Supp::NV_shader_thread_group:
3812 statement("uint subgroupBallotFindLSB(uvec4 value) { return findLSB(value.x); }");
3813 statement("uint subgroupBallotFindMSB(uvec4 value) { return findMSB(value.x); }");
3814 break;
3815 default:
3816 break;
3817 }
3818 }
3819 statement("#else");
3820 statement("uint subgroupBallotFindLSB(uvec4 value)");
3821 begin_scope();
3822 statement("int firstLive = findLSB(value.x);");
3823 statement("return uint(firstLive != -1 ? firstLive : (findLSB(value.y) + 32));");
3824 end_scope();
3825 statement("uint subgroupBallotFindMSB(uvec4 value)");
3826 begin_scope();
3827 statement("int firstLive = findMSB(value.y);");
3828 statement("return uint(firstLive != -1 ? (firstLive + 32) : findMSB(value.x));");
3829 end_scope();
3830 statement("#endif");
3831 statement("");
3832 }
3833
3834 if (shader_subgroup_supporter.is_feature_requested(Supp::SubgroupAll_Any_AllEqualBool))
3835 {
3836 auto exts = Supp::get_candidates_for_feature(Supp::SubgroupAll_Any_AllEqualBool, result);
3837
3838 for (auto &e : exts)
3839 {
3840 const char *name = Supp::get_extension_name(e);
3841 statement(&e == &exts.front() ? "#if" : "#elif", " defined(", name, ")");
3842
3843 switch (e)
3844 {
3845 case Supp::NV_gpu_shader_5:
3846 statement("bool subgroupAll(bool value) { return allThreadsNV(value); }");
3847 statement("bool subgroupAny(bool value) { return anyThreadNV(value); }");
3848 statement("bool subgroupAllEqual(bool value) { return allThreadsEqualNV(value); }");
3849 break;
3850 case Supp::ARB_shader_group_vote:
3851 statement("bool subgroupAll(bool v) { return allInvocationsARB(v); }");
3852 statement("bool subgroupAny(bool v) { return anyInvocationARB(v); }");
3853 statement("bool subgroupAllEqual(bool v) { return allInvocationsEqualARB(v); }");
3854 break;
3855 case Supp::AMD_gcn_shader:
3856 statement("bool subgroupAll(bool value) { return ballotAMD(value) == ballotAMD(true); }");
3857 statement("bool subgroupAny(bool value) { return ballotAMD(value) != 0ull; }");
3858 statement("bool subgroupAllEqual(bool value) { uint64_t b = ballotAMD(value); return b == 0ull || "
3859 "b == ballotAMD(true); }");
3860 break;
3861 default:
3862 break;
3863 }
3864 }
3865 statement("#endif");
3866 statement("");
3867 }
3868
3869 if (shader_subgroup_supporter.is_feature_requested(Supp::SubgroupAllEqualT))
3870 {
3871 statement("#ifndef GL_KHR_shader_subgroup_vote");
3872 statement(
3873 "#define _SPIRV_CROSS_SUBGROUP_ALL_EQUAL_WORKAROUND(type) bool subgroupAllEqual(type value) { return "
3874 "subgroupAllEqual(subgroupBroadcastFirst(value) == value); }");
3875 for (const char *t : workaround_types)
3876 statement("_SPIRV_CROSS_SUBGROUP_ALL_EQUAL_WORKAROUND(", t, ")");
3877 statement("#undef _SPIRV_CROSS_SUBGROUP_ALL_EQUAL_WORKAROUND");
3878 statement("#endif");
3879 statement("");
3880 }
3881
3882 if (shader_subgroup_supporter.is_feature_requested(Supp::SubgroupBallot))
3883 {
3884 auto exts = Supp::get_candidates_for_feature(Supp::SubgroupBallot, result);
3885
3886 for (auto &e : exts)
3887 {
3888 const char *name = Supp::get_extension_name(e);
3889 statement(&e == &exts.front() ? "#if" : "#elif", " defined(", name, ")");
3890
3891 switch (e)
3892 {
3893 case Supp::NV_shader_thread_group:
3894 statement("uvec4 subgroupBallot(bool v) { return uvec4(ballotThreadNV(v), 0u, 0u, 0u); }");
3895 break;
3896 case Supp::ARB_shader_ballot:
3897 statement("uvec4 subgroupBallot(bool v) { return uvec4(unpackUint2x32(ballotARB(v)), 0u, 0u); }");
3898 break;
3899 default:
3900 break;
3901 }
3902 }
3903 statement("#endif");
3904 statement("");
3905 }
3906
3907 if (shader_subgroup_supporter.is_feature_requested(Supp::SubgroupElect))
3908 {
3909 statement("#ifndef GL_KHR_shader_subgroup_basic");
3910 statement("bool subgroupElect()");
3911 begin_scope();
3912 statement("uvec4 activeMask = subgroupBallot(true);");
3913 statement("uint firstLive = subgroupBallotFindLSB(activeMask);");
3914 statement("return gl_SubgroupInvocationID == firstLive;");
3915 end_scope();
3916 statement("#endif");
3917 statement("");
3918 }
3919
3920 if (shader_subgroup_supporter.is_feature_requested(Supp::SubgroupBarrier))
3921 {
3922 // Extensions we're using in place of GL_KHR_shader_subgroup_basic state
3923 // that subgroup execute in lockstep so this barrier is implicit.
3924 // However the GL 4.6 spec also states that `barrier` implies a shared memory barrier,
3925 // and a specific test of optimizing scans by leveraging lock-step invocation execution,
3926 // has shown that a `memoryBarrierShared` is needed in place of a `subgroupBarrier`.
3927 // https://github.com/buildaworldnet/IrrlichtBAW/commit/d8536857991b89a30a6b65d29441e51b64c2c7ad#diff-9f898d27be1ea6fc79b03d9b361e299334c1a347b6e4dc344ee66110c6aa596aR19
3928 statement("#ifndef GL_KHR_shader_subgroup_basic");
3929 statement("void subgroupBarrier() { memoryBarrierShared(); }");
3930 statement("#endif");
3931 statement("");
3932 }
3933
3934 if (shader_subgroup_supporter.is_feature_requested(Supp::SubgroupMemBarrier))
3935 {
3936 if (model == spv::ExecutionModelGLCompute)
3937 {
3938 statement("#ifndef GL_KHR_shader_subgroup_basic");
3939 statement("void subgroupMemoryBarrier() { groupMemoryBarrier(); }");
3940 statement("void subgroupMemoryBarrierBuffer() { groupMemoryBarrier(); }");
3941 statement("void subgroupMemoryBarrierShared() { memoryBarrierShared(); }");
3942 statement("void subgroupMemoryBarrierImage() { groupMemoryBarrier(); }");
3943 statement("#endif");
3944 }
3945 else
3946 {
3947 statement("#ifndef GL_KHR_shader_subgroup_basic");
3948 statement("void subgroupMemoryBarrier() { memoryBarrier(); }");
3949 statement("void subgroupMemoryBarrierBuffer() { memoryBarrierBuffer(); }");
3950 statement("void subgroupMemoryBarrierImage() { memoryBarrierImage(); }");
3951 statement("#endif");
3952 }
3953 statement("");
3954 }
3955
3956 if (shader_subgroup_supporter.is_feature_requested(Supp::SubgroupInverseBallot_InclBitCount_ExclBitCout))
3957 {
3958 statement("#ifndef GL_KHR_shader_subgroup_ballot");
3959 statement("bool subgroupInverseBallot(uvec4 value)");
3960 begin_scope();
3961 statement("return any(notEqual(value.xy & gl_SubgroupEqMask.xy, uvec2(0u)));");
3962 end_scope();
3963
3964 statement("uint subgroupBallotInclusiveBitCount(uvec4 value)");
3965 begin_scope();
3966 statement("uvec2 v = value.xy & gl_SubgroupLeMask.xy;");
3967 statement("ivec2 c = bitCount(v);");
3968 statement_no_indent("#ifdef GL_NV_shader_thread_group");
3969 statement("return uint(c.x);");
3970 statement_no_indent("#else");
3971 statement("return uint(c.x + c.y);");
3972 statement_no_indent("#endif");
3973 end_scope();
3974
3975 statement("uint subgroupBallotExclusiveBitCount(uvec4 value)");
3976 begin_scope();
3977 statement("uvec2 v = value.xy & gl_SubgroupLtMask.xy;");
3978 statement("ivec2 c = bitCount(v);");
3979 statement_no_indent("#ifdef GL_NV_shader_thread_group");
3980 statement("return uint(c.x);");
3981 statement_no_indent("#else");
3982 statement("return uint(c.x + c.y);");
3983 statement_no_indent("#endif");
3984 end_scope();
3985 statement("#endif");
3986 statement("");
3987 }
3988
3989 if (shader_subgroup_supporter.is_feature_requested(Supp::SubgroupBallotBitCount))
3990 {
3991 statement("#ifndef GL_KHR_shader_subgroup_ballot");
3992 statement("uint subgroupBallotBitCount(uvec4 value)");
3993 begin_scope();
3994 statement("ivec2 c = bitCount(value.xy);");
3995 statement_no_indent("#ifdef GL_NV_shader_thread_group");
3996 statement("return uint(c.x);");
3997 statement_no_indent("#else");
3998 statement("return uint(c.x + c.y);");
3999 statement_no_indent("#endif");
4000 end_scope();
4001 statement("#endif");
4002 statement("");
4003 }
4004
4005 if (shader_subgroup_supporter.is_feature_requested(Supp::SubgroupBallotBitExtract))
4006 {
4007 statement("#ifndef GL_KHR_shader_subgroup_ballot");
4008 statement("bool subgroupBallotBitExtract(uvec4 value, uint index)");
4009 begin_scope();
4010 statement_no_indent("#ifdef GL_NV_shader_thread_group");
4011 statement("uint shifted = value.x >> index;");
4012 statement_no_indent("#else");
4013 statement("uint shifted = value[index >> 5u] >> (index & 0x1fu);");
4014 statement_no_indent("#endif");
4015 statement("return (shifted & 1u) != 0u;");
4016 end_scope();
4017 statement("#endif");
4018 statement("");
4019 }
4020 }
4021
4022 if (!workaround_ubo_load_overload_types.empty())
4023 {
4024 for (auto &type_id : workaround_ubo_load_overload_types)
4025 {
4026 auto &type = get<SPIRType>(type_id);
4027 statement(type_to_glsl(type), " spvWorkaroundRowMajor(", type_to_glsl(type),
4028 " wrap) { return wrap; }");
4029 }
4030 statement("");
4031 }
4032
4033 if (requires_transpose_2x2)
4034 {
4035 statement("mat2 spvTranspose(mat2 m)");
4036 begin_scope();
4037 statement("return mat2(m[0][0], m[1][0], m[0][1], m[1][1]);");
4038 end_scope();
4039 statement("");
4040 }
4041
4042 if (requires_transpose_3x3)
4043 {
4044 statement("mat3 spvTranspose(mat3 m)");
4045 begin_scope();
4046 statement("return mat3(m[0][0], m[1][0], m[2][0], m[0][1], m[1][1], m[2][1], m[0][2], m[1][2], m[2][2]);");
4047 end_scope();
4048 statement("");
4049 }
4050
4051 if (requires_transpose_4x4)
4052 {
4053 statement("mat4 spvTranspose(mat4 m)");
4054 begin_scope();
4055 statement("return mat4(m[0][0], m[1][0], m[2][0], m[3][0], m[0][1], m[1][1], m[2][1], m[3][1], m[0][2], "
4056 "m[1][2], m[2][2], m[3][2], m[0][3], m[1][3], m[2][3], m[3][3]);");
4057 end_scope();
4058 statement("");
4059 }
4060 }
4061
4062 // Returns a string representation of the ID, usable as a function arg.
4063 // Default is to simply return the expression representation fo the arg ID.
4064 // Subclasses may override to modify the return value.
to_func_call_arg(const SPIRFunction::Parameter &,uint32_t id)4065 string CompilerGLSL::to_func_call_arg(const SPIRFunction::Parameter &, uint32_t id)
4066 {
4067 // Make sure that we use the name of the original variable, and not the parameter alias.
4068 uint32_t name_id = id;
4069 auto *var = maybe_get<SPIRVariable>(id);
4070 if (var && var->basevariable)
4071 name_id = var->basevariable;
4072 return to_expression(name_id);
4073 }
4074
handle_invalid_expression(uint32_t id)4075 void CompilerGLSL::handle_invalid_expression(uint32_t id)
4076 {
4077 // We tried to read an invalidated expression.
4078 // This means we need another pass at compilation, but next time, force temporary variables so that they cannot be invalidated.
4079 forced_temporaries.insert(id);
4080 force_recompile();
4081 }
4082
4083 // Converts the format of the current expression from packed to unpacked,
4084 // by wrapping the expression in a constructor of the appropriate type.
4085 // GLSL does not support packed formats, so simply return the expression.
4086 // Subclasses that do will override.
unpack_expression_type(string expr_str,const SPIRType &,uint32_t,bool,bool)4087 string CompilerGLSL::unpack_expression_type(string expr_str, const SPIRType &, uint32_t, bool, bool)
4088 {
4089 return expr_str;
4090 }
4091
4092 // Sometimes we proactively enclosed an expression where it turns out we might have not needed it after all.
strip_enclosed_expression(string & expr)4093 void CompilerGLSL::strip_enclosed_expression(string &expr)
4094 {
4095 if (expr.size() < 2 || expr.front() != '(' || expr.back() != ')')
4096 return;
4097
4098 // Have to make sure that our first and last parens actually enclose everything inside it.
4099 uint32_t paren_count = 0;
4100 for (auto &c : expr)
4101 {
4102 if (c == '(')
4103 paren_count++;
4104 else if (c == ')')
4105 {
4106 paren_count--;
4107
4108 // If we hit 0 and this is not the final char, our first and final parens actually don't
4109 // enclose the expression, and we cannot strip, e.g.: (a + b) * (c + d).
4110 if (paren_count == 0 && &c != &expr.back())
4111 return;
4112 }
4113 }
4114 expr.erase(expr.size() - 1, 1);
4115 expr.erase(begin(expr));
4116 }
4117
enclose_expression(const string & expr)4118 string CompilerGLSL::enclose_expression(const string &expr)
4119 {
4120 bool need_parens = false;
4121
4122 // If the expression starts with a unary we need to enclose to deal with cases where we have back-to-back
4123 // unary expressions.
4124 if (!expr.empty())
4125 {
4126 auto c = expr.front();
4127 if (c == '-' || c == '+' || c == '!' || c == '~' || c == '&' || c == '*')
4128 need_parens = true;
4129 }
4130
4131 if (!need_parens)
4132 {
4133 uint32_t paren_count = 0;
4134 for (auto c : expr)
4135 {
4136 if (c == '(' || c == '[')
4137 paren_count++;
4138 else if (c == ')' || c == ']')
4139 {
4140 assert(paren_count);
4141 paren_count--;
4142 }
4143 else if (c == ' ' && paren_count == 0)
4144 {
4145 need_parens = true;
4146 break;
4147 }
4148 }
4149 assert(paren_count == 0);
4150 }
4151
4152 // If this expression contains any spaces which are not enclosed by parentheses,
4153 // we need to enclose it so we can treat the whole string as an expression.
4154 // This happens when two expressions have been part of a binary op earlier.
4155 if (need_parens)
4156 return join('(', expr, ')');
4157 else
4158 return expr;
4159 }
4160
dereference_expression(const SPIRType & expr_type,const std::string & expr)4161 string CompilerGLSL::dereference_expression(const SPIRType &expr_type, const std::string &expr)
4162 {
4163 // If this expression starts with an address-of operator ('&'), then
4164 // just return the part after the operator.
4165 // TODO: Strip parens if unnecessary?
4166 if (expr.front() == '&')
4167 return expr.substr(1);
4168 else if (backend.native_pointers)
4169 return join('*', expr);
4170 else if (expr_type.storage == StorageClassPhysicalStorageBufferEXT && expr_type.basetype != SPIRType::Struct &&
4171 expr_type.pointer_depth == 1)
4172 {
4173 return join(enclose_expression(expr), ".value");
4174 }
4175 else
4176 return expr;
4177 }
4178
address_of_expression(const std::string & expr)4179 string CompilerGLSL::address_of_expression(const std::string &expr)
4180 {
4181 if (expr.size() > 3 && expr[0] == '(' && expr[1] == '*' && expr.back() == ')')
4182 {
4183 // If we have an expression which looks like (*foo), taking the address of it is the same as stripping
4184 // the first two and last characters. We might have to enclose the expression.
4185 // This doesn't work for cases like (*foo + 10),
4186 // but this is an r-value expression which we cannot take the address of anyways.
4187 return enclose_expression(expr.substr(2, expr.size() - 3));
4188 }
4189 else if (expr.front() == '*')
4190 {
4191 // If this expression starts with a dereference operator ('*'), then
4192 // just return the part after the operator.
4193 return expr.substr(1);
4194 }
4195 else
4196 return join('&', enclose_expression(expr));
4197 }
4198
4199 // Just like to_expression except that we enclose the expression inside parentheses if needed.
to_enclosed_expression(uint32_t id,bool register_expression_read)4200 string CompilerGLSL::to_enclosed_expression(uint32_t id, bool register_expression_read)
4201 {
4202 return enclose_expression(to_expression(id, register_expression_read));
4203 }
4204
4205 // Used explicitly when we want to read a row-major expression, but without any transpose shenanigans.
4206 // need_transpose must be forced to false.
to_unpacked_row_major_matrix_expression(uint32_t id)4207 string CompilerGLSL::to_unpacked_row_major_matrix_expression(uint32_t id)
4208 {
4209 return unpack_expression_type(to_expression(id), expression_type(id),
4210 get_extended_decoration(id, SPIRVCrossDecorationPhysicalTypeID),
4211 has_extended_decoration(id, SPIRVCrossDecorationPhysicalTypePacked), true);
4212 }
4213
to_unpacked_expression(uint32_t id,bool register_expression_read)4214 string CompilerGLSL::to_unpacked_expression(uint32_t id, bool register_expression_read)
4215 {
4216 // If we need to transpose, it will also take care of unpacking rules.
4217 auto *e = maybe_get<SPIRExpression>(id);
4218 bool need_transpose = e && e->need_transpose;
4219 bool is_remapped = has_extended_decoration(id, SPIRVCrossDecorationPhysicalTypeID);
4220 bool is_packed = has_extended_decoration(id, SPIRVCrossDecorationPhysicalTypePacked);
4221
4222 if (!need_transpose && (is_remapped || is_packed))
4223 {
4224 return unpack_expression_type(to_expression(id, register_expression_read),
4225 get_pointee_type(expression_type_id(id)),
4226 get_extended_decoration(id, SPIRVCrossDecorationPhysicalTypeID),
4227 has_extended_decoration(id, SPIRVCrossDecorationPhysicalTypePacked), false);
4228 }
4229 else
4230 return to_expression(id, register_expression_read);
4231 }
4232
to_enclosed_unpacked_expression(uint32_t id,bool register_expression_read)4233 string CompilerGLSL::to_enclosed_unpacked_expression(uint32_t id, bool register_expression_read)
4234 {
4235 // If we need to transpose, it will also take care of unpacking rules.
4236 auto *e = maybe_get<SPIRExpression>(id);
4237 bool need_transpose = e && e->need_transpose;
4238 bool is_remapped = has_extended_decoration(id, SPIRVCrossDecorationPhysicalTypeID);
4239 bool is_packed = has_extended_decoration(id, SPIRVCrossDecorationPhysicalTypePacked);
4240 if (!need_transpose && (is_remapped || is_packed))
4241 {
4242 return unpack_expression_type(to_expression(id, register_expression_read), expression_type(id),
4243 get_extended_decoration(id, SPIRVCrossDecorationPhysicalTypeID),
4244 has_extended_decoration(id, SPIRVCrossDecorationPhysicalTypePacked), false);
4245 }
4246 else
4247 return to_enclosed_expression(id, register_expression_read);
4248 }
4249
to_dereferenced_expression(uint32_t id,bool register_expression_read)4250 string CompilerGLSL::to_dereferenced_expression(uint32_t id, bool register_expression_read)
4251 {
4252 auto &type = expression_type(id);
4253 if (type.pointer && should_dereference(id))
4254 return dereference_expression(type, to_enclosed_expression(id, register_expression_read));
4255 else
4256 return to_expression(id, register_expression_read);
4257 }
4258
to_pointer_expression(uint32_t id,bool register_expression_read)4259 string CompilerGLSL::to_pointer_expression(uint32_t id, bool register_expression_read)
4260 {
4261 auto &type = expression_type(id);
4262 if (type.pointer && expression_is_lvalue(id) && !should_dereference(id))
4263 return address_of_expression(to_enclosed_expression(id, register_expression_read));
4264 else
4265 return to_unpacked_expression(id, register_expression_read);
4266 }
4267
to_enclosed_pointer_expression(uint32_t id,bool register_expression_read)4268 string CompilerGLSL::to_enclosed_pointer_expression(uint32_t id, bool register_expression_read)
4269 {
4270 auto &type = expression_type(id);
4271 if (type.pointer && expression_is_lvalue(id) && !should_dereference(id))
4272 return address_of_expression(to_enclosed_expression(id, register_expression_read));
4273 else
4274 return to_enclosed_unpacked_expression(id, register_expression_read);
4275 }
4276
to_extract_component_expression(uint32_t id,uint32_t index)4277 string CompilerGLSL::to_extract_component_expression(uint32_t id, uint32_t index)
4278 {
4279 auto expr = to_enclosed_expression(id);
4280 if (has_extended_decoration(id, SPIRVCrossDecorationPhysicalTypePacked))
4281 return join(expr, "[", index, "]");
4282 else
4283 return join(expr, ".", index_to_swizzle(index));
4284 }
4285
to_rerolled_array_expression(const string & base_expr,const SPIRType & type)4286 string CompilerGLSL::to_rerolled_array_expression(const string &base_expr, const SPIRType &type)
4287 {
4288 uint32_t size = to_array_size_literal(type);
4289 auto &parent = get<SPIRType>(type.parent_type);
4290 string expr = "{ ";
4291
4292 for (uint32_t i = 0; i < size; i++)
4293 {
4294 auto subexpr = join(base_expr, "[", convert_to_string(i), "]");
4295 if (parent.array.empty())
4296 expr += subexpr;
4297 else
4298 expr += to_rerolled_array_expression(subexpr, parent);
4299
4300 if (i + 1 < size)
4301 expr += ", ";
4302 }
4303
4304 expr += " }";
4305 return expr;
4306 }
4307
to_composite_constructor_expression(uint32_t id,bool uses_buffer_offset)4308 string CompilerGLSL::to_composite_constructor_expression(uint32_t id, bool uses_buffer_offset)
4309 {
4310 auto &type = expression_type(id);
4311
4312 bool reroll_array = !type.array.empty() && (!backend.array_is_value_type ||
4313 (uses_buffer_offset && !backend.buffer_offset_array_is_value_type));
4314
4315 if (reroll_array)
4316 {
4317 // For this case, we need to "re-roll" an array initializer from a temporary.
4318 // We cannot simply pass the array directly, since it decays to a pointer and it cannot
4319 // participate in a struct initializer. E.g.
4320 // float arr[2] = { 1.0, 2.0 };
4321 // Foo foo = { arr }; must be transformed to
4322 // Foo foo = { { arr[0], arr[1] } };
4323 // The array sizes cannot be deduced from specialization constants since we cannot use any loops.
4324
4325 // We're only triggering one read of the array expression, but this is fine since arrays have to be declared
4326 // as temporaries anyways.
4327 return to_rerolled_array_expression(to_enclosed_expression(id), type);
4328 }
4329 else
4330 return to_unpacked_expression(id);
4331 }
4332
to_expression(uint32_t id,bool register_expression_read)4333 string CompilerGLSL::to_expression(uint32_t id, bool register_expression_read)
4334 {
4335 auto itr = invalid_expressions.find(id);
4336 if (itr != end(invalid_expressions))
4337 handle_invalid_expression(id);
4338
4339 if (ir.ids[id].get_type() == TypeExpression)
4340 {
4341 // We might have a more complex chain of dependencies.
4342 // A possible scenario is that we
4343 //
4344 // %1 = OpLoad
4345 // %2 = OpDoSomething %1 %1. here %2 will have a dependency on %1.
4346 // %3 = OpDoSomethingAgain %2 %2. Here %3 will lose the link to %1 since we don't propagate the dependencies like that.
4347 // OpStore %1 %foo // Here we can invalidate %1, and hence all expressions which depend on %1. Only %2 will know since it's part of invalid_expressions.
4348 // %4 = OpDoSomethingAnotherTime %3 %3 // If we forward all expressions we will see %1 expression after store, not before.
4349 //
4350 // However, we can propagate up a list of depended expressions when we used %2, so we can check if %2 is invalid when reading %3 after the store,
4351 // and see that we should not forward reads of the original variable.
4352 auto &expr = get<SPIRExpression>(id);
4353 for (uint32_t dep : expr.expression_dependencies)
4354 if (invalid_expressions.find(dep) != end(invalid_expressions))
4355 handle_invalid_expression(dep);
4356 }
4357
4358 if (register_expression_read)
4359 track_expression_read(id);
4360
4361 switch (ir.ids[id].get_type())
4362 {
4363 case TypeExpression:
4364 {
4365 auto &e = get<SPIRExpression>(id);
4366 if (e.base_expression)
4367 return to_enclosed_expression(e.base_expression) + e.expression;
4368 else if (e.need_transpose)
4369 {
4370 // This should not be reached for access chains, since we always deal explicitly with transpose state
4371 // when consuming an access chain expression.
4372 uint32_t physical_type_id = get_extended_decoration(id, SPIRVCrossDecorationPhysicalTypeID);
4373 bool is_packed = has_extended_decoration(id, SPIRVCrossDecorationPhysicalTypePacked);
4374 return convert_row_major_matrix(e.expression, get<SPIRType>(e.expression_type), physical_type_id,
4375 is_packed);
4376 }
4377 else if (flattened_structs.count(id))
4378 {
4379 return load_flattened_struct(e.expression, get<SPIRType>(e.expression_type));
4380 }
4381 else
4382 {
4383 if (is_forcing_recompilation())
4384 {
4385 // During first compilation phase, certain expression patterns can trigger exponential growth of memory.
4386 // Avoid this by returning dummy expressions during this phase.
4387 // Do not use empty expressions here, because those are sentinels for other cases.
4388 return "_";
4389 }
4390 else
4391 return e.expression;
4392 }
4393 }
4394
4395 case TypeConstant:
4396 {
4397 auto &c = get<SPIRConstant>(id);
4398 auto &type = get<SPIRType>(c.constant_type);
4399
4400 // WorkGroupSize may be a constant.
4401 auto &dec = ir.meta[c.self].decoration;
4402 if (dec.builtin)
4403 return builtin_to_glsl(dec.builtin_type, StorageClassGeneric);
4404 else if (c.specialization)
4405 return to_name(id);
4406 else if (c.is_used_as_lut)
4407 return to_name(id);
4408 else if (type.basetype == SPIRType::Struct && !backend.can_declare_struct_inline)
4409 return to_name(id);
4410 else if (!type.array.empty() && !backend.can_declare_arrays_inline)
4411 return to_name(id);
4412 else
4413 return constant_expression(c);
4414 }
4415
4416 case TypeConstantOp:
4417 return to_name(id);
4418
4419 case TypeVariable:
4420 {
4421 auto &var = get<SPIRVariable>(id);
4422 // If we try to use a loop variable before the loop header, we have to redirect it to the static expression,
4423 // the variable has not been declared yet.
4424 if (var.statically_assigned || (var.loop_variable && !var.loop_variable_enable))
4425 return to_expression(var.static_expression);
4426 else if (var.deferred_declaration)
4427 {
4428 var.deferred_declaration = false;
4429 return variable_decl(var);
4430 }
4431 else if (flattened_structs.count(id))
4432 {
4433 return load_flattened_struct(to_name(id), get<SPIRType>(var.basetype));
4434 }
4435 else
4436 {
4437 auto &dec = ir.meta[var.self].decoration;
4438 if (dec.builtin)
4439 return builtin_to_glsl(dec.builtin_type, var.storage);
4440 else
4441 return to_name(id);
4442 }
4443 }
4444
4445 case TypeCombinedImageSampler:
4446 // This type should never be taken the expression of directly.
4447 // The intention is that texture sampling functions will extract the image and samplers
4448 // separately and take their expressions as needed.
4449 // GLSL does not use this type because OpSampledImage immediately creates a combined image sampler
4450 // expression ala sampler2D(texture, sampler).
4451 SPIRV_CROSS_THROW("Combined image samplers have no default expression representation.");
4452
4453 case TypeAccessChain:
4454 // We cannot express this type. They only have meaning in other OpAccessChains, OpStore or OpLoad.
4455 SPIRV_CROSS_THROW("Access chains have no default expression representation.");
4456
4457 default:
4458 return to_name(id);
4459 }
4460 }
4461
constant_op_expression(const SPIRConstantOp & cop)4462 string CompilerGLSL::constant_op_expression(const SPIRConstantOp &cop)
4463 {
4464 auto &type = get<SPIRType>(cop.basetype);
4465 bool binary = false;
4466 bool unary = false;
4467 string op;
4468
4469 if (is_legacy() && is_unsigned_opcode(cop.opcode))
4470 SPIRV_CROSS_THROW("Unsigned integers are not supported on legacy targets.");
4471
4472 // TODO: Find a clean way to reuse emit_instruction.
4473 switch (cop.opcode)
4474 {
4475 case OpSConvert:
4476 case OpUConvert:
4477 case OpFConvert:
4478 op = type_to_glsl_constructor(type);
4479 break;
4480
4481 #define GLSL_BOP(opname, x) \
4482 case Op##opname: \
4483 binary = true; \
4484 op = x; \
4485 break
4486
4487 #define GLSL_UOP(opname, x) \
4488 case Op##opname: \
4489 unary = true; \
4490 op = x; \
4491 break
4492
4493 GLSL_UOP(SNegate, "-");
4494 GLSL_UOP(Not, "~");
4495 GLSL_BOP(IAdd, "+");
4496 GLSL_BOP(ISub, "-");
4497 GLSL_BOP(IMul, "*");
4498 GLSL_BOP(SDiv, "/");
4499 GLSL_BOP(UDiv, "/");
4500 GLSL_BOP(UMod, "%");
4501 GLSL_BOP(SMod, "%");
4502 GLSL_BOP(ShiftRightLogical, ">>");
4503 GLSL_BOP(ShiftRightArithmetic, ">>");
4504 GLSL_BOP(ShiftLeftLogical, "<<");
4505 GLSL_BOP(BitwiseOr, "|");
4506 GLSL_BOP(BitwiseXor, "^");
4507 GLSL_BOP(BitwiseAnd, "&");
4508 GLSL_BOP(LogicalOr, "||");
4509 GLSL_BOP(LogicalAnd, "&&");
4510 GLSL_UOP(LogicalNot, "!");
4511 GLSL_BOP(LogicalEqual, "==");
4512 GLSL_BOP(LogicalNotEqual, "!=");
4513 GLSL_BOP(IEqual, "==");
4514 GLSL_BOP(INotEqual, "!=");
4515 GLSL_BOP(ULessThan, "<");
4516 GLSL_BOP(SLessThan, "<");
4517 GLSL_BOP(ULessThanEqual, "<=");
4518 GLSL_BOP(SLessThanEqual, "<=");
4519 GLSL_BOP(UGreaterThan, ">");
4520 GLSL_BOP(SGreaterThan, ">");
4521 GLSL_BOP(UGreaterThanEqual, ">=");
4522 GLSL_BOP(SGreaterThanEqual, ">=");
4523
4524 case OpSelect:
4525 {
4526 if (cop.arguments.size() < 3)
4527 SPIRV_CROSS_THROW("Not enough arguments to OpSpecConstantOp.");
4528
4529 // This one is pretty annoying. It's triggered from
4530 // uint(bool), int(bool) from spec constants.
4531 // In order to preserve its compile-time constness in Vulkan GLSL,
4532 // we need to reduce the OpSelect expression back to this simplified model.
4533 // If we cannot, fail.
4534 if (to_trivial_mix_op(type, op, cop.arguments[2], cop.arguments[1], cop.arguments[0]))
4535 {
4536 // Implement as a simple cast down below.
4537 }
4538 else
4539 {
4540 // Implement a ternary and pray the compiler understands it :)
4541 return to_ternary_expression(type, cop.arguments[0], cop.arguments[1], cop.arguments[2]);
4542 }
4543 break;
4544 }
4545
4546 case OpVectorShuffle:
4547 {
4548 string expr = type_to_glsl_constructor(type);
4549 expr += "(";
4550
4551 uint32_t left_components = expression_type(cop.arguments[0]).vecsize;
4552 string left_arg = to_enclosed_expression(cop.arguments[0]);
4553 string right_arg = to_enclosed_expression(cop.arguments[1]);
4554
4555 for (uint32_t i = 2; i < uint32_t(cop.arguments.size()); i++)
4556 {
4557 uint32_t index = cop.arguments[i];
4558 if (index >= left_components)
4559 expr += right_arg + "." + "xyzw"[index - left_components];
4560 else
4561 expr += left_arg + "." + "xyzw"[index];
4562
4563 if (i + 1 < uint32_t(cop.arguments.size()))
4564 expr += ", ";
4565 }
4566
4567 expr += ")";
4568 return expr;
4569 }
4570
4571 case OpCompositeExtract:
4572 {
4573 auto expr = access_chain_internal(cop.arguments[0], &cop.arguments[1], uint32_t(cop.arguments.size() - 1),
4574 ACCESS_CHAIN_INDEX_IS_LITERAL_BIT, nullptr);
4575 return expr;
4576 }
4577
4578 case OpCompositeInsert:
4579 SPIRV_CROSS_THROW("OpCompositeInsert spec constant op is not supported.");
4580
4581 default:
4582 // Some opcodes are unimplemented here, these are currently not possible to test from glslang.
4583 SPIRV_CROSS_THROW("Unimplemented spec constant op.");
4584 }
4585
4586 uint32_t bit_width = 0;
4587 if (unary || binary || cop.opcode == OpSConvert || cop.opcode == OpUConvert)
4588 bit_width = expression_type(cop.arguments[0]).width;
4589
4590 SPIRType::BaseType input_type;
4591 bool skip_cast_if_equal_type = opcode_is_sign_invariant(cop.opcode);
4592
4593 switch (cop.opcode)
4594 {
4595 case OpIEqual:
4596 case OpINotEqual:
4597 input_type = to_signed_basetype(bit_width);
4598 break;
4599
4600 case OpSLessThan:
4601 case OpSLessThanEqual:
4602 case OpSGreaterThan:
4603 case OpSGreaterThanEqual:
4604 case OpSMod:
4605 case OpSDiv:
4606 case OpShiftRightArithmetic:
4607 case OpSConvert:
4608 case OpSNegate:
4609 input_type = to_signed_basetype(bit_width);
4610 break;
4611
4612 case OpULessThan:
4613 case OpULessThanEqual:
4614 case OpUGreaterThan:
4615 case OpUGreaterThanEqual:
4616 case OpUMod:
4617 case OpUDiv:
4618 case OpShiftRightLogical:
4619 case OpUConvert:
4620 input_type = to_unsigned_basetype(bit_width);
4621 break;
4622
4623 default:
4624 input_type = type.basetype;
4625 break;
4626 }
4627
4628 #undef GLSL_BOP
4629 #undef GLSL_UOP
4630 if (binary)
4631 {
4632 if (cop.arguments.size() < 2)
4633 SPIRV_CROSS_THROW("Not enough arguments to OpSpecConstantOp.");
4634
4635 string cast_op0;
4636 string cast_op1;
4637 auto expected_type = binary_op_bitcast_helper(cast_op0, cast_op1, input_type, cop.arguments[0],
4638 cop.arguments[1], skip_cast_if_equal_type);
4639
4640 if (type.basetype != input_type && type.basetype != SPIRType::Boolean)
4641 {
4642 expected_type.basetype = input_type;
4643 auto expr = bitcast_glsl_op(type, expected_type);
4644 expr += '(';
4645 expr += join(cast_op0, " ", op, " ", cast_op1);
4646 expr += ')';
4647 return expr;
4648 }
4649 else
4650 return join("(", cast_op0, " ", op, " ", cast_op1, ")");
4651 }
4652 else if (unary)
4653 {
4654 if (cop.arguments.size() < 1)
4655 SPIRV_CROSS_THROW("Not enough arguments to OpSpecConstantOp.");
4656
4657 // Auto-bitcast to result type as needed.
4658 // Works around various casting scenarios in glslang as there is no OpBitcast for specialization constants.
4659 return join("(", op, bitcast_glsl(type, cop.arguments[0]), ")");
4660 }
4661 else if (cop.opcode == OpSConvert || cop.opcode == OpUConvert)
4662 {
4663 if (cop.arguments.size() < 1)
4664 SPIRV_CROSS_THROW("Not enough arguments to OpSpecConstantOp.");
4665
4666 auto &arg_type = expression_type(cop.arguments[0]);
4667 if (arg_type.width < type.width && input_type != arg_type.basetype)
4668 {
4669 auto expected = arg_type;
4670 expected.basetype = input_type;
4671 return join(op, "(", bitcast_glsl(expected, cop.arguments[0]), ")");
4672 }
4673 else
4674 return join(op, "(", to_expression(cop.arguments[0]), ")");
4675 }
4676 else
4677 {
4678 if (cop.arguments.size() < 1)
4679 SPIRV_CROSS_THROW("Not enough arguments to OpSpecConstantOp.");
4680 return join(op, "(", to_expression(cop.arguments[0]), ")");
4681 }
4682 }
4683
constant_expression(const SPIRConstant & c)4684 string CompilerGLSL::constant_expression(const SPIRConstant &c)
4685 {
4686 auto &type = get<SPIRType>(c.constant_type);
4687
4688 if (type.pointer)
4689 {
4690 return backend.null_pointer_literal;
4691 }
4692 else if (!c.subconstants.empty())
4693 {
4694 // Handles Arrays and structures.
4695 string res;
4696
4697 // Allow Metal to use the array<T> template to make arrays a value type
4698 bool needs_trailing_tracket = false;
4699 if (backend.use_initializer_list && backend.use_typed_initializer_list && type.basetype == SPIRType::Struct &&
4700 type.array.empty())
4701 {
4702 res = type_to_glsl_constructor(type) + "{ ";
4703 }
4704 else if (backend.use_initializer_list && backend.use_typed_initializer_list && backend.array_is_value_type &&
4705 !type.array.empty())
4706 {
4707 res = type_to_glsl_constructor(type) + "({ ";
4708 needs_trailing_tracket = true;
4709 }
4710 else if (backend.use_initializer_list)
4711 {
4712 res = "{ ";
4713 }
4714 else
4715 {
4716 res = type_to_glsl_constructor(type) + "(";
4717 }
4718
4719 for (auto &elem : c.subconstants)
4720 {
4721 auto &subc = get<SPIRConstant>(elem);
4722 if (subc.specialization)
4723 res += to_name(elem);
4724 else
4725 res += constant_expression(subc);
4726
4727 if (&elem != &c.subconstants.back())
4728 res += ", ";
4729 }
4730
4731 res += backend.use_initializer_list ? " }" : ")";
4732 if (needs_trailing_tracket)
4733 res += ")";
4734
4735 return res;
4736 }
4737 else if (type.basetype == SPIRType::Struct && type.member_types.size() == 0)
4738 {
4739 // Metal tessellation likes empty structs which are then constant expressions.
4740 if (backend.supports_empty_struct)
4741 return "{ }";
4742 else if (backend.use_typed_initializer_list)
4743 return join(type_to_glsl(get<SPIRType>(c.constant_type)), "{ 0 }");
4744 else if (backend.use_initializer_list)
4745 return "{ 0 }";
4746 else
4747 return join(type_to_glsl(get<SPIRType>(c.constant_type)), "(0)");
4748 }
4749 else if (c.columns() == 1)
4750 {
4751 return constant_expression_vector(c, 0);
4752 }
4753 else
4754 {
4755 string res = type_to_glsl(get<SPIRType>(c.constant_type)) + "(";
4756 for (uint32_t col = 0; col < c.columns(); col++)
4757 {
4758 if (c.specialization_constant_id(col) != 0)
4759 res += to_name(c.specialization_constant_id(col));
4760 else
4761 res += constant_expression_vector(c, col);
4762
4763 if (col + 1 < c.columns())
4764 res += ", ";
4765 }
4766 res += ")";
4767 return res;
4768 }
4769 }
4770
4771 #ifdef _MSC_VER
4772 // sprintf warning.
4773 // We cannot rely on snprintf existing because, ..., MSVC.
4774 #pragma warning(push)
4775 #pragma warning(disable : 4996)
4776 #endif
4777
convert_half_to_string(const SPIRConstant & c,uint32_t col,uint32_t row)4778 string CompilerGLSL::convert_half_to_string(const SPIRConstant &c, uint32_t col, uint32_t row)
4779 {
4780 string res;
4781 float float_value = c.scalar_f16(col, row);
4782
4783 // There is no literal "hf" in GL_NV_gpu_shader5, so to avoid lots
4784 // of complicated workarounds, just value-cast to the half type always.
4785 if (std::isnan(float_value) || std::isinf(float_value))
4786 {
4787 SPIRType type;
4788 type.basetype = SPIRType::Half;
4789 type.vecsize = 1;
4790 type.columns = 1;
4791
4792 if (float_value == numeric_limits<float>::infinity())
4793 res = join(type_to_glsl(type), "(1.0 / 0.0)");
4794 else if (float_value == -numeric_limits<float>::infinity())
4795 res = join(type_to_glsl(type), "(-1.0 / 0.0)");
4796 else if (std::isnan(float_value))
4797 res = join(type_to_glsl(type), "(0.0 / 0.0)");
4798 else
4799 SPIRV_CROSS_THROW("Cannot represent non-finite floating point constant.");
4800 }
4801 else
4802 {
4803 SPIRType type;
4804 type.basetype = SPIRType::Half;
4805 type.vecsize = 1;
4806 type.columns = 1;
4807 res = join(type_to_glsl(type), "(", convert_to_string(float_value, current_locale_radix_character), ")");
4808 }
4809
4810 return res;
4811 }
4812
convert_float_to_string(const SPIRConstant & c,uint32_t col,uint32_t row)4813 string CompilerGLSL::convert_float_to_string(const SPIRConstant &c, uint32_t col, uint32_t row)
4814 {
4815 string res;
4816 float float_value = c.scalar_f32(col, row);
4817
4818 if (std::isnan(float_value) || std::isinf(float_value))
4819 {
4820 // Use special representation.
4821 if (!is_legacy())
4822 {
4823 SPIRType out_type;
4824 SPIRType in_type;
4825 out_type.basetype = SPIRType::Float;
4826 in_type.basetype = SPIRType::UInt;
4827 out_type.vecsize = 1;
4828 in_type.vecsize = 1;
4829 out_type.width = 32;
4830 in_type.width = 32;
4831
4832 char print_buffer[32];
4833 sprintf(print_buffer, "0x%xu", c.scalar(col, row));
4834 res = join(bitcast_glsl_op(out_type, in_type), "(", print_buffer, ")");
4835 }
4836 else
4837 {
4838 if (float_value == numeric_limits<float>::infinity())
4839 {
4840 if (backend.float_literal_suffix)
4841 res = "(1.0f / 0.0f)";
4842 else
4843 res = "(1.0 / 0.0)";
4844 }
4845 else if (float_value == -numeric_limits<float>::infinity())
4846 {
4847 if (backend.float_literal_suffix)
4848 res = "(-1.0f / 0.0f)";
4849 else
4850 res = "(-1.0 / 0.0)";
4851 }
4852 else if (std::isnan(float_value))
4853 {
4854 if (backend.float_literal_suffix)
4855 res = "(0.0f / 0.0f)";
4856 else
4857 res = "(0.0 / 0.0)";
4858 }
4859 else
4860 SPIRV_CROSS_THROW("Cannot represent non-finite floating point constant.");
4861 }
4862 }
4863 else
4864 {
4865 res = convert_to_string(float_value, current_locale_radix_character);
4866 if (backend.float_literal_suffix)
4867 res += "f";
4868 }
4869
4870 return res;
4871 }
4872
convert_double_to_string(const SPIRConstant & c,uint32_t col,uint32_t row)4873 std::string CompilerGLSL::convert_double_to_string(const SPIRConstant &c, uint32_t col, uint32_t row)
4874 {
4875 string res;
4876 double double_value = c.scalar_f64(col, row);
4877
4878 if (std::isnan(double_value) || std::isinf(double_value))
4879 {
4880 // Use special representation.
4881 if (!is_legacy())
4882 {
4883 SPIRType out_type;
4884 SPIRType in_type;
4885 out_type.basetype = SPIRType::Double;
4886 in_type.basetype = SPIRType::UInt64;
4887 out_type.vecsize = 1;
4888 in_type.vecsize = 1;
4889 out_type.width = 64;
4890 in_type.width = 64;
4891
4892 uint64_t u64_value = c.scalar_u64(col, row);
4893
4894 if (options.es)
4895 SPIRV_CROSS_THROW("64-bit integers/float not supported in ES profile.");
4896 require_extension_internal("GL_ARB_gpu_shader_int64");
4897
4898 char print_buffer[64];
4899 sprintf(print_buffer, "0x%llx%s", static_cast<unsigned long long>(u64_value),
4900 backend.long_long_literal_suffix ? "ull" : "ul");
4901 res = join(bitcast_glsl_op(out_type, in_type), "(", print_buffer, ")");
4902 }
4903 else
4904 {
4905 if (options.es)
4906 SPIRV_CROSS_THROW("FP64 not supported in ES profile.");
4907 if (options.version < 400)
4908 require_extension_internal("GL_ARB_gpu_shader_fp64");
4909
4910 if (double_value == numeric_limits<double>::infinity())
4911 {
4912 if (backend.double_literal_suffix)
4913 res = "(1.0lf / 0.0lf)";
4914 else
4915 res = "(1.0 / 0.0)";
4916 }
4917 else if (double_value == -numeric_limits<double>::infinity())
4918 {
4919 if (backend.double_literal_suffix)
4920 res = "(-1.0lf / 0.0lf)";
4921 else
4922 res = "(-1.0 / 0.0)";
4923 }
4924 else if (std::isnan(double_value))
4925 {
4926 if (backend.double_literal_suffix)
4927 res = "(0.0lf / 0.0lf)";
4928 else
4929 res = "(0.0 / 0.0)";
4930 }
4931 else
4932 SPIRV_CROSS_THROW("Cannot represent non-finite floating point constant.");
4933 }
4934 }
4935 else
4936 {
4937 res = convert_to_string(double_value, current_locale_radix_character);
4938 if (backend.double_literal_suffix)
4939 res += "lf";
4940 }
4941
4942 return res;
4943 }
4944
4945 #ifdef _MSC_VER
4946 #pragma warning(pop)
4947 #endif
4948
constant_expression_vector(const SPIRConstant & c,uint32_t vector)4949 string CompilerGLSL::constant_expression_vector(const SPIRConstant &c, uint32_t vector)
4950 {
4951 auto type = get<SPIRType>(c.constant_type);
4952 type.columns = 1;
4953
4954 auto scalar_type = type;
4955 scalar_type.vecsize = 1;
4956
4957 string res;
4958 bool splat = backend.use_constructor_splatting && c.vector_size() > 1;
4959 bool swizzle_splat = backend.can_swizzle_scalar && c.vector_size() > 1;
4960
4961 if (!type_is_floating_point(type))
4962 {
4963 // Cannot swizzle literal integers as a special case.
4964 swizzle_splat = false;
4965 }
4966
4967 if (splat || swizzle_splat)
4968 {
4969 // Cannot use constant splatting if we have specialization constants somewhere in the vector.
4970 for (uint32_t i = 0; i < c.vector_size(); i++)
4971 {
4972 if (c.specialization_constant_id(vector, i) != 0)
4973 {
4974 splat = false;
4975 swizzle_splat = false;
4976 break;
4977 }
4978 }
4979 }
4980
4981 if (splat || swizzle_splat)
4982 {
4983 if (type.width == 64)
4984 {
4985 uint64_t ident = c.scalar_u64(vector, 0);
4986 for (uint32_t i = 1; i < c.vector_size(); i++)
4987 {
4988 if (ident != c.scalar_u64(vector, i))
4989 {
4990 splat = false;
4991 swizzle_splat = false;
4992 break;
4993 }
4994 }
4995 }
4996 else
4997 {
4998 uint32_t ident = c.scalar(vector, 0);
4999 for (uint32_t i = 1; i < c.vector_size(); i++)
5000 {
5001 if (ident != c.scalar(vector, i))
5002 {
5003 splat = false;
5004 swizzle_splat = false;
5005 }
5006 }
5007 }
5008 }
5009
5010 if (c.vector_size() > 1 && !swizzle_splat)
5011 res += type_to_glsl(type) + "(";
5012
5013 switch (type.basetype)
5014 {
5015 case SPIRType::Half:
5016 if (splat || swizzle_splat)
5017 {
5018 res += convert_half_to_string(c, vector, 0);
5019 if (swizzle_splat)
5020 res = remap_swizzle(get<SPIRType>(c.constant_type), 1, res);
5021 }
5022 else
5023 {
5024 for (uint32_t i = 0; i < c.vector_size(); i++)
5025 {
5026 if (c.vector_size() > 1 && c.specialization_constant_id(vector, i) != 0)
5027 res += to_name(c.specialization_constant_id(vector, i));
5028 else
5029 res += convert_half_to_string(c, vector, i);
5030
5031 if (i + 1 < c.vector_size())
5032 res += ", ";
5033 }
5034 }
5035 break;
5036
5037 case SPIRType::Float:
5038 if (splat || swizzle_splat)
5039 {
5040 res += convert_float_to_string(c, vector, 0);
5041 if (swizzle_splat)
5042 res = remap_swizzle(get<SPIRType>(c.constant_type), 1, res);
5043 }
5044 else
5045 {
5046 for (uint32_t i = 0; i < c.vector_size(); i++)
5047 {
5048 if (c.vector_size() > 1 && c.specialization_constant_id(vector, i) != 0)
5049 res += to_name(c.specialization_constant_id(vector, i));
5050 else
5051 res += convert_float_to_string(c, vector, i);
5052
5053 if (i + 1 < c.vector_size())
5054 res += ", ";
5055 }
5056 }
5057 break;
5058
5059 case SPIRType::Double:
5060 if (splat || swizzle_splat)
5061 {
5062 res += convert_double_to_string(c, vector, 0);
5063 if (swizzle_splat)
5064 res = remap_swizzle(get<SPIRType>(c.constant_type), 1, res);
5065 }
5066 else
5067 {
5068 for (uint32_t i = 0; i < c.vector_size(); i++)
5069 {
5070 if (c.vector_size() > 1 && c.specialization_constant_id(vector, i) != 0)
5071 res += to_name(c.specialization_constant_id(vector, i));
5072 else
5073 res += convert_double_to_string(c, vector, i);
5074
5075 if (i + 1 < c.vector_size())
5076 res += ", ";
5077 }
5078 }
5079 break;
5080
5081 case SPIRType::Int64:
5082 if (splat)
5083 {
5084 res += convert_to_string(c.scalar_i64(vector, 0));
5085 if (backend.long_long_literal_suffix)
5086 res += "ll";
5087 else
5088 res += "l";
5089 }
5090 else
5091 {
5092 for (uint32_t i = 0; i < c.vector_size(); i++)
5093 {
5094 if (c.vector_size() > 1 && c.specialization_constant_id(vector, i) != 0)
5095 res += to_name(c.specialization_constant_id(vector, i));
5096 else
5097 {
5098 res += convert_to_string(c.scalar_i64(vector, i));
5099 if (backend.long_long_literal_suffix)
5100 res += "ll";
5101 else
5102 res += "l";
5103 }
5104
5105 if (i + 1 < c.vector_size())
5106 res += ", ";
5107 }
5108 }
5109 break;
5110
5111 case SPIRType::UInt64:
5112 if (splat)
5113 {
5114 res += convert_to_string(c.scalar_u64(vector, 0));
5115 if (backend.long_long_literal_suffix)
5116 res += "ull";
5117 else
5118 res += "ul";
5119 }
5120 else
5121 {
5122 for (uint32_t i = 0; i < c.vector_size(); i++)
5123 {
5124 if (c.vector_size() > 1 && c.specialization_constant_id(vector, i) != 0)
5125 res += to_name(c.specialization_constant_id(vector, i));
5126 else
5127 {
5128 res += convert_to_string(c.scalar_u64(vector, i));
5129 if (backend.long_long_literal_suffix)
5130 res += "ull";
5131 else
5132 res += "ul";
5133 }
5134
5135 if (i + 1 < c.vector_size())
5136 res += ", ";
5137 }
5138 }
5139 break;
5140
5141 case SPIRType::UInt:
5142 if (splat)
5143 {
5144 res += convert_to_string(c.scalar(vector, 0));
5145 if (is_legacy())
5146 {
5147 // Fake unsigned constant literals with signed ones if possible.
5148 // Things like array sizes, etc, tend to be unsigned even though they could just as easily be signed.
5149 if (c.scalar_i32(vector, 0) < 0)
5150 SPIRV_CROSS_THROW("Tried to convert uint literal into int, but this made the literal negative.");
5151 }
5152 else if (backend.uint32_t_literal_suffix)
5153 res += "u";
5154 }
5155 else
5156 {
5157 for (uint32_t i = 0; i < c.vector_size(); i++)
5158 {
5159 if (c.vector_size() > 1 && c.specialization_constant_id(vector, i) != 0)
5160 res += to_name(c.specialization_constant_id(vector, i));
5161 else
5162 {
5163 res += convert_to_string(c.scalar(vector, i));
5164 if (is_legacy())
5165 {
5166 // Fake unsigned constant literals with signed ones if possible.
5167 // Things like array sizes, etc, tend to be unsigned even though they could just as easily be signed.
5168 if (c.scalar_i32(vector, i) < 0)
5169 SPIRV_CROSS_THROW("Tried to convert uint literal into int, but this made "
5170 "the literal negative.");
5171 }
5172 else if (backend.uint32_t_literal_suffix)
5173 res += "u";
5174 }
5175
5176 if (i + 1 < c.vector_size())
5177 res += ", ";
5178 }
5179 }
5180 break;
5181
5182 case SPIRType::Int:
5183 if (splat)
5184 res += convert_to_string(c.scalar_i32(vector, 0));
5185 else
5186 {
5187 for (uint32_t i = 0; i < c.vector_size(); i++)
5188 {
5189 if (c.vector_size() > 1 && c.specialization_constant_id(vector, i) != 0)
5190 res += to_name(c.specialization_constant_id(vector, i));
5191 else
5192 res += convert_to_string(c.scalar_i32(vector, i));
5193 if (i + 1 < c.vector_size())
5194 res += ", ";
5195 }
5196 }
5197 break;
5198
5199 case SPIRType::UShort:
5200 if (splat)
5201 {
5202 res += convert_to_string(c.scalar(vector, 0));
5203 }
5204 else
5205 {
5206 for (uint32_t i = 0; i < c.vector_size(); i++)
5207 {
5208 if (c.vector_size() > 1 && c.specialization_constant_id(vector, i) != 0)
5209 res += to_name(c.specialization_constant_id(vector, i));
5210 else
5211 {
5212 if (*backend.uint16_t_literal_suffix)
5213 {
5214 res += convert_to_string(c.scalar_u16(vector, i));
5215 res += backend.uint16_t_literal_suffix;
5216 }
5217 else
5218 {
5219 // If backend doesn't have a literal suffix, we need to value cast.
5220 res += type_to_glsl(scalar_type);
5221 res += "(";
5222 res += convert_to_string(c.scalar_u16(vector, i));
5223 res += ")";
5224 }
5225 }
5226
5227 if (i + 1 < c.vector_size())
5228 res += ", ";
5229 }
5230 }
5231 break;
5232
5233 case SPIRType::Short:
5234 if (splat)
5235 {
5236 res += convert_to_string(c.scalar_i16(vector, 0));
5237 }
5238 else
5239 {
5240 for (uint32_t i = 0; i < c.vector_size(); i++)
5241 {
5242 if (c.vector_size() > 1 && c.specialization_constant_id(vector, i) != 0)
5243 res += to_name(c.specialization_constant_id(vector, i));
5244 else
5245 {
5246 if (*backend.int16_t_literal_suffix)
5247 {
5248 res += convert_to_string(c.scalar_i16(vector, i));
5249 res += backend.int16_t_literal_suffix;
5250 }
5251 else
5252 {
5253 // If backend doesn't have a literal suffix, we need to value cast.
5254 res += type_to_glsl(scalar_type);
5255 res += "(";
5256 res += convert_to_string(c.scalar_i16(vector, i));
5257 res += ")";
5258 }
5259 }
5260
5261 if (i + 1 < c.vector_size())
5262 res += ", ";
5263 }
5264 }
5265 break;
5266
5267 case SPIRType::UByte:
5268 if (splat)
5269 {
5270 res += convert_to_string(c.scalar_u8(vector, 0));
5271 }
5272 else
5273 {
5274 for (uint32_t i = 0; i < c.vector_size(); i++)
5275 {
5276 if (c.vector_size() > 1 && c.specialization_constant_id(vector, i) != 0)
5277 res += to_name(c.specialization_constant_id(vector, i));
5278 else
5279 {
5280 res += type_to_glsl(scalar_type);
5281 res += "(";
5282 res += convert_to_string(c.scalar_u8(vector, i));
5283 res += ")";
5284 }
5285
5286 if (i + 1 < c.vector_size())
5287 res += ", ";
5288 }
5289 }
5290 break;
5291
5292 case SPIRType::SByte:
5293 if (splat)
5294 {
5295 res += convert_to_string(c.scalar_i8(vector, 0));
5296 }
5297 else
5298 {
5299 for (uint32_t i = 0; i < c.vector_size(); i++)
5300 {
5301 if (c.vector_size() > 1 && c.specialization_constant_id(vector, i) != 0)
5302 res += to_name(c.specialization_constant_id(vector, i));
5303 else
5304 {
5305 res += type_to_glsl(scalar_type);
5306 res += "(";
5307 res += convert_to_string(c.scalar_i8(vector, i));
5308 res += ")";
5309 }
5310
5311 if (i + 1 < c.vector_size())
5312 res += ", ";
5313 }
5314 }
5315 break;
5316
5317 case SPIRType::Boolean:
5318 if (splat)
5319 res += c.scalar(vector, 0) ? "true" : "false";
5320 else
5321 {
5322 for (uint32_t i = 0; i < c.vector_size(); i++)
5323 {
5324 if (c.vector_size() > 1 && c.specialization_constant_id(vector, i) != 0)
5325 res += to_name(c.specialization_constant_id(vector, i));
5326 else
5327 res += c.scalar(vector, i) ? "true" : "false";
5328
5329 if (i + 1 < c.vector_size())
5330 res += ", ";
5331 }
5332 }
5333 break;
5334
5335 default:
5336 SPIRV_CROSS_THROW("Invalid constant expression basetype.");
5337 }
5338
5339 if (c.vector_size() > 1 && !swizzle_splat)
5340 res += ")";
5341
5342 return res;
5343 }
5344
emit_uninitialized_temporary_expression(uint32_t type,uint32_t id)5345 SPIRExpression &CompilerGLSL::emit_uninitialized_temporary_expression(uint32_t type, uint32_t id)
5346 {
5347 forced_temporaries.insert(id);
5348 emit_uninitialized_temporary(type, id);
5349 return set<SPIRExpression>(id, to_name(id), type, true);
5350 }
5351
emit_uninitialized_temporary(uint32_t result_type,uint32_t result_id)5352 void CompilerGLSL::emit_uninitialized_temporary(uint32_t result_type, uint32_t result_id)
5353 {
5354 // If we're declaring temporaries inside continue blocks,
5355 // we must declare the temporary in the loop header so that the continue block can avoid declaring new variables.
5356 if (current_continue_block && !hoisted_temporaries.count(result_id))
5357 {
5358 auto &header = get<SPIRBlock>(current_continue_block->loop_dominator);
5359 if (find_if(begin(header.declare_temporary), end(header.declare_temporary),
5360 [result_type, result_id](const pair<uint32_t, uint32_t> &tmp) {
5361 return tmp.first == result_type && tmp.second == result_id;
5362 }) == end(header.declare_temporary))
5363 {
5364 header.declare_temporary.emplace_back(result_type, result_id);
5365 hoisted_temporaries.insert(result_id);
5366 force_recompile();
5367 }
5368 }
5369 else if (hoisted_temporaries.count(result_id) == 0)
5370 {
5371 auto &type = get<SPIRType>(result_type);
5372 auto &flags = ir.meta[result_id].decoration.decoration_flags;
5373
5374 // The result_id has not been made into an expression yet, so use flags interface.
5375 add_local_variable_name(result_id);
5376
5377 string initializer;
5378 if (options.force_zero_initialized_variables && type_can_zero_initialize(type))
5379 initializer = join(" = ", to_zero_initialized_expression(result_type));
5380
5381 statement(flags_to_qualifiers_glsl(type, flags), variable_decl(type, to_name(result_id)), initializer, ";");
5382 }
5383 }
5384
declare_temporary(uint32_t result_type,uint32_t result_id)5385 string CompilerGLSL::declare_temporary(uint32_t result_type, uint32_t result_id)
5386 {
5387 auto &type = get<SPIRType>(result_type);
5388 auto &flags = ir.meta[result_id].decoration.decoration_flags;
5389
5390 // If we're declaring temporaries inside continue blocks,
5391 // we must declare the temporary in the loop header so that the continue block can avoid declaring new variables.
5392 if (current_continue_block && !hoisted_temporaries.count(result_id))
5393 {
5394 auto &header = get<SPIRBlock>(current_continue_block->loop_dominator);
5395 if (find_if(begin(header.declare_temporary), end(header.declare_temporary),
5396 [result_type, result_id](const pair<uint32_t, uint32_t> &tmp) {
5397 return tmp.first == result_type && tmp.second == result_id;
5398 }) == end(header.declare_temporary))
5399 {
5400 header.declare_temporary.emplace_back(result_type, result_id);
5401 hoisted_temporaries.insert(result_id);
5402 force_recompile();
5403 }
5404
5405 return join(to_name(result_id), " = ");
5406 }
5407 else if (hoisted_temporaries.count(result_id))
5408 {
5409 // The temporary has already been declared earlier, so just "declare" the temporary by writing to it.
5410 return join(to_name(result_id), " = ");
5411 }
5412 else
5413 {
5414 // The result_id has not been made into an expression yet, so use flags interface.
5415 add_local_variable_name(result_id);
5416 return join(flags_to_qualifiers_glsl(type, flags), variable_decl(type, to_name(result_id)), " = ");
5417 }
5418 }
5419
expression_is_forwarded(uint32_t id) const5420 bool CompilerGLSL::expression_is_forwarded(uint32_t id) const
5421 {
5422 return forwarded_temporaries.count(id) != 0;
5423 }
5424
expression_suppresses_usage_tracking(uint32_t id) const5425 bool CompilerGLSL::expression_suppresses_usage_tracking(uint32_t id) const
5426 {
5427 return suppressed_usage_tracking.count(id) != 0;
5428 }
5429
expression_read_implies_multiple_reads(uint32_t id) const5430 bool CompilerGLSL::expression_read_implies_multiple_reads(uint32_t id) const
5431 {
5432 auto *expr = maybe_get<SPIRExpression>(id);
5433 if (!expr)
5434 return false;
5435
5436 // If we're emitting code at a deeper loop level than when we emitted the expression,
5437 // we're probably reading the same expression over and over.
5438 return current_loop_level > expr->emitted_loop_level;
5439 }
5440
emit_op(uint32_t result_type,uint32_t result_id,const string & rhs,bool forwarding,bool suppress_usage_tracking)5441 SPIRExpression &CompilerGLSL::emit_op(uint32_t result_type, uint32_t result_id, const string &rhs, bool forwarding,
5442 bool suppress_usage_tracking)
5443 {
5444 if (forwarding && (forced_temporaries.find(result_id) == end(forced_temporaries)))
5445 {
5446 // Just forward it without temporary.
5447 // If the forward is trivial, we do not force flushing to temporary for this expression.
5448 forwarded_temporaries.insert(result_id);
5449 if (suppress_usage_tracking)
5450 suppressed_usage_tracking.insert(result_id);
5451
5452 return set<SPIRExpression>(result_id, rhs, result_type, true);
5453 }
5454 else
5455 {
5456 // If expression isn't immutable, bind it to a temporary and make the new temporary immutable (they always are).
5457 statement(declare_temporary(result_type, result_id), rhs, ";");
5458 return set<SPIRExpression>(result_id, to_name(result_id), result_type, true);
5459 }
5460 }
5461
emit_unary_op(uint32_t result_type,uint32_t result_id,uint32_t op0,const char * op)5462 void CompilerGLSL::emit_unary_op(uint32_t result_type, uint32_t result_id, uint32_t op0, const char *op)
5463 {
5464 bool forward = should_forward(op0);
5465 emit_op(result_type, result_id, join(op, to_enclosed_unpacked_expression(op0)), forward);
5466 inherit_expression_dependencies(result_id, op0);
5467 }
5468
emit_binary_op(uint32_t result_type,uint32_t result_id,uint32_t op0,uint32_t op1,const char * op)5469 void CompilerGLSL::emit_binary_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1, const char *op)
5470 {
5471 bool forward = should_forward(op0) && should_forward(op1);
5472 emit_op(result_type, result_id,
5473 join(to_enclosed_unpacked_expression(op0), " ", op, " ", to_enclosed_unpacked_expression(op1)), forward);
5474
5475 inherit_expression_dependencies(result_id, op0);
5476 inherit_expression_dependencies(result_id, op1);
5477 }
5478
emit_unrolled_unary_op(uint32_t result_type,uint32_t result_id,uint32_t operand,const char * op)5479 void CompilerGLSL::emit_unrolled_unary_op(uint32_t result_type, uint32_t result_id, uint32_t operand, const char *op)
5480 {
5481 auto &type = get<SPIRType>(result_type);
5482 auto expr = type_to_glsl_constructor(type);
5483 expr += '(';
5484 for (uint32_t i = 0; i < type.vecsize; i++)
5485 {
5486 // Make sure to call to_expression multiple times to ensure
5487 // that these expressions are properly flushed to temporaries if needed.
5488 expr += op;
5489 expr += to_extract_component_expression(operand, i);
5490
5491 if (i + 1 < type.vecsize)
5492 expr += ", ";
5493 }
5494 expr += ')';
5495 emit_op(result_type, result_id, expr, should_forward(operand));
5496
5497 inherit_expression_dependencies(result_id, operand);
5498 }
5499
emit_unrolled_binary_op(uint32_t result_type,uint32_t result_id,uint32_t op0,uint32_t op1,const char * op,bool negate,SPIRType::BaseType expected_type)5500 void CompilerGLSL::emit_unrolled_binary_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1,
5501 const char *op, bool negate, SPIRType::BaseType expected_type)
5502 {
5503 auto &type0 = expression_type(op0);
5504 auto &type1 = expression_type(op1);
5505
5506 SPIRType target_type0 = type0;
5507 SPIRType target_type1 = type1;
5508 target_type0.basetype = expected_type;
5509 target_type1.basetype = expected_type;
5510 target_type0.vecsize = 1;
5511 target_type1.vecsize = 1;
5512
5513 auto &type = get<SPIRType>(result_type);
5514 auto expr = type_to_glsl_constructor(type);
5515 expr += '(';
5516 for (uint32_t i = 0; i < type.vecsize; i++)
5517 {
5518 // Make sure to call to_expression multiple times to ensure
5519 // that these expressions are properly flushed to temporaries if needed.
5520 if (negate)
5521 expr += "!(";
5522
5523 if (expected_type != SPIRType::Unknown && type0.basetype != expected_type)
5524 expr += bitcast_expression(target_type0, type0.basetype, to_extract_component_expression(op0, i));
5525 else
5526 expr += to_extract_component_expression(op0, i);
5527
5528 expr += ' ';
5529 expr += op;
5530 expr += ' ';
5531
5532 if (expected_type != SPIRType::Unknown && type1.basetype != expected_type)
5533 expr += bitcast_expression(target_type1, type1.basetype, to_extract_component_expression(op1, i));
5534 else
5535 expr += to_extract_component_expression(op1, i);
5536
5537 if (negate)
5538 expr += ")";
5539
5540 if (i + 1 < type.vecsize)
5541 expr += ", ";
5542 }
5543 expr += ')';
5544 emit_op(result_type, result_id, expr, should_forward(op0) && should_forward(op1));
5545
5546 inherit_expression_dependencies(result_id, op0);
5547 inherit_expression_dependencies(result_id, op1);
5548 }
5549
binary_op_bitcast_helper(string & cast_op0,string & cast_op1,SPIRType::BaseType & input_type,uint32_t op0,uint32_t op1,bool skip_cast_if_equal_type)5550 SPIRType CompilerGLSL::binary_op_bitcast_helper(string &cast_op0, string &cast_op1, SPIRType::BaseType &input_type,
5551 uint32_t op0, uint32_t op1, bool skip_cast_if_equal_type)
5552 {
5553 auto &type0 = expression_type(op0);
5554 auto &type1 = expression_type(op1);
5555
5556 // We have to bitcast if our inputs are of different type, or if our types are not equal to expected inputs.
5557 // For some functions like OpIEqual and INotEqual, we don't care if inputs are of different types than expected
5558 // since equality test is exactly the same.
5559 bool cast = (type0.basetype != type1.basetype) || (!skip_cast_if_equal_type && type0.basetype != input_type);
5560
5561 // Create a fake type so we can bitcast to it.
5562 // We only deal with regular arithmetic types here like int, uints and so on.
5563 SPIRType expected_type;
5564 expected_type.basetype = input_type;
5565 expected_type.vecsize = type0.vecsize;
5566 expected_type.columns = type0.columns;
5567 expected_type.width = type0.width;
5568
5569 if (cast)
5570 {
5571 cast_op0 = bitcast_glsl(expected_type, op0);
5572 cast_op1 = bitcast_glsl(expected_type, op1);
5573 }
5574 else
5575 {
5576 // If we don't cast, our actual input type is that of the first (or second) argument.
5577 cast_op0 = to_enclosed_unpacked_expression(op0);
5578 cast_op1 = to_enclosed_unpacked_expression(op1);
5579 input_type = type0.basetype;
5580 }
5581
5582 return expected_type;
5583 }
5584
emit_complex_bitcast(uint32_t result_type,uint32_t id,uint32_t op0)5585 bool CompilerGLSL::emit_complex_bitcast(uint32_t result_type, uint32_t id, uint32_t op0)
5586 {
5587 // Some bitcasts may require complex casting sequences, and are implemented here.
5588 // Otherwise a simply unary function will do with bitcast_glsl_op.
5589
5590 auto &output_type = get<SPIRType>(result_type);
5591 auto &input_type = expression_type(op0);
5592 string expr;
5593
5594 if (output_type.basetype == SPIRType::Half && input_type.basetype == SPIRType::Float && input_type.vecsize == 1)
5595 expr = join("unpackFloat2x16(floatBitsToUint(", to_unpacked_expression(op0), "))");
5596 else if (output_type.basetype == SPIRType::Float && input_type.basetype == SPIRType::Half &&
5597 input_type.vecsize == 2)
5598 expr = join("uintBitsToFloat(packFloat2x16(", to_unpacked_expression(op0), "))");
5599 else
5600 return false;
5601
5602 emit_op(result_type, id, expr, should_forward(op0));
5603 return true;
5604 }
5605
emit_binary_op_cast(uint32_t result_type,uint32_t result_id,uint32_t op0,uint32_t op1,const char * op,SPIRType::BaseType input_type,bool skip_cast_if_equal_type)5606 void CompilerGLSL::emit_binary_op_cast(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1,
5607 const char *op, SPIRType::BaseType input_type, bool skip_cast_if_equal_type)
5608 {
5609 string cast_op0, cast_op1;
5610 auto expected_type = binary_op_bitcast_helper(cast_op0, cast_op1, input_type, op0, op1, skip_cast_if_equal_type);
5611 auto &out_type = get<SPIRType>(result_type);
5612
5613 // We might have casted away from the result type, so bitcast again.
5614 // For example, arithmetic right shift with uint inputs.
5615 // Special case boolean outputs since relational opcodes output booleans instead of int/uint.
5616 string expr;
5617 if (out_type.basetype != input_type && out_type.basetype != SPIRType::Boolean)
5618 {
5619 expected_type.basetype = input_type;
5620 expr = bitcast_glsl_op(out_type, expected_type);
5621 expr += '(';
5622 expr += join(cast_op0, " ", op, " ", cast_op1);
5623 expr += ')';
5624 }
5625 else
5626 expr += join(cast_op0, " ", op, " ", cast_op1);
5627
5628 emit_op(result_type, result_id, expr, should_forward(op0) && should_forward(op1));
5629 inherit_expression_dependencies(result_id, op0);
5630 inherit_expression_dependencies(result_id, op1);
5631 }
5632
emit_unary_func_op(uint32_t result_type,uint32_t result_id,uint32_t op0,const char * op)5633 void CompilerGLSL::emit_unary_func_op(uint32_t result_type, uint32_t result_id, uint32_t op0, const char *op)
5634 {
5635 bool forward = should_forward(op0);
5636 emit_op(result_type, result_id, join(op, "(", to_unpacked_expression(op0), ")"), forward);
5637 inherit_expression_dependencies(result_id, op0);
5638 }
5639
emit_binary_func_op(uint32_t result_type,uint32_t result_id,uint32_t op0,uint32_t op1,const char * op)5640 void CompilerGLSL::emit_binary_func_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1,
5641 const char *op)
5642 {
5643 bool forward = should_forward(op0) && should_forward(op1);
5644 emit_op(result_type, result_id, join(op, "(", to_unpacked_expression(op0), ", ", to_unpacked_expression(op1), ")"),
5645 forward);
5646 inherit_expression_dependencies(result_id, op0);
5647 inherit_expression_dependencies(result_id, op1);
5648 }
5649
emit_unary_func_op_cast(uint32_t result_type,uint32_t result_id,uint32_t op0,const char * op,SPIRType::BaseType input_type,SPIRType::BaseType expected_result_type)5650 void CompilerGLSL::emit_unary_func_op_cast(uint32_t result_type, uint32_t result_id, uint32_t op0, const char *op,
5651 SPIRType::BaseType input_type, SPIRType::BaseType expected_result_type)
5652 {
5653 auto &out_type = get<SPIRType>(result_type);
5654 auto &expr_type = expression_type(op0);
5655 auto expected_type = out_type;
5656
5657 // Bit-widths might be different in unary cases because we use it for SConvert/UConvert and friends.
5658 expected_type.basetype = input_type;
5659 expected_type.width = expr_type.width;
5660 string cast_op = expr_type.basetype != input_type ? bitcast_glsl(expected_type, op0) : to_unpacked_expression(op0);
5661
5662 string expr;
5663 if (out_type.basetype != expected_result_type)
5664 {
5665 expected_type.basetype = expected_result_type;
5666 expected_type.width = out_type.width;
5667 expr = bitcast_glsl_op(out_type, expected_type);
5668 expr += '(';
5669 expr += join(op, "(", cast_op, ")");
5670 expr += ')';
5671 }
5672 else
5673 {
5674 expr += join(op, "(", cast_op, ")");
5675 }
5676
5677 emit_op(result_type, result_id, expr, should_forward(op0));
5678 inherit_expression_dependencies(result_id, op0);
5679 }
5680
5681 // Very special case. Handling bitfieldExtract requires us to deal with different bitcasts of different signs
5682 // and different vector sizes all at once. Need a special purpose method here.
emit_trinary_func_op_bitextract(uint32_t result_type,uint32_t result_id,uint32_t op0,uint32_t op1,uint32_t op2,const char * op,SPIRType::BaseType expected_result_type,SPIRType::BaseType input_type0,SPIRType::BaseType input_type1,SPIRType::BaseType input_type2)5683 void CompilerGLSL::emit_trinary_func_op_bitextract(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1,
5684 uint32_t op2, const char *op,
5685 SPIRType::BaseType expected_result_type,
5686 SPIRType::BaseType input_type0, SPIRType::BaseType input_type1,
5687 SPIRType::BaseType input_type2)
5688 {
5689 auto &out_type = get<SPIRType>(result_type);
5690 auto expected_type = out_type;
5691 expected_type.basetype = input_type0;
5692
5693 string cast_op0 =
5694 expression_type(op0).basetype != input_type0 ? bitcast_glsl(expected_type, op0) : to_unpacked_expression(op0);
5695
5696 auto op1_expr = to_unpacked_expression(op1);
5697 auto op2_expr = to_unpacked_expression(op2);
5698
5699 // Use value casts here instead. Input must be exactly int or uint, but SPIR-V might be 16-bit.
5700 expected_type.basetype = input_type1;
5701 expected_type.vecsize = 1;
5702 string cast_op1 = expression_type(op1).basetype != input_type1 ?
5703 join(type_to_glsl_constructor(expected_type), "(", op1_expr, ")") :
5704 op1_expr;
5705
5706 expected_type.basetype = input_type2;
5707 expected_type.vecsize = 1;
5708 string cast_op2 = expression_type(op2).basetype != input_type2 ?
5709 join(type_to_glsl_constructor(expected_type), "(", op2_expr, ")") :
5710 op2_expr;
5711
5712 string expr;
5713 if (out_type.basetype != expected_result_type)
5714 {
5715 expected_type.vecsize = out_type.vecsize;
5716 expected_type.basetype = expected_result_type;
5717 expr = bitcast_glsl_op(out_type, expected_type);
5718 expr += '(';
5719 expr += join(op, "(", cast_op0, ", ", cast_op1, ", ", cast_op2, ")");
5720 expr += ')';
5721 }
5722 else
5723 {
5724 expr += join(op, "(", cast_op0, ", ", cast_op1, ", ", cast_op2, ")");
5725 }
5726
5727 emit_op(result_type, result_id, expr, should_forward(op0) && should_forward(op1) && should_forward(op2));
5728 inherit_expression_dependencies(result_id, op0);
5729 inherit_expression_dependencies(result_id, op1);
5730 inherit_expression_dependencies(result_id, op2);
5731 }
5732
emit_trinary_func_op_cast(uint32_t result_type,uint32_t result_id,uint32_t op0,uint32_t op1,uint32_t op2,const char * op,SPIRType::BaseType input_type)5733 void CompilerGLSL::emit_trinary_func_op_cast(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1,
5734 uint32_t op2, const char *op, SPIRType::BaseType input_type)
5735 {
5736 auto &out_type = get<SPIRType>(result_type);
5737 auto expected_type = out_type;
5738 expected_type.basetype = input_type;
5739 string cast_op0 =
5740 expression_type(op0).basetype != input_type ? bitcast_glsl(expected_type, op0) : to_unpacked_expression(op0);
5741 string cast_op1 =
5742 expression_type(op1).basetype != input_type ? bitcast_glsl(expected_type, op1) : to_unpacked_expression(op1);
5743 string cast_op2 =
5744 expression_type(op2).basetype != input_type ? bitcast_glsl(expected_type, op2) : to_unpacked_expression(op2);
5745
5746 string expr;
5747 if (out_type.basetype != input_type)
5748 {
5749 expr = bitcast_glsl_op(out_type, expected_type);
5750 expr += '(';
5751 expr += join(op, "(", cast_op0, ", ", cast_op1, ", ", cast_op2, ")");
5752 expr += ')';
5753 }
5754 else
5755 {
5756 expr += join(op, "(", cast_op0, ", ", cast_op1, ", ", cast_op2, ")");
5757 }
5758
5759 emit_op(result_type, result_id, expr, should_forward(op0) && should_forward(op1) && should_forward(op2));
5760 inherit_expression_dependencies(result_id, op0);
5761 inherit_expression_dependencies(result_id, op1);
5762 inherit_expression_dependencies(result_id, op2);
5763 }
5764
emit_binary_func_op_cast_clustered(uint32_t result_type,uint32_t result_id,uint32_t op0,uint32_t op1,const char * op,SPIRType::BaseType input_type)5765 void CompilerGLSL::emit_binary_func_op_cast_clustered(uint32_t result_type, uint32_t result_id, uint32_t op0,
5766 uint32_t op1, const char *op, SPIRType::BaseType input_type)
5767 {
5768 // Special purpose method for implementing clustered subgroup opcodes.
5769 // Main difference is that op1 does not participate in any casting, it needs to be a literal.
5770 auto &out_type = get<SPIRType>(result_type);
5771 auto expected_type = out_type;
5772 expected_type.basetype = input_type;
5773 string cast_op0 =
5774 expression_type(op0).basetype != input_type ? bitcast_glsl(expected_type, op0) : to_unpacked_expression(op0);
5775
5776 string expr;
5777 if (out_type.basetype != input_type)
5778 {
5779 expr = bitcast_glsl_op(out_type, expected_type);
5780 expr += '(';
5781 expr += join(op, "(", cast_op0, ", ", to_expression(op1), ")");
5782 expr += ')';
5783 }
5784 else
5785 {
5786 expr += join(op, "(", cast_op0, ", ", to_expression(op1), ")");
5787 }
5788
5789 emit_op(result_type, result_id, expr, should_forward(op0));
5790 inherit_expression_dependencies(result_id, op0);
5791 }
5792
emit_binary_func_op_cast(uint32_t result_type,uint32_t result_id,uint32_t op0,uint32_t op1,const char * op,SPIRType::BaseType input_type,bool skip_cast_if_equal_type)5793 void CompilerGLSL::emit_binary_func_op_cast(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1,
5794 const char *op, SPIRType::BaseType input_type, bool skip_cast_if_equal_type)
5795 {
5796 string cast_op0, cast_op1;
5797 auto expected_type = binary_op_bitcast_helper(cast_op0, cast_op1, input_type, op0, op1, skip_cast_if_equal_type);
5798 auto &out_type = get<SPIRType>(result_type);
5799
5800 // Special case boolean outputs since relational opcodes output booleans instead of int/uint.
5801 string expr;
5802 if (out_type.basetype != input_type && out_type.basetype != SPIRType::Boolean)
5803 {
5804 expected_type.basetype = input_type;
5805 expr = bitcast_glsl_op(out_type, expected_type);
5806 expr += '(';
5807 expr += join(op, "(", cast_op0, ", ", cast_op1, ")");
5808 expr += ')';
5809 }
5810 else
5811 {
5812 expr += join(op, "(", cast_op0, ", ", cast_op1, ")");
5813 }
5814
5815 emit_op(result_type, result_id, expr, should_forward(op0) && should_forward(op1));
5816 inherit_expression_dependencies(result_id, op0);
5817 inherit_expression_dependencies(result_id, op1);
5818 }
5819
emit_trinary_func_op(uint32_t result_type,uint32_t result_id,uint32_t op0,uint32_t op1,uint32_t op2,const char * op)5820 void CompilerGLSL::emit_trinary_func_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1,
5821 uint32_t op2, const char *op)
5822 {
5823 bool forward = should_forward(op0) && should_forward(op1) && should_forward(op2);
5824 emit_op(result_type, result_id,
5825 join(op, "(", to_unpacked_expression(op0), ", ", to_unpacked_expression(op1), ", ",
5826 to_unpacked_expression(op2), ")"),
5827 forward);
5828
5829 inherit_expression_dependencies(result_id, op0);
5830 inherit_expression_dependencies(result_id, op1);
5831 inherit_expression_dependencies(result_id, op2);
5832 }
5833
emit_quaternary_func_op(uint32_t result_type,uint32_t result_id,uint32_t op0,uint32_t op1,uint32_t op2,uint32_t op3,const char * op)5834 void CompilerGLSL::emit_quaternary_func_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1,
5835 uint32_t op2, uint32_t op3, const char *op)
5836 {
5837 bool forward = should_forward(op0) && should_forward(op1) && should_forward(op2) && should_forward(op3);
5838 emit_op(result_type, result_id,
5839 join(op, "(", to_unpacked_expression(op0), ", ", to_unpacked_expression(op1), ", ",
5840 to_unpacked_expression(op2), ", ", to_unpacked_expression(op3), ")"),
5841 forward);
5842
5843 inherit_expression_dependencies(result_id, op0);
5844 inherit_expression_dependencies(result_id, op1);
5845 inherit_expression_dependencies(result_id, op2);
5846 inherit_expression_dependencies(result_id, op3);
5847 }
5848
emit_bitfield_insert_op(uint32_t result_type,uint32_t result_id,uint32_t op0,uint32_t op1,uint32_t op2,uint32_t op3,const char * op,SPIRType::BaseType offset_count_type)5849 void CompilerGLSL::emit_bitfield_insert_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1,
5850 uint32_t op2, uint32_t op3, const char *op,
5851 SPIRType::BaseType offset_count_type)
5852 {
5853 // Only need to cast offset/count arguments. Types of base/insert must be same as result type,
5854 // and bitfieldInsert is sign invariant.
5855 bool forward = should_forward(op0) && should_forward(op1) && should_forward(op2) && should_forward(op3);
5856
5857 auto op0_expr = to_unpacked_expression(op0);
5858 auto op1_expr = to_unpacked_expression(op1);
5859 auto op2_expr = to_unpacked_expression(op2);
5860 auto op3_expr = to_unpacked_expression(op3);
5861
5862 SPIRType target_type;
5863 target_type.vecsize = 1;
5864 target_type.basetype = offset_count_type;
5865
5866 if (expression_type(op2).basetype != offset_count_type)
5867 {
5868 // Value-cast here. Input might be 16-bit. GLSL requires int.
5869 op2_expr = join(type_to_glsl_constructor(target_type), "(", op2_expr, ")");
5870 }
5871
5872 if (expression_type(op3).basetype != offset_count_type)
5873 {
5874 // Value-cast here. Input might be 16-bit. GLSL requires int.
5875 op3_expr = join(type_to_glsl_constructor(target_type), "(", op3_expr, ")");
5876 }
5877
5878 emit_op(result_type, result_id, join(op, "(", op0_expr, ", ", op1_expr, ", ", op2_expr, ", ", op3_expr, ")"),
5879 forward);
5880
5881 inherit_expression_dependencies(result_id, op0);
5882 inherit_expression_dependencies(result_id, op1);
5883 inherit_expression_dependencies(result_id, op2);
5884 inherit_expression_dependencies(result_id, op3);
5885 }
5886
legacy_tex_op(const std::string & op,const SPIRType & imgtype,uint32_t tex)5887 string CompilerGLSL::legacy_tex_op(const std::string &op, const SPIRType &imgtype, uint32_t tex)
5888 {
5889 const char *type;
5890 switch (imgtype.image.dim)
5891 {
5892 case spv::Dim1D:
5893 type = (imgtype.image.arrayed && !options.es) ? "1DArray" : "1D";
5894 break;
5895 case spv::Dim2D:
5896 type = (imgtype.image.arrayed && !options.es) ? "2DArray" : "2D";
5897 break;
5898 case spv::Dim3D:
5899 type = "3D";
5900 break;
5901 case spv::DimCube:
5902 type = "Cube";
5903 break;
5904 case spv::DimRect:
5905 type = "2DRect";
5906 break;
5907 case spv::DimBuffer:
5908 type = "Buffer";
5909 break;
5910 case spv::DimSubpassData:
5911 type = "2D";
5912 break;
5913 default:
5914 type = "";
5915 break;
5916 }
5917
5918 // In legacy GLSL, an extension is required for textureLod in the fragment
5919 // shader or textureGrad anywhere.
5920 bool legacy_lod_ext = false;
5921 auto &execution = get_entry_point();
5922 if (op == "textureGrad" || op == "textureProjGrad" ||
5923 ((op == "textureLod" || op == "textureProjLod") && execution.model != ExecutionModelVertex))
5924 {
5925 if (is_legacy_es())
5926 {
5927 legacy_lod_ext = true;
5928 require_extension_internal("GL_EXT_shader_texture_lod");
5929 }
5930 else if (is_legacy_desktop())
5931 require_extension_internal("GL_ARB_shader_texture_lod");
5932 }
5933
5934 if (op == "textureLodOffset" || op == "textureProjLodOffset")
5935 {
5936 if (is_legacy_es())
5937 SPIRV_CROSS_THROW(join(op, " not allowed in legacy ES"));
5938
5939 require_extension_internal("GL_EXT_gpu_shader4");
5940 }
5941
5942 // GLES has very limited support for shadow samplers.
5943 // Basically shadow2D and shadow2DProj work through EXT_shadow_samplers,
5944 // everything else can just throw
5945 bool is_comparison = image_is_comparison(imgtype, tex);
5946 if (is_comparison && is_legacy_es())
5947 {
5948 if (op == "texture" || op == "textureProj")
5949 require_extension_internal("GL_EXT_shadow_samplers");
5950 else
5951 SPIRV_CROSS_THROW(join(op, " not allowed on depth samplers in legacy ES"));
5952 }
5953
5954 if (op == "textureSize")
5955 {
5956 if (is_legacy_es())
5957 SPIRV_CROSS_THROW("textureSize not supported in legacy ES");
5958 if (is_comparison)
5959 SPIRV_CROSS_THROW("textureSize not supported on shadow sampler in legacy GLSL");
5960 require_extension_internal("GL_EXT_gpu_shader4");
5961 }
5962
5963 if (op == "texelFetch" && is_legacy_es())
5964 SPIRV_CROSS_THROW("texelFetch not supported in legacy ES");
5965
5966 bool is_es_and_depth = is_legacy_es() && is_comparison;
5967 std::string type_prefix = is_comparison ? "shadow" : "texture";
5968
5969 if (op == "texture")
5970 return is_es_and_depth ? join(type_prefix, type, "EXT") : join(type_prefix, type);
5971 else if (op == "textureLod")
5972 return join(type_prefix, type, legacy_lod_ext ? "LodEXT" : "Lod");
5973 else if (op == "textureProj")
5974 return join(type_prefix, type, is_es_and_depth ? "ProjEXT" : "Proj");
5975 else if (op == "textureGrad")
5976 return join(type_prefix, type, is_legacy_es() ? "GradEXT" : is_legacy_desktop() ? "GradARB" : "Grad");
5977 else if (op == "textureProjLod")
5978 return join(type_prefix, type, legacy_lod_ext ? "ProjLodEXT" : "ProjLod");
5979 else if (op == "textureLodOffset")
5980 return join(type_prefix, type, "LodOffset");
5981 else if (op == "textureProjGrad")
5982 return join(type_prefix, type,
5983 is_legacy_es() ? "ProjGradEXT" : is_legacy_desktop() ? "ProjGradARB" : "ProjGrad");
5984 else if (op == "textureProjLodOffset")
5985 return join(type_prefix, type, "ProjLodOffset");
5986 else if (op == "textureSize")
5987 return join("textureSize", type);
5988 else if (op == "texelFetch")
5989 return join("texelFetch", type);
5990 else
5991 {
5992 SPIRV_CROSS_THROW(join("Unsupported legacy texture op: ", op));
5993 }
5994 }
5995
to_trivial_mix_op(const SPIRType & type,string & op,uint32_t left,uint32_t right,uint32_t lerp)5996 bool CompilerGLSL::to_trivial_mix_op(const SPIRType &type, string &op, uint32_t left, uint32_t right, uint32_t lerp)
5997 {
5998 auto *cleft = maybe_get<SPIRConstant>(left);
5999 auto *cright = maybe_get<SPIRConstant>(right);
6000 auto &lerptype = expression_type(lerp);
6001
6002 // If our targets aren't constants, we cannot use construction.
6003 if (!cleft || !cright)
6004 return false;
6005
6006 // If our targets are spec constants, we cannot use construction.
6007 if (cleft->specialization || cright->specialization)
6008 return false;
6009
6010 // We can only use trivial construction if we have a scalar
6011 // (should be possible to do it for vectors as well, but that is overkill for now).
6012 if (lerptype.basetype != SPIRType::Boolean || lerptype.vecsize > 1)
6013 return false;
6014
6015 // If our bool selects between 0 and 1, we can cast from bool instead, making our trivial constructor.
6016 bool ret = false;
6017 switch (type.basetype)
6018 {
6019 case SPIRType::Short:
6020 case SPIRType::UShort:
6021 ret = cleft->scalar_u16() == 0 && cright->scalar_u16() == 1;
6022 break;
6023
6024 case SPIRType::Int:
6025 case SPIRType::UInt:
6026 ret = cleft->scalar() == 0 && cright->scalar() == 1;
6027 break;
6028
6029 case SPIRType::Half:
6030 ret = cleft->scalar_f16() == 0.0f && cright->scalar_f16() == 1.0f;
6031 break;
6032
6033 case SPIRType::Float:
6034 ret = cleft->scalar_f32() == 0.0f && cright->scalar_f32() == 1.0f;
6035 break;
6036
6037 case SPIRType::Double:
6038 ret = cleft->scalar_f64() == 0.0 && cright->scalar_f64() == 1.0;
6039 break;
6040
6041 case SPIRType::Int64:
6042 case SPIRType::UInt64:
6043 ret = cleft->scalar_u64() == 0 && cright->scalar_u64() == 1;
6044 break;
6045
6046 default:
6047 break;
6048 }
6049
6050 if (ret)
6051 op = type_to_glsl_constructor(type);
6052 return ret;
6053 }
6054
to_ternary_expression(const SPIRType & restype,uint32_t select,uint32_t true_value,uint32_t false_value)6055 string CompilerGLSL::to_ternary_expression(const SPIRType &restype, uint32_t select, uint32_t true_value,
6056 uint32_t false_value)
6057 {
6058 string expr;
6059 auto &lerptype = expression_type(select);
6060
6061 if (lerptype.vecsize == 1)
6062 expr = join(to_enclosed_expression(select), " ? ", to_enclosed_pointer_expression(true_value), " : ",
6063 to_enclosed_pointer_expression(false_value));
6064 else
6065 {
6066 auto swiz = [this](uint32_t expression, uint32_t i) { return to_extract_component_expression(expression, i); };
6067
6068 expr = type_to_glsl_constructor(restype);
6069 expr += "(";
6070 for (uint32_t i = 0; i < restype.vecsize; i++)
6071 {
6072 expr += swiz(select, i);
6073 expr += " ? ";
6074 expr += swiz(true_value, i);
6075 expr += " : ";
6076 expr += swiz(false_value, i);
6077 if (i + 1 < restype.vecsize)
6078 expr += ", ";
6079 }
6080 expr += ")";
6081 }
6082
6083 return expr;
6084 }
6085
emit_mix_op(uint32_t result_type,uint32_t id,uint32_t left,uint32_t right,uint32_t lerp)6086 void CompilerGLSL::emit_mix_op(uint32_t result_type, uint32_t id, uint32_t left, uint32_t right, uint32_t lerp)
6087 {
6088 auto &lerptype = expression_type(lerp);
6089 auto &restype = get<SPIRType>(result_type);
6090
6091 // If this results in a variable pointer, assume it may be written through.
6092 if (restype.pointer)
6093 {
6094 register_write(left);
6095 register_write(right);
6096 }
6097
6098 string mix_op;
6099 bool has_boolean_mix = *backend.boolean_mix_function &&
6100 ((options.es && options.version >= 310) || (!options.es && options.version >= 450));
6101 bool trivial_mix = to_trivial_mix_op(restype, mix_op, left, right, lerp);
6102
6103 // Cannot use boolean mix when the lerp argument is just one boolean,
6104 // fall back to regular trinary statements.
6105 if (lerptype.vecsize == 1)
6106 has_boolean_mix = false;
6107
6108 // If we can reduce the mix to a simple cast, do so.
6109 // This helps for cases like int(bool), uint(bool) which is implemented with
6110 // OpSelect bool 1 0.
6111 if (trivial_mix)
6112 {
6113 emit_unary_func_op(result_type, id, lerp, mix_op.c_str());
6114 }
6115 else if (!has_boolean_mix && lerptype.basetype == SPIRType::Boolean)
6116 {
6117 // Boolean mix not supported on desktop without extension.
6118 // Was added in OpenGL 4.5 with ES 3.1 compat.
6119 //
6120 // Could use GL_EXT_shader_integer_mix on desktop at least,
6121 // but Apple doesn't support it. :(
6122 // Just implement it as ternary expressions.
6123 auto expr = to_ternary_expression(get<SPIRType>(result_type), lerp, right, left);
6124 emit_op(result_type, id, expr, should_forward(left) && should_forward(right) && should_forward(lerp));
6125 inherit_expression_dependencies(id, left);
6126 inherit_expression_dependencies(id, right);
6127 inherit_expression_dependencies(id, lerp);
6128 }
6129 else if (lerptype.basetype == SPIRType::Boolean)
6130 emit_trinary_func_op(result_type, id, left, right, lerp, backend.boolean_mix_function);
6131 else
6132 emit_trinary_func_op(result_type, id, left, right, lerp, "mix");
6133 }
6134
to_combined_image_sampler(VariableID image_id,VariableID samp_id)6135 string CompilerGLSL::to_combined_image_sampler(VariableID image_id, VariableID samp_id)
6136 {
6137 // Keep track of the array indices we have used to load the image.
6138 // We'll need to use the same array index into the combined image sampler array.
6139 auto image_expr = to_expression(image_id);
6140 string array_expr;
6141 auto array_index = image_expr.find_first_of('[');
6142 if (array_index != string::npos)
6143 array_expr = image_expr.substr(array_index, string::npos);
6144
6145 auto &args = current_function->arguments;
6146
6147 // For GLSL and ESSL targets, we must enumerate all possible combinations for sampler2D(texture2D, sampler) and redirect
6148 // all possible combinations into new sampler2D uniforms.
6149 auto *image = maybe_get_backing_variable(image_id);
6150 auto *samp = maybe_get_backing_variable(samp_id);
6151 if (image)
6152 image_id = image->self;
6153 if (samp)
6154 samp_id = samp->self;
6155
6156 auto image_itr = find_if(begin(args), end(args),
6157 [image_id](const SPIRFunction::Parameter ¶m) { return image_id == param.id; });
6158
6159 auto sampler_itr = find_if(begin(args), end(args),
6160 [samp_id](const SPIRFunction::Parameter ¶m) { return samp_id == param.id; });
6161
6162 if (image_itr != end(args) || sampler_itr != end(args))
6163 {
6164 // If any parameter originates from a parameter, we will find it in our argument list.
6165 bool global_image = image_itr == end(args);
6166 bool global_sampler = sampler_itr == end(args);
6167 VariableID iid = global_image ? image_id : VariableID(uint32_t(image_itr - begin(args)));
6168 VariableID sid = global_sampler ? samp_id : VariableID(uint32_t(sampler_itr - begin(args)));
6169
6170 auto &combined = current_function->combined_parameters;
6171 auto itr = find_if(begin(combined), end(combined), [=](const SPIRFunction::CombinedImageSamplerParameter &p) {
6172 return p.global_image == global_image && p.global_sampler == global_sampler && p.image_id == iid &&
6173 p.sampler_id == sid;
6174 });
6175
6176 if (itr != end(combined))
6177 return to_expression(itr->id) + array_expr;
6178 else
6179 {
6180 SPIRV_CROSS_THROW("Cannot find mapping for combined sampler parameter, was "
6181 "build_combined_image_samplers() used "
6182 "before compile() was called?");
6183 }
6184 }
6185 else
6186 {
6187 // For global sampler2D, look directly at the global remapping table.
6188 auto &mapping = combined_image_samplers;
6189 auto itr = find_if(begin(mapping), end(mapping), [image_id, samp_id](const CombinedImageSampler &combined) {
6190 return combined.image_id == image_id && combined.sampler_id == samp_id;
6191 });
6192
6193 if (itr != end(combined_image_samplers))
6194 return to_expression(itr->combined_id) + array_expr;
6195 else
6196 {
6197 SPIRV_CROSS_THROW("Cannot find mapping for combined sampler, was build_combined_image_samplers() used "
6198 "before compile() was called?");
6199 }
6200 }
6201 }
6202
is_supported_subgroup_op_in_opengl(spv::Op op)6203 bool CompilerGLSL::is_supported_subgroup_op_in_opengl(spv::Op op)
6204 {
6205 switch (op)
6206 {
6207 case OpGroupNonUniformElect:
6208 case OpGroupNonUniformBallot:
6209 case OpGroupNonUniformBallotFindLSB:
6210 case OpGroupNonUniformBallotFindMSB:
6211 case OpGroupNonUniformBroadcast:
6212 case OpGroupNonUniformBroadcastFirst:
6213 case OpGroupNonUniformAll:
6214 case OpGroupNonUniformAny:
6215 case OpGroupNonUniformAllEqual:
6216 case OpControlBarrier:
6217 case OpMemoryBarrier:
6218 case OpGroupNonUniformBallotBitCount:
6219 case OpGroupNonUniformBallotBitExtract:
6220 case OpGroupNonUniformInverseBallot:
6221 return true;
6222 default:
6223 return false;
6224 }
6225 }
6226
emit_sampled_image_op(uint32_t result_type,uint32_t result_id,uint32_t image_id,uint32_t samp_id)6227 void CompilerGLSL::emit_sampled_image_op(uint32_t result_type, uint32_t result_id, uint32_t image_id, uint32_t samp_id)
6228 {
6229 if (options.vulkan_semantics && combined_image_samplers.empty())
6230 {
6231 emit_binary_func_op(result_type, result_id, image_id, samp_id,
6232 type_to_glsl(get<SPIRType>(result_type), result_id).c_str());
6233 }
6234 else
6235 {
6236 // Make sure to suppress usage tracking. It is illegal to create temporaries of opaque types.
6237 emit_op(result_type, result_id, to_combined_image_sampler(image_id, samp_id), true, true);
6238 }
6239
6240 // Make sure to suppress usage tracking and any expression invalidation.
6241 // It is illegal to create temporaries of opaque types.
6242 forwarded_temporaries.erase(result_id);
6243 }
6244
image_opcode_is_sample_no_dref(Op op)6245 static inline bool image_opcode_is_sample_no_dref(Op op)
6246 {
6247 switch (op)
6248 {
6249 case OpImageSampleExplicitLod:
6250 case OpImageSampleImplicitLod:
6251 case OpImageSampleProjExplicitLod:
6252 case OpImageSampleProjImplicitLod:
6253 case OpImageFetch:
6254 case OpImageRead:
6255 case OpImageSparseSampleExplicitLod:
6256 case OpImageSparseSampleImplicitLod:
6257 case OpImageSparseSampleProjExplicitLod:
6258 case OpImageSparseSampleProjImplicitLod:
6259 case OpImageSparseFetch:
6260 case OpImageSparseRead:
6261 return true;
6262
6263 default:
6264 return false;
6265 }
6266 }
6267
emit_sparse_feedback_temporaries(uint32_t result_type_id,uint32_t id,uint32_t & feedback_id,uint32_t & texel_id)6268 void CompilerGLSL::emit_sparse_feedback_temporaries(uint32_t result_type_id, uint32_t id, uint32_t &feedback_id,
6269 uint32_t &texel_id)
6270 {
6271 // Need to allocate two temporaries.
6272 if (options.es)
6273 SPIRV_CROSS_THROW("Sparse texture feedback is not supported on ESSL.");
6274 require_extension_internal("GL_ARB_sparse_texture2");
6275
6276 auto &temps = extra_sub_expressions[id];
6277 if (temps == 0)
6278 temps = ir.increase_bound_by(2);
6279
6280 feedback_id = temps + 0;
6281 texel_id = temps + 1;
6282
6283 auto &return_type = get<SPIRType>(result_type_id);
6284 if (return_type.basetype != SPIRType::Struct || return_type.member_types.size() != 2)
6285 SPIRV_CROSS_THROW("Invalid return type for sparse feedback.");
6286 emit_uninitialized_temporary(return_type.member_types[0], feedback_id);
6287 emit_uninitialized_temporary(return_type.member_types[1], texel_id);
6288 }
6289
get_sparse_feedback_texel_id(uint32_t id) const6290 uint32_t CompilerGLSL::get_sparse_feedback_texel_id(uint32_t id) const
6291 {
6292 auto itr = extra_sub_expressions.find(id);
6293 if (itr == extra_sub_expressions.end())
6294 return 0;
6295 else
6296 return itr->second + 1;
6297 }
6298
emit_texture_op(const Instruction & i,bool sparse)6299 void CompilerGLSL::emit_texture_op(const Instruction &i, bool sparse)
6300 {
6301 auto *ops = stream(i);
6302 auto op = static_cast<Op>(i.op);
6303
6304 SmallVector<uint32_t> inherited_expressions;
6305
6306 uint32_t result_type_id = ops[0];
6307 uint32_t id = ops[1];
6308 auto &return_type = get<SPIRType>(result_type_id);
6309
6310 uint32_t sparse_code_id = 0;
6311 uint32_t sparse_texel_id = 0;
6312 if (sparse)
6313 emit_sparse_feedback_temporaries(result_type_id, id, sparse_code_id, sparse_texel_id);
6314
6315 bool forward = false;
6316 string expr = to_texture_op(i, sparse, &forward, inherited_expressions);
6317
6318 if (sparse)
6319 {
6320 statement(to_expression(sparse_code_id), " = ", expr, ";");
6321 expr = join(type_to_glsl(return_type), "(", to_expression(sparse_code_id), ", ", to_expression(sparse_texel_id),
6322 ")");
6323 forward = true;
6324 inherited_expressions.clear();
6325 }
6326
6327 emit_op(result_type_id, id, expr, forward);
6328 for (auto &inherit : inherited_expressions)
6329 inherit_expression_dependencies(id, inherit);
6330
6331 // Do not register sparse ops as control dependent as they are always lowered to a temporary.
6332 switch (op)
6333 {
6334 case OpImageSampleDrefImplicitLod:
6335 case OpImageSampleImplicitLod:
6336 case OpImageSampleProjImplicitLod:
6337 case OpImageSampleProjDrefImplicitLod:
6338 register_control_dependent_expression(id);
6339 break;
6340
6341 default:
6342 break;
6343 }
6344 }
6345
to_texture_op(const Instruction & i,bool sparse,bool * forward,SmallVector<uint32_t> & inherited_expressions)6346 std::string CompilerGLSL::to_texture_op(const Instruction &i, bool sparse, bool *forward,
6347 SmallVector<uint32_t> &inherited_expressions)
6348 {
6349 auto *ops = stream(i);
6350 auto op = static_cast<Op>(i.op);
6351 uint32_t length = i.length;
6352
6353 uint32_t result_type_id = ops[0];
6354 VariableID img = ops[2];
6355 uint32_t coord = ops[3];
6356 uint32_t dref = 0;
6357 uint32_t comp = 0;
6358 bool gather = false;
6359 bool proj = false;
6360 bool fetch = false;
6361 bool nonuniform_expression = false;
6362 const uint32_t *opt = nullptr;
6363
6364 auto &result_type = get<SPIRType>(result_type_id);
6365
6366 inherited_expressions.push_back(coord);
6367
6368 // Make sure non-uniform decoration is back-propagated to where it needs to be.
6369 if (has_decoration(img, DecorationNonUniformEXT))
6370 {
6371 // In Vulkan GLSL, we cannot back-propgate nonuniform qualifiers if we
6372 // use a combined image sampler constructor.
6373 // We're only interested in back-propagating if we can trace back through access chains.
6374 // If not, we will apply nonuniform to the sampled image expression itself.
6375 auto *backing = maybe_get_backing_variable(img);
6376 if (backing)
6377 propagate_nonuniform_qualifier(img);
6378 else
6379 nonuniform_expression = true;
6380 }
6381
6382 switch (op)
6383 {
6384 case OpImageSampleDrefImplicitLod:
6385 case OpImageSampleDrefExplicitLod:
6386 case OpImageSparseSampleDrefImplicitLod:
6387 case OpImageSparseSampleDrefExplicitLod:
6388 dref = ops[4];
6389 opt = &ops[5];
6390 length -= 5;
6391 break;
6392
6393 case OpImageSampleProjDrefImplicitLod:
6394 case OpImageSampleProjDrefExplicitLod:
6395 case OpImageSparseSampleProjDrefImplicitLod:
6396 case OpImageSparseSampleProjDrefExplicitLod:
6397 dref = ops[4];
6398 opt = &ops[5];
6399 length -= 5;
6400 proj = true;
6401 break;
6402
6403 case OpImageDrefGather:
6404 case OpImageSparseDrefGather:
6405 dref = ops[4];
6406 opt = &ops[5];
6407 length -= 5;
6408 gather = true;
6409 if (options.es && options.version < 310)
6410 SPIRV_CROSS_THROW("textureGather requires ESSL 310.");
6411 else if (!options.es && options.version < 400)
6412 SPIRV_CROSS_THROW("textureGather with depth compare requires GLSL 400.");
6413 break;
6414
6415 case OpImageGather:
6416 case OpImageSparseGather:
6417 comp = ops[4];
6418 opt = &ops[5];
6419 length -= 5;
6420 gather = true;
6421 if (options.es && options.version < 310)
6422 SPIRV_CROSS_THROW("textureGather requires ESSL 310.");
6423 else if (!options.es && options.version < 400)
6424 {
6425 if (!expression_is_constant_null(comp))
6426 SPIRV_CROSS_THROW("textureGather with component requires GLSL 400.");
6427 require_extension_internal("GL_ARB_texture_gather");
6428 }
6429 break;
6430
6431 case OpImageFetch:
6432 case OpImageSparseFetch:
6433 case OpImageRead: // Reads == fetches in Metal (other langs will not get here)
6434 opt = &ops[4];
6435 length -= 4;
6436 fetch = true;
6437 break;
6438
6439 case OpImageSampleProjImplicitLod:
6440 case OpImageSampleProjExplicitLod:
6441 case OpImageSparseSampleProjImplicitLod:
6442 case OpImageSparseSampleProjExplicitLod:
6443 opt = &ops[4];
6444 length -= 4;
6445 proj = true;
6446 break;
6447
6448 default:
6449 opt = &ops[4];
6450 length -= 4;
6451 break;
6452 }
6453
6454 // Bypass pointers because we need the real image struct
6455 auto &type = expression_type(img);
6456 auto &imgtype = get<SPIRType>(type.self);
6457
6458 uint32_t coord_components = 0;
6459 switch (imgtype.image.dim)
6460 {
6461 case spv::Dim1D:
6462 coord_components = 1;
6463 break;
6464 case spv::Dim2D:
6465 coord_components = 2;
6466 break;
6467 case spv::Dim3D:
6468 coord_components = 3;
6469 break;
6470 case spv::DimCube:
6471 coord_components = 3;
6472 break;
6473 case spv::DimBuffer:
6474 coord_components = 1;
6475 break;
6476 default:
6477 coord_components = 2;
6478 break;
6479 }
6480
6481 if (dref)
6482 inherited_expressions.push_back(dref);
6483
6484 if (proj)
6485 coord_components++;
6486 if (imgtype.image.arrayed)
6487 coord_components++;
6488
6489 uint32_t bias = 0;
6490 uint32_t lod = 0;
6491 uint32_t grad_x = 0;
6492 uint32_t grad_y = 0;
6493 uint32_t coffset = 0;
6494 uint32_t offset = 0;
6495 uint32_t coffsets = 0;
6496 uint32_t sample = 0;
6497 uint32_t minlod = 0;
6498 uint32_t flags = 0;
6499
6500 if (length)
6501 {
6502 flags = *opt++;
6503 length--;
6504 }
6505
6506 auto test = [&](uint32_t &v, uint32_t flag) {
6507 if (length && (flags & flag))
6508 {
6509 v = *opt++;
6510 inherited_expressions.push_back(v);
6511 length--;
6512 }
6513 };
6514
6515 test(bias, ImageOperandsBiasMask);
6516 test(lod, ImageOperandsLodMask);
6517 test(grad_x, ImageOperandsGradMask);
6518 test(grad_y, ImageOperandsGradMask);
6519 test(coffset, ImageOperandsConstOffsetMask);
6520 test(offset, ImageOperandsOffsetMask);
6521 test(coffsets, ImageOperandsConstOffsetsMask);
6522 test(sample, ImageOperandsSampleMask);
6523 test(minlod, ImageOperandsMinLodMask);
6524
6525 TextureFunctionBaseArguments base_args = {};
6526 base_args.img = img;
6527 base_args.imgtype = &imgtype;
6528 base_args.is_fetch = fetch != 0;
6529 base_args.is_gather = gather != 0;
6530 base_args.is_proj = proj != 0;
6531
6532 string expr;
6533 TextureFunctionNameArguments name_args = {};
6534
6535 name_args.base = base_args;
6536 name_args.has_array_offsets = coffsets != 0;
6537 name_args.has_offset = coffset != 0 || offset != 0;
6538 name_args.has_grad = grad_x != 0 || grad_y != 0;
6539 name_args.has_dref = dref != 0;
6540 name_args.is_sparse_feedback = sparse;
6541 name_args.has_min_lod = minlod != 0;
6542 name_args.lod = lod;
6543 expr += to_function_name(name_args);
6544 expr += "(";
6545
6546 uint32_t sparse_texel_id = 0;
6547 if (sparse)
6548 sparse_texel_id = get_sparse_feedback_texel_id(ops[1]);
6549
6550 TextureFunctionArguments args = {};
6551 args.base = base_args;
6552 args.coord = coord;
6553 args.coord_components = coord_components;
6554 args.dref = dref;
6555 args.grad_x = grad_x;
6556 args.grad_y = grad_y;
6557 args.lod = lod;
6558 args.coffset = coffset;
6559 args.offset = offset;
6560 args.bias = bias;
6561 args.component = comp;
6562 args.sample = sample;
6563 args.sparse_texel = sparse_texel_id;
6564 args.min_lod = minlod;
6565 args.nonuniform_expression = nonuniform_expression;
6566 expr += to_function_args(args, forward);
6567 expr += ")";
6568
6569 // texture(samplerXShadow) returns float. shadowX() returns vec4. Swizzle here.
6570 if (is_legacy() && image_is_comparison(imgtype, img))
6571 expr += ".r";
6572
6573 // Sampling from a texture which was deduced to be a depth image, might actually return 1 component here.
6574 // Remap back to 4 components as sampling opcodes expect.
6575 if (backend.comparison_image_samples_scalar && image_opcode_is_sample_no_dref(op))
6576 {
6577 bool image_is_depth = false;
6578 const auto *combined = maybe_get<SPIRCombinedImageSampler>(img);
6579 VariableID image_id = combined ? combined->image : img;
6580
6581 if (combined && image_is_comparison(imgtype, combined->image))
6582 image_is_depth = true;
6583 else if (image_is_comparison(imgtype, img))
6584 image_is_depth = true;
6585
6586 // We must also check the backing variable for the image.
6587 // We might have loaded an OpImage, and used that handle for two different purposes.
6588 // Once with comparison, once without.
6589 auto *image_variable = maybe_get_backing_variable(image_id);
6590 if (image_variable && image_is_comparison(get<SPIRType>(image_variable->basetype), image_variable->self))
6591 image_is_depth = true;
6592
6593 if (image_is_depth)
6594 expr = remap_swizzle(result_type, 1, expr);
6595 }
6596
6597 if (!sparse && !backend.support_small_type_sampling_result && result_type.width < 32)
6598 {
6599 // Just value cast (narrowing) to expected type since we cannot rely on narrowing to work automatically.
6600 // Hopefully compiler picks this up and converts the texturing instruction to the appropriate precision.
6601 expr = join(type_to_glsl_constructor(result_type), "(", expr, ")");
6602 }
6603
6604 // Deals with reads from MSL. We might need to downconvert to fewer components.
6605 if (op == OpImageRead)
6606 expr = remap_swizzle(result_type, 4, expr);
6607
6608 return expr;
6609 }
6610
expression_is_constant_null(uint32_t id) const6611 bool CompilerGLSL::expression_is_constant_null(uint32_t id) const
6612 {
6613 auto *c = maybe_get<SPIRConstant>(id);
6614 if (!c)
6615 return false;
6616 return c->constant_is_null();
6617 }
6618
expression_is_non_value_type_array(uint32_t ptr)6619 bool CompilerGLSL::expression_is_non_value_type_array(uint32_t ptr)
6620 {
6621 auto &type = expression_type(ptr);
6622 if (type.array.empty())
6623 return false;
6624
6625 if (!backend.array_is_value_type)
6626 return true;
6627
6628 auto *var = maybe_get_backing_variable(ptr);
6629 if (!var)
6630 return false;
6631
6632 auto &backed_type = get<SPIRType>(var->basetype);
6633 return !backend.buffer_offset_array_is_value_type && backed_type.basetype == SPIRType::Struct &&
6634 has_member_decoration(backed_type.self, 0, DecorationOffset);
6635 }
6636
6637 // Returns the function name for a texture sampling function for the specified image and sampling characteristics.
6638 // For some subclasses, the function is a method on the specified image.
to_function_name(const TextureFunctionNameArguments & args)6639 string CompilerGLSL::to_function_name(const TextureFunctionNameArguments &args)
6640 {
6641 if (args.has_min_lod)
6642 {
6643 if (options.es)
6644 SPIRV_CROSS_THROW("Sparse residency is not supported in ESSL.");
6645 require_extension_internal("GL_ARB_sparse_texture_clamp");
6646 }
6647
6648 string fname;
6649 auto &imgtype = *args.base.imgtype;
6650 VariableID tex = args.base.img;
6651
6652 // textureLod on sampler2DArrayShadow and samplerCubeShadow does not exist in GLSL for some reason.
6653 // To emulate this, we will have to use textureGrad with a constant gradient of 0.
6654 // The workaround will assert that the LOD is in fact constant 0, or we cannot emit correct code.
6655 // This happens for HLSL SampleCmpLevelZero on Texture2DArray and TextureCube.
6656 bool workaround_lod_array_shadow_as_grad = false;
6657 if (((imgtype.image.arrayed && imgtype.image.dim == Dim2D) || imgtype.image.dim == DimCube) &&
6658 image_is_comparison(imgtype, tex) && args.lod)
6659 {
6660 if (!expression_is_constant_null(args.lod))
6661 {
6662 SPIRV_CROSS_THROW("textureLod on sampler2DArrayShadow is not constant 0.0. This cannot be "
6663 "expressed in GLSL.");
6664 }
6665 workaround_lod_array_shadow_as_grad = true;
6666 }
6667
6668 if (args.is_sparse_feedback)
6669 fname += "sparse";
6670
6671 if (args.base.is_fetch)
6672 fname += args.is_sparse_feedback ? "TexelFetch" : "texelFetch";
6673 else
6674 {
6675 fname += args.is_sparse_feedback ? "Texture" : "texture";
6676
6677 if (args.base.is_gather)
6678 fname += "Gather";
6679 if (args.has_array_offsets)
6680 fname += "Offsets";
6681 if (args.base.is_proj)
6682 fname += "Proj";
6683 if (args.has_grad || workaround_lod_array_shadow_as_grad)
6684 fname += "Grad";
6685 if (args.lod != 0 && !workaround_lod_array_shadow_as_grad)
6686 fname += "Lod";
6687 }
6688
6689 if (args.has_offset)
6690 fname += "Offset";
6691
6692 if (args.has_min_lod)
6693 fname += "Clamp";
6694
6695 if (args.is_sparse_feedback || args.has_min_lod)
6696 fname += "ARB";
6697
6698 return (is_legacy() && !args.base.is_gather) ? legacy_tex_op(fname, imgtype, tex) : fname;
6699 }
6700
convert_separate_image_to_expression(uint32_t id)6701 std::string CompilerGLSL::convert_separate_image_to_expression(uint32_t id)
6702 {
6703 auto *var = maybe_get_backing_variable(id);
6704
6705 // If we are fetching from a plain OpTypeImage, we must combine with a dummy sampler in GLSL.
6706 // In Vulkan GLSL, we can make use of the newer GL_EXT_samplerless_texture_functions.
6707 if (var)
6708 {
6709 auto &type = get<SPIRType>(var->basetype);
6710 if (type.basetype == SPIRType::Image && type.image.sampled == 1 && type.image.dim != DimBuffer)
6711 {
6712 if (options.vulkan_semantics)
6713 {
6714 if (dummy_sampler_id)
6715 {
6716 // Don't need to consider Shadow state since the dummy sampler is always non-shadow.
6717 auto sampled_type = type;
6718 sampled_type.basetype = SPIRType::SampledImage;
6719 return join(type_to_glsl(sampled_type), "(", to_expression(id), ", ",
6720 to_expression(dummy_sampler_id), ")");
6721 }
6722 else
6723 {
6724 // Newer glslang supports this extension to deal with texture2D as argument to texture functions.
6725 require_extension_internal("GL_EXT_samplerless_texture_functions");
6726 }
6727 }
6728 else
6729 {
6730 if (!dummy_sampler_id)
6731 SPIRV_CROSS_THROW("Cannot find dummy sampler ID. Was "
6732 "build_dummy_sampler_for_combined_images() called?");
6733
6734 return to_combined_image_sampler(id, dummy_sampler_id);
6735 }
6736 }
6737 }
6738
6739 return to_expression(id);
6740 }
6741
6742 // Returns the function args for a texture sampling function for the specified image and sampling characteristics.
to_function_args(const TextureFunctionArguments & args,bool * p_forward)6743 string CompilerGLSL::to_function_args(const TextureFunctionArguments &args, bool *p_forward)
6744 {
6745 VariableID img = args.base.img;
6746 auto &imgtype = *args.base.imgtype;
6747
6748 string farg_str;
6749 if (args.base.is_fetch)
6750 farg_str = convert_separate_image_to_expression(img);
6751 else
6752 farg_str = to_expression(img);
6753
6754 if (args.nonuniform_expression && farg_str.find_first_of('[') != string::npos)
6755 {
6756 // Only emit nonuniformEXT() wrapper if the underlying expression is arrayed in some way.
6757 farg_str = join(backend.nonuniform_qualifier, "(", farg_str, ")");
6758 }
6759
6760 bool swizz_func = backend.swizzle_is_function;
6761 auto swizzle = [swizz_func](uint32_t comps, uint32_t in_comps) -> const char * {
6762 if (comps == in_comps)
6763 return "";
6764
6765 switch (comps)
6766 {
6767 case 1:
6768 return ".x";
6769 case 2:
6770 return swizz_func ? ".xy()" : ".xy";
6771 case 3:
6772 return swizz_func ? ".xyz()" : ".xyz";
6773 default:
6774 return "";
6775 }
6776 };
6777
6778 bool forward = should_forward(args.coord);
6779
6780 // The IR can give us more components than we need, so chop them off as needed.
6781 auto swizzle_expr = swizzle(args.coord_components, expression_type(args.coord).vecsize);
6782 // Only enclose the UV expression if needed.
6783 auto coord_expr =
6784 (*swizzle_expr == '\0') ? to_expression(args.coord) : (to_enclosed_expression(args.coord) + swizzle_expr);
6785
6786 // texelFetch only takes int, not uint.
6787 auto &coord_type = expression_type(args.coord);
6788 if (coord_type.basetype == SPIRType::UInt)
6789 {
6790 auto expected_type = coord_type;
6791 expected_type.vecsize = args.coord_components;
6792 expected_type.basetype = SPIRType::Int;
6793 coord_expr = bitcast_expression(expected_type, coord_type.basetype, coord_expr);
6794 }
6795
6796 // textureLod on sampler2DArrayShadow and samplerCubeShadow does not exist in GLSL for some reason.
6797 // To emulate this, we will have to use textureGrad with a constant gradient of 0.
6798 // The workaround will assert that the LOD is in fact constant 0, or we cannot emit correct code.
6799 // This happens for HLSL SampleCmpLevelZero on Texture2DArray and TextureCube.
6800 bool workaround_lod_array_shadow_as_grad =
6801 ((imgtype.image.arrayed && imgtype.image.dim == Dim2D) || imgtype.image.dim == DimCube) &&
6802 image_is_comparison(imgtype, img) && args.lod != 0;
6803
6804 if (args.dref)
6805 {
6806 forward = forward && should_forward(args.dref);
6807
6808 // SPIR-V splits dref and coordinate.
6809 if (args.base.is_gather ||
6810 args.coord_components == 4) // GLSL also splits the arguments in two. Same for textureGather.
6811 {
6812 farg_str += ", ";
6813 farg_str += to_expression(args.coord);
6814 farg_str += ", ";
6815 farg_str += to_expression(args.dref);
6816 }
6817 else if (args.base.is_proj)
6818 {
6819 // Have to reshuffle so we get vec4(coord, dref, proj), special case.
6820 // Other shading languages splits up the arguments for coord and compare value like SPIR-V.
6821 // The coordinate type for textureProj shadow is always vec4 even for sampler1DShadow.
6822 farg_str += ", vec4(";
6823
6824 if (imgtype.image.dim == Dim1D)
6825 {
6826 // Could reuse coord_expr, but we will mess up the temporary usage checking.
6827 farg_str += to_enclosed_expression(args.coord) + ".x";
6828 farg_str += ", ";
6829 farg_str += "0.0, ";
6830 farg_str += to_expression(args.dref);
6831 farg_str += ", ";
6832 farg_str += to_enclosed_expression(args.coord) + ".y)";
6833 }
6834 else if (imgtype.image.dim == Dim2D)
6835 {
6836 // Could reuse coord_expr, but we will mess up the temporary usage checking.
6837 farg_str += to_enclosed_expression(args.coord) + (swizz_func ? ".xy()" : ".xy");
6838 farg_str += ", ";
6839 farg_str += to_expression(args.dref);
6840 farg_str += ", ";
6841 farg_str += to_enclosed_expression(args.coord) + ".z)";
6842 }
6843 else
6844 SPIRV_CROSS_THROW("Invalid type for textureProj with shadow.");
6845 }
6846 else
6847 {
6848 // Create a composite which merges coord/dref into a single vector.
6849 auto type = expression_type(args.coord);
6850 type.vecsize = args.coord_components + 1;
6851 farg_str += ", ";
6852 farg_str += type_to_glsl_constructor(type);
6853 farg_str += "(";
6854 farg_str += coord_expr;
6855 farg_str += ", ";
6856 farg_str += to_expression(args.dref);
6857 farg_str += ")";
6858 }
6859 }
6860 else
6861 {
6862 farg_str += ", ";
6863 farg_str += coord_expr;
6864 }
6865
6866 if (args.grad_x || args.grad_y)
6867 {
6868 forward = forward && should_forward(args.grad_x);
6869 forward = forward && should_forward(args.grad_y);
6870 farg_str += ", ";
6871 farg_str += to_expression(args.grad_x);
6872 farg_str += ", ";
6873 farg_str += to_expression(args.grad_y);
6874 }
6875
6876 if (args.lod)
6877 {
6878 if (workaround_lod_array_shadow_as_grad)
6879 {
6880 // Implement textureGrad() instead. LOD == 0.0 is implemented as gradient of 0.0.
6881 // Implementing this as plain texture() is not safe on some implementations.
6882 if (imgtype.image.dim == Dim2D)
6883 farg_str += ", vec2(0.0), vec2(0.0)";
6884 else if (imgtype.image.dim == DimCube)
6885 farg_str += ", vec3(0.0), vec3(0.0)";
6886 }
6887 else
6888 {
6889 forward = forward && should_forward(args.lod);
6890 farg_str += ", ";
6891
6892 auto &lod_expr_type = expression_type(args.lod);
6893
6894 // Lod expression for TexelFetch in GLSL must be int, and only int.
6895 if (args.base.is_fetch && imgtype.image.dim != DimBuffer && !imgtype.image.ms &&
6896 lod_expr_type.basetype != SPIRType::Int)
6897 {
6898 farg_str += join("int(", to_expression(args.lod), ")");
6899 }
6900 else
6901 {
6902 farg_str += to_expression(args.lod);
6903 }
6904 }
6905 }
6906 else if (args.base.is_fetch && imgtype.image.dim != DimBuffer && !imgtype.image.ms)
6907 {
6908 // Lod argument is optional in OpImageFetch, but we require a LOD value, pick 0 as the default.
6909 farg_str += ", 0";
6910 }
6911
6912 if (args.coffset)
6913 {
6914 forward = forward && should_forward(args.coffset);
6915 farg_str += ", ";
6916 farg_str += to_expression(args.coffset);
6917 }
6918 else if (args.offset)
6919 {
6920 forward = forward && should_forward(args.offset);
6921 farg_str += ", ";
6922 farg_str += to_expression(args.offset);
6923 }
6924
6925 if (args.sample)
6926 {
6927 farg_str += ", ";
6928 farg_str += to_expression(args.sample);
6929 }
6930
6931 if (args.min_lod)
6932 {
6933 farg_str += ", ";
6934 farg_str += to_expression(args.min_lod);
6935 }
6936
6937 if (args.sparse_texel)
6938 {
6939 // Sparse texel output parameter comes after everything else, except it's before the optional, component/bias arguments.
6940 farg_str += ", ";
6941 farg_str += to_expression(args.sparse_texel);
6942 }
6943
6944 if (args.bias)
6945 {
6946 forward = forward && should_forward(args.bias);
6947 farg_str += ", ";
6948 farg_str += to_expression(args.bias);
6949 }
6950
6951 if (args.component && !expression_is_constant_null(args.component))
6952 {
6953 forward = forward && should_forward(args.component);
6954 farg_str += ", ";
6955 farg_str += to_expression(args.component);
6956 }
6957
6958 *p_forward = forward;
6959
6960 return farg_str;
6961 }
6962
emit_glsl_op(uint32_t result_type,uint32_t id,uint32_t eop,const uint32_t * args,uint32_t length)6963 void CompilerGLSL::emit_glsl_op(uint32_t result_type, uint32_t id, uint32_t eop, const uint32_t *args, uint32_t length)
6964 {
6965 auto op = static_cast<GLSLstd450>(eop);
6966
6967 if (is_legacy() && is_unsigned_glsl_opcode(op))
6968 SPIRV_CROSS_THROW("Unsigned integers are not supported on legacy GLSL targets.");
6969
6970 // If we need to do implicit bitcasts, make sure we do it with the correct type.
6971 uint32_t integer_width = get_integer_width_for_glsl_instruction(op, args, length);
6972 auto int_type = to_signed_basetype(integer_width);
6973 auto uint_type = to_unsigned_basetype(integer_width);
6974
6975 switch (op)
6976 {
6977 // FP fiddling
6978 case GLSLstd450Round:
6979 if (!is_legacy())
6980 emit_unary_func_op(result_type, id, args[0], "round");
6981 else
6982 {
6983 auto op0 = to_enclosed_expression(args[0]);
6984 auto &op0_type = expression_type(args[0]);
6985 auto expr = join("floor(", op0, " + ", type_to_glsl_constructor(op0_type), "(0.5))");
6986 bool forward = should_forward(args[0]);
6987 emit_op(result_type, id, expr, forward);
6988 inherit_expression_dependencies(id, args[0]);
6989 }
6990 break;
6991
6992 case GLSLstd450RoundEven:
6993 if (!is_legacy())
6994 emit_unary_func_op(result_type, id, args[0], "roundEven");
6995 else if (!options.es)
6996 {
6997 // This extension provides round() with round-to-even semantics.
6998 require_extension_internal("GL_EXT_gpu_shader4");
6999 emit_unary_func_op(result_type, id, args[0], "round");
7000 }
7001 else
7002 SPIRV_CROSS_THROW("roundEven supported only in ESSL 300.");
7003 break;
7004
7005 case GLSLstd450Trunc:
7006 emit_unary_func_op(result_type, id, args[0], "trunc");
7007 break;
7008 case GLSLstd450SAbs:
7009 emit_unary_func_op_cast(result_type, id, args[0], "abs", int_type, int_type);
7010 break;
7011 case GLSLstd450FAbs:
7012 emit_unary_func_op(result_type, id, args[0], "abs");
7013 break;
7014 case GLSLstd450SSign:
7015 emit_unary_func_op_cast(result_type, id, args[0], "sign", int_type, int_type);
7016 break;
7017 case GLSLstd450FSign:
7018 emit_unary_func_op(result_type, id, args[0], "sign");
7019 break;
7020 case GLSLstd450Floor:
7021 emit_unary_func_op(result_type, id, args[0], "floor");
7022 break;
7023 case GLSLstd450Ceil:
7024 emit_unary_func_op(result_type, id, args[0], "ceil");
7025 break;
7026 case GLSLstd450Fract:
7027 emit_unary_func_op(result_type, id, args[0], "fract");
7028 break;
7029 case GLSLstd450Radians:
7030 emit_unary_func_op(result_type, id, args[0], "radians");
7031 break;
7032 case GLSLstd450Degrees:
7033 emit_unary_func_op(result_type, id, args[0], "degrees");
7034 break;
7035 case GLSLstd450Fma:
7036 if ((!options.es && options.version < 400) || (options.es && options.version < 320))
7037 {
7038 auto expr = join(to_enclosed_expression(args[0]), " * ", to_enclosed_expression(args[1]), " + ",
7039 to_enclosed_expression(args[2]));
7040
7041 emit_op(result_type, id, expr,
7042 should_forward(args[0]) && should_forward(args[1]) && should_forward(args[2]));
7043 for (uint32_t i = 0; i < 3; i++)
7044 inherit_expression_dependencies(id, args[i]);
7045 }
7046 else
7047 emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "fma");
7048 break;
7049 case GLSLstd450Modf:
7050 register_call_out_argument(args[1]);
7051 forced_temporaries.insert(id);
7052 emit_binary_func_op(result_type, id, args[0], args[1], "modf");
7053 break;
7054
7055 case GLSLstd450ModfStruct:
7056 {
7057 auto &type = get<SPIRType>(result_type);
7058 emit_uninitialized_temporary_expression(result_type, id);
7059 statement(to_expression(id), ".", to_member_name(type, 0), " = ", "modf(", to_expression(args[0]), ", ",
7060 to_expression(id), ".", to_member_name(type, 1), ");");
7061 break;
7062 }
7063
7064 // Minmax
7065 case GLSLstd450UMin:
7066 emit_binary_func_op_cast(result_type, id, args[0], args[1], "min", uint_type, false);
7067 break;
7068
7069 case GLSLstd450SMin:
7070 emit_binary_func_op_cast(result_type, id, args[0], args[1], "min", int_type, false);
7071 break;
7072
7073 case GLSLstd450FMin:
7074 emit_binary_func_op(result_type, id, args[0], args[1], "min");
7075 break;
7076
7077 case GLSLstd450FMax:
7078 emit_binary_func_op(result_type, id, args[0], args[1], "max");
7079 break;
7080
7081 case GLSLstd450UMax:
7082 emit_binary_func_op_cast(result_type, id, args[0], args[1], "max", uint_type, false);
7083 break;
7084
7085 case GLSLstd450SMax:
7086 emit_binary_func_op_cast(result_type, id, args[0], args[1], "max", int_type, false);
7087 break;
7088
7089 case GLSLstd450FClamp:
7090 emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "clamp");
7091 break;
7092
7093 case GLSLstd450UClamp:
7094 emit_trinary_func_op_cast(result_type, id, args[0], args[1], args[2], "clamp", uint_type);
7095 break;
7096
7097 case GLSLstd450SClamp:
7098 emit_trinary_func_op_cast(result_type, id, args[0], args[1], args[2], "clamp", int_type);
7099 break;
7100
7101 // Trig
7102 case GLSLstd450Sin:
7103 emit_unary_func_op(result_type, id, args[0], "sin");
7104 break;
7105 case GLSLstd450Cos:
7106 emit_unary_func_op(result_type, id, args[0], "cos");
7107 break;
7108 case GLSLstd450Tan:
7109 emit_unary_func_op(result_type, id, args[0], "tan");
7110 break;
7111 case GLSLstd450Asin:
7112 emit_unary_func_op(result_type, id, args[0], "asin");
7113 break;
7114 case GLSLstd450Acos:
7115 emit_unary_func_op(result_type, id, args[0], "acos");
7116 break;
7117 case GLSLstd450Atan:
7118 emit_unary_func_op(result_type, id, args[0], "atan");
7119 break;
7120 case GLSLstd450Sinh:
7121 emit_unary_func_op(result_type, id, args[0], "sinh");
7122 break;
7123 case GLSLstd450Cosh:
7124 emit_unary_func_op(result_type, id, args[0], "cosh");
7125 break;
7126 case GLSLstd450Tanh:
7127 emit_unary_func_op(result_type, id, args[0], "tanh");
7128 break;
7129 case GLSLstd450Asinh:
7130 emit_unary_func_op(result_type, id, args[0], "asinh");
7131 break;
7132 case GLSLstd450Acosh:
7133 emit_unary_func_op(result_type, id, args[0], "acosh");
7134 break;
7135 case GLSLstd450Atanh:
7136 emit_unary_func_op(result_type, id, args[0], "atanh");
7137 break;
7138 case GLSLstd450Atan2:
7139 emit_binary_func_op(result_type, id, args[0], args[1], "atan");
7140 break;
7141
7142 // Exponentials
7143 case GLSLstd450Pow:
7144 emit_binary_func_op(result_type, id, args[0], args[1], "pow");
7145 break;
7146 case GLSLstd450Exp:
7147 emit_unary_func_op(result_type, id, args[0], "exp");
7148 break;
7149 case GLSLstd450Log:
7150 emit_unary_func_op(result_type, id, args[0], "log");
7151 break;
7152 case GLSLstd450Exp2:
7153 emit_unary_func_op(result_type, id, args[0], "exp2");
7154 break;
7155 case GLSLstd450Log2:
7156 emit_unary_func_op(result_type, id, args[0], "log2");
7157 break;
7158 case GLSLstd450Sqrt:
7159 emit_unary_func_op(result_type, id, args[0], "sqrt");
7160 break;
7161 case GLSLstd450InverseSqrt:
7162 emit_unary_func_op(result_type, id, args[0], "inversesqrt");
7163 break;
7164
7165 // Matrix math
7166 case GLSLstd450Determinant:
7167 emit_unary_func_op(result_type, id, args[0], "determinant");
7168 break;
7169 case GLSLstd450MatrixInverse:
7170 emit_unary_func_op(result_type, id, args[0], "inverse");
7171 break;
7172
7173 // Lerping
7174 case GLSLstd450FMix:
7175 case GLSLstd450IMix:
7176 {
7177 emit_mix_op(result_type, id, args[0], args[1], args[2]);
7178 break;
7179 }
7180 case GLSLstd450Step:
7181 emit_binary_func_op(result_type, id, args[0], args[1], "step");
7182 break;
7183 case GLSLstd450SmoothStep:
7184 emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "smoothstep");
7185 break;
7186
7187 // Packing
7188 case GLSLstd450Frexp:
7189 register_call_out_argument(args[1]);
7190 forced_temporaries.insert(id);
7191 emit_binary_func_op(result_type, id, args[0], args[1], "frexp");
7192 break;
7193
7194 case GLSLstd450FrexpStruct:
7195 {
7196 auto &type = get<SPIRType>(result_type);
7197 emit_uninitialized_temporary_expression(result_type, id);
7198 statement(to_expression(id), ".", to_member_name(type, 0), " = ", "frexp(", to_expression(args[0]), ", ",
7199 to_expression(id), ".", to_member_name(type, 1), ");");
7200 break;
7201 }
7202
7203 case GLSLstd450Ldexp:
7204 {
7205 bool forward = should_forward(args[0]) && should_forward(args[1]);
7206
7207 auto op0 = to_unpacked_expression(args[0]);
7208 auto op1 = to_unpacked_expression(args[1]);
7209 auto &op1_type = expression_type(args[1]);
7210 if (op1_type.basetype != SPIRType::Int)
7211 {
7212 // Need a value cast here.
7213 auto target_type = op1_type;
7214 target_type.basetype = SPIRType::Int;
7215 op1 = join(type_to_glsl_constructor(target_type), "(", op1, ")");
7216 }
7217
7218 auto expr = join("ldexp(", op0, ", ", op1, ")");
7219
7220 emit_op(result_type, id, expr, forward);
7221 inherit_expression_dependencies(id, args[0]);
7222 inherit_expression_dependencies(id, args[1]);
7223 break;
7224 }
7225
7226 case GLSLstd450PackSnorm4x8:
7227 emit_unary_func_op(result_type, id, args[0], "packSnorm4x8");
7228 break;
7229 case GLSLstd450PackUnorm4x8:
7230 emit_unary_func_op(result_type, id, args[0], "packUnorm4x8");
7231 break;
7232 case GLSLstd450PackSnorm2x16:
7233 emit_unary_func_op(result_type, id, args[0], "packSnorm2x16");
7234 break;
7235 case GLSLstd450PackUnorm2x16:
7236 emit_unary_func_op(result_type, id, args[0], "packUnorm2x16");
7237 break;
7238 case GLSLstd450PackHalf2x16:
7239 emit_unary_func_op(result_type, id, args[0], "packHalf2x16");
7240 break;
7241 case GLSLstd450UnpackSnorm4x8:
7242 emit_unary_func_op(result_type, id, args[0], "unpackSnorm4x8");
7243 break;
7244 case GLSLstd450UnpackUnorm4x8:
7245 emit_unary_func_op(result_type, id, args[0], "unpackUnorm4x8");
7246 break;
7247 case GLSLstd450UnpackSnorm2x16:
7248 emit_unary_func_op(result_type, id, args[0], "unpackSnorm2x16");
7249 break;
7250 case GLSLstd450UnpackUnorm2x16:
7251 emit_unary_func_op(result_type, id, args[0], "unpackUnorm2x16");
7252 break;
7253 case GLSLstd450UnpackHalf2x16:
7254 emit_unary_func_op(result_type, id, args[0], "unpackHalf2x16");
7255 break;
7256
7257 case GLSLstd450PackDouble2x32:
7258 emit_unary_func_op(result_type, id, args[0], "packDouble2x32");
7259 break;
7260 case GLSLstd450UnpackDouble2x32:
7261 emit_unary_func_op(result_type, id, args[0], "unpackDouble2x32");
7262 break;
7263
7264 // Vector math
7265 case GLSLstd450Length:
7266 emit_unary_func_op(result_type, id, args[0], "length");
7267 break;
7268 case GLSLstd450Distance:
7269 emit_binary_func_op(result_type, id, args[0], args[1], "distance");
7270 break;
7271 case GLSLstd450Cross:
7272 emit_binary_func_op(result_type, id, args[0], args[1], "cross");
7273 break;
7274 case GLSLstd450Normalize:
7275 emit_unary_func_op(result_type, id, args[0], "normalize");
7276 break;
7277 case GLSLstd450FaceForward:
7278 emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "faceforward");
7279 break;
7280 case GLSLstd450Reflect:
7281 emit_binary_func_op(result_type, id, args[0], args[1], "reflect");
7282 break;
7283 case GLSLstd450Refract:
7284 emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "refract");
7285 break;
7286
7287 // Bit-fiddling
7288 case GLSLstd450FindILsb:
7289 // findLSB always returns int.
7290 emit_unary_func_op_cast(result_type, id, args[0], "findLSB", expression_type(args[0]).basetype, int_type);
7291 break;
7292
7293 case GLSLstd450FindSMsb:
7294 emit_unary_func_op_cast(result_type, id, args[0], "findMSB", int_type, int_type);
7295 break;
7296
7297 case GLSLstd450FindUMsb:
7298 emit_unary_func_op_cast(result_type, id, args[0], "findMSB", uint_type,
7299 int_type); // findMSB always returns int.
7300 break;
7301
7302 // Multisampled varying
7303 case GLSLstd450InterpolateAtCentroid:
7304 emit_unary_func_op(result_type, id, args[0], "interpolateAtCentroid");
7305 break;
7306 case GLSLstd450InterpolateAtSample:
7307 emit_binary_func_op(result_type, id, args[0], args[1], "interpolateAtSample");
7308 break;
7309 case GLSLstd450InterpolateAtOffset:
7310 emit_binary_func_op(result_type, id, args[0], args[1], "interpolateAtOffset");
7311 break;
7312
7313 case GLSLstd450NMin:
7314 case GLSLstd450NMax:
7315 {
7316 emit_nminmax_op(result_type, id, args[0], args[1], op);
7317 break;
7318 }
7319
7320 case GLSLstd450NClamp:
7321 {
7322 // Make sure we have a unique ID here to avoid aliasing the extra sub-expressions between clamp and NMin sub-op.
7323 // IDs cannot exceed 24 bits, so we can make use of the higher bits for some unique flags.
7324 uint32_t &max_id = extra_sub_expressions[id | 0x80000000u];
7325 if (!max_id)
7326 max_id = ir.increase_bound_by(1);
7327
7328 // Inherit precision qualifiers.
7329 ir.meta[max_id] = ir.meta[id];
7330
7331 emit_nminmax_op(result_type, max_id, args[0], args[1], GLSLstd450NMax);
7332 emit_nminmax_op(result_type, id, max_id, args[2], GLSLstd450NMin);
7333 break;
7334 }
7335
7336 default:
7337 statement("// unimplemented GLSL op ", eop);
7338 break;
7339 }
7340 }
7341
emit_nminmax_op(uint32_t result_type,uint32_t id,uint32_t op0,uint32_t op1,GLSLstd450 op)7342 void CompilerGLSL::emit_nminmax_op(uint32_t result_type, uint32_t id, uint32_t op0, uint32_t op1, GLSLstd450 op)
7343 {
7344 // Need to emulate this call.
7345 uint32_t &ids = extra_sub_expressions[id];
7346 if (!ids)
7347 {
7348 ids = ir.increase_bound_by(5);
7349 auto btype = get<SPIRType>(result_type);
7350 btype.basetype = SPIRType::Boolean;
7351 set<SPIRType>(ids, btype);
7352 }
7353
7354 uint32_t btype_id = ids + 0;
7355 uint32_t left_nan_id = ids + 1;
7356 uint32_t right_nan_id = ids + 2;
7357 uint32_t tmp_id = ids + 3;
7358 uint32_t mixed_first_id = ids + 4;
7359
7360 // Inherit precision qualifiers.
7361 ir.meta[tmp_id] = ir.meta[id];
7362 ir.meta[mixed_first_id] = ir.meta[id];
7363
7364 emit_unary_func_op(btype_id, left_nan_id, op0, "isnan");
7365 emit_unary_func_op(btype_id, right_nan_id, op1, "isnan");
7366 emit_binary_func_op(result_type, tmp_id, op0, op1, op == GLSLstd450NMin ? "min" : "max");
7367 emit_mix_op(result_type, mixed_first_id, tmp_id, op1, left_nan_id);
7368 emit_mix_op(result_type, id, mixed_first_id, op0, right_nan_id);
7369 }
7370
emit_spv_amd_shader_ballot_op(uint32_t result_type,uint32_t id,uint32_t eop,const uint32_t * args,uint32_t)7371 void CompilerGLSL::emit_spv_amd_shader_ballot_op(uint32_t result_type, uint32_t id, uint32_t eop, const uint32_t *args,
7372 uint32_t)
7373 {
7374 require_extension_internal("GL_AMD_shader_ballot");
7375
7376 enum AMDShaderBallot
7377 {
7378 SwizzleInvocationsAMD = 1,
7379 SwizzleInvocationsMaskedAMD = 2,
7380 WriteInvocationAMD = 3,
7381 MbcntAMD = 4
7382 };
7383
7384 auto op = static_cast<AMDShaderBallot>(eop);
7385
7386 switch (op)
7387 {
7388 case SwizzleInvocationsAMD:
7389 emit_binary_func_op(result_type, id, args[0], args[1], "swizzleInvocationsAMD");
7390 register_control_dependent_expression(id);
7391 break;
7392
7393 case SwizzleInvocationsMaskedAMD:
7394 emit_binary_func_op(result_type, id, args[0], args[1], "swizzleInvocationsMaskedAMD");
7395 register_control_dependent_expression(id);
7396 break;
7397
7398 case WriteInvocationAMD:
7399 emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "writeInvocationAMD");
7400 register_control_dependent_expression(id);
7401 break;
7402
7403 case MbcntAMD:
7404 emit_unary_func_op(result_type, id, args[0], "mbcntAMD");
7405 register_control_dependent_expression(id);
7406 break;
7407
7408 default:
7409 statement("// unimplemented SPV AMD shader ballot op ", eop);
7410 break;
7411 }
7412 }
7413
emit_spv_amd_shader_explicit_vertex_parameter_op(uint32_t result_type,uint32_t id,uint32_t eop,const uint32_t * args,uint32_t)7414 void CompilerGLSL::emit_spv_amd_shader_explicit_vertex_parameter_op(uint32_t result_type, uint32_t id, uint32_t eop,
7415 const uint32_t *args, uint32_t)
7416 {
7417 require_extension_internal("GL_AMD_shader_explicit_vertex_parameter");
7418
7419 enum AMDShaderExplicitVertexParameter
7420 {
7421 InterpolateAtVertexAMD = 1
7422 };
7423
7424 auto op = static_cast<AMDShaderExplicitVertexParameter>(eop);
7425
7426 switch (op)
7427 {
7428 case InterpolateAtVertexAMD:
7429 emit_binary_func_op(result_type, id, args[0], args[1], "interpolateAtVertexAMD");
7430 break;
7431
7432 default:
7433 statement("// unimplemented SPV AMD shader explicit vertex parameter op ", eop);
7434 break;
7435 }
7436 }
7437
emit_spv_amd_shader_trinary_minmax_op(uint32_t result_type,uint32_t id,uint32_t eop,const uint32_t * args,uint32_t)7438 void CompilerGLSL::emit_spv_amd_shader_trinary_minmax_op(uint32_t result_type, uint32_t id, uint32_t eop,
7439 const uint32_t *args, uint32_t)
7440 {
7441 require_extension_internal("GL_AMD_shader_trinary_minmax");
7442
7443 enum AMDShaderTrinaryMinMax
7444 {
7445 FMin3AMD = 1,
7446 UMin3AMD = 2,
7447 SMin3AMD = 3,
7448 FMax3AMD = 4,
7449 UMax3AMD = 5,
7450 SMax3AMD = 6,
7451 FMid3AMD = 7,
7452 UMid3AMD = 8,
7453 SMid3AMD = 9
7454 };
7455
7456 auto op = static_cast<AMDShaderTrinaryMinMax>(eop);
7457
7458 switch (op)
7459 {
7460 case FMin3AMD:
7461 case UMin3AMD:
7462 case SMin3AMD:
7463 emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "min3");
7464 break;
7465
7466 case FMax3AMD:
7467 case UMax3AMD:
7468 case SMax3AMD:
7469 emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "max3");
7470 break;
7471
7472 case FMid3AMD:
7473 case UMid3AMD:
7474 case SMid3AMD:
7475 emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "mid3");
7476 break;
7477
7478 default:
7479 statement("// unimplemented SPV AMD shader trinary minmax op ", eop);
7480 break;
7481 }
7482 }
7483
emit_spv_amd_gcn_shader_op(uint32_t result_type,uint32_t id,uint32_t eop,const uint32_t * args,uint32_t)7484 void CompilerGLSL::emit_spv_amd_gcn_shader_op(uint32_t result_type, uint32_t id, uint32_t eop, const uint32_t *args,
7485 uint32_t)
7486 {
7487 require_extension_internal("GL_AMD_gcn_shader");
7488
7489 enum AMDGCNShader
7490 {
7491 CubeFaceIndexAMD = 1,
7492 CubeFaceCoordAMD = 2,
7493 TimeAMD = 3
7494 };
7495
7496 auto op = static_cast<AMDGCNShader>(eop);
7497
7498 switch (op)
7499 {
7500 case CubeFaceIndexAMD:
7501 emit_unary_func_op(result_type, id, args[0], "cubeFaceIndexAMD");
7502 break;
7503 case CubeFaceCoordAMD:
7504 emit_unary_func_op(result_type, id, args[0], "cubeFaceCoordAMD");
7505 break;
7506 case TimeAMD:
7507 {
7508 string expr = "timeAMD()";
7509 emit_op(result_type, id, expr, true);
7510 register_control_dependent_expression(id);
7511 break;
7512 }
7513
7514 default:
7515 statement("// unimplemented SPV AMD gcn shader op ", eop);
7516 break;
7517 }
7518 }
7519
emit_subgroup_op(const Instruction & i)7520 void CompilerGLSL::emit_subgroup_op(const Instruction &i)
7521 {
7522 const uint32_t *ops = stream(i);
7523 auto op = static_cast<Op>(i.op);
7524
7525 if (!options.vulkan_semantics && !is_supported_subgroup_op_in_opengl(op))
7526 SPIRV_CROSS_THROW("This subgroup operation is only supported in Vulkan semantics.");
7527
7528 // If we need to do implicit bitcasts, make sure we do it with the correct type.
7529 uint32_t integer_width = get_integer_width_for_instruction(i);
7530 auto int_type = to_signed_basetype(integer_width);
7531 auto uint_type = to_unsigned_basetype(integer_width);
7532
7533 switch (op)
7534 {
7535 case OpGroupNonUniformElect:
7536 request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupElect);
7537 break;
7538
7539 case OpGroupNonUniformBallotBitCount:
7540 {
7541 const GroupOperation operation = static_cast<GroupOperation>(ops[3]);
7542 if (operation == GroupOperationReduce)
7543 request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupBallotBitCount);
7544 else if (operation == GroupOperationInclusiveScan || operation == GroupOperationExclusiveScan)
7545 request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupInverseBallot_InclBitCount_ExclBitCout);
7546 }
7547 break;
7548
7549 case OpGroupNonUniformBallotBitExtract:
7550 request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupBallotBitExtract);
7551 break;
7552
7553 case OpGroupNonUniformInverseBallot:
7554 request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupInverseBallot_InclBitCount_ExclBitCout);
7555 break;
7556
7557 case OpGroupNonUniformBallot:
7558 request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupBallot);
7559 break;
7560
7561 case OpGroupNonUniformBallotFindLSB:
7562 case OpGroupNonUniformBallotFindMSB:
7563 request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupBallotFindLSB_MSB);
7564 break;
7565
7566 case OpGroupNonUniformBroadcast:
7567 case OpGroupNonUniformBroadcastFirst:
7568 request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupBroadcast_First);
7569 break;
7570
7571 case OpGroupNonUniformShuffle:
7572 case OpGroupNonUniformShuffleXor:
7573 require_extension_internal("GL_KHR_shader_subgroup_shuffle");
7574 break;
7575
7576 case OpGroupNonUniformShuffleUp:
7577 case OpGroupNonUniformShuffleDown:
7578 require_extension_internal("GL_KHR_shader_subgroup_shuffle_relative");
7579 break;
7580
7581 case OpGroupNonUniformAll:
7582 case OpGroupNonUniformAny:
7583 case OpGroupNonUniformAllEqual:
7584 {
7585 const SPIRType &type = expression_type(ops[3]);
7586 if (type.basetype == SPIRType::BaseType::Boolean && type.vecsize == 1u)
7587 request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupAll_Any_AllEqualBool);
7588 else
7589 request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupAllEqualT);
7590 }
7591 break;
7592
7593 case OpGroupNonUniformFAdd:
7594 case OpGroupNonUniformFMul:
7595 case OpGroupNonUniformFMin:
7596 case OpGroupNonUniformFMax:
7597 case OpGroupNonUniformIAdd:
7598 case OpGroupNonUniformIMul:
7599 case OpGroupNonUniformSMin:
7600 case OpGroupNonUniformSMax:
7601 case OpGroupNonUniformUMin:
7602 case OpGroupNonUniformUMax:
7603 case OpGroupNonUniformBitwiseAnd:
7604 case OpGroupNonUniformBitwiseOr:
7605 case OpGroupNonUniformBitwiseXor:
7606 {
7607 auto operation = static_cast<GroupOperation>(ops[3]);
7608 if (operation == GroupOperationClusteredReduce)
7609 {
7610 require_extension_internal("GL_KHR_shader_subgroup_clustered");
7611 }
7612 else if (operation == GroupOperationExclusiveScan || operation == GroupOperationInclusiveScan ||
7613 operation == GroupOperationReduce)
7614 {
7615 require_extension_internal("GL_KHR_shader_subgroup_arithmetic");
7616 }
7617 else
7618 SPIRV_CROSS_THROW("Invalid group operation.");
7619 break;
7620 }
7621
7622 case OpGroupNonUniformQuadSwap:
7623 case OpGroupNonUniformQuadBroadcast:
7624 require_extension_internal("GL_KHR_shader_subgroup_quad");
7625 break;
7626
7627 default:
7628 SPIRV_CROSS_THROW("Invalid opcode for subgroup.");
7629 }
7630
7631 uint32_t result_type = ops[0];
7632 uint32_t id = ops[1];
7633
7634 auto scope = static_cast<Scope>(evaluate_constant_u32(ops[2]));
7635 if (scope != ScopeSubgroup)
7636 SPIRV_CROSS_THROW("Only subgroup scope is supported.");
7637
7638 switch (op)
7639 {
7640 case OpGroupNonUniformElect:
7641 emit_op(result_type, id, "subgroupElect()", true);
7642 break;
7643
7644 case OpGroupNonUniformBroadcast:
7645 emit_binary_func_op(result_type, id, ops[3], ops[4], "subgroupBroadcast");
7646 break;
7647
7648 case OpGroupNonUniformBroadcastFirst:
7649 emit_unary_func_op(result_type, id, ops[3], "subgroupBroadcastFirst");
7650 break;
7651
7652 case OpGroupNonUniformBallot:
7653 emit_unary_func_op(result_type, id, ops[3], "subgroupBallot");
7654 break;
7655
7656 case OpGroupNonUniformInverseBallot:
7657 emit_unary_func_op(result_type, id, ops[3], "subgroupInverseBallot");
7658 break;
7659
7660 case OpGroupNonUniformBallotBitExtract:
7661 emit_binary_func_op(result_type, id, ops[3], ops[4], "subgroupBallotBitExtract");
7662 break;
7663
7664 case OpGroupNonUniformBallotFindLSB:
7665 emit_unary_func_op(result_type, id, ops[3], "subgroupBallotFindLSB");
7666 break;
7667
7668 case OpGroupNonUniformBallotFindMSB:
7669 emit_unary_func_op(result_type, id, ops[3], "subgroupBallotFindMSB");
7670 break;
7671
7672 case OpGroupNonUniformBallotBitCount:
7673 {
7674 auto operation = static_cast<GroupOperation>(ops[3]);
7675 if (operation == GroupOperationReduce)
7676 emit_unary_func_op(result_type, id, ops[4], "subgroupBallotBitCount");
7677 else if (operation == GroupOperationInclusiveScan)
7678 emit_unary_func_op(result_type, id, ops[4], "subgroupBallotInclusiveBitCount");
7679 else if (operation == GroupOperationExclusiveScan)
7680 emit_unary_func_op(result_type, id, ops[4], "subgroupBallotExclusiveBitCount");
7681 else
7682 SPIRV_CROSS_THROW("Invalid BitCount operation.");
7683 break;
7684 }
7685
7686 case OpGroupNonUniformShuffle:
7687 emit_binary_func_op(result_type, id, ops[3], ops[4], "subgroupShuffle");
7688 break;
7689
7690 case OpGroupNonUniformShuffleXor:
7691 emit_binary_func_op(result_type, id, ops[3], ops[4], "subgroupShuffleXor");
7692 break;
7693
7694 case OpGroupNonUniformShuffleUp:
7695 emit_binary_func_op(result_type, id, ops[3], ops[4], "subgroupShuffleUp");
7696 break;
7697
7698 case OpGroupNonUniformShuffleDown:
7699 emit_binary_func_op(result_type, id, ops[3], ops[4], "subgroupShuffleDown");
7700 break;
7701
7702 case OpGroupNonUniformAll:
7703 emit_unary_func_op(result_type, id, ops[3], "subgroupAll");
7704 break;
7705
7706 case OpGroupNonUniformAny:
7707 emit_unary_func_op(result_type, id, ops[3], "subgroupAny");
7708 break;
7709
7710 case OpGroupNonUniformAllEqual:
7711 emit_unary_func_op(result_type, id, ops[3], "subgroupAllEqual");
7712 break;
7713
7714 // clang-format off
7715 #define GLSL_GROUP_OP(op, glsl_op) \
7716 case OpGroupNonUniform##op: \
7717 { \
7718 auto operation = static_cast<GroupOperation>(ops[3]); \
7719 if (operation == GroupOperationReduce) \
7720 emit_unary_func_op(result_type, id, ops[4], "subgroup" #glsl_op); \
7721 else if (operation == GroupOperationInclusiveScan) \
7722 emit_unary_func_op(result_type, id, ops[4], "subgroupInclusive" #glsl_op); \
7723 else if (operation == GroupOperationExclusiveScan) \
7724 emit_unary_func_op(result_type, id, ops[4], "subgroupExclusive" #glsl_op); \
7725 else if (operation == GroupOperationClusteredReduce) \
7726 emit_binary_func_op(result_type, id, ops[4], ops[5], "subgroupClustered" #glsl_op); \
7727 else \
7728 SPIRV_CROSS_THROW("Invalid group operation."); \
7729 break; \
7730 }
7731
7732 #define GLSL_GROUP_OP_CAST(op, glsl_op, type) \
7733 case OpGroupNonUniform##op: \
7734 { \
7735 auto operation = static_cast<GroupOperation>(ops[3]); \
7736 if (operation == GroupOperationReduce) \
7737 emit_unary_func_op_cast(result_type, id, ops[4], "subgroup" #glsl_op, type, type); \
7738 else if (operation == GroupOperationInclusiveScan) \
7739 emit_unary_func_op_cast(result_type, id, ops[4], "subgroupInclusive" #glsl_op, type, type); \
7740 else if (operation == GroupOperationExclusiveScan) \
7741 emit_unary_func_op_cast(result_type, id, ops[4], "subgroupExclusive" #glsl_op, type, type); \
7742 else if (operation == GroupOperationClusteredReduce) \
7743 emit_binary_func_op_cast_clustered(result_type, id, ops[4], ops[5], "subgroupClustered" #glsl_op, type); \
7744 else \
7745 SPIRV_CROSS_THROW("Invalid group operation."); \
7746 break; \
7747 }
7748
7749 GLSL_GROUP_OP(FAdd, Add)
7750 GLSL_GROUP_OP(FMul, Mul)
7751 GLSL_GROUP_OP(FMin, Min)
7752 GLSL_GROUP_OP(FMax, Max)
7753 GLSL_GROUP_OP(IAdd, Add)
7754 GLSL_GROUP_OP(IMul, Mul)
7755 GLSL_GROUP_OP_CAST(SMin, Min, int_type)
7756 GLSL_GROUP_OP_CAST(SMax, Max, int_type)
7757 GLSL_GROUP_OP_CAST(UMin, Min, uint_type)
7758 GLSL_GROUP_OP_CAST(UMax, Max, uint_type)
7759 GLSL_GROUP_OP(BitwiseAnd, And)
7760 GLSL_GROUP_OP(BitwiseOr, Or)
7761 GLSL_GROUP_OP(BitwiseXor, Xor)
7762 #undef GLSL_GROUP_OP
7763 #undef GLSL_GROUP_OP_CAST
7764 // clang-format on
7765
7766 case OpGroupNonUniformQuadSwap:
7767 {
7768 uint32_t direction = evaluate_constant_u32(ops[4]);
7769 if (direction == 0)
7770 emit_unary_func_op(result_type, id, ops[3], "subgroupQuadSwapHorizontal");
7771 else if (direction == 1)
7772 emit_unary_func_op(result_type, id, ops[3], "subgroupQuadSwapVertical");
7773 else if (direction == 2)
7774 emit_unary_func_op(result_type, id, ops[3], "subgroupQuadSwapDiagonal");
7775 else
7776 SPIRV_CROSS_THROW("Invalid quad swap direction.");
7777 break;
7778 }
7779
7780 case OpGroupNonUniformQuadBroadcast:
7781 {
7782 emit_binary_func_op(result_type, id, ops[3], ops[4], "subgroupQuadBroadcast");
7783 break;
7784 }
7785
7786 default:
7787 SPIRV_CROSS_THROW("Invalid opcode for subgroup.");
7788 }
7789
7790 register_control_dependent_expression(id);
7791 }
7792
bitcast_glsl_op(const SPIRType & out_type,const SPIRType & in_type)7793 string CompilerGLSL::bitcast_glsl_op(const SPIRType &out_type, const SPIRType &in_type)
7794 {
7795 // OpBitcast can deal with pointers.
7796 if (out_type.pointer || in_type.pointer)
7797 return type_to_glsl(out_type);
7798
7799 if (out_type.basetype == in_type.basetype)
7800 return "";
7801
7802 assert(out_type.basetype != SPIRType::Boolean);
7803 assert(in_type.basetype != SPIRType::Boolean);
7804
7805 bool integral_cast = type_is_integral(out_type) && type_is_integral(in_type);
7806 bool same_size_cast = out_type.width == in_type.width;
7807
7808 // Trivial bitcast case, casts between integers.
7809 if (integral_cast && same_size_cast)
7810 return type_to_glsl(out_type);
7811
7812 // Catch-all 8-bit arithmetic casts (GL_EXT_shader_explicit_arithmetic_types).
7813 if (out_type.width == 8 && in_type.width >= 16 && integral_cast && in_type.vecsize == 1)
7814 return "unpack8";
7815 else if (in_type.width == 8 && out_type.width == 16 && integral_cast && out_type.vecsize == 1)
7816 return "pack16";
7817 else if (in_type.width == 8 && out_type.width == 32 && integral_cast && out_type.vecsize == 1)
7818 return "pack32";
7819
7820 // Floating <-> Integer special casts. Just have to enumerate all cases. :(
7821 // 16-bit, 32-bit and 64-bit floats.
7822 if (out_type.basetype == SPIRType::UInt && in_type.basetype == SPIRType::Float)
7823 {
7824 if (is_legacy_es())
7825 SPIRV_CROSS_THROW("Float -> Uint bitcast not supported on legacy ESSL.");
7826 else if (!options.es && options.version < 330)
7827 require_extension_internal("GL_ARB_shader_bit_encoding");
7828 return "floatBitsToUint";
7829 }
7830 else if (out_type.basetype == SPIRType::Int && in_type.basetype == SPIRType::Float)
7831 {
7832 if (is_legacy_es())
7833 SPIRV_CROSS_THROW("Float -> Int bitcast not supported on legacy ESSL.");
7834 else if (!options.es && options.version < 330)
7835 require_extension_internal("GL_ARB_shader_bit_encoding");
7836 return "floatBitsToInt";
7837 }
7838 else if (out_type.basetype == SPIRType::Float && in_type.basetype == SPIRType::UInt)
7839 {
7840 if (is_legacy_es())
7841 SPIRV_CROSS_THROW("Uint -> Float bitcast not supported on legacy ESSL.");
7842 else if (!options.es && options.version < 330)
7843 require_extension_internal("GL_ARB_shader_bit_encoding");
7844 return "uintBitsToFloat";
7845 }
7846 else if (out_type.basetype == SPIRType::Float && in_type.basetype == SPIRType::Int)
7847 {
7848 if (is_legacy_es())
7849 SPIRV_CROSS_THROW("Int -> Float bitcast not supported on legacy ESSL.");
7850 else if (!options.es && options.version < 330)
7851 require_extension_internal("GL_ARB_shader_bit_encoding");
7852 return "intBitsToFloat";
7853 }
7854
7855 else if (out_type.basetype == SPIRType::Int64 && in_type.basetype == SPIRType::Double)
7856 return "doubleBitsToInt64";
7857 else if (out_type.basetype == SPIRType::UInt64 && in_type.basetype == SPIRType::Double)
7858 return "doubleBitsToUint64";
7859 else if (out_type.basetype == SPIRType::Double && in_type.basetype == SPIRType::Int64)
7860 return "int64BitsToDouble";
7861 else if (out_type.basetype == SPIRType::Double && in_type.basetype == SPIRType::UInt64)
7862 return "uint64BitsToDouble";
7863 else if (out_type.basetype == SPIRType::Short && in_type.basetype == SPIRType::Half)
7864 return "float16BitsToInt16";
7865 else if (out_type.basetype == SPIRType::UShort && in_type.basetype == SPIRType::Half)
7866 return "float16BitsToUint16";
7867 else if (out_type.basetype == SPIRType::Half && in_type.basetype == SPIRType::Short)
7868 return "int16BitsToFloat16";
7869 else if (out_type.basetype == SPIRType::Half && in_type.basetype == SPIRType::UShort)
7870 return "uint16BitsToFloat16";
7871
7872 // And finally, some even more special purpose casts.
7873 if (out_type.basetype == SPIRType::UInt64 && in_type.basetype == SPIRType::UInt && in_type.vecsize == 2)
7874 return "packUint2x32";
7875 else if (out_type.basetype == SPIRType::UInt && in_type.basetype == SPIRType::UInt64 && out_type.vecsize == 2)
7876 return "unpackUint2x32";
7877 else if (out_type.basetype == SPIRType::Half && in_type.basetype == SPIRType::UInt && in_type.vecsize == 1)
7878 return "unpackFloat2x16";
7879 else if (out_type.basetype == SPIRType::UInt && in_type.basetype == SPIRType::Half && in_type.vecsize == 2)
7880 return "packFloat2x16";
7881 else if (out_type.basetype == SPIRType::Int && in_type.basetype == SPIRType::Short && in_type.vecsize == 2)
7882 return "packInt2x16";
7883 else if (out_type.basetype == SPIRType::Short && in_type.basetype == SPIRType::Int && in_type.vecsize == 1)
7884 return "unpackInt2x16";
7885 else if (out_type.basetype == SPIRType::UInt && in_type.basetype == SPIRType::UShort && in_type.vecsize == 2)
7886 return "packUint2x16";
7887 else if (out_type.basetype == SPIRType::UShort && in_type.basetype == SPIRType::UInt && in_type.vecsize == 1)
7888 return "unpackUint2x16";
7889 else if (out_type.basetype == SPIRType::Int64 && in_type.basetype == SPIRType::Short && in_type.vecsize == 4)
7890 return "packInt4x16";
7891 else if (out_type.basetype == SPIRType::Short && in_type.basetype == SPIRType::Int64 && in_type.vecsize == 1)
7892 return "unpackInt4x16";
7893 else if (out_type.basetype == SPIRType::UInt64 && in_type.basetype == SPIRType::UShort && in_type.vecsize == 4)
7894 return "packUint4x16";
7895 else if (out_type.basetype == SPIRType::UShort && in_type.basetype == SPIRType::UInt64 && in_type.vecsize == 1)
7896 return "unpackUint4x16";
7897
7898 return "";
7899 }
7900
bitcast_glsl(const SPIRType & result_type,uint32_t argument)7901 string CompilerGLSL::bitcast_glsl(const SPIRType &result_type, uint32_t argument)
7902 {
7903 auto op = bitcast_glsl_op(result_type, expression_type(argument));
7904 if (op.empty())
7905 return to_enclosed_unpacked_expression(argument);
7906 else
7907 return join(op, "(", to_unpacked_expression(argument), ")");
7908 }
7909
bitcast_expression(SPIRType::BaseType target_type,uint32_t arg)7910 std::string CompilerGLSL::bitcast_expression(SPIRType::BaseType target_type, uint32_t arg)
7911 {
7912 auto expr = to_expression(arg);
7913 auto &src_type = expression_type(arg);
7914 if (src_type.basetype != target_type)
7915 {
7916 auto target = src_type;
7917 target.basetype = target_type;
7918 expr = join(bitcast_glsl_op(target, src_type), "(", expr, ")");
7919 }
7920
7921 return expr;
7922 }
7923
bitcast_expression(const SPIRType & target_type,SPIRType::BaseType expr_type,const std::string & expr)7924 std::string CompilerGLSL::bitcast_expression(const SPIRType &target_type, SPIRType::BaseType expr_type,
7925 const std::string &expr)
7926 {
7927 if (target_type.basetype == expr_type)
7928 return expr;
7929
7930 auto src_type = target_type;
7931 src_type.basetype = expr_type;
7932 return join(bitcast_glsl_op(target_type, src_type), "(", expr, ")");
7933 }
7934
builtin_to_glsl(BuiltIn builtin,StorageClass storage)7935 string CompilerGLSL::builtin_to_glsl(BuiltIn builtin, StorageClass storage)
7936 {
7937 switch (builtin)
7938 {
7939 case BuiltInPosition:
7940 return "gl_Position";
7941 case BuiltInPointSize:
7942 return "gl_PointSize";
7943 case BuiltInClipDistance:
7944 return "gl_ClipDistance";
7945 case BuiltInCullDistance:
7946 return "gl_CullDistance";
7947 case BuiltInVertexId:
7948 if (options.vulkan_semantics)
7949 SPIRV_CROSS_THROW("Cannot implement gl_VertexID in Vulkan GLSL. This shader was created "
7950 "with GL semantics.");
7951 return "gl_VertexID";
7952 case BuiltInInstanceId:
7953 if (options.vulkan_semantics)
7954 {
7955 auto model = get_entry_point().model;
7956 switch (model)
7957 {
7958 case spv::ExecutionModelIntersectionKHR:
7959 case spv::ExecutionModelAnyHitKHR:
7960 case spv::ExecutionModelClosestHitKHR:
7961 // gl_InstanceID is allowed in these shaders.
7962 break;
7963
7964 default:
7965 SPIRV_CROSS_THROW("Cannot implement gl_InstanceID in Vulkan GLSL. This shader was "
7966 "created with GL semantics.");
7967 }
7968 }
7969 if (!options.es && options.version < 140)
7970 {
7971 require_extension_internal("GL_ARB_draw_instanced");
7972 }
7973 return "gl_InstanceID";
7974 case BuiltInVertexIndex:
7975 if (options.vulkan_semantics)
7976 return "gl_VertexIndex";
7977 else
7978 return "gl_VertexID"; // gl_VertexID already has the base offset applied.
7979 case BuiltInInstanceIndex:
7980 if (options.vulkan_semantics)
7981 return "gl_InstanceIndex";
7982
7983 if (!options.es && options.version < 140)
7984 {
7985 require_extension_internal("GL_ARB_draw_instanced");
7986 }
7987
7988 if (options.vertex.support_nonzero_base_instance)
7989 {
7990 if (!options.vulkan_semantics)
7991 {
7992 // This is a soft-enable. We will opt-in to using gl_BaseInstanceARB if supported.
7993 require_extension_internal("GL_ARB_shader_draw_parameters");
7994 }
7995 return "(gl_InstanceID + SPIRV_Cross_BaseInstance)"; // ... but not gl_InstanceID.
7996 }
7997 else
7998 return "gl_InstanceID";
7999 case BuiltInPrimitiveId:
8000 if (storage == StorageClassInput && get_entry_point().model == ExecutionModelGeometry)
8001 return "gl_PrimitiveIDIn";
8002 else
8003 return "gl_PrimitiveID";
8004 case BuiltInInvocationId:
8005 return "gl_InvocationID";
8006 case BuiltInLayer:
8007 return "gl_Layer";
8008 case BuiltInViewportIndex:
8009 return "gl_ViewportIndex";
8010 case BuiltInTessLevelOuter:
8011 return "gl_TessLevelOuter";
8012 case BuiltInTessLevelInner:
8013 return "gl_TessLevelInner";
8014 case BuiltInTessCoord:
8015 return "gl_TessCoord";
8016 case BuiltInFragCoord:
8017 return "gl_FragCoord";
8018 case BuiltInPointCoord:
8019 return "gl_PointCoord";
8020 case BuiltInFrontFacing:
8021 return "gl_FrontFacing";
8022 case BuiltInFragDepth:
8023 return "gl_FragDepth";
8024 case BuiltInNumWorkgroups:
8025 return "gl_NumWorkGroups";
8026 case BuiltInWorkgroupSize:
8027 return "gl_WorkGroupSize";
8028 case BuiltInWorkgroupId:
8029 return "gl_WorkGroupID";
8030 case BuiltInLocalInvocationId:
8031 return "gl_LocalInvocationID";
8032 case BuiltInGlobalInvocationId:
8033 return "gl_GlobalInvocationID";
8034 case BuiltInLocalInvocationIndex:
8035 return "gl_LocalInvocationIndex";
8036 case BuiltInHelperInvocation:
8037 return "gl_HelperInvocation";
8038
8039 case BuiltInBaseVertex:
8040 if (options.es)
8041 SPIRV_CROSS_THROW("BaseVertex not supported in ES profile.");
8042
8043 if (options.vulkan_semantics)
8044 {
8045 if (options.version < 460)
8046 {
8047 require_extension_internal("GL_ARB_shader_draw_parameters");
8048 return "gl_BaseVertexARB";
8049 }
8050 return "gl_BaseVertex";
8051 }
8052 else
8053 {
8054 // On regular GL, this is soft-enabled and we emit ifdefs in code.
8055 require_extension_internal("GL_ARB_shader_draw_parameters");
8056 return "SPIRV_Cross_BaseVertex";
8057 }
8058 break;
8059
8060 case BuiltInBaseInstance:
8061 if (options.es)
8062 SPIRV_CROSS_THROW("BaseInstance not supported in ES profile.");
8063
8064 if (options.vulkan_semantics)
8065 {
8066 if (options.version < 460)
8067 {
8068 require_extension_internal("GL_ARB_shader_draw_parameters");
8069 return "gl_BaseInstanceARB";
8070 }
8071 return "gl_BaseInstance";
8072 }
8073 else
8074 {
8075 // On regular GL, this is soft-enabled and we emit ifdefs in code.
8076 require_extension_internal("GL_ARB_shader_draw_parameters");
8077 return "SPIRV_Cross_BaseInstance";
8078 }
8079 break;
8080
8081 case BuiltInDrawIndex:
8082 if (options.es)
8083 SPIRV_CROSS_THROW("DrawIndex not supported in ES profile.");
8084
8085 if (options.vulkan_semantics)
8086 {
8087 if (options.version < 460)
8088 {
8089 require_extension_internal("GL_ARB_shader_draw_parameters");
8090 return "gl_DrawIDARB";
8091 }
8092 return "gl_DrawID";
8093 }
8094 else
8095 {
8096 // On regular GL, this is soft-enabled and we emit ifdefs in code.
8097 require_extension_internal("GL_ARB_shader_draw_parameters");
8098 return "gl_DrawIDARB";
8099 }
8100 break;
8101
8102 case BuiltInSampleId:
8103 if (options.es && options.version < 320)
8104 require_extension_internal("GL_OES_sample_variables");
8105 if (!options.es && options.version < 400)
8106 SPIRV_CROSS_THROW("gl_SampleID not supported before GLSL 400.");
8107 return "gl_SampleID";
8108
8109 case BuiltInSampleMask:
8110 if (options.es && options.version < 320)
8111 require_extension_internal("GL_OES_sample_variables");
8112 if (!options.es && options.version < 400)
8113 SPIRV_CROSS_THROW("gl_SampleMask/gl_SampleMaskIn not supported before GLSL 400.");
8114
8115 if (storage == StorageClassInput)
8116 return "gl_SampleMaskIn";
8117 else
8118 return "gl_SampleMask";
8119
8120 case BuiltInSamplePosition:
8121 if (options.es && options.version < 320)
8122 require_extension_internal("GL_OES_sample_variables");
8123 if (!options.es && options.version < 400)
8124 SPIRV_CROSS_THROW("gl_SamplePosition not supported before GLSL 400.");
8125 return "gl_SamplePosition";
8126
8127 case BuiltInViewIndex:
8128 if (options.vulkan_semantics)
8129 {
8130 require_extension_internal("GL_EXT_multiview");
8131 return "gl_ViewIndex";
8132 }
8133 else
8134 {
8135 require_extension_internal("GL_OVR_multiview2");
8136 return "gl_ViewID_OVR";
8137 }
8138
8139 case BuiltInNumSubgroups:
8140 request_subgroup_feature(ShaderSubgroupSupportHelper::NumSubgroups);
8141 return "gl_NumSubgroups";
8142
8143 case BuiltInSubgroupId:
8144 request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupID);
8145 return "gl_SubgroupID";
8146
8147 case BuiltInSubgroupSize:
8148 request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupSize);
8149 return "gl_SubgroupSize";
8150
8151 case BuiltInSubgroupLocalInvocationId:
8152 request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupInvocationID);
8153 return "gl_SubgroupInvocationID";
8154
8155 case BuiltInSubgroupEqMask:
8156 request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupMask);
8157 return "gl_SubgroupEqMask";
8158
8159 case BuiltInSubgroupGeMask:
8160 request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupMask);
8161 return "gl_SubgroupGeMask";
8162
8163 case BuiltInSubgroupGtMask:
8164 request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupMask);
8165 return "gl_SubgroupGtMask";
8166
8167 case BuiltInSubgroupLeMask:
8168 request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupMask);
8169 return "gl_SubgroupLeMask";
8170
8171 case BuiltInSubgroupLtMask:
8172 request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupMask);
8173 return "gl_SubgroupLtMask";
8174
8175 case BuiltInLaunchIdKHR:
8176 return ray_tracing_is_khr ? "gl_LaunchIDEXT" : "gl_LaunchIDNV";
8177 case BuiltInLaunchSizeKHR:
8178 return ray_tracing_is_khr ? "gl_LaunchSizeEXT" : "gl_LaunchSizeNV";
8179 case BuiltInWorldRayOriginKHR:
8180 return ray_tracing_is_khr ? "gl_WorldRayOriginEXT" : "gl_WorldRayOriginNV";
8181 case BuiltInWorldRayDirectionKHR:
8182 return ray_tracing_is_khr ? "gl_WorldRayDirectionEXT" : "gl_WorldRayDirectionNV";
8183 case BuiltInObjectRayOriginKHR:
8184 return ray_tracing_is_khr ? "gl_ObjectRayOriginEXT" : "gl_ObjectRayOriginNV";
8185 case BuiltInObjectRayDirectionKHR:
8186 return ray_tracing_is_khr ? "gl_ObjectRayDirectionEXT" : "gl_ObjectRayDirectionNV";
8187 case BuiltInRayTminKHR:
8188 return ray_tracing_is_khr ? "gl_RayTminEXT" : "gl_RayTminNV";
8189 case BuiltInRayTmaxKHR:
8190 return ray_tracing_is_khr ? "gl_RayTmaxEXT" : "gl_RayTmaxNV";
8191 case BuiltInInstanceCustomIndexKHR:
8192 return ray_tracing_is_khr ? "gl_InstanceCustomIndexEXT" : "gl_InstanceCustomIndexNV";
8193 case BuiltInObjectToWorldKHR:
8194 return ray_tracing_is_khr ? "gl_ObjectToWorldEXT" : "gl_ObjectToWorldNV";
8195 case BuiltInWorldToObjectKHR:
8196 return ray_tracing_is_khr ? "gl_WorldToObjectEXT" : "gl_WorldToObjectNV";
8197 case BuiltInHitTNV:
8198 // gl_HitTEXT is an alias of RayTMax in KHR.
8199 return "gl_HitTNV";
8200 case BuiltInHitKindKHR:
8201 return ray_tracing_is_khr ? "gl_HitKindEXT" : "gl_HitKindNV";
8202 case BuiltInIncomingRayFlagsKHR:
8203 return ray_tracing_is_khr ? "gl_IncomingRayFlagsEXT" : "gl_IncomingRayFlagsNV";
8204
8205 case BuiltInBaryCoordNV:
8206 {
8207 if (options.es && options.version < 320)
8208 SPIRV_CROSS_THROW("gl_BaryCoordNV requires ESSL 320.");
8209 else if (!options.es && options.version < 450)
8210 SPIRV_CROSS_THROW("gl_BaryCoordNV requires GLSL 450.");
8211 require_extension_internal("GL_NV_fragment_shader_barycentric");
8212 return "gl_BaryCoordNV";
8213 }
8214
8215 case BuiltInBaryCoordNoPerspNV:
8216 {
8217 if (options.es && options.version < 320)
8218 SPIRV_CROSS_THROW("gl_BaryCoordNoPerspNV requires ESSL 320.");
8219 else if (!options.es && options.version < 450)
8220 SPIRV_CROSS_THROW("gl_BaryCoordNoPerspNV requires GLSL 450.");
8221 require_extension_internal("GL_NV_fragment_shader_barycentric");
8222 return "gl_BaryCoordNoPerspNV";
8223 }
8224
8225 case BuiltInFragStencilRefEXT:
8226 {
8227 if (!options.es)
8228 {
8229 require_extension_internal("GL_ARB_shader_stencil_export");
8230 return "gl_FragStencilRefARB";
8231 }
8232 else
8233 SPIRV_CROSS_THROW("Stencil export not supported in GLES.");
8234 }
8235
8236 case BuiltInDeviceIndex:
8237 if (!options.vulkan_semantics)
8238 SPIRV_CROSS_THROW("Need Vulkan semantics for device group support.");
8239 require_extension_internal("GL_EXT_device_group");
8240 return "gl_DeviceIndex";
8241
8242 default:
8243 return join("gl_BuiltIn_", convert_to_string(builtin));
8244 }
8245 }
8246
index_to_swizzle(uint32_t index)8247 const char *CompilerGLSL::index_to_swizzle(uint32_t index)
8248 {
8249 switch (index)
8250 {
8251 case 0:
8252 return "x";
8253 case 1:
8254 return "y";
8255 case 2:
8256 return "z";
8257 case 3:
8258 return "w";
8259 default:
8260 SPIRV_CROSS_THROW("Swizzle index out of range");
8261 }
8262 }
8263
access_chain_internal_append_index(std::string & expr,uint32_t,const SPIRType * type,AccessChainFlags flags,bool &,uint32_t index)8264 void CompilerGLSL::access_chain_internal_append_index(std::string &expr, uint32_t /*base*/, const SPIRType *type,
8265 AccessChainFlags flags, bool & /*access_chain_is_arrayed*/,
8266 uint32_t index)
8267 {
8268 bool index_is_literal = (flags & ACCESS_CHAIN_INDEX_IS_LITERAL_BIT) != 0;
8269 bool register_expression_read = (flags & ACCESS_CHAIN_SKIP_REGISTER_EXPRESSION_READ_BIT) == 0;
8270
8271 expr += "[";
8272
8273 // If we are indexing into an array of SSBOs or UBOs, we need to index it with a non-uniform qualifier.
8274 bool nonuniform_index =
8275 has_decoration(index, DecorationNonUniformEXT) &&
8276 (has_decoration(type->self, DecorationBlock) || has_decoration(type->self, DecorationBufferBlock));
8277 if (nonuniform_index)
8278 {
8279 expr += backend.nonuniform_qualifier;
8280 expr += "(";
8281 }
8282
8283 if (index_is_literal)
8284 expr += convert_to_string(index);
8285 else
8286 expr += to_expression(index, register_expression_read);
8287
8288 if (nonuniform_index)
8289 expr += ")";
8290
8291 expr += "]";
8292 }
8293
access_chain_internal(uint32_t base,const uint32_t * indices,uint32_t count,AccessChainFlags flags,AccessChainMeta * meta)8294 string CompilerGLSL::access_chain_internal(uint32_t base, const uint32_t *indices, uint32_t count,
8295 AccessChainFlags flags, AccessChainMeta *meta)
8296 {
8297 string expr;
8298
8299 bool index_is_literal = (flags & ACCESS_CHAIN_INDEX_IS_LITERAL_BIT) != 0;
8300 bool msb_is_id = (flags & ACCESS_CHAIN_LITERAL_MSB_FORCE_ID) != 0;
8301 bool chain_only = (flags & ACCESS_CHAIN_CHAIN_ONLY_BIT) != 0;
8302 bool ptr_chain = (flags & ACCESS_CHAIN_PTR_CHAIN_BIT) != 0;
8303 bool register_expression_read = (flags & ACCESS_CHAIN_SKIP_REGISTER_EXPRESSION_READ_BIT) == 0;
8304 bool flatten_member_reference = (flags & ACCESS_CHAIN_FLATTEN_ALL_MEMBERS_BIT) != 0;
8305
8306 if (!chain_only)
8307 {
8308 // We handle transpose explicitly, so don't resolve that here.
8309 auto *e = maybe_get<SPIRExpression>(base);
8310 bool old_transpose = e && e->need_transpose;
8311 if (e)
8312 e->need_transpose = false;
8313 expr = to_enclosed_expression(base, register_expression_read);
8314 if (e)
8315 e->need_transpose = old_transpose;
8316 }
8317
8318 // Start traversing type hierarchy at the proper non-pointer types,
8319 // but keep type_id referencing the original pointer for use below.
8320 uint32_t type_id = expression_type_id(base);
8321
8322 if (!backend.native_pointers)
8323 {
8324 if (ptr_chain)
8325 SPIRV_CROSS_THROW("Backend does not support native pointers and does not support OpPtrAccessChain.");
8326
8327 // Wrapped buffer reference pointer types will need to poke into the internal "value" member before
8328 // continuing the access chain.
8329 if (should_dereference(base))
8330 {
8331 auto &type = get<SPIRType>(type_id);
8332 expr = dereference_expression(type, expr);
8333 }
8334 }
8335
8336 const auto *type = &get_pointee_type(type_id);
8337
8338 bool access_chain_is_arrayed = expr.find_first_of('[') != string::npos;
8339 bool row_major_matrix_needs_conversion = is_non_native_row_major_matrix(base);
8340 bool is_packed = has_extended_decoration(base, SPIRVCrossDecorationPhysicalTypePacked);
8341 uint32_t physical_type = get_extended_decoration(base, SPIRVCrossDecorationPhysicalTypeID);
8342 bool is_invariant = has_decoration(base, DecorationInvariant);
8343 bool pending_array_enclose = false;
8344 bool dimension_flatten = false;
8345
8346 const auto append_index = [&](uint32_t index, bool is_literal) {
8347 AccessChainFlags mod_flags = flags;
8348 if (!is_literal)
8349 mod_flags &= ~ACCESS_CHAIN_INDEX_IS_LITERAL_BIT;
8350 access_chain_internal_append_index(expr, base, type, mod_flags, access_chain_is_arrayed, index);
8351 };
8352
8353 for (uint32_t i = 0; i < count; i++)
8354 {
8355 uint32_t index = indices[i];
8356
8357 bool is_literal = index_is_literal;
8358 if (is_literal && msb_is_id && (index >> 31u) != 0u)
8359 {
8360 is_literal = false;
8361 index &= 0x7fffffffu;
8362 }
8363
8364 // Pointer chains
8365 if (ptr_chain && i == 0)
8366 {
8367 // If we are flattening multidimensional arrays, only create opening bracket on first
8368 // array index.
8369 if (options.flatten_multidimensional_arrays)
8370 {
8371 dimension_flatten = type->array.size() >= 1;
8372 pending_array_enclose = dimension_flatten;
8373 if (pending_array_enclose)
8374 expr += "[";
8375 }
8376
8377 if (options.flatten_multidimensional_arrays && dimension_flatten)
8378 {
8379 // If we are flattening multidimensional arrays, do manual stride computation.
8380 if (is_literal)
8381 expr += convert_to_string(index);
8382 else
8383 expr += to_enclosed_expression(index, register_expression_read);
8384
8385 for (auto j = uint32_t(type->array.size()); j; j--)
8386 {
8387 expr += " * ";
8388 expr += enclose_expression(to_array_size(*type, j - 1));
8389 }
8390
8391 if (type->array.empty())
8392 pending_array_enclose = false;
8393 else
8394 expr += " + ";
8395
8396 if (!pending_array_enclose)
8397 expr += "]";
8398 }
8399 else
8400 {
8401 append_index(index, is_literal);
8402 }
8403
8404 if (type->basetype == SPIRType::ControlPointArray)
8405 {
8406 type_id = type->parent_type;
8407 type = &get<SPIRType>(type_id);
8408 }
8409
8410 access_chain_is_arrayed = true;
8411 }
8412 // Arrays
8413 else if (!type->array.empty())
8414 {
8415 // If we are flattening multidimensional arrays, only create opening bracket on first
8416 // array index.
8417 if (options.flatten_multidimensional_arrays && !pending_array_enclose)
8418 {
8419 dimension_flatten = type->array.size() > 1;
8420 pending_array_enclose = dimension_flatten;
8421 if (pending_array_enclose)
8422 expr += "[";
8423 }
8424
8425 assert(type->parent_type);
8426
8427 auto *var = maybe_get<SPIRVariable>(base);
8428 if (backend.force_gl_in_out_block && i == 0 && var && is_builtin_variable(*var) &&
8429 !has_decoration(type->self, DecorationBlock))
8430 {
8431 // This deals with scenarios for tesc/geom where arrays of gl_Position[] are declared.
8432 // Normally, these variables live in blocks when compiled from GLSL,
8433 // but HLSL seems to just emit straight arrays here.
8434 // We must pretend this access goes through gl_in/gl_out arrays
8435 // to be able to access certain builtins as arrays.
8436 auto builtin = ir.meta[base].decoration.builtin_type;
8437 switch (builtin)
8438 {
8439 // case BuiltInCullDistance: // These are already arrays, need to figure out rules for these in tess/geom.
8440 // case BuiltInClipDistance:
8441 case BuiltInPosition:
8442 case BuiltInPointSize:
8443 if (var->storage == StorageClassInput)
8444 expr = join("gl_in[", to_expression(index, register_expression_read), "].", expr);
8445 else if (var->storage == StorageClassOutput)
8446 expr = join("gl_out[", to_expression(index, register_expression_read), "].", expr);
8447 else
8448 append_index(index, is_literal);
8449 break;
8450
8451 default:
8452 append_index(index, is_literal);
8453 break;
8454 }
8455 }
8456 else if (options.flatten_multidimensional_arrays && dimension_flatten)
8457 {
8458 // If we are flattening multidimensional arrays, do manual stride computation.
8459 auto &parent_type = get<SPIRType>(type->parent_type);
8460
8461 if (is_literal)
8462 expr += convert_to_string(index);
8463 else
8464 expr += to_enclosed_expression(index, register_expression_read);
8465
8466 for (auto j = uint32_t(parent_type.array.size()); j; j--)
8467 {
8468 expr += " * ";
8469 expr += enclose_expression(to_array_size(parent_type, j - 1));
8470 }
8471
8472 if (parent_type.array.empty())
8473 pending_array_enclose = false;
8474 else
8475 expr += " + ";
8476
8477 if (!pending_array_enclose)
8478 expr += "]";
8479 }
8480 // Some builtins are arrays in SPIR-V but not in other languages, e.g. gl_SampleMask[] is an array in SPIR-V but not in Metal.
8481 // By throwing away the index, we imply the index was 0, which it must be for gl_SampleMask.
8482 else if (!builtin_translates_to_nonarray(BuiltIn(get_decoration(base, DecorationBuiltIn))))
8483 {
8484 append_index(index, is_literal);
8485 }
8486
8487 type_id = type->parent_type;
8488 type = &get<SPIRType>(type_id);
8489
8490 access_chain_is_arrayed = true;
8491 }
8492 // For structs, the index refers to a constant, which indexes into the members.
8493 // We also check if this member is a builtin, since we then replace the entire expression with the builtin one.
8494 else if (type->basetype == SPIRType::Struct)
8495 {
8496 if (!is_literal)
8497 index = evaluate_constant_u32(index);
8498
8499 if (index >= type->member_types.size())
8500 SPIRV_CROSS_THROW("Member index is out of bounds!");
8501
8502 BuiltIn builtin;
8503 if (is_member_builtin(*type, index, &builtin))
8504 {
8505 if (access_chain_is_arrayed)
8506 {
8507 expr += ".";
8508 expr += builtin_to_glsl(builtin, type->storage);
8509 }
8510 else
8511 expr = builtin_to_glsl(builtin, type->storage);
8512 }
8513 else
8514 {
8515 // If the member has a qualified name, use it as the entire chain
8516 string qual_mbr_name = get_member_qualified_name(type_id, index);
8517 if (!qual_mbr_name.empty())
8518 expr = qual_mbr_name;
8519 else if (flatten_member_reference)
8520 expr += join("_", to_member_name(*type, index));
8521 else
8522 expr += to_member_reference(base, *type, index, ptr_chain);
8523 }
8524
8525 if (has_member_decoration(type->self, index, DecorationInvariant))
8526 is_invariant = true;
8527
8528 is_packed = member_is_packed_physical_type(*type, index);
8529 if (member_is_remapped_physical_type(*type, index))
8530 physical_type = get_extended_member_decoration(type->self, index, SPIRVCrossDecorationPhysicalTypeID);
8531 else
8532 physical_type = 0;
8533
8534 row_major_matrix_needs_conversion = member_is_non_native_row_major_matrix(*type, index);
8535 type = &get<SPIRType>(type->member_types[index]);
8536 }
8537 // Matrix -> Vector
8538 else if (type->columns > 1)
8539 {
8540 // If we have a row-major matrix here, we need to defer any transpose in case this access chain
8541 // is used to store a column. We can resolve it right here and now if we access a scalar directly,
8542 // by flipping indexing order of the matrix.
8543
8544 expr += "[";
8545 if (is_literal)
8546 expr += convert_to_string(index);
8547 else
8548 expr += to_expression(index, register_expression_read);
8549 expr += "]";
8550
8551 type_id = type->parent_type;
8552 type = &get<SPIRType>(type_id);
8553 }
8554 // Vector -> Scalar
8555 else if (type->vecsize > 1)
8556 {
8557 string deferred_index;
8558 if (row_major_matrix_needs_conversion)
8559 {
8560 // Flip indexing order.
8561 auto column_index = expr.find_last_of('[');
8562 if (column_index != string::npos)
8563 {
8564 deferred_index = expr.substr(column_index);
8565 expr.resize(column_index);
8566 }
8567 }
8568
8569 // Internally, access chain implementation can also be used on composites,
8570 // ignore scalar access workarounds in this case.
8571 StorageClass effective_storage;
8572 if (expression_type(base).pointer)
8573 effective_storage = get_expression_effective_storage_class(base);
8574 else
8575 effective_storage = StorageClassGeneric;
8576
8577 if (!row_major_matrix_needs_conversion)
8578 {
8579 // On some backends, we might not be able to safely access individual scalars in a vector.
8580 // To work around this, we might have to cast the access chain reference to something which can,
8581 // like a pointer to scalar, which we can then index into.
8582 prepare_access_chain_for_scalar_access(expr, get<SPIRType>(type->parent_type), effective_storage,
8583 is_packed);
8584 }
8585
8586 if (is_literal && !is_packed && !row_major_matrix_needs_conversion)
8587 {
8588 expr += ".";
8589 expr += index_to_swizzle(index);
8590 }
8591 else if (ir.ids[index].get_type() == TypeConstant && !is_packed && !row_major_matrix_needs_conversion)
8592 {
8593 auto &c = get<SPIRConstant>(index);
8594 if (c.specialization)
8595 {
8596 // If the index is a spec constant, we cannot turn extract into a swizzle.
8597 expr += join("[", to_expression(index), "]");
8598 }
8599 else
8600 {
8601 expr += ".";
8602 expr += index_to_swizzle(c.scalar());
8603 }
8604 }
8605 else if (is_literal)
8606 {
8607 // For packed vectors, we can only access them as an array, not by swizzle.
8608 expr += join("[", index, "]");
8609 }
8610 else
8611 {
8612 expr += "[";
8613 expr += to_expression(index, register_expression_read);
8614 expr += "]";
8615 }
8616
8617 if (row_major_matrix_needs_conversion)
8618 {
8619 prepare_access_chain_for_scalar_access(expr, get<SPIRType>(type->parent_type), effective_storage,
8620 is_packed);
8621 }
8622
8623 expr += deferred_index;
8624 row_major_matrix_needs_conversion = false;
8625
8626 is_packed = false;
8627 physical_type = 0;
8628 type_id = type->parent_type;
8629 type = &get<SPIRType>(type_id);
8630 }
8631 else if (!backend.allow_truncated_access_chain)
8632 SPIRV_CROSS_THROW("Cannot subdivide a scalar value!");
8633 }
8634
8635 if (pending_array_enclose)
8636 {
8637 SPIRV_CROSS_THROW("Flattening of multidimensional arrays were enabled, "
8638 "but the access chain was terminated in the middle of a multidimensional array. "
8639 "This is not supported.");
8640 }
8641
8642 if (meta)
8643 {
8644 meta->need_transpose = row_major_matrix_needs_conversion;
8645 meta->storage_is_packed = is_packed;
8646 meta->storage_is_invariant = is_invariant;
8647 meta->storage_physical_type = physical_type;
8648 }
8649
8650 return expr;
8651 }
8652
prepare_access_chain_for_scalar_access(std::string &,const SPIRType &,spv::StorageClass,bool &)8653 void CompilerGLSL::prepare_access_chain_for_scalar_access(std::string &, const SPIRType &, spv::StorageClass, bool &)
8654 {
8655 }
8656
to_flattened_struct_member(const string & basename,const SPIRType & type,uint32_t index)8657 string CompilerGLSL::to_flattened_struct_member(const string &basename, const SPIRType &type, uint32_t index)
8658 {
8659 auto ret = join(basename, "_", to_member_name(type, index));
8660 ParsedIR::sanitize_underscores(ret);
8661 return ret;
8662 }
8663
access_chain(uint32_t base,const uint32_t * indices,uint32_t count,const SPIRType & target_type,AccessChainMeta * meta,bool ptr_chain)8664 string CompilerGLSL::access_chain(uint32_t base, const uint32_t *indices, uint32_t count, const SPIRType &target_type,
8665 AccessChainMeta *meta, bool ptr_chain)
8666 {
8667 if (flattened_buffer_blocks.count(base))
8668 {
8669 uint32_t matrix_stride = 0;
8670 uint32_t array_stride = 0;
8671 bool need_transpose = false;
8672 flattened_access_chain_offset(expression_type(base), indices, count, 0, 16, &need_transpose, &matrix_stride,
8673 &array_stride, ptr_chain);
8674
8675 if (meta)
8676 {
8677 meta->need_transpose = target_type.columns > 1 && need_transpose;
8678 meta->storage_is_packed = false;
8679 }
8680
8681 return flattened_access_chain(base, indices, count, target_type, 0, matrix_stride, array_stride,
8682 need_transpose);
8683 }
8684 else if (flattened_structs.count(base) && count > 0)
8685 {
8686 AccessChainFlags flags = ACCESS_CHAIN_CHAIN_ONLY_BIT | ACCESS_CHAIN_SKIP_REGISTER_EXPRESSION_READ_BIT;
8687 if (ptr_chain)
8688 flags |= ACCESS_CHAIN_PTR_CHAIN_BIT;
8689
8690 if (flattened_structs[base])
8691 {
8692 flags |= ACCESS_CHAIN_FLATTEN_ALL_MEMBERS_BIT;
8693 if (meta)
8694 meta->flattened_struct = target_type.basetype == SPIRType::Struct;
8695 }
8696
8697 auto chain = access_chain_internal(base, indices, count, flags, nullptr).substr(1);
8698 if (meta)
8699 {
8700 meta->need_transpose = false;
8701 meta->storage_is_packed = false;
8702 }
8703
8704 auto basename = to_flattened_access_chain_expression(base);
8705 auto ret = join(basename, "_", chain);
8706 ParsedIR::sanitize_underscores(ret);
8707 return ret;
8708 }
8709 else
8710 {
8711 AccessChainFlags flags = ACCESS_CHAIN_SKIP_REGISTER_EXPRESSION_READ_BIT;
8712 if (ptr_chain)
8713 flags |= ACCESS_CHAIN_PTR_CHAIN_BIT;
8714 return access_chain_internal(base, indices, count, flags, meta);
8715 }
8716 }
8717
load_flattened_struct(const string & basename,const SPIRType & type)8718 string CompilerGLSL::load_flattened_struct(const string &basename, const SPIRType &type)
8719 {
8720 auto expr = type_to_glsl_constructor(type);
8721 expr += '(';
8722
8723 for (uint32_t i = 0; i < uint32_t(type.member_types.size()); i++)
8724 {
8725 if (i)
8726 expr += ", ";
8727
8728 auto &member_type = get<SPIRType>(type.member_types[i]);
8729 if (member_type.basetype == SPIRType::Struct)
8730 expr += load_flattened_struct(to_flattened_struct_member(basename, type, i), member_type);
8731 else
8732 expr += to_flattened_struct_member(basename, type, i);
8733 }
8734 expr += ')';
8735 return expr;
8736 }
8737
to_flattened_access_chain_expression(uint32_t id)8738 std::string CompilerGLSL::to_flattened_access_chain_expression(uint32_t id)
8739 {
8740 // Do not use to_expression as that will unflatten access chains.
8741 string basename;
8742 if (const auto *var = maybe_get<SPIRVariable>(id))
8743 basename = to_name(var->self);
8744 else if (const auto *expr = maybe_get<SPIRExpression>(id))
8745 basename = expr->expression;
8746 else
8747 basename = to_expression(id);
8748
8749 return basename;
8750 }
8751
store_flattened_struct(const string & basename,uint32_t rhs_id,const SPIRType & type,const SmallVector<uint32_t> & indices)8752 void CompilerGLSL::store_flattened_struct(const string &basename, uint32_t rhs_id, const SPIRType &type,
8753 const SmallVector<uint32_t> &indices)
8754 {
8755 SmallVector<uint32_t> sub_indices = indices;
8756 sub_indices.push_back(0);
8757
8758 auto *member_type = &type;
8759 for (auto &index : indices)
8760 member_type = &get<SPIRType>(member_type->member_types[index]);
8761
8762 for (uint32_t i = 0; i < uint32_t(member_type->member_types.size()); i++)
8763 {
8764 sub_indices.back() = i;
8765 auto lhs = join(basename, "_", to_member_name(*member_type, i));
8766 ParsedIR::sanitize_underscores(lhs);
8767
8768 if (get<SPIRType>(member_type->member_types[i]).basetype == SPIRType::Struct)
8769 {
8770 store_flattened_struct(lhs, rhs_id, type, sub_indices);
8771 }
8772 else
8773 {
8774 auto rhs = to_expression(rhs_id) + to_multi_member_reference(type, sub_indices);
8775 statement(lhs, " = ", rhs, ";");
8776 }
8777 }
8778 }
8779
store_flattened_struct(uint32_t lhs_id,uint32_t value)8780 void CompilerGLSL::store_flattened_struct(uint32_t lhs_id, uint32_t value)
8781 {
8782 auto &type = expression_type(lhs_id);
8783 auto basename = to_flattened_access_chain_expression(lhs_id);
8784 store_flattened_struct(basename, value, type, {});
8785 }
8786
flattened_access_chain(uint32_t base,const uint32_t * indices,uint32_t count,const SPIRType & target_type,uint32_t offset,uint32_t matrix_stride,uint32_t,bool need_transpose)8787 std::string CompilerGLSL::flattened_access_chain(uint32_t base, const uint32_t *indices, uint32_t count,
8788 const SPIRType &target_type, uint32_t offset, uint32_t matrix_stride,
8789 uint32_t /* array_stride */, bool need_transpose)
8790 {
8791 if (!target_type.array.empty())
8792 SPIRV_CROSS_THROW("Access chains that result in an array can not be flattened");
8793 else if (target_type.basetype == SPIRType::Struct)
8794 return flattened_access_chain_struct(base, indices, count, target_type, offset);
8795 else if (target_type.columns > 1)
8796 return flattened_access_chain_matrix(base, indices, count, target_type, offset, matrix_stride, need_transpose);
8797 else
8798 return flattened_access_chain_vector(base, indices, count, target_type, offset, matrix_stride, need_transpose);
8799 }
8800
flattened_access_chain_struct(uint32_t base,const uint32_t * indices,uint32_t count,const SPIRType & target_type,uint32_t offset)8801 std::string CompilerGLSL::flattened_access_chain_struct(uint32_t base, const uint32_t *indices, uint32_t count,
8802 const SPIRType &target_type, uint32_t offset)
8803 {
8804 std::string expr;
8805
8806 expr += type_to_glsl_constructor(target_type);
8807 expr += "(";
8808
8809 for (uint32_t i = 0; i < uint32_t(target_type.member_types.size()); ++i)
8810 {
8811 if (i != 0)
8812 expr += ", ";
8813
8814 const SPIRType &member_type = get<SPIRType>(target_type.member_types[i]);
8815 uint32_t member_offset = type_struct_member_offset(target_type, i);
8816
8817 // The access chain terminates at the struct, so we need to find matrix strides and row-major information
8818 // ahead of time.
8819 bool need_transpose = false;
8820 uint32_t matrix_stride = 0;
8821 if (member_type.columns > 1)
8822 {
8823 need_transpose = combined_decoration_for_member(target_type, i).get(DecorationRowMajor);
8824 matrix_stride = type_struct_member_matrix_stride(target_type, i);
8825 }
8826
8827 auto tmp = flattened_access_chain(base, indices, count, member_type, offset + member_offset, matrix_stride,
8828 0 /* array_stride */, need_transpose);
8829
8830 // Cannot forward transpositions, so resolve them here.
8831 if (need_transpose)
8832 expr += convert_row_major_matrix(tmp, member_type, 0, false);
8833 else
8834 expr += tmp;
8835 }
8836
8837 expr += ")";
8838
8839 return expr;
8840 }
8841
flattened_access_chain_matrix(uint32_t base,const uint32_t * indices,uint32_t count,const SPIRType & target_type,uint32_t offset,uint32_t matrix_stride,bool need_transpose)8842 std::string CompilerGLSL::flattened_access_chain_matrix(uint32_t base, const uint32_t *indices, uint32_t count,
8843 const SPIRType &target_type, uint32_t offset,
8844 uint32_t matrix_stride, bool need_transpose)
8845 {
8846 assert(matrix_stride);
8847 SPIRType tmp_type = target_type;
8848 if (need_transpose)
8849 swap(tmp_type.vecsize, tmp_type.columns);
8850
8851 std::string expr;
8852
8853 expr += type_to_glsl_constructor(tmp_type);
8854 expr += "(";
8855
8856 for (uint32_t i = 0; i < tmp_type.columns; i++)
8857 {
8858 if (i != 0)
8859 expr += ", ";
8860
8861 expr += flattened_access_chain_vector(base, indices, count, tmp_type, offset + i * matrix_stride, matrix_stride,
8862 /* need_transpose= */ false);
8863 }
8864
8865 expr += ")";
8866
8867 return expr;
8868 }
8869
flattened_access_chain_vector(uint32_t base,const uint32_t * indices,uint32_t count,const SPIRType & target_type,uint32_t offset,uint32_t matrix_stride,bool need_transpose)8870 std::string CompilerGLSL::flattened_access_chain_vector(uint32_t base, const uint32_t *indices, uint32_t count,
8871 const SPIRType &target_type, uint32_t offset,
8872 uint32_t matrix_stride, bool need_transpose)
8873 {
8874 auto result = flattened_access_chain_offset(expression_type(base), indices, count, offset, 16);
8875
8876 auto buffer_name = to_name(expression_type(base).self);
8877
8878 if (need_transpose)
8879 {
8880 std::string expr;
8881
8882 if (target_type.vecsize > 1)
8883 {
8884 expr += type_to_glsl_constructor(target_type);
8885 expr += "(";
8886 }
8887
8888 for (uint32_t i = 0; i < target_type.vecsize; ++i)
8889 {
8890 if (i != 0)
8891 expr += ", ";
8892
8893 uint32_t component_offset = result.second + i * matrix_stride;
8894
8895 assert(component_offset % (target_type.width / 8) == 0);
8896 uint32_t index = component_offset / (target_type.width / 8);
8897
8898 expr += buffer_name;
8899 expr += "[";
8900 expr += result.first; // this is a series of N1 * k1 + N2 * k2 + ... that is either empty or ends with a +
8901 expr += convert_to_string(index / 4);
8902 expr += "]";
8903
8904 expr += vector_swizzle(1, index % 4);
8905 }
8906
8907 if (target_type.vecsize > 1)
8908 {
8909 expr += ")";
8910 }
8911
8912 return expr;
8913 }
8914 else
8915 {
8916 assert(result.second % (target_type.width / 8) == 0);
8917 uint32_t index = result.second / (target_type.width / 8);
8918
8919 std::string expr;
8920
8921 expr += buffer_name;
8922 expr += "[";
8923 expr += result.first; // this is a series of N1 * k1 + N2 * k2 + ... that is either empty or ends with a +
8924 expr += convert_to_string(index / 4);
8925 expr += "]";
8926
8927 expr += vector_swizzle(target_type.vecsize, index % 4);
8928
8929 return expr;
8930 }
8931 }
8932
flattened_access_chain_offset(const SPIRType & basetype,const uint32_t * indices,uint32_t count,uint32_t offset,uint32_t word_stride,bool * need_transpose,uint32_t * out_matrix_stride,uint32_t * out_array_stride,bool ptr_chain)8933 std::pair<std::string, uint32_t> CompilerGLSL::flattened_access_chain_offset(
8934 const SPIRType &basetype, const uint32_t *indices, uint32_t count, uint32_t offset, uint32_t word_stride,
8935 bool *need_transpose, uint32_t *out_matrix_stride, uint32_t *out_array_stride, bool ptr_chain)
8936 {
8937 // Start traversing type hierarchy at the proper non-pointer types.
8938 const auto *type = &get_pointee_type(basetype);
8939
8940 std::string expr;
8941
8942 // Inherit matrix information in case we are access chaining a vector which might have come from a row major layout.
8943 bool row_major_matrix_needs_conversion = need_transpose ? *need_transpose : false;
8944 uint32_t matrix_stride = out_matrix_stride ? *out_matrix_stride : 0;
8945 uint32_t array_stride = out_array_stride ? *out_array_stride : 0;
8946
8947 for (uint32_t i = 0; i < count; i++)
8948 {
8949 uint32_t index = indices[i];
8950
8951 // Pointers
8952 if (ptr_chain && i == 0)
8953 {
8954 // Here, the pointer type will be decorated with an array stride.
8955 array_stride = get_decoration(basetype.self, DecorationArrayStride);
8956 if (!array_stride)
8957 SPIRV_CROSS_THROW("SPIR-V does not define ArrayStride for buffer block.");
8958
8959 auto *constant = maybe_get<SPIRConstant>(index);
8960 if (constant)
8961 {
8962 // Constant array access.
8963 offset += constant->scalar() * array_stride;
8964 }
8965 else
8966 {
8967 // Dynamic array access.
8968 if (array_stride % word_stride)
8969 {
8970 SPIRV_CROSS_THROW("Array stride for dynamic indexing must be divisible by the size "
8971 "of a 4-component vector. "
8972 "Likely culprit here is a float or vec2 array inside a push "
8973 "constant block which is std430. "
8974 "This cannot be flattened. Try using std140 layout instead.");
8975 }
8976
8977 expr += to_enclosed_expression(index);
8978 expr += " * ";
8979 expr += convert_to_string(array_stride / word_stride);
8980 expr += " + ";
8981 }
8982 }
8983 // Arrays
8984 else if (!type->array.empty())
8985 {
8986 auto *constant = maybe_get<SPIRConstant>(index);
8987 if (constant)
8988 {
8989 // Constant array access.
8990 offset += constant->scalar() * array_stride;
8991 }
8992 else
8993 {
8994 // Dynamic array access.
8995 if (array_stride % word_stride)
8996 {
8997 SPIRV_CROSS_THROW("Array stride for dynamic indexing must be divisible by the size "
8998 "of a 4-component vector. "
8999 "Likely culprit here is a float or vec2 array inside a push "
9000 "constant block which is std430. "
9001 "This cannot be flattened. Try using std140 layout instead.");
9002 }
9003
9004 expr += to_enclosed_expression(index, false);
9005 expr += " * ";
9006 expr += convert_to_string(array_stride / word_stride);
9007 expr += " + ";
9008 }
9009
9010 uint32_t parent_type = type->parent_type;
9011 type = &get<SPIRType>(parent_type);
9012
9013 if (!type->array.empty())
9014 array_stride = get_decoration(parent_type, DecorationArrayStride);
9015 }
9016 // For structs, the index refers to a constant, which indexes into the members.
9017 // We also check if this member is a builtin, since we then replace the entire expression with the builtin one.
9018 else if (type->basetype == SPIRType::Struct)
9019 {
9020 index = evaluate_constant_u32(index);
9021
9022 if (index >= type->member_types.size())
9023 SPIRV_CROSS_THROW("Member index is out of bounds!");
9024
9025 offset += type_struct_member_offset(*type, index);
9026
9027 auto &struct_type = *type;
9028 type = &get<SPIRType>(type->member_types[index]);
9029
9030 if (type->columns > 1)
9031 {
9032 matrix_stride = type_struct_member_matrix_stride(struct_type, index);
9033 row_major_matrix_needs_conversion =
9034 combined_decoration_for_member(struct_type, index).get(DecorationRowMajor);
9035 }
9036 else
9037 row_major_matrix_needs_conversion = false;
9038
9039 if (!type->array.empty())
9040 array_stride = type_struct_member_array_stride(struct_type, index);
9041 }
9042 // Matrix -> Vector
9043 else if (type->columns > 1)
9044 {
9045 auto *constant = maybe_get<SPIRConstant>(index);
9046 if (constant)
9047 {
9048 index = evaluate_constant_u32(index);
9049 offset += index * (row_major_matrix_needs_conversion ? (type->width / 8) : matrix_stride);
9050 }
9051 else
9052 {
9053 uint32_t indexing_stride = row_major_matrix_needs_conversion ? (type->width / 8) : matrix_stride;
9054 // Dynamic array access.
9055 if (indexing_stride % word_stride)
9056 {
9057 SPIRV_CROSS_THROW("Matrix stride for dynamic indexing must be divisible by the size of a "
9058 "4-component vector. "
9059 "Likely culprit here is a row-major matrix being accessed dynamically. "
9060 "This cannot be flattened. Try using std140 layout instead.");
9061 }
9062
9063 expr += to_enclosed_expression(index, false);
9064 expr += " * ";
9065 expr += convert_to_string(indexing_stride / word_stride);
9066 expr += " + ";
9067 }
9068
9069 type = &get<SPIRType>(type->parent_type);
9070 }
9071 // Vector -> Scalar
9072 else if (type->vecsize > 1)
9073 {
9074 auto *constant = maybe_get<SPIRConstant>(index);
9075 if (constant)
9076 {
9077 index = evaluate_constant_u32(index);
9078 offset += index * (row_major_matrix_needs_conversion ? matrix_stride : (type->width / 8));
9079 }
9080 else
9081 {
9082 uint32_t indexing_stride = row_major_matrix_needs_conversion ? matrix_stride : (type->width / 8);
9083
9084 // Dynamic array access.
9085 if (indexing_stride % word_stride)
9086 {
9087 SPIRV_CROSS_THROW("Stride for dynamic vector indexing must be divisible by the "
9088 "size of a 4-component vector. "
9089 "This cannot be flattened in legacy targets.");
9090 }
9091
9092 expr += to_enclosed_expression(index, false);
9093 expr += " * ";
9094 expr += convert_to_string(indexing_stride / word_stride);
9095 expr += " + ";
9096 }
9097
9098 type = &get<SPIRType>(type->parent_type);
9099 }
9100 else
9101 SPIRV_CROSS_THROW("Cannot subdivide a scalar value!");
9102 }
9103
9104 if (need_transpose)
9105 *need_transpose = row_major_matrix_needs_conversion;
9106 if (out_matrix_stride)
9107 *out_matrix_stride = matrix_stride;
9108 if (out_array_stride)
9109 *out_array_stride = array_stride;
9110
9111 return std::make_pair(expr, offset);
9112 }
9113
should_dereference(uint32_t id)9114 bool CompilerGLSL::should_dereference(uint32_t id)
9115 {
9116 const auto &type = expression_type(id);
9117 // Non-pointer expressions don't need to be dereferenced.
9118 if (!type.pointer)
9119 return false;
9120
9121 // Handles shouldn't be dereferenced either.
9122 if (!expression_is_lvalue(id))
9123 return false;
9124
9125 // If id is a variable but not a phi variable, we should not dereference it.
9126 if (auto *var = maybe_get<SPIRVariable>(id))
9127 return var->phi_variable;
9128
9129 // If id is an access chain, we should not dereference it.
9130 if (auto *expr = maybe_get<SPIRExpression>(id))
9131 return !expr->access_chain;
9132
9133 // Otherwise, we should dereference this pointer expression.
9134 return true;
9135 }
9136
should_forward(uint32_t id) const9137 bool CompilerGLSL::should_forward(uint32_t id) const
9138 {
9139 // If id is a variable we will try to forward it regardless of force_temporary check below
9140 // This is important because otherwise we'll get local sampler copies (highp sampler2D foo = bar) that are invalid in OpenGL GLSL
9141 auto *var = maybe_get<SPIRVariable>(id);
9142 if (var && var->forwardable)
9143 return true;
9144
9145 // For debugging emit temporary variables for all expressions
9146 if (options.force_temporary)
9147 return false;
9148
9149 // Immutable expression can always be forwarded.
9150 if (is_immutable(id))
9151 return true;
9152
9153 return false;
9154 }
9155
should_suppress_usage_tracking(uint32_t id) const9156 bool CompilerGLSL::should_suppress_usage_tracking(uint32_t id) const
9157 {
9158 // Used only by opcodes which don't do any real "work", they just swizzle data in some fashion.
9159 return !expression_is_forwarded(id) || expression_suppresses_usage_tracking(id);
9160 }
9161
track_expression_read(uint32_t id)9162 void CompilerGLSL::track_expression_read(uint32_t id)
9163 {
9164 switch (ir.ids[id].get_type())
9165 {
9166 case TypeExpression:
9167 {
9168 auto &e = get<SPIRExpression>(id);
9169 for (auto implied_read : e.implied_read_expressions)
9170 track_expression_read(implied_read);
9171 break;
9172 }
9173
9174 case TypeAccessChain:
9175 {
9176 auto &e = get<SPIRAccessChain>(id);
9177 for (auto implied_read : e.implied_read_expressions)
9178 track_expression_read(implied_read);
9179 break;
9180 }
9181
9182 default:
9183 break;
9184 }
9185
9186 // If we try to read a forwarded temporary more than once we will stamp out possibly complex code twice.
9187 // In this case, it's better to just bind the complex expression to the temporary and read that temporary twice.
9188 if (expression_is_forwarded(id) && !expression_suppresses_usage_tracking(id))
9189 {
9190 auto &v = expression_usage_counts[id];
9191 v++;
9192
9193 // If we create an expression outside a loop,
9194 // but access it inside a loop, we're implicitly reading it multiple times.
9195 // If the expression in question is expensive, we should hoist it out to avoid relying on loop-invariant code motion
9196 // working inside the backend compiler.
9197 if (expression_read_implies_multiple_reads(id))
9198 v++;
9199
9200 if (v >= 2)
9201 {
9202 //if (v == 2)
9203 // fprintf(stderr, "ID %u was forced to temporary due to more than 1 expression use!\n", id);
9204
9205 forced_temporaries.insert(id);
9206 // Force a recompile after this pass to avoid forwarding this variable.
9207 force_recompile();
9208 }
9209 }
9210 }
9211
args_will_forward(uint32_t id,const uint32_t * args,uint32_t num_args,bool pure)9212 bool CompilerGLSL::args_will_forward(uint32_t id, const uint32_t *args, uint32_t num_args, bool pure)
9213 {
9214 if (forced_temporaries.find(id) != end(forced_temporaries))
9215 return false;
9216
9217 for (uint32_t i = 0; i < num_args; i++)
9218 if (!should_forward(args[i]))
9219 return false;
9220
9221 // We need to forward globals as well.
9222 if (!pure)
9223 {
9224 for (auto global : global_variables)
9225 if (!should_forward(global))
9226 return false;
9227 for (auto aliased : aliased_variables)
9228 if (!should_forward(aliased))
9229 return false;
9230 }
9231
9232 return true;
9233 }
9234
register_impure_function_call()9235 void CompilerGLSL::register_impure_function_call()
9236 {
9237 // Impure functions can modify globals and aliased variables, so invalidate them as well.
9238 for (auto global : global_variables)
9239 flush_dependees(get<SPIRVariable>(global));
9240 for (auto aliased : aliased_variables)
9241 flush_dependees(get<SPIRVariable>(aliased));
9242 }
9243
register_call_out_argument(uint32_t id)9244 void CompilerGLSL::register_call_out_argument(uint32_t id)
9245 {
9246 register_write(id);
9247
9248 auto *var = maybe_get<SPIRVariable>(id);
9249 if (var)
9250 flush_variable_declaration(var->self);
9251 }
9252
variable_decl_function_local(SPIRVariable & var)9253 string CompilerGLSL::variable_decl_function_local(SPIRVariable &var)
9254 {
9255 // These variables are always function local,
9256 // so make sure we emit the variable without storage qualifiers.
9257 // Some backends will inject custom variables locally in a function
9258 // with a storage qualifier which is not function-local.
9259 auto old_storage = var.storage;
9260 var.storage = StorageClassFunction;
9261 auto expr = variable_decl(var);
9262 var.storage = old_storage;
9263 return expr;
9264 }
9265
emit_variable_temporary_copies(const SPIRVariable & var)9266 void CompilerGLSL::emit_variable_temporary_copies(const SPIRVariable &var)
9267 {
9268 // Ensure that we declare phi-variable copies even if the original declaration isn't deferred
9269 if (var.allocate_temporary_copy && !flushed_phi_variables.count(var.self))
9270 {
9271 auto &type = get<SPIRType>(var.basetype);
9272 auto &flags = get_decoration_bitset(var.self);
9273 statement(flags_to_qualifiers_glsl(type, flags), variable_decl(type, join("_", var.self, "_copy")), ";");
9274 flushed_phi_variables.insert(var.self);
9275 }
9276 }
9277
flush_variable_declaration(uint32_t id)9278 void CompilerGLSL::flush_variable_declaration(uint32_t id)
9279 {
9280 // Ensure that we declare phi-variable copies even if the original declaration isn't deferred
9281 auto *var = maybe_get<SPIRVariable>(id);
9282 if (var && var->deferred_declaration)
9283 {
9284 string initializer;
9285 if (options.force_zero_initialized_variables &&
9286 (var->storage == StorageClassFunction || var->storage == StorageClassGeneric ||
9287 var->storage == StorageClassPrivate) &&
9288 !var->initializer && type_can_zero_initialize(get_variable_data_type(*var)))
9289 {
9290 initializer = join(" = ", to_zero_initialized_expression(get_variable_data_type_id(*var)));
9291 }
9292
9293 statement(variable_decl_function_local(*var), initializer, ";");
9294 var->deferred_declaration = false;
9295 }
9296 if (var)
9297 {
9298 emit_variable_temporary_copies(*var);
9299 }
9300 }
9301
remove_duplicate_swizzle(string & op)9302 bool CompilerGLSL::remove_duplicate_swizzle(string &op)
9303 {
9304 auto pos = op.find_last_of('.');
9305 if (pos == string::npos || pos == 0)
9306 return false;
9307
9308 string final_swiz = op.substr(pos + 1, string::npos);
9309
9310 if (backend.swizzle_is_function)
9311 {
9312 if (final_swiz.size() < 2)
9313 return false;
9314
9315 if (final_swiz.substr(final_swiz.size() - 2, string::npos) == "()")
9316 final_swiz.erase(final_swiz.size() - 2, string::npos);
9317 else
9318 return false;
9319 }
9320
9321 // Check if final swizzle is of form .x, .xy, .xyz, .xyzw or similar.
9322 // If so, and previous swizzle is of same length,
9323 // we can drop the final swizzle altogether.
9324 for (uint32_t i = 0; i < final_swiz.size(); i++)
9325 {
9326 static const char expected[] = { 'x', 'y', 'z', 'w' };
9327 if (i >= 4 || final_swiz[i] != expected[i])
9328 return false;
9329 }
9330
9331 auto prevpos = op.find_last_of('.', pos - 1);
9332 if (prevpos == string::npos)
9333 return false;
9334
9335 prevpos++;
9336
9337 // Make sure there are only swizzles here ...
9338 for (auto i = prevpos; i < pos; i++)
9339 {
9340 if (op[i] < 'w' || op[i] > 'z')
9341 {
9342 // If swizzles are foo.xyz() like in C++ backend for example, check for that.
9343 if (backend.swizzle_is_function && i + 2 == pos && op[i] == '(' && op[i + 1] == ')')
9344 break;
9345 return false;
9346 }
9347 }
9348
9349 // If original swizzle is large enough, just carve out the components we need.
9350 // E.g. foobar.wyx.xy will turn into foobar.wy.
9351 if (pos - prevpos >= final_swiz.size())
9352 {
9353 op.erase(prevpos + final_swiz.size(), string::npos);
9354
9355 // Add back the function call ...
9356 if (backend.swizzle_is_function)
9357 op += "()";
9358 }
9359 return true;
9360 }
9361
9362 // Optimizes away vector swizzles where we have something like
9363 // vec3 foo;
9364 // foo.xyz <-- swizzle expression does nothing.
9365 // This is a very common pattern after OpCompositeCombine.
remove_unity_swizzle(uint32_t base,string & op)9366 bool CompilerGLSL::remove_unity_swizzle(uint32_t base, string &op)
9367 {
9368 auto pos = op.find_last_of('.');
9369 if (pos == string::npos || pos == 0)
9370 return false;
9371
9372 string final_swiz = op.substr(pos + 1, string::npos);
9373
9374 if (backend.swizzle_is_function)
9375 {
9376 if (final_swiz.size() < 2)
9377 return false;
9378
9379 if (final_swiz.substr(final_swiz.size() - 2, string::npos) == "()")
9380 final_swiz.erase(final_swiz.size() - 2, string::npos);
9381 else
9382 return false;
9383 }
9384
9385 // Check if final swizzle is of form .x, .xy, .xyz, .xyzw or similar.
9386 // If so, and previous swizzle is of same length,
9387 // we can drop the final swizzle altogether.
9388 for (uint32_t i = 0; i < final_swiz.size(); i++)
9389 {
9390 static const char expected[] = { 'x', 'y', 'z', 'w' };
9391 if (i >= 4 || final_swiz[i] != expected[i])
9392 return false;
9393 }
9394
9395 auto &type = expression_type(base);
9396
9397 // Sanity checking ...
9398 assert(type.columns == 1 && type.array.empty());
9399
9400 if (type.vecsize == final_swiz.size())
9401 op.erase(pos, string::npos);
9402 return true;
9403 }
9404
build_composite_combiner(uint32_t return_type,const uint32_t * elems,uint32_t length)9405 string CompilerGLSL::build_composite_combiner(uint32_t return_type, const uint32_t *elems, uint32_t length)
9406 {
9407 ID base = 0;
9408 string op;
9409 string subop;
9410
9411 // Can only merge swizzles for vectors.
9412 auto &type = get<SPIRType>(return_type);
9413 bool can_apply_swizzle_opt = type.basetype != SPIRType::Struct && type.array.empty() && type.columns == 1;
9414 bool swizzle_optimization = false;
9415
9416 for (uint32_t i = 0; i < length; i++)
9417 {
9418 auto *e = maybe_get<SPIRExpression>(elems[i]);
9419
9420 // If we're merging another scalar which belongs to the same base
9421 // object, just merge the swizzles to avoid triggering more than 1 expression read as much as possible!
9422 if (can_apply_swizzle_opt && e && e->base_expression && e->base_expression == base)
9423 {
9424 // Only supposed to be used for vector swizzle -> scalar.
9425 assert(!e->expression.empty() && e->expression.front() == '.');
9426 subop += e->expression.substr(1, string::npos);
9427 swizzle_optimization = true;
9428 }
9429 else
9430 {
9431 // We'll likely end up with duplicated swizzles, e.g.
9432 // foobar.xyz.xyz from patterns like
9433 // OpVectorShuffle
9434 // OpCompositeExtract x 3
9435 // OpCompositeConstruct 3x + other scalar.
9436 // Just modify op in-place.
9437 if (swizzle_optimization)
9438 {
9439 if (backend.swizzle_is_function)
9440 subop += "()";
9441
9442 // Don't attempt to remove unity swizzling if we managed to remove duplicate swizzles.
9443 // The base "foo" might be vec4, while foo.xyz is vec3 (OpVectorShuffle) and looks like a vec3 due to the .xyz tacked on.
9444 // We only want to remove the swizzles if we're certain that the resulting base will be the same vecsize.
9445 // Essentially, we can only remove one set of swizzles, since that's what we have control over ...
9446 // Case 1:
9447 // foo.yxz.xyz: Duplicate swizzle kicks in, giving foo.yxz, we are done.
9448 // foo.yxz was the result of OpVectorShuffle and we don't know the type of foo.
9449 // Case 2:
9450 // foo.xyz: Duplicate swizzle won't kick in.
9451 // If foo is vec3, we can remove xyz, giving just foo.
9452 if (!remove_duplicate_swizzle(subop))
9453 remove_unity_swizzle(base, subop);
9454
9455 // Strips away redundant parens if we created them during component extraction.
9456 strip_enclosed_expression(subop);
9457 swizzle_optimization = false;
9458 op += subop;
9459 }
9460 else
9461 op += subop;
9462
9463 if (i)
9464 op += ", ";
9465
9466 bool uses_buffer_offset =
9467 type.basetype == SPIRType::Struct && has_member_decoration(type.self, i, DecorationOffset);
9468 subop = to_composite_constructor_expression(elems[i], uses_buffer_offset);
9469 }
9470
9471 base = e ? e->base_expression : ID(0);
9472 }
9473
9474 if (swizzle_optimization)
9475 {
9476 if (backend.swizzle_is_function)
9477 subop += "()";
9478
9479 if (!remove_duplicate_swizzle(subop))
9480 remove_unity_swizzle(base, subop);
9481 // Strips away redundant parens if we created them during component extraction.
9482 strip_enclosed_expression(subop);
9483 }
9484
9485 op += subop;
9486 return op;
9487 }
9488
skip_argument(uint32_t id) const9489 bool CompilerGLSL::skip_argument(uint32_t id) const
9490 {
9491 if (!combined_image_samplers.empty() || !options.vulkan_semantics)
9492 {
9493 auto &type = expression_type(id);
9494 if (type.basetype == SPIRType::Sampler || (type.basetype == SPIRType::Image && type.image.sampled == 1))
9495 return true;
9496 }
9497 return false;
9498 }
9499
optimize_read_modify_write(const SPIRType & type,const string & lhs,const string & rhs)9500 bool CompilerGLSL::optimize_read_modify_write(const SPIRType &type, const string &lhs, const string &rhs)
9501 {
9502 // Do this with strings because we have a very clear pattern we can check for and it avoids
9503 // adding lots of special cases to the code emission.
9504 if (rhs.size() < lhs.size() + 3)
9505 return false;
9506
9507 // Do not optimize matrices. They are a bit awkward to reason about in general
9508 // (in which order does operation happen?), and it does not work on MSL anyways.
9509 if (type.vecsize > 1 && type.columns > 1)
9510 return false;
9511
9512 auto index = rhs.find(lhs);
9513 if (index != 0)
9514 return false;
9515
9516 // TODO: Shift operators, but it's not important for now.
9517 auto op = rhs.find_first_of("+-/*%|&^", lhs.size() + 1);
9518 if (op != lhs.size() + 1)
9519 return false;
9520
9521 // Check that the op is followed by space. This excludes && and ||.
9522 if (rhs[op + 1] != ' ')
9523 return false;
9524
9525 char bop = rhs[op];
9526 auto expr = rhs.substr(lhs.size() + 3);
9527 // Try to find increments and decrements. Makes it look neater as += 1, -= 1 is fairly rare to see in real code.
9528 // Find some common patterns which are equivalent.
9529 if ((bop == '+' || bop == '-') && (expr == "1" || expr == "uint(1)" || expr == "1u" || expr == "int(1u)"))
9530 statement(lhs, bop, bop, ";");
9531 else
9532 statement(lhs, " ", bop, "= ", expr, ";");
9533 return true;
9534 }
9535
register_control_dependent_expression(uint32_t expr)9536 void CompilerGLSL::register_control_dependent_expression(uint32_t expr)
9537 {
9538 if (forwarded_temporaries.find(expr) == end(forwarded_temporaries))
9539 return;
9540
9541 assert(current_emitting_block);
9542 current_emitting_block->invalidate_expressions.push_back(expr);
9543 }
9544
emit_block_instructions(SPIRBlock & block)9545 void CompilerGLSL::emit_block_instructions(SPIRBlock &block)
9546 {
9547 current_emitting_block = █
9548 for (auto &op : block.ops)
9549 emit_instruction(op);
9550 current_emitting_block = nullptr;
9551 }
9552
disallow_forwarding_in_expression_chain(const SPIRExpression & expr)9553 void CompilerGLSL::disallow_forwarding_in_expression_chain(const SPIRExpression &expr)
9554 {
9555 // Allow trivially forwarded expressions like OpLoad or trivial shuffles,
9556 // these will be marked as having suppressed usage tracking.
9557 // Our only concern is to make sure arithmetic operations are done in similar ways.
9558 if (expression_is_forwarded(expr.self) && !expression_suppresses_usage_tracking(expr.self) &&
9559 forced_invariant_temporaries.count(expr.self) == 0)
9560 {
9561 forced_temporaries.insert(expr.self);
9562 forced_invariant_temporaries.insert(expr.self);
9563 force_recompile();
9564
9565 for (auto &dependent : expr.expression_dependencies)
9566 disallow_forwarding_in_expression_chain(get<SPIRExpression>(dependent));
9567 }
9568 }
9569
handle_store_to_invariant_variable(uint32_t store_id,uint32_t value_id)9570 void CompilerGLSL::handle_store_to_invariant_variable(uint32_t store_id, uint32_t value_id)
9571 {
9572 // Variables or access chains marked invariant are complicated. We will need to make sure the code-gen leading up to
9573 // this variable is consistent. The failure case for SPIRV-Cross is when an expression is forced to a temporary
9574 // in one translation unit, but not another, e.g. due to multiple use of an expression.
9575 // This causes variance despite the output variable being marked invariant, so the solution here is to force all dependent
9576 // expressions to be temporaries.
9577 // It is uncertain if this is enough to support invariant in all possible cases, but it should be good enough
9578 // for all reasonable uses of invariant.
9579 if (!has_decoration(store_id, DecorationInvariant))
9580 return;
9581
9582 auto *expr = maybe_get<SPIRExpression>(value_id);
9583 if (!expr)
9584 return;
9585
9586 disallow_forwarding_in_expression_chain(*expr);
9587 }
9588
emit_store_statement(uint32_t lhs_expression,uint32_t rhs_expression)9589 void CompilerGLSL::emit_store_statement(uint32_t lhs_expression, uint32_t rhs_expression)
9590 {
9591 auto rhs = to_pointer_expression(rhs_expression);
9592
9593 // Statements to OpStore may be empty if it is a struct with zero members. Just forward the store to /dev/null.
9594 if (!rhs.empty())
9595 {
9596 handle_store_to_invariant_variable(lhs_expression, rhs_expression);
9597
9598 auto lhs = to_dereferenced_expression(lhs_expression);
9599
9600 // We might need to cast in order to store to a builtin.
9601 cast_to_builtin_store(lhs_expression, rhs, expression_type(rhs_expression));
9602
9603 // Tries to optimize assignments like "<lhs> = <lhs> op expr".
9604 // While this is purely cosmetic, this is important for legacy ESSL where loop
9605 // variable increments must be in either i++ or i += const-expr.
9606 // Without this, we end up with i = i + 1, which is correct GLSL, but not correct GLES 2.0.
9607 if (!optimize_read_modify_write(expression_type(rhs_expression), lhs, rhs))
9608 statement(lhs, " = ", rhs, ";");
9609 register_write(lhs_expression);
9610 }
9611 }
9612
get_integer_width_for_instruction(const Instruction & instr) const9613 uint32_t CompilerGLSL::get_integer_width_for_instruction(const Instruction &instr) const
9614 {
9615 if (instr.length < 3)
9616 return 32;
9617
9618 auto *ops = stream(instr);
9619
9620 switch (instr.op)
9621 {
9622 case OpSConvert:
9623 case OpConvertSToF:
9624 case OpUConvert:
9625 case OpConvertUToF:
9626 case OpIEqual:
9627 case OpINotEqual:
9628 case OpSLessThan:
9629 case OpSLessThanEqual:
9630 case OpSGreaterThan:
9631 case OpSGreaterThanEqual:
9632 case OpULessThan:
9633 case OpULessThanEqual:
9634 case OpUGreaterThan:
9635 case OpUGreaterThanEqual:
9636 return expression_type(ops[2]).width;
9637
9638 default:
9639 {
9640 // We can look at result type which is more robust.
9641 auto *type = maybe_get<SPIRType>(ops[0]);
9642 if (type && type_is_integral(*type))
9643 return type->width;
9644 else
9645 return 32;
9646 }
9647 }
9648 }
9649
get_integer_width_for_glsl_instruction(GLSLstd450 op,const uint32_t * ops,uint32_t length) const9650 uint32_t CompilerGLSL::get_integer_width_for_glsl_instruction(GLSLstd450 op, const uint32_t *ops, uint32_t length) const
9651 {
9652 if (length < 1)
9653 return 32;
9654
9655 switch (op)
9656 {
9657 case GLSLstd450SAbs:
9658 case GLSLstd450SSign:
9659 case GLSLstd450UMin:
9660 case GLSLstd450SMin:
9661 case GLSLstd450UMax:
9662 case GLSLstd450SMax:
9663 case GLSLstd450UClamp:
9664 case GLSLstd450SClamp:
9665 case GLSLstd450FindSMsb:
9666 case GLSLstd450FindUMsb:
9667 return expression_type(ops[0]).width;
9668
9669 default:
9670 {
9671 // We don't need to care about other opcodes, just return 32.
9672 return 32;
9673 }
9674 }
9675 }
9676
emit_instruction(const Instruction & instruction)9677 void CompilerGLSL::emit_instruction(const Instruction &instruction)
9678 {
9679 auto ops = stream(instruction);
9680 auto opcode = static_cast<Op>(instruction.op);
9681 uint32_t length = instruction.length;
9682
9683 #define GLSL_BOP(op) emit_binary_op(ops[0], ops[1], ops[2], ops[3], #op)
9684 #define GLSL_BOP_CAST(op, type) \
9685 emit_binary_op_cast(ops[0], ops[1], ops[2], ops[3], #op, type, opcode_is_sign_invariant(opcode))
9686 #define GLSL_UOP(op) emit_unary_op(ops[0], ops[1], ops[2], #op)
9687 #define GLSL_QFOP(op) emit_quaternary_func_op(ops[0], ops[1], ops[2], ops[3], ops[4], ops[5], #op)
9688 #define GLSL_TFOP(op) emit_trinary_func_op(ops[0], ops[1], ops[2], ops[3], ops[4], #op)
9689 #define GLSL_BFOP(op) emit_binary_func_op(ops[0], ops[1], ops[2], ops[3], #op)
9690 #define GLSL_BFOP_CAST(op, type) \
9691 emit_binary_func_op_cast(ops[0], ops[1], ops[2], ops[3], #op, type, opcode_is_sign_invariant(opcode))
9692 #define GLSL_BFOP(op) emit_binary_func_op(ops[0], ops[1], ops[2], ops[3], #op)
9693 #define GLSL_UFOP(op) emit_unary_func_op(ops[0], ops[1], ops[2], #op)
9694
9695 // If we need to do implicit bitcasts, make sure we do it with the correct type.
9696 uint32_t integer_width = get_integer_width_for_instruction(instruction);
9697 auto int_type = to_signed_basetype(integer_width);
9698 auto uint_type = to_unsigned_basetype(integer_width);
9699
9700 switch (opcode)
9701 {
9702 // Dealing with memory
9703 case OpLoad:
9704 {
9705 uint32_t result_type = ops[0];
9706 uint32_t id = ops[1];
9707 uint32_t ptr = ops[2];
9708
9709 flush_variable_declaration(ptr);
9710
9711 // If we're loading from memory that cannot be changed by the shader,
9712 // just forward the expression directly to avoid needless temporaries.
9713 // If an expression is mutable and forwardable, we speculate that it is immutable.
9714 bool forward = should_forward(ptr) && forced_temporaries.find(id) == end(forced_temporaries);
9715
9716 // If loading a non-native row-major matrix, mark the expression as need_transpose.
9717 bool need_transpose = false;
9718 bool old_need_transpose = false;
9719
9720 auto *ptr_expression = maybe_get<SPIRExpression>(ptr);
9721
9722 if (forward)
9723 {
9724 // If we're forwarding the load, we're also going to forward transpose state, so don't transpose while
9725 // taking the expression.
9726 if (ptr_expression && ptr_expression->need_transpose)
9727 {
9728 old_need_transpose = true;
9729 ptr_expression->need_transpose = false;
9730 need_transpose = true;
9731 }
9732 else if (is_non_native_row_major_matrix(ptr))
9733 need_transpose = true;
9734 }
9735
9736 // If we are forwarding this load,
9737 // don't register the read to access chain here, defer that to when we actually use the expression,
9738 // using the add_implied_read_expression mechanism.
9739 string expr;
9740
9741 bool is_packed = has_extended_decoration(ptr, SPIRVCrossDecorationPhysicalTypePacked);
9742 bool is_remapped = has_extended_decoration(ptr, SPIRVCrossDecorationPhysicalTypeID);
9743 if (forward || (!is_packed && !is_remapped))
9744 {
9745 // For the simple case, we do not need to deal with repacking.
9746 expr = to_dereferenced_expression(ptr, false);
9747 }
9748 else
9749 {
9750 // If we are not forwarding the expression, we need to unpack and resolve any physical type remapping here before
9751 // storing the expression to a temporary.
9752 expr = to_unpacked_expression(ptr);
9753 }
9754
9755 auto &type = get<SPIRType>(result_type);
9756 auto &expr_type = expression_type(ptr);
9757
9758 // If the expression has more vector components than the result type, insert
9759 // a swizzle. This shouldn't happen normally on valid SPIR-V, but it might
9760 // happen with e.g. the MSL backend replacing the type of an input variable.
9761 if (expr_type.vecsize > type.vecsize)
9762 expr = enclose_expression(expr + vector_swizzle(type.vecsize, 0));
9763
9764 // We might need to cast in order to load from a builtin.
9765 cast_from_builtin_load(ptr, expr, type);
9766
9767 // We might be trying to load a gl_Position[N], where we should be
9768 // doing float4[](gl_in[i].gl_Position, ...) instead.
9769 // Similar workarounds are required for input arrays in tessellation.
9770 unroll_array_from_complex_load(id, ptr, expr);
9771
9772 // Shouldn't need to check for ID, but current glslang codegen requires it in some cases
9773 // when loading Image/Sampler descriptors. It does not hurt to check ID as well.
9774 if (has_decoration(id, DecorationNonUniformEXT) || has_decoration(ptr, DecorationNonUniformEXT))
9775 {
9776 propagate_nonuniform_qualifier(ptr);
9777 convert_non_uniform_expression(type, expr);
9778 }
9779
9780 if (forward && ptr_expression)
9781 ptr_expression->need_transpose = old_need_transpose;
9782
9783 bool flattened = ptr_expression && flattened_buffer_blocks.count(ptr_expression->loaded_from) != 0;
9784
9785 if (backend.needs_row_major_load_workaround && !is_non_native_row_major_matrix(ptr) && !flattened)
9786 rewrite_load_for_wrapped_row_major(expr, result_type, ptr);
9787
9788 // By default, suppress usage tracking since using same expression multiple times does not imply any extra work.
9789 // However, if we try to load a complex, composite object from a flattened buffer,
9790 // we should avoid emitting the same code over and over and lower the result to a temporary.
9791 bool usage_tracking = flattened && (type.basetype == SPIRType::Struct || (type.columns > 1));
9792
9793 SPIRExpression *e = nullptr;
9794 if (!forward && expression_is_non_value_type_array(ptr))
9795 {
9796 // Complicated load case where we need to make a copy of ptr, but we cannot, because
9797 // it is an array, and our backend does not support arrays as value types.
9798 // Emit the temporary, and copy it explicitly.
9799 e = &emit_uninitialized_temporary_expression(result_type, id);
9800 emit_array_copy(to_expression(id), ptr, StorageClassFunction, get_expression_effective_storage_class(ptr));
9801 }
9802 else
9803 e = &emit_op(result_type, id, expr, forward, !usage_tracking);
9804
9805 e->need_transpose = need_transpose;
9806 register_read(id, ptr, forward);
9807
9808 if (forward)
9809 {
9810 // Pass through whether the result is of a packed type and the physical type ID.
9811 if (has_extended_decoration(ptr, SPIRVCrossDecorationPhysicalTypePacked))
9812 set_extended_decoration(id, SPIRVCrossDecorationPhysicalTypePacked);
9813 if (has_extended_decoration(ptr, SPIRVCrossDecorationPhysicalTypeID))
9814 {
9815 set_extended_decoration(id, SPIRVCrossDecorationPhysicalTypeID,
9816 get_extended_decoration(ptr, SPIRVCrossDecorationPhysicalTypeID));
9817 }
9818 }
9819 else
9820 {
9821 // This might have been set on an earlier compilation iteration, force it to be unset.
9822 unset_extended_decoration(id, SPIRVCrossDecorationPhysicalTypePacked);
9823 unset_extended_decoration(id, SPIRVCrossDecorationPhysicalTypeID);
9824 }
9825
9826 inherit_expression_dependencies(id, ptr);
9827 if (forward)
9828 add_implied_read_expression(*e, ptr);
9829 break;
9830 }
9831
9832 case OpInBoundsAccessChain:
9833 case OpAccessChain:
9834 case OpPtrAccessChain:
9835 {
9836 auto *var = maybe_get<SPIRVariable>(ops[2]);
9837 if (var)
9838 flush_variable_declaration(var->self);
9839
9840 // If the base is immutable, the access chain pointer must also be.
9841 // If an expression is mutable and forwardable, we speculate that it is immutable.
9842 AccessChainMeta meta;
9843 bool ptr_chain = opcode == OpPtrAccessChain;
9844 auto e = access_chain(ops[2], &ops[3], length - 3, get<SPIRType>(ops[0]), &meta, ptr_chain);
9845
9846 auto &expr = set<SPIRExpression>(ops[1], move(e), ops[0], should_forward(ops[2]));
9847
9848 auto *backing_variable = maybe_get_backing_variable(ops[2]);
9849 expr.loaded_from = backing_variable ? backing_variable->self : ID(ops[2]);
9850 expr.need_transpose = meta.need_transpose;
9851 expr.access_chain = true;
9852
9853 // Mark the result as being packed. Some platforms handled packed vectors differently than non-packed.
9854 if (meta.storage_is_packed)
9855 set_extended_decoration(ops[1], SPIRVCrossDecorationPhysicalTypePacked);
9856 if (meta.storage_physical_type != 0)
9857 set_extended_decoration(ops[1], SPIRVCrossDecorationPhysicalTypeID, meta.storage_physical_type);
9858 if (meta.storage_is_invariant)
9859 set_decoration(ops[1], DecorationInvariant);
9860 if (meta.flattened_struct)
9861 flattened_structs[ops[1]] = true;
9862
9863 // If we have some expression dependencies in our access chain, this access chain is technically a forwarded
9864 // temporary which could be subject to invalidation.
9865 // Need to assume we're forwarded while calling inherit_expression_depdendencies.
9866 forwarded_temporaries.insert(ops[1]);
9867 // The access chain itself is never forced to a temporary, but its dependencies might.
9868 suppressed_usage_tracking.insert(ops[1]);
9869
9870 for (uint32_t i = 2; i < length; i++)
9871 {
9872 inherit_expression_dependencies(ops[1], ops[i]);
9873 add_implied_read_expression(expr, ops[i]);
9874 }
9875
9876 // If we have no dependencies after all, i.e., all indices in the access chain are immutable temporaries,
9877 // we're not forwarded after all.
9878 if (expr.expression_dependencies.empty())
9879 forwarded_temporaries.erase(ops[1]);
9880
9881 if (has_decoration(ops[1], DecorationNonUniformEXT))
9882 propagate_nonuniform_qualifier(ops[1]);
9883
9884 break;
9885 }
9886
9887 case OpStore:
9888 {
9889 auto *var = maybe_get<SPIRVariable>(ops[0]);
9890
9891 if (var && var->statically_assigned)
9892 var->static_expression = ops[1];
9893 else if (var && var->loop_variable && !var->loop_variable_enable)
9894 var->static_expression = ops[1];
9895 else if (var && var->remapped_variable && var->static_expression)
9896 {
9897 // Skip the write.
9898 }
9899 else if (flattened_structs.count(ops[0]))
9900 {
9901 store_flattened_struct(ops[0], ops[1]);
9902 register_write(ops[0]);
9903 }
9904 else
9905 {
9906 emit_store_statement(ops[0], ops[1]);
9907 }
9908
9909 // Storing a pointer results in a variable pointer, so we must conservatively assume
9910 // we can write through it.
9911 if (expression_type(ops[1]).pointer)
9912 register_write(ops[1]);
9913 break;
9914 }
9915
9916 case OpArrayLength:
9917 {
9918 uint32_t result_type = ops[0];
9919 uint32_t id = ops[1];
9920 auto e = access_chain_internal(ops[2], &ops[3], length - 3, ACCESS_CHAIN_INDEX_IS_LITERAL_BIT, nullptr);
9921 set<SPIRExpression>(id, join(type_to_glsl(get<SPIRType>(result_type)), "(", e, ".length())"), result_type,
9922 true);
9923 break;
9924 }
9925
9926 // Function calls
9927 case OpFunctionCall:
9928 {
9929 uint32_t result_type = ops[0];
9930 uint32_t id = ops[1];
9931 uint32_t func = ops[2];
9932 const auto *arg = &ops[3];
9933 length -= 3;
9934
9935 auto &callee = get<SPIRFunction>(func);
9936 auto &return_type = get<SPIRType>(callee.return_type);
9937 bool pure = function_is_pure(callee);
9938
9939 bool callee_has_out_variables = false;
9940 bool emit_return_value_as_argument = false;
9941
9942 // Invalidate out variables passed to functions since they can be OpStore'd to.
9943 for (uint32_t i = 0; i < length; i++)
9944 {
9945 if (callee.arguments[i].write_count)
9946 {
9947 register_call_out_argument(arg[i]);
9948 callee_has_out_variables = true;
9949 }
9950
9951 flush_variable_declaration(arg[i]);
9952 }
9953
9954 if (!return_type.array.empty() && !backend.can_return_array)
9955 {
9956 callee_has_out_variables = true;
9957 emit_return_value_as_argument = true;
9958 }
9959
9960 if (!pure)
9961 register_impure_function_call();
9962
9963 string funexpr;
9964 SmallVector<string> arglist;
9965 funexpr += to_name(func) + "(";
9966
9967 if (emit_return_value_as_argument)
9968 {
9969 statement(type_to_glsl(return_type), " ", to_name(id), type_to_array_glsl(return_type), ";");
9970 arglist.push_back(to_name(id));
9971 }
9972
9973 for (uint32_t i = 0; i < length; i++)
9974 {
9975 // Do not pass in separate images or samplers if we're remapping
9976 // to combined image samplers.
9977 if (skip_argument(arg[i]))
9978 continue;
9979
9980 arglist.push_back(to_func_call_arg(callee.arguments[i], arg[i]));
9981 }
9982
9983 for (auto &combined : callee.combined_parameters)
9984 {
9985 auto image_id = combined.global_image ? combined.image_id : VariableID(arg[combined.image_id]);
9986 auto sampler_id = combined.global_sampler ? combined.sampler_id : VariableID(arg[combined.sampler_id]);
9987 arglist.push_back(to_combined_image_sampler(image_id, sampler_id));
9988 }
9989
9990 append_global_func_args(callee, length, arglist);
9991
9992 funexpr += merge(arglist);
9993 funexpr += ")";
9994
9995 // Check for function call constraints.
9996 check_function_call_constraints(arg, length);
9997
9998 if (return_type.basetype != SPIRType::Void)
9999 {
10000 // If the function actually writes to an out variable,
10001 // take the conservative route and do not forward.
10002 // The problem is that we might not read the function
10003 // result (and emit the function) before an out variable
10004 // is read (common case when return value is ignored!
10005 // In order to avoid start tracking invalid variables,
10006 // just avoid the forwarding problem altogether.
10007 bool forward = args_will_forward(id, arg, length, pure) && !callee_has_out_variables && pure &&
10008 (forced_temporaries.find(id) == end(forced_temporaries));
10009
10010 if (emit_return_value_as_argument)
10011 {
10012 statement(funexpr, ";");
10013 set<SPIRExpression>(id, to_name(id), result_type, true);
10014 }
10015 else
10016 emit_op(result_type, id, funexpr, forward);
10017
10018 // Function calls are implicit loads from all variables in question.
10019 // Set dependencies for them.
10020 for (uint32_t i = 0; i < length; i++)
10021 register_read(id, arg[i], forward);
10022
10023 // If we're going to forward the temporary result,
10024 // put dependencies on every variable that must not change.
10025 if (forward)
10026 register_global_read_dependencies(callee, id);
10027 }
10028 else
10029 statement(funexpr, ";");
10030
10031 break;
10032 }
10033
10034 // Composite munging
10035 case OpCompositeConstruct:
10036 {
10037 uint32_t result_type = ops[0];
10038 uint32_t id = ops[1];
10039 const auto *const elems = &ops[2];
10040 length -= 2;
10041
10042 bool forward = true;
10043 for (uint32_t i = 0; i < length; i++)
10044 forward = forward && should_forward(elems[i]);
10045
10046 auto &out_type = get<SPIRType>(result_type);
10047 auto *in_type = length > 0 ? &expression_type(elems[0]) : nullptr;
10048
10049 // Only splat if we have vector constructors.
10050 // Arrays and structs must be initialized properly in full.
10051 bool composite = !out_type.array.empty() || out_type.basetype == SPIRType::Struct;
10052
10053 bool splat = false;
10054 bool swizzle_splat = false;
10055
10056 if (in_type)
10057 {
10058 splat = in_type->vecsize == 1 && in_type->columns == 1 && !composite && backend.use_constructor_splatting;
10059 swizzle_splat = in_type->vecsize == 1 && in_type->columns == 1 && backend.can_swizzle_scalar;
10060
10061 if (ir.ids[elems[0]].get_type() == TypeConstant && !type_is_floating_point(*in_type))
10062 {
10063 // Cannot swizzle literal integers as a special case.
10064 swizzle_splat = false;
10065 }
10066 }
10067
10068 if (splat || swizzle_splat)
10069 {
10070 uint32_t input = elems[0];
10071 for (uint32_t i = 0; i < length; i++)
10072 {
10073 if (input != elems[i])
10074 {
10075 splat = false;
10076 swizzle_splat = false;
10077 }
10078 }
10079 }
10080
10081 if (out_type.basetype == SPIRType::Struct && !backend.can_declare_struct_inline)
10082 forward = false;
10083 if (!out_type.array.empty() && !backend.can_declare_arrays_inline)
10084 forward = false;
10085 if (type_is_empty(out_type) && !backend.supports_empty_struct)
10086 forward = false;
10087
10088 string constructor_op;
10089 if (backend.use_initializer_list && composite)
10090 {
10091 bool needs_trailing_tracket = false;
10092 // Only use this path if we are building composites.
10093 // This path cannot be used for arithmetic.
10094 if (backend.use_typed_initializer_list && out_type.basetype == SPIRType::Struct && out_type.array.empty())
10095 constructor_op += type_to_glsl_constructor(get<SPIRType>(result_type));
10096 else if (backend.use_typed_initializer_list && backend.array_is_value_type && !out_type.array.empty())
10097 {
10098 // MSL path. Array constructor is baked into type here, do not use _constructor variant.
10099 constructor_op += type_to_glsl_constructor(get<SPIRType>(result_type)) + "(";
10100 needs_trailing_tracket = true;
10101 }
10102 constructor_op += "{ ";
10103
10104 if (type_is_empty(out_type) && !backend.supports_empty_struct)
10105 constructor_op += "0";
10106 else if (splat)
10107 constructor_op += to_unpacked_expression(elems[0]);
10108 else
10109 constructor_op += build_composite_combiner(result_type, elems, length);
10110 constructor_op += " }";
10111 if (needs_trailing_tracket)
10112 constructor_op += ")";
10113 }
10114 else if (swizzle_splat && !composite)
10115 {
10116 constructor_op = remap_swizzle(get<SPIRType>(result_type), 1, to_unpacked_expression(elems[0]));
10117 }
10118 else
10119 {
10120 constructor_op = type_to_glsl_constructor(get<SPIRType>(result_type)) + "(";
10121 if (type_is_empty(out_type) && !backend.supports_empty_struct)
10122 constructor_op += "0";
10123 else if (splat)
10124 constructor_op += to_unpacked_expression(elems[0]);
10125 else
10126 constructor_op += build_composite_combiner(result_type, elems, length);
10127 constructor_op += ")";
10128 }
10129
10130 if (!constructor_op.empty())
10131 {
10132 emit_op(result_type, id, constructor_op, forward);
10133 for (uint32_t i = 0; i < length; i++)
10134 inherit_expression_dependencies(id, elems[i]);
10135 }
10136 break;
10137 }
10138
10139 case OpVectorInsertDynamic:
10140 {
10141 uint32_t result_type = ops[0];
10142 uint32_t id = ops[1];
10143 uint32_t vec = ops[2];
10144 uint32_t comp = ops[3];
10145 uint32_t index = ops[4];
10146
10147 flush_variable_declaration(vec);
10148
10149 // Make a copy, then use access chain to store the variable.
10150 statement(declare_temporary(result_type, id), to_expression(vec), ";");
10151 set<SPIRExpression>(id, to_name(id), result_type, true);
10152 auto chain = access_chain_internal(id, &index, 1, 0, nullptr);
10153 statement(chain, " = ", to_unpacked_expression(comp), ";");
10154 break;
10155 }
10156
10157 case OpVectorExtractDynamic:
10158 {
10159 uint32_t result_type = ops[0];
10160 uint32_t id = ops[1];
10161
10162 auto expr = access_chain_internal(ops[2], &ops[3], 1, 0, nullptr);
10163 emit_op(result_type, id, expr, should_forward(ops[2]));
10164 inherit_expression_dependencies(id, ops[2]);
10165 inherit_expression_dependencies(id, ops[3]);
10166 break;
10167 }
10168
10169 case OpCompositeExtract:
10170 {
10171 uint32_t result_type = ops[0];
10172 uint32_t id = ops[1];
10173 length -= 3;
10174
10175 auto &type = get<SPIRType>(result_type);
10176
10177 // We can only split the expression here if our expression is forwarded as a temporary.
10178 bool allow_base_expression = forced_temporaries.find(id) == end(forced_temporaries);
10179
10180 // Do not allow base expression for struct members. We risk doing "swizzle" optimizations in this case.
10181 auto &composite_type = expression_type(ops[2]);
10182 if (composite_type.basetype == SPIRType::Struct || !composite_type.array.empty())
10183 allow_base_expression = false;
10184
10185 // Packed expressions or physical ID mapped expressions cannot be split up.
10186 if (has_extended_decoration(ops[2], SPIRVCrossDecorationPhysicalTypePacked) ||
10187 has_extended_decoration(ops[2], SPIRVCrossDecorationPhysicalTypeID))
10188 allow_base_expression = false;
10189
10190 // Cannot use base expression for row-major matrix row-extraction since we need to interleave access pattern
10191 // into the base expression.
10192 if (is_non_native_row_major_matrix(ops[2]))
10193 allow_base_expression = false;
10194
10195 AccessChainMeta meta;
10196 SPIRExpression *e = nullptr;
10197
10198 // Only apply this optimization if result is scalar.
10199 if (allow_base_expression && should_forward(ops[2]) && type.vecsize == 1 && type.columns == 1 && length == 1)
10200 {
10201 // We want to split the access chain from the base.
10202 // This is so we can later combine different CompositeExtract results
10203 // with CompositeConstruct without emitting code like
10204 //
10205 // vec3 temp = texture(...).xyz
10206 // vec4(temp.x, temp.y, temp.z, 1.0).
10207 //
10208 // when we actually wanted to emit this
10209 // vec4(texture(...).xyz, 1.0).
10210 //
10211 // Including the base will prevent this and would trigger multiple reads
10212 // from expression causing it to be forced to an actual temporary in GLSL.
10213 auto expr = access_chain_internal(ops[2], &ops[3], length,
10214 ACCESS_CHAIN_INDEX_IS_LITERAL_BIT | ACCESS_CHAIN_CHAIN_ONLY_BIT, &meta);
10215 e = &emit_op(result_type, id, expr, true, should_suppress_usage_tracking(ops[2]));
10216 inherit_expression_dependencies(id, ops[2]);
10217 e->base_expression = ops[2];
10218 }
10219 else
10220 {
10221 auto expr = access_chain_internal(ops[2], &ops[3], length, ACCESS_CHAIN_INDEX_IS_LITERAL_BIT, &meta);
10222 e = &emit_op(result_type, id, expr, should_forward(ops[2]), should_suppress_usage_tracking(ops[2]));
10223 inherit_expression_dependencies(id, ops[2]);
10224 }
10225
10226 // Pass through some meta information to the loaded expression.
10227 // We can still end up loading a buffer type to a variable, then CompositeExtract from it
10228 // instead of loading everything through an access chain.
10229 e->need_transpose = meta.need_transpose;
10230 if (meta.storage_is_packed)
10231 set_extended_decoration(id, SPIRVCrossDecorationPhysicalTypePacked);
10232 if (meta.storage_physical_type != 0)
10233 set_extended_decoration(id, SPIRVCrossDecorationPhysicalTypeID, meta.storage_physical_type);
10234 if (meta.storage_is_invariant)
10235 set_decoration(id, DecorationInvariant);
10236
10237 break;
10238 }
10239
10240 case OpCompositeInsert:
10241 {
10242 uint32_t result_type = ops[0];
10243 uint32_t id = ops[1];
10244 uint32_t obj = ops[2];
10245 uint32_t composite = ops[3];
10246 const auto *elems = &ops[4];
10247 length -= 4;
10248
10249 flush_variable_declaration(composite);
10250
10251 // Make a copy, then use access chain to store the variable.
10252 statement(declare_temporary(result_type, id), to_expression(composite), ";");
10253 set<SPIRExpression>(id, to_name(id), result_type, true);
10254 auto chain = access_chain_internal(id, elems, length, ACCESS_CHAIN_INDEX_IS_LITERAL_BIT, nullptr);
10255 statement(chain, " = ", to_unpacked_expression(obj), ";");
10256
10257 break;
10258 }
10259
10260 case OpCopyMemory:
10261 {
10262 uint32_t lhs = ops[0];
10263 uint32_t rhs = ops[1];
10264 if (lhs != rhs)
10265 {
10266 flush_variable_declaration(lhs);
10267 flush_variable_declaration(rhs);
10268 statement(to_expression(lhs), " = ", to_unpacked_expression(rhs), ";");
10269 register_write(lhs);
10270 }
10271 break;
10272 }
10273
10274 case OpCopyLogical:
10275 {
10276 // This is used for copying object of different types, arrays and structs.
10277 // We need to unroll the copy, element-by-element.
10278 uint32_t result_type = ops[0];
10279 uint32_t id = ops[1];
10280 uint32_t rhs = ops[2];
10281
10282 emit_uninitialized_temporary_expression(result_type, id);
10283 emit_copy_logical_type(id, result_type, rhs, expression_type_id(rhs), {});
10284 break;
10285 }
10286
10287 case OpCopyObject:
10288 {
10289 uint32_t result_type = ops[0];
10290 uint32_t id = ops[1];
10291 uint32_t rhs = ops[2];
10292 bool pointer = get<SPIRType>(result_type).pointer;
10293
10294 auto *chain = maybe_get<SPIRAccessChain>(rhs);
10295 auto *imgsamp = maybe_get<SPIRCombinedImageSampler>(rhs);
10296 if (chain)
10297 {
10298 // Cannot lower to a SPIRExpression, just copy the object.
10299 auto &e = set<SPIRAccessChain>(id, *chain);
10300 e.self = id;
10301 }
10302 else if (imgsamp)
10303 {
10304 // Cannot lower to a SPIRExpression, just copy the object.
10305 // GLSL does not currently use this type and will never get here, but MSL does.
10306 // Handled here instead of CompilerMSL for better integration and general handling,
10307 // and in case GLSL or other subclasses require it in the future.
10308 auto &e = set<SPIRCombinedImageSampler>(id, *imgsamp);
10309 e.self = id;
10310 }
10311 else if (expression_is_lvalue(rhs) && !pointer)
10312 {
10313 // Need a copy.
10314 // For pointer types, we copy the pointer itself.
10315 statement(declare_temporary(result_type, id), to_unpacked_expression(rhs), ";");
10316 set<SPIRExpression>(id, to_name(id), result_type, true);
10317 }
10318 else
10319 {
10320 // RHS expression is immutable, so just forward it.
10321 // Copying these things really make no sense, but
10322 // seems to be allowed anyways.
10323 auto &e = set<SPIRExpression>(id, to_expression(rhs), result_type, true);
10324 if (pointer)
10325 {
10326 auto *var = maybe_get_backing_variable(rhs);
10327 e.loaded_from = var ? var->self : ID(0);
10328 }
10329
10330 // If we're copying an access chain, need to inherit the read expressions.
10331 auto *rhs_expr = maybe_get<SPIRExpression>(rhs);
10332 if (rhs_expr)
10333 {
10334 e.implied_read_expressions = rhs_expr->implied_read_expressions;
10335 e.expression_dependencies = rhs_expr->expression_dependencies;
10336 }
10337 }
10338 break;
10339 }
10340
10341 case OpVectorShuffle:
10342 {
10343 uint32_t result_type = ops[0];
10344 uint32_t id = ops[1];
10345 uint32_t vec0 = ops[2];
10346 uint32_t vec1 = ops[3];
10347 const auto *elems = &ops[4];
10348 length -= 4;
10349
10350 auto &type0 = expression_type(vec0);
10351
10352 // If we have the undefined swizzle index -1, we need to swizzle in undefined data,
10353 // or in our case, T(0).
10354 bool shuffle = false;
10355 for (uint32_t i = 0; i < length; i++)
10356 if (elems[i] >= type0.vecsize || elems[i] == 0xffffffffu)
10357 shuffle = true;
10358
10359 // Cannot use swizzles with packed expressions, force shuffle path.
10360 if (!shuffle && has_extended_decoration(vec0, SPIRVCrossDecorationPhysicalTypePacked))
10361 shuffle = true;
10362
10363 string expr;
10364 bool should_fwd, trivial_forward;
10365
10366 if (shuffle)
10367 {
10368 should_fwd = should_forward(vec0) && should_forward(vec1);
10369 trivial_forward = should_suppress_usage_tracking(vec0) && should_suppress_usage_tracking(vec1);
10370
10371 // Constructor style and shuffling from two different vectors.
10372 SmallVector<string> args;
10373 for (uint32_t i = 0; i < length; i++)
10374 {
10375 if (elems[i] == 0xffffffffu)
10376 {
10377 // Use a constant 0 here.
10378 // We could use the first component or similar, but then we risk propagating
10379 // a value we might not need, and bog down codegen.
10380 SPIRConstant c;
10381 c.constant_type = type0.parent_type;
10382 assert(type0.parent_type != ID(0));
10383 args.push_back(constant_expression(c));
10384 }
10385 else if (elems[i] >= type0.vecsize)
10386 args.push_back(to_extract_component_expression(vec1, elems[i] - type0.vecsize));
10387 else
10388 args.push_back(to_extract_component_expression(vec0, elems[i]));
10389 }
10390 expr += join(type_to_glsl_constructor(get<SPIRType>(result_type)), "(", merge(args), ")");
10391 }
10392 else
10393 {
10394 should_fwd = should_forward(vec0);
10395 trivial_forward = should_suppress_usage_tracking(vec0);
10396
10397 // We only source from first vector, so can use swizzle.
10398 // If the vector is packed, unpack it before applying a swizzle (needed for MSL)
10399 expr += to_enclosed_unpacked_expression(vec0);
10400 expr += ".";
10401 for (uint32_t i = 0; i < length; i++)
10402 {
10403 assert(elems[i] != 0xffffffffu);
10404 expr += index_to_swizzle(elems[i]);
10405 }
10406
10407 if (backend.swizzle_is_function && length > 1)
10408 expr += "()";
10409 }
10410
10411 // A shuffle is trivial in that it doesn't actually *do* anything.
10412 // We inherit the forwardedness from our arguments to avoid flushing out to temporaries when it's not really needed.
10413
10414 emit_op(result_type, id, expr, should_fwd, trivial_forward);
10415
10416 inherit_expression_dependencies(id, vec0);
10417 if (vec0 != vec1)
10418 inherit_expression_dependencies(id, vec1);
10419 break;
10420 }
10421
10422 // ALU
10423 case OpIsNan:
10424 GLSL_UFOP(isnan);
10425 break;
10426
10427 case OpIsInf:
10428 GLSL_UFOP(isinf);
10429 break;
10430
10431 case OpSNegate:
10432 case OpFNegate:
10433 GLSL_UOP(-);
10434 break;
10435
10436 case OpIAdd:
10437 {
10438 // For simple arith ops, prefer the output type if there's a mismatch to avoid extra bitcasts.
10439 auto type = get<SPIRType>(ops[0]).basetype;
10440 GLSL_BOP_CAST(+, type);
10441 break;
10442 }
10443
10444 case OpFAdd:
10445 GLSL_BOP(+);
10446 break;
10447
10448 case OpISub:
10449 {
10450 auto type = get<SPIRType>(ops[0]).basetype;
10451 GLSL_BOP_CAST(-, type);
10452 break;
10453 }
10454
10455 case OpFSub:
10456 GLSL_BOP(-);
10457 break;
10458
10459 case OpIMul:
10460 {
10461 auto type = get<SPIRType>(ops[0]).basetype;
10462 GLSL_BOP_CAST(*, type);
10463 break;
10464 }
10465
10466 case OpVectorTimesMatrix:
10467 case OpMatrixTimesVector:
10468 {
10469 // If the matrix needs transpose, just flip the multiply order.
10470 auto *e = maybe_get<SPIRExpression>(ops[opcode == OpMatrixTimesVector ? 2 : 3]);
10471 if (e && e->need_transpose)
10472 {
10473 e->need_transpose = false;
10474 string expr;
10475
10476 if (opcode == OpMatrixTimesVector)
10477 expr = join(to_enclosed_unpacked_expression(ops[3]), " * ",
10478 enclose_expression(to_unpacked_row_major_matrix_expression(ops[2])));
10479 else
10480 expr = join(enclose_expression(to_unpacked_row_major_matrix_expression(ops[3])), " * ",
10481 to_enclosed_unpacked_expression(ops[2]));
10482
10483 bool forward = should_forward(ops[2]) && should_forward(ops[3]);
10484 emit_op(ops[0], ops[1], expr, forward);
10485 e->need_transpose = true;
10486 inherit_expression_dependencies(ops[1], ops[2]);
10487 inherit_expression_dependencies(ops[1], ops[3]);
10488 }
10489 else
10490 GLSL_BOP(*);
10491 break;
10492 }
10493
10494 case OpMatrixTimesMatrix:
10495 {
10496 auto *a = maybe_get<SPIRExpression>(ops[2]);
10497 auto *b = maybe_get<SPIRExpression>(ops[3]);
10498
10499 // If both matrices need transpose, we can multiply in flipped order and tag the expression as transposed.
10500 // a^T * b^T = (b * a)^T.
10501 if (a && b && a->need_transpose && b->need_transpose)
10502 {
10503 a->need_transpose = false;
10504 b->need_transpose = false;
10505 auto expr = join(enclose_expression(to_unpacked_row_major_matrix_expression(ops[3])), " * ",
10506 enclose_expression(to_unpacked_row_major_matrix_expression(ops[2])));
10507 bool forward = should_forward(ops[2]) && should_forward(ops[3]);
10508 auto &e = emit_op(ops[0], ops[1], expr, forward);
10509 e.need_transpose = true;
10510 a->need_transpose = true;
10511 b->need_transpose = true;
10512 inherit_expression_dependencies(ops[1], ops[2]);
10513 inherit_expression_dependencies(ops[1], ops[3]);
10514 }
10515 else
10516 GLSL_BOP(*);
10517
10518 break;
10519 }
10520
10521 case OpFMul:
10522 case OpMatrixTimesScalar:
10523 case OpVectorTimesScalar:
10524 GLSL_BOP(*);
10525 break;
10526
10527 case OpOuterProduct:
10528 GLSL_BFOP(outerProduct);
10529 break;
10530
10531 case OpDot:
10532 GLSL_BFOP(dot);
10533 break;
10534
10535 case OpTranspose:
10536 if (options.version < 120) // Matches GLSL 1.10 / ESSL 1.00
10537 {
10538 // transpose() is not available, so instead, flip need_transpose,
10539 // which can later be turned into an emulated transpose op by
10540 // convert_row_major_matrix(), if necessary.
10541 uint32_t result_type = ops[0];
10542 uint32_t result_id = ops[1];
10543 uint32_t input = ops[2];
10544
10545 // Force need_transpose to false temporarily to prevent
10546 // to_expression() from doing the transpose.
10547 bool need_transpose = false;
10548 auto *input_e = maybe_get<SPIRExpression>(input);
10549 if (input_e)
10550 swap(need_transpose, input_e->need_transpose);
10551
10552 bool forward = should_forward(input);
10553 auto &e = emit_op(result_type, result_id, to_expression(input), forward);
10554 e.need_transpose = !need_transpose;
10555
10556 // Restore the old need_transpose flag.
10557 if (input_e)
10558 input_e->need_transpose = need_transpose;
10559 }
10560 else
10561 GLSL_UFOP(transpose);
10562 break;
10563
10564 case OpSRem:
10565 {
10566 uint32_t result_type = ops[0];
10567 uint32_t result_id = ops[1];
10568 uint32_t op0 = ops[2];
10569 uint32_t op1 = ops[3];
10570
10571 // Needs special handling.
10572 bool forward = should_forward(op0) && should_forward(op1);
10573 auto expr = join(to_enclosed_expression(op0), " - ", to_enclosed_expression(op1), " * ", "(",
10574 to_enclosed_expression(op0), " / ", to_enclosed_expression(op1), ")");
10575
10576 emit_op(result_type, result_id, expr, forward);
10577 inherit_expression_dependencies(result_id, op0);
10578 inherit_expression_dependencies(result_id, op1);
10579 break;
10580 }
10581
10582 case OpSDiv:
10583 GLSL_BOP_CAST(/, int_type);
10584 break;
10585
10586 case OpUDiv:
10587 GLSL_BOP_CAST(/, uint_type);
10588 break;
10589
10590 case OpIAddCarry:
10591 case OpISubBorrow:
10592 {
10593 if (options.es && options.version < 310)
10594 SPIRV_CROSS_THROW("Extended arithmetic is only available from ESSL 310.");
10595 else if (!options.es && options.version < 400)
10596 SPIRV_CROSS_THROW("Extended arithmetic is only available from GLSL 400.");
10597
10598 uint32_t result_type = ops[0];
10599 uint32_t result_id = ops[1];
10600 uint32_t op0 = ops[2];
10601 uint32_t op1 = ops[3];
10602 auto &type = get<SPIRType>(result_type);
10603 emit_uninitialized_temporary_expression(result_type, result_id);
10604 const char *op = opcode == OpIAddCarry ? "uaddCarry" : "usubBorrow";
10605
10606 statement(to_expression(result_id), ".", to_member_name(type, 0), " = ", op, "(", to_expression(op0), ", ",
10607 to_expression(op1), ", ", to_expression(result_id), ".", to_member_name(type, 1), ");");
10608 break;
10609 }
10610
10611 case OpUMulExtended:
10612 case OpSMulExtended:
10613 {
10614 if (options.es && options.version < 310)
10615 SPIRV_CROSS_THROW("Extended arithmetic is only available from ESSL 310.");
10616 else if (!options.es && options.version < 400)
10617 SPIRV_CROSS_THROW("Extended arithmetic is only available from GLSL 4000.");
10618
10619 uint32_t result_type = ops[0];
10620 uint32_t result_id = ops[1];
10621 uint32_t op0 = ops[2];
10622 uint32_t op1 = ops[3];
10623 auto &type = get<SPIRType>(result_type);
10624 emit_uninitialized_temporary_expression(result_type, result_id);
10625 const char *op = opcode == OpUMulExtended ? "umulExtended" : "imulExtended";
10626
10627 statement(op, "(", to_expression(op0), ", ", to_expression(op1), ", ", to_expression(result_id), ".",
10628 to_member_name(type, 1), ", ", to_expression(result_id), ".", to_member_name(type, 0), ");");
10629 break;
10630 }
10631
10632 case OpFDiv:
10633 GLSL_BOP(/);
10634 break;
10635
10636 case OpShiftRightLogical:
10637 GLSL_BOP_CAST(>>, uint_type);
10638 break;
10639
10640 case OpShiftRightArithmetic:
10641 GLSL_BOP_CAST(>>, int_type);
10642 break;
10643
10644 case OpShiftLeftLogical:
10645 {
10646 auto type = get<SPIRType>(ops[0]).basetype;
10647 GLSL_BOP_CAST(<<, type);
10648 break;
10649 }
10650
10651 case OpBitwiseOr:
10652 {
10653 auto type = get<SPIRType>(ops[0]).basetype;
10654 GLSL_BOP_CAST(|, type);
10655 break;
10656 }
10657
10658 case OpBitwiseXor:
10659 {
10660 auto type = get<SPIRType>(ops[0]).basetype;
10661 GLSL_BOP_CAST(^, type);
10662 break;
10663 }
10664
10665 case OpBitwiseAnd:
10666 {
10667 auto type = get<SPIRType>(ops[0]).basetype;
10668 GLSL_BOP_CAST(&, type);
10669 break;
10670 }
10671
10672 case OpNot:
10673 GLSL_UOP(~);
10674 break;
10675
10676 case OpUMod:
10677 GLSL_BOP_CAST(%, uint_type);
10678 break;
10679
10680 case OpSMod:
10681 GLSL_BOP_CAST(%, int_type);
10682 break;
10683
10684 case OpFMod:
10685 GLSL_BFOP(mod);
10686 break;
10687
10688 case OpFRem:
10689 {
10690 if (is_legacy())
10691 SPIRV_CROSS_THROW("OpFRem requires trunc() and is only supported on non-legacy targets. A workaround is "
10692 "needed for legacy.");
10693
10694 uint32_t result_type = ops[0];
10695 uint32_t result_id = ops[1];
10696 uint32_t op0 = ops[2];
10697 uint32_t op1 = ops[3];
10698
10699 // Needs special handling.
10700 bool forward = should_forward(op0) && should_forward(op1);
10701 auto expr = join(to_enclosed_expression(op0), " - ", to_enclosed_expression(op1), " * ", "trunc(",
10702 to_enclosed_expression(op0), " / ", to_enclosed_expression(op1), ")");
10703
10704 emit_op(result_type, result_id, expr, forward);
10705 inherit_expression_dependencies(result_id, op0);
10706 inherit_expression_dependencies(result_id, op1);
10707 break;
10708 }
10709
10710 // Relational
10711 case OpAny:
10712 GLSL_UFOP(any);
10713 break;
10714
10715 case OpAll:
10716 GLSL_UFOP(all);
10717 break;
10718
10719 case OpSelect:
10720 emit_mix_op(ops[0], ops[1], ops[4], ops[3], ops[2]);
10721 break;
10722
10723 case OpLogicalOr:
10724 {
10725 // No vector variant in GLSL for logical OR.
10726 auto result_type = ops[0];
10727 auto id = ops[1];
10728 auto &type = get<SPIRType>(result_type);
10729
10730 if (type.vecsize > 1)
10731 emit_unrolled_binary_op(result_type, id, ops[2], ops[3], "||", false, SPIRType::Unknown);
10732 else
10733 GLSL_BOP(||);
10734 break;
10735 }
10736
10737 case OpLogicalAnd:
10738 {
10739 // No vector variant in GLSL for logical AND.
10740 auto result_type = ops[0];
10741 auto id = ops[1];
10742 auto &type = get<SPIRType>(result_type);
10743
10744 if (type.vecsize > 1)
10745 emit_unrolled_binary_op(result_type, id, ops[2], ops[3], "&&", false, SPIRType::Unknown);
10746 else
10747 GLSL_BOP(&&);
10748 break;
10749 }
10750
10751 case OpLogicalNot:
10752 {
10753 auto &type = get<SPIRType>(ops[0]);
10754 if (type.vecsize > 1)
10755 GLSL_UFOP(not );
10756 else
10757 GLSL_UOP(!);
10758 break;
10759 }
10760
10761 case OpIEqual:
10762 {
10763 if (expression_type(ops[2]).vecsize > 1)
10764 GLSL_BFOP_CAST(equal, int_type);
10765 else
10766 GLSL_BOP_CAST(==, int_type);
10767 break;
10768 }
10769
10770 case OpLogicalEqual:
10771 case OpFOrdEqual:
10772 {
10773 if (expression_type(ops[2]).vecsize > 1)
10774 GLSL_BFOP(equal);
10775 else
10776 GLSL_BOP(==);
10777 break;
10778 }
10779
10780 case OpINotEqual:
10781 {
10782 if (expression_type(ops[2]).vecsize > 1)
10783 GLSL_BFOP_CAST(notEqual, int_type);
10784 else
10785 GLSL_BOP_CAST(!=, int_type);
10786 break;
10787 }
10788
10789 case OpLogicalNotEqual:
10790 case OpFOrdNotEqual:
10791 {
10792 if (expression_type(ops[2]).vecsize > 1)
10793 GLSL_BFOP(notEqual);
10794 else
10795 GLSL_BOP(!=);
10796 break;
10797 }
10798
10799 case OpUGreaterThan:
10800 case OpSGreaterThan:
10801 {
10802 auto type = opcode == OpUGreaterThan ? uint_type : int_type;
10803 if (expression_type(ops[2]).vecsize > 1)
10804 GLSL_BFOP_CAST(greaterThan, type);
10805 else
10806 GLSL_BOP_CAST(>, type);
10807 break;
10808 }
10809
10810 case OpFOrdGreaterThan:
10811 {
10812 if (expression_type(ops[2]).vecsize > 1)
10813 GLSL_BFOP(greaterThan);
10814 else
10815 GLSL_BOP(>);
10816 break;
10817 }
10818
10819 case OpUGreaterThanEqual:
10820 case OpSGreaterThanEqual:
10821 {
10822 auto type = opcode == OpUGreaterThanEqual ? uint_type : int_type;
10823 if (expression_type(ops[2]).vecsize > 1)
10824 GLSL_BFOP_CAST(greaterThanEqual, type);
10825 else
10826 GLSL_BOP_CAST(>=, type);
10827 break;
10828 }
10829
10830 case OpFOrdGreaterThanEqual:
10831 {
10832 if (expression_type(ops[2]).vecsize > 1)
10833 GLSL_BFOP(greaterThanEqual);
10834 else
10835 GLSL_BOP(>=);
10836 break;
10837 }
10838
10839 case OpULessThan:
10840 case OpSLessThan:
10841 {
10842 auto type = opcode == OpULessThan ? uint_type : int_type;
10843 if (expression_type(ops[2]).vecsize > 1)
10844 GLSL_BFOP_CAST(lessThan, type);
10845 else
10846 GLSL_BOP_CAST(<, type);
10847 break;
10848 }
10849
10850 case OpFOrdLessThan:
10851 {
10852 if (expression_type(ops[2]).vecsize > 1)
10853 GLSL_BFOP(lessThan);
10854 else
10855 GLSL_BOP(<);
10856 break;
10857 }
10858
10859 case OpULessThanEqual:
10860 case OpSLessThanEqual:
10861 {
10862 auto type = opcode == OpULessThanEqual ? uint_type : int_type;
10863 if (expression_type(ops[2]).vecsize > 1)
10864 GLSL_BFOP_CAST(lessThanEqual, type);
10865 else
10866 GLSL_BOP_CAST(<=, type);
10867 break;
10868 }
10869
10870 case OpFOrdLessThanEqual:
10871 {
10872 if (expression_type(ops[2]).vecsize > 1)
10873 GLSL_BFOP(lessThanEqual);
10874 else
10875 GLSL_BOP(<=);
10876 break;
10877 }
10878
10879 // Conversion
10880 case OpSConvert:
10881 case OpConvertSToF:
10882 case OpUConvert:
10883 case OpConvertUToF:
10884 {
10885 auto input_type = opcode == OpSConvert || opcode == OpConvertSToF ? int_type : uint_type;
10886 uint32_t result_type = ops[0];
10887 uint32_t id = ops[1];
10888
10889 auto &type = get<SPIRType>(result_type);
10890 auto &arg_type = expression_type(ops[2]);
10891 auto func = type_to_glsl_constructor(type);
10892
10893 if (arg_type.width < type.width || type_is_floating_point(type))
10894 emit_unary_func_op_cast(result_type, id, ops[2], func.c_str(), input_type, type.basetype);
10895 else
10896 emit_unary_func_op(result_type, id, ops[2], func.c_str());
10897 break;
10898 }
10899
10900 case OpConvertFToU:
10901 case OpConvertFToS:
10902 {
10903 // Cast to expected arithmetic type, then potentially bitcast away to desired signedness.
10904 uint32_t result_type = ops[0];
10905 uint32_t id = ops[1];
10906 auto &type = get<SPIRType>(result_type);
10907 auto expected_type = type;
10908 auto &float_type = expression_type(ops[2]);
10909 expected_type.basetype =
10910 opcode == OpConvertFToS ? to_signed_basetype(type.width) : to_unsigned_basetype(type.width);
10911
10912 auto func = type_to_glsl_constructor(expected_type);
10913 emit_unary_func_op_cast(result_type, id, ops[2], func.c_str(), float_type.basetype, expected_type.basetype);
10914 break;
10915 }
10916
10917 case OpFConvert:
10918 {
10919 uint32_t result_type = ops[0];
10920 uint32_t id = ops[1];
10921
10922 auto func = type_to_glsl_constructor(get<SPIRType>(result_type));
10923 emit_unary_func_op(result_type, id, ops[2], func.c_str());
10924 break;
10925 }
10926
10927 case OpBitcast:
10928 {
10929 uint32_t result_type = ops[0];
10930 uint32_t id = ops[1];
10931 uint32_t arg = ops[2];
10932
10933 if (!emit_complex_bitcast(result_type, id, arg))
10934 {
10935 auto op = bitcast_glsl_op(get<SPIRType>(result_type), expression_type(arg));
10936 emit_unary_func_op(result_type, id, arg, op.c_str());
10937 }
10938 break;
10939 }
10940
10941 case OpQuantizeToF16:
10942 {
10943 uint32_t result_type = ops[0];
10944 uint32_t id = ops[1];
10945 uint32_t arg = ops[2];
10946
10947 string op;
10948 auto &type = get<SPIRType>(result_type);
10949
10950 switch (type.vecsize)
10951 {
10952 case 1:
10953 op = join("unpackHalf2x16(packHalf2x16(vec2(", to_expression(arg), "))).x");
10954 break;
10955 case 2:
10956 op = join("unpackHalf2x16(packHalf2x16(", to_expression(arg), "))");
10957 break;
10958 case 3:
10959 {
10960 auto op0 = join("unpackHalf2x16(packHalf2x16(", to_expression(arg), ".xy))");
10961 auto op1 = join("unpackHalf2x16(packHalf2x16(", to_expression(arg), ".zz)).x");
10962 op = join("vec3(", op0, ", ", op1, ")");
10963 break;
10964 }
10965 case 4:
10966 {
10967 auto op0 = join("unpackHalf2x16(packHalf2x16(", to_expression(arg), ".xy))");
10968 auto op1 = join("unpackHalf2x16(packHalf2x16(", to_expression(arg), ".zw))");
10969 op = join("vec4(", op0, ", ", op1, ")");
10970 break;
10971 }
10972 default:
10973 SPIRV_CROSS_THROW("Illegal argument to OpQuantizeToF16.");
10974 }
10975
10976 emit_op(result_type, id, op, should_forward(arg));
10977 inherit_expression_dependencies(id, arg);
10978 break;
10979 }
10980
10981 // Derivatives
10982 case OpDPdx:
10983 GLSL_UFOP(dFdx);
10984 if (is_legacy_es())
10985 require_extension_internal("GL_OES_standard_derivatives");
10986 register_control_dependent_expression(ops[1]);
10987 break;
10988
10989 case OpDPdy:
10990 GLSL_UFOP(dFdy);
10991 if (is_legacy_es())
10992 require_extension_internal("GL_OES_standard_derivatives");
10993 register_control_dependent_expression(ops[1]);
10994 break;
10995
10996 case OpDPdxFine:
10997 GLSL_UFOP(dFdxFine);
10998 if (options.es)
10999 {
11000 SPIRV_CROSS_THROW("GL_ARB_derivative_control is unavailable in OpenGL ES.");
11001 }
11002 if (options.version < 450)
11003 require_extension_internal("GL_ARB_derivative_control");
11004 register_control_dependent_expression(ops[1]);
11005 break;
11006
11007 case OpDPdyFine:
11008 GLSL_UFOP(dFdyFine);
11009 if (options.es)
11010 {
11011 SPIRV_CROSS_THROW("GL_ARB_derivative_control is unavailable in OpenGL ES.");
11012 }
11013 if (options.version < 450)
11014 require_extension_internal("GL_ARB_derivative_control");
11015 register_control_dependent_expression(ops[1]);
11016 break;
11017
11018 case OpDPdxCoarse:
11019 if (options.es)
11020 {
11021 SPIRV_CROSS_THROW("GL_ARB_derivative_control is unavailable in OpenGL ES.");
11022 }
11023 GLSL_UFOP(dFdxCoarse);
11024 if (options.version < 450)
11025 require_extension_internal("GL_ARB_derivative_control");
11026 register_control_dependent_expression(ops[1]);
11027 break;
11028
11029 case OpDPdyCoarse:
11030 GLSL_UFOP(dFdyCoarse);
11031 if (options.es)
11032 {
11033 SPIRV_CROSS_THROW("GL_ARB_derivative_control is unavailable in OpenGL ES.");
11034 }
11035 if (options.version < 450)
11036 require_extension_internal("GL_ARB_derivative_control");
11037 register_control_dependent_expression(ops[1]);
11038 break;
11039
11040 case OpFwidth:
11041 GLSL_UFOP(fwidth);
11042 if (is_legacy_es())
11043 require_extension_internal("GL_OES_standard_derivatives");
11044 register_control_dependent_expression(ops[1]);
11045 break;
11046
11047 case OpFwidthCoarse:
11048 GLSL_UFOP(fwidthCoarse);
11049 if (options.es)
11050 {
11051 SPIRV_CROSS_THROW("GL_ARB_derivative_control is unavailable in OpenGL ES.");
11052 }
11053 if (options.version < 450)
11054 require_extension_internal("GL_ARB_derivative_control");
11055 register_control_dependent_expression(ops[1]);
11056 break;
11057
11058 case OpFwidthFine:
11059 GLSL_UFOP(fwidthFine);
11060 if (options.es)
11061 {
11062 SPIRV_CROSS_THROW("GL_ARB_derivative_control is unavailable in OpenGL ES.");
11063 }
11064 if (options.version < 450)
11065 require_extension_internal("GL_ARB_derivative_control");
11066 register_control_dependent_expression(ops[1]);
11067 break;
11068
11069 // Bitfield
11070 case OpBitFieldInsert:
11071 {
11072 emit_bitfield_insert_op(ops[0], ops[1], ops[2], ops[3], ops[4], ops[5], "bitfieldInsert", SPIRType::Int);
11073 break;
11074 }
11075
11076 case OpBitFieldSExtract:
11077 {
11078 emit_trinary_func_op_bitextract(ops[0], ops[1], ops[2], ops[3], ops[4], "bitfieldExtract", int_type, int_type,
11079 SPIRType::Int, SPIRType::Int);
11080 break;
11081 }
11082
11083 case OpBitFieldUExtract:
11084 {
11085 emit_trinary_func_op_bitextract(ops[0], ops[1], ops[2], ops[3], ops[4], "bitfieldExtract", uint_type, uint_type,
11086 SPIRType::Int, SPIRType::Int);
11087 break;
11088 }
11089
11090 case OpBitReverse:
11091 // BitReverse does not have issues with sign since result type must match input type.
11092 GLSL_UFOP(bitfieldReverse);
11093 break;
11094
11095 case OpBitCount:
11096 {
11097 auto basetype = expression_type(ops[2]).basetype;
11098 emit_unary_func_op_cast(ops[0], ops[1], ops[2], "bitCount", basetype, int_type);
11099 break;
11100 }
11101
11102 // Atomics
11103 case OpAtomicExchange:
11104 {
11105 uint32_t result_type = ops[0];
11106 uint32_t id = ops[1];
11107 uint32_t ptr = ops[2];
11108 // Ignore semantics for now, probably only relevant to CL.
11109 uint32_t val = ops[5];
11110 const char *op = check_atomic_image(ptr) ? "imageAtomicExchange" : "atomicExchange";
11111 forced_temporaries.insert(id);
11112 emit_binary_func_op(result_type, id, ptr, val, op);
11113 flush_all_atomic_capable_variables();
11114 break;
11115 }
11116
11117 case OpAtomicCompareExchange:
11118 {
11119 uint32_t result_type = ops[0];
11120 uint32_t id = ops[1];
11121 uint32_t ptr = ops[2];
11122 uint32_t val = ops[6];
11123 uint32_t comp = ops[7];
11124 const char *op = check_atomic_image(ptr) ? "imageAtomicCompSwap" : "atomicCompSwap";
11125
11126 forced_temporaries.insert(id);
11127 emit_trinary_func_op(result_type, id, ptr, comp, val, op);
11128 flush_all_atomic_capable_variables();
11129 break;
11130 }
11131
11132 case OpAtomicLoad:
11133 {
11134 // In plain GLSL, we have no atomic loads, so emulate this by fetch adding by 0 and hope compiler figures it out.
11135 // Alternatively, we could rely on KHR_memory_model, but that's not very helpful for GL.
11136 auto &type = expression_type(ops[2]);
11137 forced_temporaries.insert(ops[1]);
11138 bool atomic_image = check_atomic_image(ops[2]);
11139 bool unsigned_type = (type.basetype == SPIRType::UInt) ||
11140 (atomic_image && get<SPIRType>(type.image.type).basetype == SPIRType::UInt);
11141 const char *op = atomic_image ? "imageAtomicAdd" : "atomicAdd";
11142 const char *increment = unsigned_type ? "0u" : "0";
11143 emit_op(ops[0], ops[1], join(op, "(", to_expression(ops[2]), ", ", increment, ")"), false);
11144 flush_all_atomic_capable_variables();
11145 break;
11146 }
11147
11148 case OpAtomicStore:
11149 {
11150 // In plain GLSL, we have no atomic stores, so emulate this with an atomic exchange where we don't consume the result.
11151 // Alternatively, we could rely on KHR_memory_model, but that's not very helpful for GL.
11152 uint32_t ptr = ops[0];
11153 // Ignore semantics for now, probably only relevant to CL.
11154 uint32_t val = ops[3];
11155 const char *op = check_atomic_image(ptr) ? "imageAtomicExchange" : "atomicExchange";
11156 statement(op, "(", to_expression(ptr), ", ", to_expression(val), ");");
11157 flush_all_atomic_capable_variables();
11158 break;
11159 }
11160
11161 case OpAtomicIIncrement:
11162 case OpAtomicIDecrement:
11163 {
11164 forced_temporaries.insert(ops[1]);
11165 auto &type = expression_type(ops[2]);
11166 if (type.storage == StorageClassAtomicCounter)
11167 {
11168 // Legacy GLSL stuff, not sure if this is relevant to support.
11169 if (opcode == OpAtomicIIncrement)
11170 GLSL_UFOP(atomicCounterIncrement);
11171 else
11172 GLSL_UFOP(atomicCounterDecrement);
11173 }
11174 else
11175 {
11176 bool atomic_image = check_atomic_image(ops[2]);
11177 bool unsigned_type = (type.basetype == SPIRType::UInt) ||
11178 (atomic_image && get<SPIRType>(type.image.type).basetype == SPIRType::UInt);
11179 const char *op = atomic_image ? "imageAtomicAdd" : "atomicAdd";
11180
11181 const char *increment = nullptr;
11182 if (opcode == OpAtomicIIncrement && unsigned_type)
11183 increment = "1u";
11184 else if (opcode == OpAtomicIIncrement)
11185 increment = "1";
11186 else if (unsigned_type)
11187 increment = "uint(-1)";
11188 else
11189 increment = "-1";
11190
11191 emit_op(ops[0], ops[1], join(op, "(", to_expression(ops[2]), ", ", increment, ")"), false);
11192 }
11193
11194 flush_all_atomic_capable_variables();
11195 break;
11196 }
11197
11198 case OpAtomicIAdd:
11199 {
11200 const char *op = check_atomic_image(ops[2]) ? "imageAtomicAdd" : "atomicAdd";
11201 forced_temporaries.insert(ops[1]);
11202 emit_binary_func_op(ops[0], ops[1], ops[2], ops[5], op);
11203 flush_all_atomic_capable_variables();
11204 break;
11205 }
11206
11207 case OpAtomicISub:
11208 {
11209 const char *op = check_atomic_image(ops[2]) ? "imageAtomicAdd" : "atomicAdd";
11210 forced_temporaries.insert(ops[1]);
11211 auto expr = join(op, "(", to_expression(ops[2]), ", -", to_enclosed_expression(ops[5]), ")");
11212 emit_op(ops[0], ops[1], expr, should_forward(ops[2]) && should_forward(ops[5]));
11213 flush_all_atomic_capable_variables();
11214 break;
11215 }
11216
11217 case OpAtomicSMin:
11218 case OpAtomicUMin:
11219 {
11220 const char *op = check_atomic_image(ops[2]) ? "imageAtomicMin" : "atomicMin";
11221 forced_temporaries.insert(ops[1]);
11222 emit_binary_func_op(ops[0], ops[1], ops[2], ops[5], op);
11223 flush_all_atomic_capable_variables();
11224 break;
11225 }
11226
11227 case OpAtomicSMax:
11228 case OpAtomicUMax:
11229 {
11230 const char *op = check_atomic_image(ops[2]) ? "imageAtomicMax" : "atomicMax";
11231 forced_temporaries.insert(ops[1]);
11232 emit_binary_func_op(ops[0], ops[1], ops[2], ops[5], op);
11233 flush_all_atomic_capable_variables();
11234 break;
11235 }
11236
11237 case OpAtomicAnd:
11238 {
11239 const char *op = check_atomic_image(ops[2]) ? "imageAtomicAnd" : "atomicAnd";
11240 forced_temporaries.insert(ops[1]);
11241 emit_binary_func_op(ops[0], ops[1], ops[2], ops[5], op);
11242 flush_all_atomic_capable_variables();
11243 break;
11244 }
11245
11246 case OpAtomicOr:
11247 {
11248 const char *op = check_atomic_image(ops[2]) ? "imageAtomicOr" : "atomicOr";
11249 forced_temporaries.insert(ops[1]);
11250 emit_binary_func_op(ops[0], ops[1], ops[2], ops[5], op);
11251 flush_all_atomic_capable_variables();
11252 break;
11253 }
11254
11255 case OpAtomicXor:
11256 {
11257 const char *op = check_atomic_image(ops[2]) ? "imageAtomicXor" : "atomicXor";
11258 forced_temporaries.insert(ops[1]);
11259 emit_binary_func_op(ops[0], ops[1], ops[2], ops[5], op);
11260 flush_all_atomic_capable_variables();
11261 break;
11262 }
11263
11264 // Geometry shaders
11265 case OpEmitVertex:
11266 statement("EmitVertex();");
11267 break;
11268
11269 case OpEndPrimitive:
11270 statement("EndPrimitive();");
11271 break;
11272
11273 case OpEmitStreamVertex:
11274 {
11275 if (options.es)
11276 SPIRV_CROSS_THROW("Multi-stream geometry shaders not supported in ES.");
11277 else if (!options.es && options.version < 400)
11278 SPIRV_CROSS_THROW("Multi-stream geometry shaders only supported in GLSL 400.");
11279
11280 auto stream_expr = to_expression(ops[0]);
11281 if (expression_type(ops[0]).basetype != SPIRType::Int)
11282 stream_expr = join("int(", stream_expr, ")");
11283 statement("EmitStreamVertex(", stream_expr, ");");
11284 break;
11285 }
11286
11287 case OpEndStreamPrimitive:
11288 {
11289 if (options.es)
11290 SPIRV_CROSS_THROW("Multi-stream geometry shaders not supported in ES.");
11291 else if (!options.es && options.version < 400)
11292 SPIRV_CROSS_THROW("Multi-stream geometry shaders only supported in GLSL 400.");
11293
11294 auto stream_expr = to_expression(ops[0]);
11295 if (expression_type(ops[0]).basetype != SPIRType::Int)
11296 stream_expr = join("int(", stream_expr, ")");
11297 statement("EndStreamPrimitive(", stream_expr, ");");
11298 break;
11299 }
11300
11301 // Textures
11302 case OpImageSampleExplicitLod:
11303 case OpImageSampleProjExplicitLod:
11304 case OpImageSampleDrefExplicitLod:
11305 case OpImageSampleProjDrefExplicitLod:
11306 case OpImageSampleImplicitLod:
11307 case OpImageSampleProjImplicitLod:
11308 case OpImageSampleDrefImplicitLod:
11309 case OpImageSampleProjDrefImplicitLod:
11310 case OpImageFetch:
11311 case OpImageGather:
11312 case OpImageDrefGather:
11313 // Gets a bit hairy, so move this to a separate instruction.
11314 emit_texture_op(instruction, false);
11315 break;
11316
11317 case OpImageSparseSampleExplicitLod:
11318 case OpImageSparseSampleProjExplicitLod:
11319 case OpImageSparseSampleDrefExplicitLod:
11320 case OpImageSparseSampleProjDrefExplicitLod:
11321 case OpImageSparseSampleImplicitLod:
11322 case OpImageSparseSampleProjImplicitLod:
11323 case OpImageSparseSampleDrefImplicitLod:
11324 case OpImageSparseSampleProjDrefImplicitLod:
11325 case OpImageSparseFetch:
11326 case OpImageSparseGather:
11327 case OpImageSparseDrefGather:
11328 // Gets a bit hairy, so move this to a separate instruction.
11329 emit_texture_op(instruction, true);
11330 break;
11331
11332 case OpImageSparseTexelsResident:
11333 if (options.es)
11334 SPIRV_CROSS_THROW("Sparse feedback is not supported in GLSL.");
11335 require_extension_internal("GL_ARB_sparse_texture2");
11336 emit_unary_func_op_cast(ops[0], ops[1], ops[2], "sparseTexelsResidentARB", int_type, SPIRType::Boolean);
11337 break;
11338
11339 case OpImage:
11340 {
11341 uint32_t result_type = ops[0];
11342 uint32_t id = ops[1];
11343
11344 // Suppress usage tracking.
11345 auto &e = emit_op(result_type, id, to_expression(ops[2]), true, true);
11346
11347 // When using the image, we need to know which variable it is actually loaded from.
11348 auto *var = maybe_get_backing_variable(ops[2]);
11349 e.loaded_from = var ? var->self : ID(0);
11350 break;
11351 }
11352
11353 case OpImageQueryLod:
11354 {
11355 if (!options.es && options.version < 400)
11356 {
11357 require_extension_internal("GL_ARB_texture_query_lod");
11358 // For some reason, the ARB spec is all-caps.
11359 GLSL_BFOP(textureQueryLOD);
11360 }
11361 else if (options.es)
11362 SPIRV_CROSS_THROW("textureQueryLod not supported in ES profile.");
11363 else
11364 GLSL_BFOP(textureQueryLod);
11365 register_control_dependent_expression(ops[1]);
11366 break;
11367 }
11368
11369 case OpImageQueryLevels:
11370 {
11371 uint32_t result_type = ops[0];
11372 uint32_t id = ops[1];
11373
11374 if (!options.es && options.version < 430)
11375 require_extension_internal("GL_ARB_texture_query_levels");
11376 if (options.es)
11377 SPIRV_CROSS_THROW("textureQueryLevels not supported in ES profile.");
11378
11379 auto expr = join("textureQueryLevels(", convert_separate_image_to_expression(ops[2]), ")");
11380 auto &restype = get<SPIRType>(ops[0]);
11381 expr = bitcast_expression(restype, SPIRType::Int, expr);
11382 emit_op(result_type, id, expr, true);
11383 break;
11384 }
11385
11386 case OpImageQuerySamples:
11387 {
11388 auto &type = expression_type(ops[2]);
11389 uint32_t result_type = ops[0];
11390 uint32_t id = ops[1];
11391
11392 string expr;
11393 if (type.image.sampled == 2)
11394 expr = join("imageSamples(", to_expression(ops[2]), ")");
11395 else
11396 expr = join("textureSamples(", convert_separate_image_to_expression(ops[2]), ")");
11397
11398 auto &restype = get<SPIRType>(ops[0]);
11399 expr = bitcast_expression(restype, SPIRType::Int, expr);
11400 emit_op(result_type, id, expr, true);
11401 break;
11402 }
11403
11404 case OpSampledImage:
11405 {
11406 uint32_t result_type = ops[0];
11407 uint32_t id = ops[1];
11408 emit_sampled_image_op(result_type, id, ops[2], ops[3]);
11409 inherit_expression_dependencies(id, ops[2]);
11410 inherit_expression_dependencies(id, ops[3]);
11411 break;
11412 }
11413
11414 case OpImageQuerySizeLod:
11415 {
11416 uint32_t result_type = ops[0];
11417 uint32_t id = ops[1];
11418 uint32_t img = ops[2];
11419
11420 std::string fname = "textureSize";
11421 if (is_legacy_desktop())
11422 {
11423 auto &type = expression_type(img);
11424 auto &imgtype = get<SPIRType>(type.self);
11425 fname = legacy_tex_op(fname, imgtype, img);
11426 }
11427 else if (is_legacy_es())
11428 SPIRV_CROSS_THROW("textureSize is not supported in ESSL 100.");
11429
11430 auto expr = join(fname, "(", convert_separate_image_to_expression(img), ", ",
11431 bitcast_expression(SPIRType::Int, ops[3]), ")");
11432 auto &restype = get<SPIRType>(ops[0]);
11433 expr = bitcast_expression(restype, SPIRType::Int, expr);
11434 emit_op(result_type, id, expr, true);
11435 break;
11436 }
11437
11438 // Image load/store
11439 case OpImageRead:
11440 case OpImageSparseRead:
11441 {
11442 // We added Nonreadable speculatively to the OpImage variable due to glslangValidator
11443 // not adding the proper qualifiers.
11444 // If it turns out we need to read the image after all, remove the qualifier and recompile.
11445 auto *var = maybe_get_backing_variable(ops[2]);
11446 if (var)
11447 {
11448 auto &flags = ir.meta[var->self].decoration.decoration_flags;
11449 if (flags.get(DecorationNonReadable))
11450 {
11451 flags.clear(DecorationNonReadable);
11452 force_recompile();
11453 }
11454 }
11455
11456 uint32_t result_type = ops[0];
11457 uint32_t id = ops[1];
11458
11459 bool pure;
11460 string imgexpr;
11461 auto &type = expression_type(ops[2]);
11462
11463 if (var && var->remapped_variable) // Remapped input, just read as-is without any op-code
11464 {
11465 if (type.image.ms)
11466 SPIRV_CROSS_THROW("Trying to remap multisampled image to variable, this is not possible.");
11467
11468 auto itr =
11469 find_if(begin(pls_inputs), end(pls_inputs), [var](const PlsRemap &pls) { return pls.id == var->self; });
11470
11471 if (itr == end(pls_inputs))
11472 {
11473 // For non-PLS inputs, we rely on subpass type remapping information to get it right
11474 // since ImageRead always returns 4-component vectors and the backing type is opaque.
11475 if (!var->remapped_components)
11476 SPIRV_CROSS_THROW("subpassInput was remapped, but remap_components is not set correctly.");
11477 imgexpr = remap_swizzle(get<SPIRType>(result_type), var->remapped_components, to_expression(ops[2]));
11478 }
11479 else
11480 {
11481 // PLS input could have different number of components than what the SPIR expects, swizzle to
11482 // the appropriate vector size.
11483 uint32_t components = pls_format_to_components(itr->format);
11484 imgexpr = remap_swizzle(get<SPIRType>(result_type), components, to_expression(ops[2]));
11485 }
11486 pure = true;
11487 }
11488 else if (type.image.dim == DimSubpassData)
11489 {
11490 if (var && subpass_input_is_framebuffer_fetch(var->self))
11491 {
11492 imgexpr = to_expression(var->self);
11493 }
11494 else if (options.vulkan_semantics)
11495 {
11496 // With Vulkan semantics, use the proper Vulkan GLSL construct.
11497 if (type.image.ms)
11498 {
11499 uint32_t operands = ops[4];
11500 if (operands != ImageOperandsSampleMask || length != 6)
11501 SPIRV_CROSS_THROW("Multisampled image used in OpImageRead, but unexpected "
11502 "operand mask was used.");
11503
11504 uint32_t samples = ops[5];
11505 imgexpr = join("subpassLoad(", to_expression(ops[2]), ", ", to_expression(samples), ")");
11506 }
11507 else
11508 imgexpr = join("subpassLoad(", to_expression(ops[2]), ")");
11509 }
11510 else
11511 {
11512 if (type.image.ms)
11513 {
11514 uint32_t operands = ops[4];
11515 if (operands != ImageOperandsSampleMask || length != 6)
11516 SPIRV_CROSS_THROW("Multisampled image used in OpImageRead, but unexpected "
11517 "operand mask was used.");
11518
11519 uint32_t samples = ops[5];
11520 imgexpr = join("texelFetch(", to_expression(ops[2]), ", ivec2(gl_FragCoord.xy), ",
11521 to_expression(samples), ")");
11522 }
11523 else
11524 {
11525 // Implement subpass loads via texture barrier style sampling.
11526 imgexpr = join("texelFetch(", to_expression(ops[2]), ", ivec2(gl_FragCoord.xy), 0)");
11527 }
11528 }
11529 imgexpr = remap_swizzle(get<SPIRType>(result_type), 4, imgexpr);
11530 pure = true;
11531 }
11532 else
11533 {
11534 bool sparse = opcode == OpImageSparseRead;
11535 uint32_t sparse_code_id = 0;
11536 uint32_t sparse_texel_id = 0;
11537 if (sparse)
11538 emit_sparse_feedback_temporaries(ops[0], ops[1], sparse_code_id, sparse_texel_id);
11539
11540 // imageLoad only accepts int coords, not uint.
11541 auto coord_expr = to_expression(ops[3]);
11542 auto target_coord_type = expression_type(ops[3]);
11543 target_coord_type.basetype = SPIRType::Int;
11544 coord_expr = bitcast_expression(target_coord_type, expression_type(ops[3]).basetype, coord_expr);
11545
11546 // Plain image load/store.
11547 if (sparse)
11548 {
11549 if (type.image.ms)
11550 {
11551 uint32_t operands = ops[4];
11552 if (operands != ImageOperandsSampleMask || length != 6)
11553 SPIRV_CROSS_THROW("Multisampled image used in OpImageRead, but unexpected "
11554 "operand mask was used.");
11555
11556 uint32_t samples = ops[5];
11557 statement(to_expression(sparse_code_id), " = sparseImageLoadARB(", to_expression(ops[2]), ", ",
11558 coord_expr, ", ", to_expression(samples), ", ", to_expression(sparse_texel_id), ");");
11559 }
11560 else
11561 {
11562 statement(to_expression(sparse_code_id), " = sparseImageLoadARB(", to_expression(ops[2]), ", ",
11563 coord_expr, ", ", to_expression(sparse_texel_id), ");");
11564 }
11565 imgexpr = join(type_to_glsl(get<SPIRType>(result_type)), "(", to_expression(sparse_code_id), ", ",
11566 to_expression(sparse_texel_id), ")");
11567 }
11568 else
11569 {
11570 if (type.image.ms)
11571 {
11572 uint32_t operands = ops[4];
11573 if (operands != ImageOperandsSampleMask || length != 6)
11574 SPIRV_CROSS_THROW("Multisampled image used in OpImageRead, but unexpected "
11575 "operand mask was used.");
11576
11577 uint32_t samples = ops[5];
11578 imgexpr =
11579 join("imageLoad(", to_expression(ops[2]), ", ", coord_expr, ", ", to_expression(samples), ")");
11580 }
11581 else
11582 imgexpr = join("imageLoad(", to_expression(ops[2]), ", ", coord_expr, ")");
11583 }
11584
11585 if (!sparse)
11586 imgexpr = remap_swizzle(get<SPIRType>(result_type), 4, imgexpr);
11587 pure = false;
11588 }
11589
11590 if (var && var->forwardable)
11591 {
11592 bool forward = forced_temporaries.find(id) == end(forced_temporaries);
11593 auto &e = emit_op(result_type, id, imgexpr, forward);
11594
11595 // We only need to track dependencies if we're reading from image load/store.
11596 if (!pure)
11597 {
11598 e.loaded_from = var->self;
11599 if (forward)
11600 var->dependees.push_back(id);
11601 }
11602 }
11603 else
11604 emit_op(result_type, id, imgexpr, false);
11605
11606 inherit_expression_dependencies(id, ops[2]);
11607 if (type.image.ms)
11608 inherit_expression_dependencies(id, ops[5]);
11609 break;
11610 }
11611
11612 case OpImageTexelPointer:
11613 {
11614 uint32_t result_type = ops[0];
11615 uint32_t id = ops[1];
11616
11617 auto coord_expr = to_expression(ops[3]);
11618 auto target_coord_type = expression_type(ops[3]);
11619 target_coord_type.basetype = SPIRType::Int;
11620 coord_expr = bitcast_expression(target_coord_type, expression_type(ops[3]).basetype, coord_expr);
11621
11622 auto expr = join(to_expression(ops[2]), ", ", coord_expr);
11623 if (has_decoration(id, DecorationNonUniformEXT) || has_decoration(ops[2], DecorationNonUniformEXT))
11624 convert_non_uniform_expression(expression_type(ops[2]), expr);
11625
11626 auto &e = set<SPIRExpression>(id, expr, result_type, true);
11627
11628 // When using the pointer, we need to know which variable it is actually loaded from.
11629 auto *var = maybe_get_backing_variable(ops[2]);
11630 e.loaded_from = var ? var->self : ID(0);
11631 inherit_expression_dependencies(id, ops[3]);
11632 break;
11633 }
11634
11635 case OpImageWrite:
11636 {
11637 // We added Nonwritable speculatively to the OpImage variable due to glslangValidator
11638 // not adding the proper qualifiers.
11639 // If it turns out we need to write to the image after all, remove the qualifier and recompile.
11640 auto *var = maybe_get_backing_variable(ops[0]);
11641 if (var)
11642 {
11643 auto &flags = ir.meta[var->self].decoration.decoration_flags;
11644 if (flags.get(DecorationNonWritable))
11645 {
11646 flags.clear(DecorationNonWritable);
11647 force_recompile();
11648 }
11649 }
11650
11651 auto &type = expression_type(ops[0]);
11652 auto &value_type = expression_type(ops[2]);
11653 auto store_type = value_type;
11654 store_type.vecsize = 4;
11655
11656 // imageStore only accepts int coords, not uint.
11657 auto coord_expr = to_expression(ops[1]);
11658 auto target_coord_type = expression_type(ops[1]);
11659 target_coord_type.basetype = SPIRType::Int;
11660 coord_expr = bitcast_expression(target_coord_type, expression_type(ops[1]).basetype, coord_expr);
11661
11662 if (type.image.ms)
11663 {
11664 uint32_t operands = ops[3];
11665 if (operands != ImageOperandsSampleMask || length != 5)
11666 SPIRV_CROSS_THROW("Multisampled image used in OpImageWrite, but unexpected operand mask was used.");
11667 uint32_t samples = ops[4];
11668 statement("imageStore(", to_expression(ops[0]), ", ", coord_expr, ", ", to_expression(samples), ", ",
11669 remap_swizzle(store_type, value_type.vecsize, to_expression(ops[2])), ");");
11670 }
11671 else
11672 statement("imageStore(", to_expression(ops[0]), ", ", coord_expr, ", ",
11673 remap_swizzle(store_type, value_type.vecsize, to_expression(ops[2])), ");");
11674
11675 if (var && variable_storage_is_aliased(*var))
11676 flush_all_aliased_variables();
11677 break;
11678 }
11679
11680 case OpImageQuerySize:
11681 {
11682 auto &type = expression_type(ops[2]);
11683 uint32_t result_type = ops[0];
11684 uint32_t id = ops[1];
11685
11686 if (type.basetype == SPIRType::Image)
11687 {
11688 string expr;
11689 if (type.image.sampled == 2)
11690 {
11691 if (!options.es && options.version < 430)
11692 require_extension_internal("GL_ARB_shader_image_size");
11693 else if (options.es && options.version < 310)
11694 SPIRV_CROSS_THROW("At least ESSL 3.10 required for imageSize.");
11695
11696 // The size of an image is always constant.
11697 expr = join("imageSize(", to_expression(ops[2]), ")");
11698 }
11699 else
11700 {
11701 // This path is hit for samplerBuffers and multisampled images which do not have LOD.
11702 std::string fname = "textureSize";
11703 if (is_legacy())
11704 {
11705 auto &imgtype = get<SPIRType>(type.self);
11706 fname = legacy_tex_op(fname, imgtype, ops[2]);
11707 }
11708 expr = join(fname, "(", convert_separate_image_to_expression(ops[2]), ")");
11709 }
11710
11711 auto &restype = get<SPIRType>(ops[0]);
11712 expr = bitcast_expression(restype, SPIRType::Int, expr);
11713 emit_op(result_type, id, expr, true);
11714 }
11715 else
11716 SPIRV_CROSS_THROW("Invalid type for OpImageQuerySize.");
11717 break;
11718 }
11719
11720 // Compute
11721 case OpControlBarrier:
11722 case OpMemoryBarrier:
11723 {
11724 uint32_t execution_scope = 0;
11725 uint32_t memory;
11726 uint32_t semantics;
11727
11728 if (opcode == OpMemoryBarrier)
11729 {
11730 memory = evaluate_constant_u32(ops[0]);
11731 semantics = evaluate_constant_u32(ops[1]);
11732 }
11733 else
11734 {
11735 execution_scope = evaluate_constant_u32(ops[0]);
11736 memory = evaluate_constant_u32(ops[1]);
11737 semantics = evaluate_constant_u32(ops[2]);
11738 }
11739
11740 if (execution_scope == ScopeSubgroup || memory == ScopeSubgroup)
11741 {
11742 // OpControlBarrier with ScopeSubgroup is subgroupBarrier()
11743 if (opcode != OpControlBarrier)
11744 {
11745 request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupMemBarrier);
11746 }
11747 else
11748 {
11749 request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupBarrier);
11750 }
11751 }
11752
11753 if (execution_scope != ScopeSubgroup && get_entry_point().model == ExecutionModelTessellationControl)
11754 {
11755 // Control shaders only have barriers, and it implies memory barriers.
11756 if (opcode == OpControlBarrier)
11757 statement("barrier();");
11758 break;
11759 }
11760
11761 // We only care about these flags, acquire/release and friends are not relevant to GLSL.
11762 semantics = mask_relevant_memory_semantics(semantics);
11763
11764 if (opcode == OpMemoryBarrier)
11765 {
11766 // If we are a memory barrier, and the next instruction is a control barrier, check if that memory barrier
11767 // does what we need, so we avoid redundant barriers.
11768 const Instruction *next = get_next_instruction_in_block(instruction);
11769 if (next && next->op == OpControlBarrier)
11770 {
11771 auto *next_ops = stream(*next);
11772 uint32_t next_memory = evaluate_constant_u32(next_ops[1]);
11773 uint32_t next_semantics = evaluate_constant_u32(next_ops[2]);
11774 next_semantics = mask_relevant_memory_semantics(next_semantics);
11775
11776 bool memory_scope_covered = false;
11777 if (next_memory == memory)
11778 memory_scope_covered = true;
11779 else if (next_semantics == MemorySemanticsWorkgroupMemoryMask)
11780 {
11781 // If we only care about workgroup memory, either Device or Workgroup scope is fine,
11782 // scope does not have to match.
11783 if ((next_memory == ScopeDevice || next_memory == ScopeWorkgroup) &&
11784 (memory == ScopeDevice || memory == ScopeWorkgroup))
11785 {
11786 memory_scope_covered = true;
11787 }
11788 }
11789 else if (memory == ScopeWorkgroup && next_memory == ScopeDevice)
11790 {
11791 // The control barrier has device scope, but the memory barrier just has workgroup scope.
11792 memory_scope_covered = true;
11793 }
11794
11795 // If we have the same memory scope, and all memory types are covered, we're good.
11796 if (memory_scope_covered && (semantics & next_semantics) == semantics)
11797 break;
11798 }
11799 }
11800
11801 // We are synchronizing some memory or syncing execution,
11802 // so we cannot forward any loads beyond the memory barrier.
11803 if (semantics || opcode == OpControlBarrier)
11804 {
11805 assert(current_emitting_block);
11806 flush_control_dependent_expressions(current_emitting_block->self);
11807 flush_all_active_variables();
11808 }
11809
11810 if (memory == ScopeWorkgroup) // Only need to consider memory within a group
11811 {
11812 if (semantics == MemorySemanticsWorkgroupMemoryMask)
11813 {
11814 // OpControlBarrier implies a memory barrier for shared memory as well.
11815 bool implies_shared_barrier = opcode == OpControlBarrier && execution_scope == ScopeWorkgroup;
11816 if (!implies_shared_barrier)
11817 statement("memoryBarrierShared();");
11818 }
11819 else if (semantics != 0)
11820 statement("groupMemoryBarrier();");
11821 }
11822 else if (memory == ScopeSubgroup)
11823 {
11824 const uint32_t all_barriers =
11825 MemorySemanticsWorkgroupMemoryMask | MemorySemanticsUniformMemoryMask | MemorySemanticsImageMemoryMask;
11826
11827 if (semantics & (MemorySemanticsCrossWorkgroupMemoryMask | MemorySemanticsSubgroupMemoryMask))
11828 {
11829 // These are not relevant for GLSL, but assume it means memoryBarrier().
11830 // memoryBarrier() does everything, so no need to test anything else.
11831 statement("subgroupMemoryBarrier();");
11832 }
11833 else if ((semantics & all_barriers) == all_barriers)
11834 {
11835 // Short-hand instead of emitting 3 barriers.
11836 statement("subgroupMemoryBarrier();");
11837 }
11838 else
11839 {
11840 // Pick out individual barriers.
11841 if (semantics & MemorySemanticsWorkgroupMemoryMask)
11842 statement("subgroupMemoryBarrierShared();");
11843 if (semantics & MemorySemanticsUniformMemoryMask)
11844 statement("subgroupMemoryBarrierBuffer();");
11845 if (semantics & MemorySemanticsImageMemoryMask)
11846 statement("subgroupMemoryBarrierImage();");
11847 }
11848 }
11849 else
11850 {
11851 const uint32_t all_barriers =
11852 MemorySemanticsWorkgroupMemoryMask | MemorySemanticsUniformMemoryMask | MemorySemanticsImageMemoryMask;
11853
11854 if (semantics & (MemorySemanticsCrossWorkgroupMemoryMask | MemorySemanticsSubgroupMemoryMask))
11855 {
11856 // These are not relevant for GLSL, but assume it means memoryBarrier().
11857 // memoryBarrier() does everything, so no need to test anything else.
11858 statement("memoryBarrier();");
11859 }
11860 else if ((semantics & all_barriers) == all_barriers)
11861 {
11862 // Short-hand instead of emitting 4 barriers.
11863 statement("memoryBarrier();");
11864 }
11865 else
11866 {
11867 // Pick out individual barriers.
11868 if (semantics & MemorySemanticsWorkgroupMemoryMask)
11869 statement("memoryBarrierShared();");
11870 if (semantics & MemorySemanticsUniformMemoryMask)
11871 statement("memoryBarrierBuffer();");
11872 if (semantics & MemorySemanticsImageMemoryMask)
11873 statement("memoryBarrierImage();");
11874 }
11875 }
11876
11877 if (opcode == OpControlBarrier)
11878 {
11879 if (execution_scope == ScopeSubgroup)
11880 statement("subgroupBarrier();");
11881 else
11882 statement("barrier();");
11883 }
11884 break;
11885 }
11886
11887 case OpExtInst:
11888 {
11889 uint32_t extension_set = ops[2];
11890
11891 if (get<SPIRExtension>(extension_set).ext == SPIRExtension::GLSL)
11892 {
11893 emit_glsl_op(ops[0], ops[1], ops[3], &ops[4], length - 4);
11894 }
11895 else if (get<SPIRExtension>(extension_set).ext == SPIRExtension::SPV_AMD_shader_ballot)
11896 {
11897 emit_spv_amd_shader_ballot_op(ops[0], ops[1], ops[3], &ops[4], length - 4);
11898 }
11899 else if (get<SPIRExtension>(extension_set).ext == SPIRExtension::SPV_AMD_shader_explicit_vertex_parameter)
11900 {
11901 emit_spv_amd_shader_explicit_vertex_parameter_op(ops[0], ops[1], ops[3], &ops[4], length - 4);
11902 }
11903 else if (get<SPIRExtension>(extension_set).ext == SPIRExtension::SPV_AMD_shader_trinary_minmax)
11904 {
11905 emit_spv_amd_shader_trinary_minmax_op(ops[0], ops[1], ops[3], &ops[4], length - 4);
11906 }
11907 else if (get<SPIRExtension>(extension_set).ext == SPIRExtension::SPV_AMD_gcn_shader)
11908 {
11909 emit_spv_amd_gcn_shader_op(ops[0], ops[1], ops[3], &ops[4], length - 4);
11910 }
11911 else if (get<SPIRExtension>(extension_set).ext == SPIRExtension::SPV_debug_info)
11912 {
11913 break; // Ignore SPIR-V debug information extended instructions.
11914 }
11915 else
11916 {
11917 statement("// unimplemented ext op ", instruction.op);
11918 break;
11919 }
11920
11921 break;
11922 }
11923
11924 // Legacy sub-group stuff ...
11925 case OpSubgroupBallotKHR:
11926 {
11927 uint32_t result_type = ops[0];
11928 uint32_t id = ops[1];
11929 string expr;
11930 expr = join("uvec4(unpackUint2x32(ballotARB(" + to_expression(ops[2]) + ")), 0u, 0u)");
11931 emit_op(result_type, id, expr, should_forward(ops[2]));
11932
11933 require_extension_internal("GL_ARB_shader_ballot");
11934 inherit_expression_dependencies(id, ops[2]);
11935 register_control_dependent_expression(ops[1]);
11936 break;
11937 }
11938
11939 case OpSubgroupFirstInvocationKHR:
11940 {
11941 uint32_t result_type = ops[0];
11942 uint32_t id = ops[1];
11943 emit_unary_func_op(result_type, id, ops[2], "readFirstInvocationARB");
11944
11945 require_extension_internal("GL_ARB_shader_ballot");
11946 register_control_dependent_expression(ops[1]);
11947 break;
11948 }
11949
11950 case OpSubgroupReadInvocationKHR:
11951 {
11952 uint32_t result_type = ops[0];
11953 uint32_t id = ops[1];
11954 emit_binary_func_op(result_type, id, ops[2], ops[3], "readInvocationARB");
11955
11956 require_extension_internal("GL_ARB_shader_ballot");
11957 register_control_dependent_expression(ops[1]);
11958 break;
11959 }
11960
11961 case OpSubgroupAllKHR:
11962 {
11963 uint32_t result_type = ops[0];
11964 uint32_t id = ops[1];
11965 emit_unary_func_op(result_type, id, ops[2], "allInvocationsARB");
11966
11967 require_extension_internal("GL_ARB_shader_group_vote");
11968 register_control_dependent_expression(ops[1]);
11969 break;
11970 }
11971
11972 case OpSubgroupAnyKHR:
11973 {
11974 uint32_t result_type = ops[0];
11975 uint32_t id = ops[1];
11976 emit_unary_func_op(result_type, id, ops[2], "anyInvocationARB");
11977
11978 require_extension_internal("GL_ARB_shader_group_vote");
11979 register_control_dependent_expression(ops[1]);
11980 break;
11981 }
11982
11983 case OpSubgroupAllEqualKHR:
11984 {
11985 uint32_t result_type = ops[0];
11986 uint32_t id = ops[1];
11987 emit_unary_func_op(result_type, id, ops[2], "allInvocationsEqualARB");
11988
11989 require_extension_internal("GL_ARB_shader_group_vote");
11990 register_control_dependent_expression(ops[1]);
11991 break;
11992 }
11993
11994 case OpGroupIAddNonUniformAMD:
11995 case OpGroupFAddNonUniformAMD:
11996 {
11997 uint32_t result_type = ops[0];
11998 uint32_t id = ops[1];
11999 emit_unary_func_op(result_type, id, ops[4], "addInvocationsNonUniformAMD");
12000
12001 require_extension_internal("GL_AMD_shader_ballot");
12002 register_control_dependent_expression(ops[1]);
12003 break;
12004 }
12005
12006 case OpGroupFMinNonUniformAMD:
12007 case OpGroupUMinNonUniformAMD:
12008 case OpGroupSMinNonUniformAMD:
12009 {
12010 uint32_t result_type = ops[0];
12011 uint32_t id = ops[1];
12012 emit_unary_func_op(result_type, id, ops[4], "minInvocationsNonUniformAMD");
12013
12014 require_extension_internal("GL_AMD_shader_ballot");
12015 register_control_dependent_expression(ops[1]);
12016 break;
12017 }
12018
12019 case OpGroupFMaxNonUniformAMD:
12020 case OpGroupUMaxNonUniformAMD:
12021 case OpGroupSMaxNonUniformAMD:
12022 {
12023 uint32_t result_type = ops[0];
12024 uint32_t id = ops[1];
12025 emit_unary_func_op(result_type, id, ops[4], "maxInvocationsNonUniformAMD");
12026
12027 require_extension_internal("GL_AMD_shader_ballot");
12028 register_control_dependent_expression(ops[1]);
12029 break;
12030 }
12031
12032 case OpFragmentMaskFetchAMD:
12033 {
12034 auto &type = expression_type(ops[2]);
12035 uint32_t result_type = ops[0];
12036 uint32_t id = ops[1];
12037
12038 if (type.image.dim == spv::DimSubpassData)
12039 {
12040 emit_unary_func_op(result_type, id, ops[2], "fragmentMaskFetchAMD");
12041 }
12042 else
12043 {
12044 emit_binary_func_op(result_type, id, ops[2], ops[3], "fragmentMaskFetchAMD");
12045 }
12046
12047 require_extension_internal("GL_AMD_shader_fragment_mask");
12048 break;
12049 }
12050
12051 case OpFragmentFetchAMD:
12052 {
12053 auto &type = expression_type(ops[2]);
12054 uint32_t result_type = ops[0];
12055 uint32_t id = ops[1];
12056
12057 if (type.image.dim == spv::DimSubpassData)
12058 {
12059 emit_binary_func_op(result_type, id, ops[2], ops[4], "fragmentFetchAMD");
12060 }
12061 else
12062 {
12063 emit_trinary_func_op(result_type, id, ops[2], ops[3], ops[4], "fragmentFetchAMD");
12064 }
12065
12066 require_extension_internal("GL_AMD_shader_fragment_mask");
12067 break;
12068 }
12069
12070 // Vulkan 1.1 sub-group stuff ...
12071 case OpGroupNonUniformElect:
12072 case OpGroupNonUniformBroadcast:
12073 case OpGroupNonUniformBroadcastFirst:
12074 case OpGroupNonUniformBallot:
12075 case OpGroupNonUniformInverseBallot:
12076 case OpGroupNonUniformBallotBitExtract:
12077 case OpGroupNonUniformBallotBitCount:
12078 case OpGroupNonUniformBallotFindLSB:
12079 case OpGroupNonUniformBallotFindMSB:
12080 case OpGroupNonUniformShuffle:
12081 case OpGroupNonUniformShuffleXor:
12082 case OpGroupNonUniformShuffleUp:
12083 case OpGroupNonUniformShuffleDown:
12084 case OpGroupNonUniformAll:
12085 case OpGroupNonUniformAny:
12086 case OpGroupNonUniformAllEqual:
12087 case OpGroupNonUniformFAdd:
12088 case OpGroupNonUniformIAdd:
12089 case OpGroupNonUniformFMul:
12090 case OpGroupNonUniformIMul:
12091 case OpGroupNonUniformFMin:
12092 case OpGroupNonUniformFMax:
12093 case OpGroupNonUniformSMin:
12094 case OpGroupNonUniformSMax:
12095 case OpGroupNonUniformUMin:
12096 case OpGroupNonUniformUMax:
12097 case OpGroupNonUniformBitwiseAnd:
12098 case OpGroupNonUniformBitwiseOr:
12099 case OpGroupNonUniformBitwiseXor:
12100 case OpGroupNonUniformQuadSwap:
12101 case OpGroupNonUniformQuadBroadcast:
12102 emit_subgroup_op(instruction);
12103 break;
12104
12105 case OpFUnordEqual:
12106 case OpFUnordNotEqual:
12107 case OpFUnordLessThan:
12108 case OpFUnordGreaterThan:
12109 case OpFUnordLessThanEqual:
12110 case OpFUnordGreaterThanEqual:
12111 {
12112 // GLSL doesn't specify if floating point comparisons are ordered or unordered,
12113 // but glslang always emits ordered floating point compares for GLSL.
12114 // To get unordered compares, we can test the opposite thing and invert the result.
12115 // This way, we force true when there is any NaN present.
12116 uint32_t op0 = ops[2];
12117 uint32_t op1 = ops[3];
12118
12119 string expr;
12120 if (expression_type(op0).vecsize > 1)
12121 {
12122 const char *comp_op = nullptr;
12123 switch (opcode)
12124 {
12125 case OpFUnordEqual:
12126 comp_op = "notEqual";
12127 break;
12128
12129 case OpFUnordNotEqual:
12130 comp_op = "equal";
12131 break;
12132
12133 case OpFUnordLessThan:
12134 comp_op = "greaterThanEqual";
12135 break;
12136
12137 case OpFUnordLessThanEqual:
12138 comp_op = "greaterThan";
12139 break;
12140
12141 case OpFUnordGreaterThan:
12142 comp_op = "lessThanEqual";
12143 break;
12144
12145 case OpFUnordGreaterThanEqual:
12146 comp_op = "lessThan";
12147 break;
12148
12149 default:
12150 assert(0);
12151 break;
12152 }
12153
12154 expr = join("not(", comp_op, "(", to_unpacked_expression(op0), ", ", to_unpacked_expression(op1), "))");
12155 }
12156 else
12157 {
12158 const char *comp_op = nullptr;
12159 switch (opcode)
12160 {
12161 case OpFUnordEqual:
12162 comp_op = " != ";
12163 break;
12164
12165 case OpFUnordNotEqual:
12166 comp_op = " == ";
12167 break;
12168
12169 case OpFUnordLessThan:
12170 comp_op = " >= ";
12171 break;
12172
12173 case OpFUnordLessThanEqual:
12174 comp_op = " > ";
12175 break;
12176
12177 case OpFUnordGreaterThan:
12178 comp_op = " <= ";
12179 break;
12180
12181 case OpFUnordGreaterThanEqual:
12182 comp_op = " < ";
12183 break;
12184
12185 default:
12186 assert(0);
12187 break;
12188 }
12189
12190 expr = join("!(", to_enclosed_unpacked_expression(op0), comp_op, to_enclosed_unpacked_expression(op1), ")");
12191 }
12192
12193 emit_op(ops[0], ops[1], expr, should_forward(op0) && should_forward(op1));
12194 inherit_expression_dependencies(ops[1], op0);
12195 inherit_expression_dependencies(ops[1], op1);
12196 break;
12197 }
12198
12199 case OpReportIntersectionKHR:
12200 // NV is same opcode.
12201 forced_temporaries.insert(ops[1]);
12202 if (ray_tracing_is_khr)
12203 GLSL_BFOP(reportIntersectionEXT);
12204 else
12205 GLSL_BFOP(reportIntersectionNV);
12206 flush_control_dependent_expressions(current_emitting_block->self);
12207 break;
12208 case OpIgnoreIntersectionNV:
12209 // KHR variant is a terminator.
12210 statement("ignoreIntersectionNV();");
12211 flush_control_dependent_expressions(current_emitting_block->self);
12212 break;
12213 case OpTerminateRayNV:
12214 // KHR variant is a terminator.
12215 statement("terminateRayNV();");
12216 flush_control_dependent_expressions(current_emitting_block->self);
12217 break;
12218 case OpTraceNV:
12219 statement("traceNV(", to_expression(ops[0]), ", ", to_expression(ops[1]), ", ", to_expression(ops[2]), ", ",
12220 to_expression(ops[3]), ", ", to_expression(ops[4]), ", ", to_expression(ops[5]), ", ",
12221 to_expression(ops[6]), ", ", to_expression(ops[7]), ", ", to_expression(ops[8]), ", ",
12222 to_expression(ops[9]), ", ", to_expression(ops[10]), ");");
12223 flush_control_dependent_expressions(current_emitting_block->self);
12224 break;
12225 case OpTraceRayKHR:
12226 if (!has_decoration(ops[10], DecorationLocation))
12227 SPIRV_CROSS_THROW("A memory declaration object must be used in TraceRayKHR.");
12228 statement("traceRayEXT(", to_expression(ops[0]), ", ", to_expression(ops[1]), ", ", to_expression(ops[2]), ", ",
12229 to_expression(ops[3]), ", ", to_expression(ops[4]), ", ", to_expression(ops[5]), ", ",
12230 to_expression(ops[6]), ", ", to_expression(ops[7]), ", ", to_expression(ops[8]), ", ",
12231 to_expression(ops[9]), ", ", get_decoration(ops[10], DecorationLocation), ");");
12232 flush_control_dependent_expressions(current_emitting_block->self);
12233 break;
12234 case OpExecuteCallableNV:
12235 statement("executeCallableNV(", to_expression(ops[0]), ", ", to_expression(ops[1]), ");");
12236 flush_control_dependent_expressions(current_emitting_block->self);
12237 break;
12238 case OpExecuteCallableKHR:
12239 if (!has_decoration(ops[1], DecorationLocation))
12240 SPIRV_CROSS_THROW("A memory declaration object must be used in ExecuteCallableKHR.");
12241 statement("executeCallableEXT(", to_expression(ops[0]), ", ", get_decoration(ops[1], DecorationLocation), ");");
12242 flush_control_dependent_expressions(current_emitting_block->self);
12243 break;
12244
12245 case OpConvertUToAccelerationStructureKHR:
12246 GLSL_UFOP(accelerationStructureEXT);
12247 break;
12248
12249 case OpConvertUToPtr:
12250 {
12251 auto &type = get<SPIRType>(ops[0]);
12252 if (type.storage != StorageClassPhysicalStorageBufferEXT)
12253 SPIRV_CROSS_THROW("Only StorageClassPhysicalStorageBufferEXT is supported by OpConvertUToPtr.");
12254
12255 auto op = type_to_glsl(type);
12256 emit_unary_func_op(ops[0], ops[1], ops[2], op.c_str());
12257 break;
12258 }
12259
12260 case OpConvertPtrToU:
12261 {
12262 auto &type = get<SPIRType>(ops[0]);
12263 auto &ptr_type = expression_type(ops[2]);
12264 if (ptr_type.storage != StorageClassPhysicalStorageBufferEXT)
12265 SPIRV_CROSS_THROW("Only StorageClassPhysicalStorageBufferEXT is supported by OpConvertPtrToU.");
12266
12267 auto op = type_to_glsl(type);
12268 emit_unary_func_op(ops[0], ops[1], ops[2], op.c_str());
12269 break;
12270 }
12271
12272 case OpUndef:
12273 // Undefined value has been declared.
12274 break;
12275
12276 case OpLine:
12277 {
12278 emit_line_directive(ops[0], ops[1]);
12279 break;
12280 }
12281
12282 case OpNoLine:
12283 break;
12284
12285 case OpDemoteToHelperInvocationEXT:
12286 if (!options.vulkan_semantics)
12287 SPIRV_CROSS_THROW("GL_EXT_demote_to_helper_invocation is only supported in Vulkan GLSL.");
12288 require_extension_internal("GL_EXT_demote_to_helper_invocation");
12289 statement(backend.demote_literal, ";");
12290 break;
12291
12292 case OpIsHelperInvocationEXT:
12293 if (!options.vulkan_semantics)
12294 SPIRV_CROSS_THROW("GL_EXT_demote_to_helper_invocation is only supported in Vulkan GLSL.");
12295 require_extension_internal("GL_EXT_demote_to_helper_invocation");
12296 emit_op(ops[0], ops[1], "helperInvocationEXT()", false);
12297 break;
12298
12299 case OpBeginInvocationInterlockEXT:
12300 // If the interlock is complex, we emit this elsewhere.
12301 if (!interlocked_is_complex)
12302 {
12303 if (options.es)
12304 statement("beginInvocationInterlockNV();");
12305 else
12306 statement("beginInvocationInterlockARB();");
12307
12308 flush_all_active_variables();
12309 // Make sure forwarding doesn't propagate outside interlock region.
12310 }
12311 break;
12312
12313 case OpEndInvocationInterlockEXT:
12314 // If the interlock is complex, we emit this elsewhere.
12315 if (!interlocked_is_complex)
12316 {
12317 if (options.es)
12318 statement("endInvocationInterlockNV();");
12319 else
12320 statement("endInvocationInterlockARB();");
12321
12322 flush_all_active_variables();
12323 // Make sure forwarding doesn't propagate outside interlock region.
12324 }
12325 break;
12326
12327 default:
12328 statement("// unimplemented op ", instruction.op);
12329 break;
12330 }
12331 }
12332
12333 // Appends function arguments, mapped from global variables, beyond the specified arg index.
12334 // This is used when a function call uses fewer arguments than the function defines.
12335 // This situation may occur if the function signature has been dynamically modified to
12336 // extract global variables referenced from within the function, and convert them to
12337 // function arguments. This is necessary for shader languages that do not support global
12338 // access to shader input content from within a function (eg. Metal). Each additional
12339 // function args uses the name of the global variable. Function nesting will modify the
12340 // functions and function calls all the way up the nesting chain.
append_global_func_args(const SPIRFunction & func,uint32_t index,SmallVector<string> & arglist)12341 void CompilerGLSL::append_global_func_args(const SPIRFunction &func, uint32_t index, SmallVector<string> &arglist)
12342 {
12343 auto &args = func.arguments;
12344 uint32_t arg_cnt = uint32_t(args.size());
12345 for (uint32_t arg_idx = index; arg_idx < arg_cnt; arg_idx++)
12346 {
12347 auto &arg = args[arg_idx];
12348 assert(arg.alias_global_variable);
12349
12350 // If the underlying variable needs to be declared
12351 // (ie. a local variable with deferred declaration), do so now.
12352 uint32_t var_id = get<SPIRVariable>(arg.id).basevariable;
12353 if (var_id)
12354 flush_variable_declaration(var_id);
12355
12356 arglist.push_back(to_func_call_arg(arg, arg.id));
12357 }
12358 }
12359
to_member_name(const SPIRType & type,uint32_t index)12360 string CompilerGLSL::to_member_name(const SPIRType &type, uint32_t index)
12361 {
12362 if (type.type_alias != TypeID(0) &&
12363 !has_extended_decoration(type.type_alias, SPIRVCrossDecorationBufferBlockRepacked))
12364 {
12365 return to_member_name(get<SPIRType>(type.type_alias), index);
12366 }
12367
12368 auto &memb = ir.meta[type.self].members;
12369 if (index < memb.size() && !memb[index].alias.empty())
12370 return memb[index].alias;
12371 else
12372 return join("_m", index);
12373 }
12374
to_member_reference(uint32_t,const SPIRType & type,uint32_t index,bool)12375 string CompilerGLSL::to_member_reference(uint32_t, const SPIRType &type, uint32_t index, bool)
12376 {
12377 return join(".", to_member_name(type, index));
12378 }
12379
to_multi_member_reference(const SPIRType & type,const SmallVector<uint32_t> & indices)12380 string CompilerGLSL::to_multi_member_reference(const SPIRType &type, const SmallVector<uint32_t> &indices)
12381 {
12382 string ret;
12383 auto *member_type = &type;
12384 for (auto &index : indices)
12385 {
12386 ret += join(".", to_member_name(*member_type, index));
12387 member_type = &get<SPIRType>(member_type->member_types[index]);
12388 }
12389 return ret;
12390 }
12391
add_member_name(SPIRType & type,uint32_t index)12392 void CompilerGLSL::add_member_name(SPIRType &type, uint32_t index)
12393 {
12394 auto &memb = ir.meta[type.self].members;
12395 if (index < memb.size() && !memb[index].alias.empty())
12396 {
12397 auto &name = memb[index].alias;
12398 if (name.empty())
12399 return;
12400
12401 ParsedIR::sanitize_identifier(name, true, true);
12402 update_name_cache(type.member_name_cache, name);
12403 }
12404 }
12405
12406 // Checks whether the ID is a row_major matrix that requires conversion before use
is_non_native_row_major_matrix(uint32_t id)12407 bool CompilerGLSL::is_non_native_row_major_matrix(uint32_t id)
12408 {
12409 // Natively supported row-major matrices do not need to be converted.
12410 // Legacy targets do not support row major.
12411 if (backend.native_row_major_matrix && !is_legacy())
12412 return false;
12413
12414 auto *e = maybe_get<SPIRExpression>(id);
12415 if (e)
12416 return e->need_transpose;
12417 else
12418 return has_decoration(id, DecorationRowMajor);
12419 }
12420
12421 // Checks whether the member is a row_major matrix that requires conversion before use
member_is_non_native_row_major_matrix(const SPIRType & type,uint32_t index)12422 bool CompilerGLSL::member_is_non_native_row_major_matrix(const SPIRType &type, uint32_t index)
12423 {
12424 // Natively supported row-major matrices do not need to be converted.
12425 if (backend.native_row_major_matrix && !is_legacy())
12426 return false;
12427
12428 // Non-matrix or column-major matrix types do not need to be converted.
12429 if (!has_member_decoration(type.self, index, DecorationRowMajor))
12430 return false;
12431
12432 // Only square row-major matrices can be converted at this time.
12433 // Converting non-square matrices will require defining custom GLSL function that
12434 // swaps matrix elements while retaining the original dimensional form of the matrix.
12435 const auto mbr_type = get<SPIRType>(type.member_types[index]);
12436 if (mbr_type.columns != mbr_type.vecsize)
12437 SPIRV_CROSS_THROW("Row-major matrices must be square on this platform.");
12438
12439 return true;
12440 }
12441
12442 // Checks if we need to remap physical type IDs when declaring the type in a buffer.
member_is_remapped_physical_type(const SPIRType & type,uint32_t index) const12443 bool CompilerGLSL::member_is_remapped_physical_type(const SPIRType &type, uint32_t index) const
12444 {
12445 return has_extended_member_decoration(type.self, index, SPIRVCrossDecorationPhysicalTypeID);
12446 }
12447
12448 // Checks whether the member is in packed data type, that might need to be unpacked.
member_is_packed_physical_type(const SPIRType & type,uint32_t index) const12449 bool CompilerGLSL::member_is_packed_physical_type(const SPIRType &type, uint32_t index) const
12450 {
12451 return has_extended_member_decoration(type.self, index, SPIRVCrossDecorationPhysicalTypePacked);
12452 }
12453
12454 // Wraps the expression string in a function call that converts the
12455 // row_major matrix result of the expression to a column_major matrix.
12456 // Base implementation uses the standard library transpose() function.
12457 // Subclasses may override to use a different function.
convert_row_major_matrix(string exp_str,const SPIRType & exp_type,uint32_t,bool)12458 string CompilerGLSL::convert_row_major_matrix(string exp_str, const SPIRType &exp_type, uint32_t /* physical_type_id */,
12459 bool /*is_packed*/)
12460 {
12461 strip_enclosed_expression(exp_str);
12462 if (!is_matrix(exp_type))
12463 {
12464 auto column_index = exp_str.find_last_of('[');
12465 if (column_index == string::npos)
12466 return exp_str;
12467
12468 auto column_expr = exp_str.substr(column_index);
12469 exp_str.resize(column_index);
12470
12471 auto transposed_expr = type_to_glsl_constructor(exp_type) + "(";
12472
12473 // Loading a column from a row-major matrix. Unroll the load.
12474 for (uint32_t c = 0; c < exp_type.vecsize; c++)
12475 {
12476 transposed_expr += join(exp_str, '[', c, ']', column_expr);
12477 if (c + 1 < exp_type.vecsize)
12478 transposed_expr += ", ";
12479 }
12480
12481 transposed_expr += ")";
12482 return transposed_expr;
12483 }
12484 else if (options.version < 120)
12485 {
12486 // GLSL 110, ES 100 do not have transpose(), so emulate it. Note that
12487 // these GLSL versions do not support non-square matrices.
12488 if (exp_type.vecsize == 2 && exp_type.columns == 2)
12489 {
12490 if (!requires_transpose_2x2)
12491 {
12492 requires_transpose_2x2 = true;
12493 force_recompile();
12494 }
12495 }
12496 else if (exp_type.vecsize == 3 && exp_type.columns == 3)
12497 {
12498 if (!requires_transpose_3x3)
12499 {
12500 requires_transpose_3x3 = true;
12501 force_recompile();
12502 }
12503 }
12504 else if (exp_type.vecsize == 4 && exp_type.columns == 4)
12505 {
12506 if (!requires_transpose_4x4)
12507 {
12508 requires_transpose_4x4 = true;
12509 force_recompile();
12510 }
12511 }
12512 else
12513 SPIRV_CROSS_THROW("Non-square matrices are not supported in legacy GLSL, cannot transpose.");
12514 return join("spvTranspose(", exp_str, ")");
12515 }
12516 else
12517 return join("transpose(", exp_str, ")");
12518 }
12519
variable_decl(const SPIRType & type,const string & name,uint32_t id)12520 string CompilerGLSL::variable_decl(const SPIRType &type, const string &name, uint32_t id)
12521 {
12522 string type_name = type_to_glsl(type, id);
12523 remap_variable_type_name(type, name, type_name);
12524 return join(type_name, " ", name, type_to_array_glsl(type));
12525 }
12526
12527 // Emit a structure member. Subclasses may override to modify output,
12528 // or to dynamically add a padding member if needed.
emit_struct_member(const SPIRType & type,uint32_t member_type_id,uint32_t index,const string & qualifier,uint32_t)12529 void CompilerGLSL::emit_struct_member(const SPIRType &type, uint32_t member_type_id, uint32_t index,
12530 const string &qualifier, uint32_t)
12531 {
12532 auto &membertype = get<SPIRType>(member_type_id);
12533
12534 Bitset memberflags;
12535 auto &memb = ir.meta[type.self].members;
12536 if (index < memb.size())
12537 memberflags = memb[index].decoration_flags;
12538
12539 string qualifiers;
12540 bool is_block = ir.meta[type.self].decoration.decoration_flags.get(DecorationBlock) ||
12541 ir.meta[type.self].decoration.decoration_flags.get(DecorationBufferBlock);
12542
12543 if (is_block)
12544 qualifiers = to_interpolation_qualifiers(memberflags);
12545
12546 statement(layout_for_member(type, index), qualifiers, qualifier, flags_to_qualifiers_glsl(membertype, memberflags),
12547 variable_decl(membertype, to_member_name(type, index)), ";");
12548 }
12549
emit_struct_padding_target(const SPIRType &)12550 void CompilerGLSL::emit_struct_padding_target(const SPIRType &)
12551 {
12552 }
12553
flags_to_qualifiers_glsl(const SPIRType & type,const Bitset & flags)12554 const char *CompilerGLSL::flags_to_qualifiers_glsl(const SPIRType &type, const Bitset &flags)
12555 {
12556 // GL_EXT_buffer_reference variables can be marked as restrict.
12557 if (flags.get(DecorationRestrictPointerEXT))
12558 return "restrict ";
12559
12560 // Structs do not have precision qualifiers, neither do doubles (desktop only anyways, so no mediump/highp).
12561 if (type.basetype != SPIRType::Float && type.basetype != SPIRType::Int && type.basetype != SPIRType::UInt &&
12562 type.basetype != SPIRType::Image && type.basetype != SPIRType::SampledImage &&
12563 type.basetype != SPIRType::Sampler)
12564 return "";
12565
12566 if (options.es)
12567 {
12568 auto &execution = get_entry_point();
12569
12570 if (flags.get(DecorationRelaxedPrecision))
12571 {
12572 bool implied_fmediump = type.basetype == SPIRType::Float &&
12573 options.fragment.default_float_precision == Options::Mediump &&
12574 execution.model == ExecutionModelFragment;
12575
12576 bool implied_imediump = (type.basetype == SPIRType::Int || type.basetype == SPIRType::UInt) &&
12577 options.fragment.default_int_precision == Options::Mediump &&
12578 execution.model == ExecutionModelFragment;
12579
12580 return implied_fmediump || implied_imediump ? "" : "mediump ";
12581 }
12582 else
12583 {
12584 bool implied_fhighp =
12585 type.basetype == SPIRType::Float && ((options.fragment.default_float_precision == Options::Highp &&
12586 execution.model == ExecutionModelFragment) ||
12587 (execution.model != ExecutionModelFragment));
12588
12589 bool implied_ihighp = (type.basetype == SPIRType::Int || type.basetype == SPIRType::UInt) &&
12590 ((options.fragment.default_int_precision == Options::Highp &&
12591 execution.model == ExecutionModelFragment) ||
12592 (execution.model != ExecutionModelFragment));
12593
12594 return implied_fhighp || implied_ihighp ? "" : "highp ";
12595 }
12596 }
12597 else if (backend.allow_precision_qualifiers)
12598 {
12599 // Vulkan GLSL supports precision qualifiers, even in desktop profiles, which is convenient.
12600 // The default is highp however, so only emit mediump in the rare case that a shader has these.
12601 if (flags.get(DecorationRelaxedPrecision))
12602 return "mediump ";
12603 else
12604 return "";
12605 }
12606 else
12607 return "";
12608 }
12609
to_precision_qualifiers_glsl(uint32_t id)12610 const char *CompilerGLSL::to_precision_qualifiers_glsl(uint32_t id)
12611 {
12612 auto &type = expression_type(id);
12613 bool use_precision_qualifiers = backend.allow_precision_qualifiers || options.es;
12614 if (use_precision_qualifiers && (type.basetype == SPIRType::Image || type.basetype == SPIRType::SampledImage))
12615 {
12616 // Force mediump for the sampler type. We cannot declare 16-bit or smaller image types.
12617 auto &result_type = get<SPIRType>(type.image.type);
12618 if (result_type.width < 32)
12619 return "mediump ";
12620 }
12621 return flags_to_qualifiers_glsl(type, ir.meta[id].decoration.decoration_flags);
12622 }
12623
fixup_io_block_patch_qualifiers(const SPIRVariable & var)12624 void CompilerGLSL::fixup_io_block_patch_qualifiers(const SPIRVariable &var)
12625 {
12626 // Works around weird behavior in glslangValidator where
12627 // a patch out block is translated to just block members getting the decoration.
12628 // To make glslang not complain when we compile again, we have to transform this back to a case where
12629 // the variable itself has Patch decoration, and not members.
12630 auto &type = get<SPIRType>(var.basetype);
12631 if (has_decoration(type.self, DecorationBlock))
12632 {
12633 uint32_t member_count = uint32_t(type.member_types.size());
12634 for (uint32_t i = 0; i < member_count; i++)
12635 {
12636 if (has_member_decoration(type.self, i, DecorationPatch))
12637 {
12638 set_decoration(var.self, DecorationPatch);
12639 break;
12640 }
12641 }
12642
12643 if (has_decoration(var.self, DecorationPatch))
12644 for (uint32_t i = 0; i < member_count; i++)
12645 unset_member_decoration(type.self, i, DecorationPatch);
12646 }
12647 }
12648
to_qualifiers_glsl(uint32_t id)12649 string CompilerGLSL::to_qualifiers_glsl(uint32_t id)
12650 {
12651 auto &flags = ir.meta[id].decoration.decoration_flags;
12652 string res;
12653
12654 auto *var = maybe_get<SPIRVariable>(id);
12655
12656 if (var && var->storage == StorageClassWorkgroup && !backend.shared_is_implied)
12657 res += "shared ";
12658
12659 res += to_interpolation_qualifiers(flags);
12660 if (var)
12661 res += to_storage_qualifiers_glsl(*var);
12662
12663 auto &type = expression_type(id);
12664 if (type.image.dim != DimSubpassData && type.image.sampled == 2)
12665 {
12666 if (flags.get(DecorationCoherent))
12667 res += "coherent ";
12668 if (flags.get(DecorationRestrict))
12669 res += "restrict ";
12670 if (flags.get(DecorationNonWritable))
12671 res += "readonly ";
12672 if (flags.get(DecorationNonReadable))
12673 res += "writeonly ";
12674 }
12675
12676 res += to_precision_qualifiers_glsl(id);
12677
12678 return res;
12679 }
12680
argument_decl(const SPIRFunction::Parameter & arg)12681 string CompilerGLSL::argument_decl(const SPIRFunction::Parameter &arg)
12682 {
12683 // glslangValidator seems to make all arguments pointer no matter what which is rather bizarre ...
12684 auto &type = expression_type(arg.id);
12685 const char *direction = "";
12686
12687 if (type.pointer)
12688 {
12689 if (arg.write_count && arg.read_count)
12690 direction = "inout ";
12691 else if (arg.write_count)
12692 direction = "out ";
12693 }
12694
12695 return join(direction, to_qualifiers_glsl(arg.id), variable_decl(type, to_name(arg.id), arg.id));
12696 }
12697
to_initializer_expression(const SPIRVariable & var)12698 string CompilerGLSL::to_initializer_expression(const SPIRVariable &var)
12699 {
12700 return to_expression(var.initializer);
12701 }
12702
to_zero_initialized_expression(uint32_t type_id)12703 string CompilerGLSL::to_zero_initialized_expression(uint32_t type_id)
12704 {
12705 #ifndef NDEBUG
12706 auto &type = get<SPIRType>(type_id);
12707 assert(type.storage == StorageClassPrivate || type.storage == StorageClassFunction ||
12708 type.storage == StorageClassGeneric);
12709 #endif
12710 uint32_t id = ir.increase_bound_by(1);
12711 ir.make_constant_null(id, type_id, false);
12712 return constant_expression(get<SPIRConstant>(id));
12713 }
12714
type_can_zero_initialize(const SPIRType & type) const12715 bool CompilerGLSL::type_can_zero_initialize(const SPIRType &type) const
12716 {
12717 if (type.pointer)
12718 return false;
12719
12720 if (!type.array.empty() && options.flatten_multidimensional_arrays)
12721 return false;
12722
12723 for (auto &literal : type.array_size_literal)
12724 if (!literal)
12725 return false;
12726
12727 for (auto &memb : type.member_types)
12728 if (!type_can_zero_initialize(get<SPIRType>(memb)))
12729 return false;
12730
12731 return true;
12732 }
12733
variable_decl(const SPIRVariable & variable)12734 string CompilerGLSL::variable_decl(const SPIRVariable &variable)
12735 {
12736 // Ignore the pointer type since GLSL doesn't have pointers.
12737 auto &type = get_variable_data_type(variable);
12738
12739 if (type.pointer_depth > 1)
12740 SPIRV_CROSS_THROW("Cannot declare pointer-to-pointer types.");
12741
12742 auto res = join(to_qualifiers_glsl(variable.self), variable_decl(type, to_name(variable.self), variable.self));
12743
12744 if (variable.loop_variable && variable.static_expression)
12745 {
12746 uint32_t expr = variable.static_expression;
12747 if (ir.ids[expr].get_type() != TypeUndef)
12748 res += join(" = ", to_expression(variable.static_expression));
12749 else if (options.force_zero_initialized_variables && type_can_zero_initialize(type))
12750 res += join(" = ", to_zero_initialized_expression(get_variable_data_type_id(variable)));
12751 }
12752 else if (variable.initializer)
12753 {
12754 uint32_t expr = variable.initializer;
12755 if (ir.ids[expr].get_type() != TypeUndef)
12756 res += join(" = ", to_initializer_expression(variable));
12757 else if (options.force_zero_initialized_variables && type_can_zero_initialize(type))
12758 res += join(" = ", to_zero_initialized_expression(get_variable_data_type_id(variable)));
12759 }
12760
12761 return res;
12762 }
12763
to_pls_qualifiers_glsl(const SPIRVariable & variable)12764 const char *CompilerGLSL::to_pls_qualifiers_glsl(const SPIRVariable &variable)
12765 {
12766 auto &flags = ir.meta[variable.self].decoration.decoration_flags;
12767 if (flags.get(DecorationRelaxedPrecision))
12768 return "mediump ";
12769 else
12770 return "highp ";
12771 }
12772
pls_decl(const PlsRemap & var)12773 string CompilerGLSL::pls_decl(const PlsRemap &var)
12774 {
12775 auto &variable = get<SPIRVariable>(var.id);
12776
12777 SPIRType type;
12778 type.vecsize = pls_format_to_components(var.format);
12779 type.basetype = pls_format_to_basetype(var.format);
12780
12781 return join(to_pls_layout(var.format), to_pls_qualifiers_glsl(variable), type_to_glsl(type), " ",
12782 to_name(variable.self));
12783 }
12784
to_array_size_literal(const SPIRType & type) const12785 uint32_t CompilerGLSL::to_array_size_literal(const SPIRType &type) const
12786 {
12787 return to_array_size_literal(type, uint32_t(type.array.size() - 1));
12788 }
12789
to_array_size_literal(const SPIRType & type,uint32_t index) const12790 uint32_t CompilerGLSL::to_array_size_literal(const SPIRType &type, uint32_t index) const
12791 {
12792 assert(type.array.size() == type.array_size_literal.size());
12793
12794 if (type.array_size_literal[index])
12795 {
12796 return type.array[index];
12797 }
12798 else
12799 {
12800 // Use the default spec constant value.
12801 // This is the best we can do.
12802 return evaluate_constant_u32(type.array[index]);
12803 }
12804 }
12805
to_array_size(const SPIRType & type,uint32_t index)12806 string CompilerGLSL::to_array_size(const SPIRType &type, uint32_t index)
12807 {
12808 assert(type.array.size() == type.array_size_literal.size());
12809
12810 auto &size = type.array[index];
12811 if (!type.array_size_literal[index])
12812 return to_expression(size);
12813 else if (size)
12814 return convert_to_string(size);
12815 else if (!backend.unsized_array_supported)
12816 {
12817 // For runtime-sized arrays, we can work around
12818 // lack of standard support for this by simply having
12819 // a single element array.
12820 //
12821 // Runtime length arrays must always be the last element
12822 // in an interface block.
12823 return "1";
12824 }
12825 else
12826 return "";
12827 }
12828
type_to_array_glsl(const SPIRType & type)12829 string CompilerGLSL::type_to_array_glsl(const SPIRType &type)
12830 {
12831 if (type.pointer && type.storage == StorageClassPhysicalStorageBufferEXT && type.basetype != SPIRType::Struct)
12832 {
12833 // We are using a wrapped pointer type, and we should not emit any array declarations here.
12834 return "";
12835 }
12836
12837 if (type.array.empty())
12838 return "";
12839
12840 if (options.flatten_multidimensional_arrays)
12841 {
12842 string res;
12843 res += "[";
12844 for (auto i = uint32_t(type.array.size()); i; i--)
12845 {
12846 res += enclose_expression(to_array_size(type, i - 1));
12847 if (i > 1)
12848 res += " * ";
12849 }
12850 res += "]";
12851 return res;
12852 }
12853 else
12854 {
12855 if (type.array.size() > 1)
12856 {
12857 if (!options.es && options.version < 430)
12858 require_extension_internal("GL_ARB_arrays_of_arrays");
12859 else if (options.es && options.version < 310)
12860 SPIRV_CROSS_THROW("Arrays of arrays not supported before ESSL version 310. "
12861 "Try using --flatten-multidimensional-arrays or set "
12862 "options.flatten_multidimensional_arrays to true.");
12863 }
12864
12865 string res;
12866 for (auto i = uint32_t(type.array.size()); i; i--)
12867 {
12868 res += "[";
12869 res += to_array_size(type, i - 1);
12870 res += "]";
12871 }
12872 return res;
12873 }
12874 }
12875
image_type_glsl(const SPIRType & type,uint32_t id)12876 string CompilerGLSL::image_type_glsl(const SPIRType &type, uint32_t id)
12877 {
12878 auto &imagetype = get<SPIRType>(type.image.type);
12879 string res;
12880
12881 switch (imagetype.basetype)
12882 {
12883 case SPIRType::Int:
12884 case SPIRType::Short:
12885 case SPIRType::SByte:
12886 res = "i";
12887 break;
12888 case SPIRType::UInt:
12889 case SPIRType::UShort:
12890 case SPIRType::UByte:
12891 res = "u";
12892 break;
12893 default:
12894 break;
12895 }
12896
12897 // For half image types, we will force mediump for the sampler, and cast to f16 after any sampling operation.
12898 // We cannot express a true half texture type in GLSL. Neither for short integer formats for that matter.
12899
12900 if (type.basetype == SPIRType::Image && type.image.dim == DimSubpassData && options.vulkan_semantics)
12901 return res + "subpassInput" + (type.image.ms ? "MS" : "");
12902 else if (type.basetype == SPIRType::Image && type.image.dim == DimSubpassData &&
12903 subpass_input_is_framebuffer_fetch(id))
12904 {
12905 SPIRType sampled_type = get<SPIRType>(type.image.type);
12906 sampled_type.vecsize = 4;
12907 return type_to_glsl(sampled_type);
12908 }
12909
12910 // If we're emulating subpassInput with samplers, force sampler2D
12911 // so we don't have to specify format.
12912 if (type.basetype == SPIRType::Image && type.image.dim != DimSubpassData)
12913 {
12914 // Sampler buffers are always declared as samplerBuffer even though they might be separate images in the SPIR-V.
12915 if (type.image.dim == DimBuffer && type.image.sampled == 1)
12916 res += "sampler";
12917 else
12918 res += type.image.sampled == 2 ? "image" : "texture";
12919 }
12920 else
12921 res += "sampler";
12922
12923 switch (type.image.dim)
12924 {
12925 case Dim1D:
12926 res += "1D";
12927 break;
12928 case Dim2D:
12929 res += "2D";
12930 break;
12931 case Dim3D:
12932 res += "3D";
12933 break;
12934 case DimCube:
12935 res += "Cube";
12936 break;
12937 case DimRect:
12938 if (options.es)
12939 SPIRV_CROSS_THROW("Rectangle textures are not supported on OpenGL ES.");
12940
12941 if (is_legacy_desktop())
12942 require_extension_internal("GL_ARB_texture_rectangle");
12943
12944 res += "2DRect";
12945 break;
12946
12947 case DimBuffer:
12948 if (options.es && options.version < 320)
12949 require_extension_internal("GL_OES_texture_buffer");
12950 else if (!options.es && options.version < 300)
12951 require_extension_internal("GL_EXT_texture_buffer_object");
12952 res += "Buffer";
12953 break;
12954
12955 case DimSubpassData:
12956 res += "2D";
12957 break;
12958 default:
12959 SPIRV_CROSS_THROW("Only 1D, 2D, 2DRect, 3D, Buffer, InputTarget and Cube textures supported.");
12960 }
12961
12962 if (type.image.ms)
12963 res += "MS";
12964 if (type.image.arrayed)
12965 {
12966 if (is_legacy_desktop())
12967 require_extension_internal("GL_EXT_texture_array");
12968 res += "Array";
12969 }
12970
12971 // "Shadow" state in GLSL only exists for samplers and combined image samplers.
12972 if (((type.basetype == SPIRType::SampledImage) || (type.basetype == SPIRType::Sampler)) &&
12973 image_is_comparison(type, id))
12974 {
12975 res += "Shadow";
12976 }
12977
12978 return res;
12979 }
12980
type_to_glsl_constructor(const SPIRType & type)12981 string CompilerGLSL::type_to_glsl_constructor(const SPIRType &type)
12982 {
12983 if (backend.use_array_constructor && type.array.size() > 1)
12984 {
12985 if (options.flatten_multidimensional_arrays)
12986 SPIRV_CROSS_THROW("Cannot flatten constructors of multidimensional array constructors, "
12987 "e.g. float[][]().");
12988 else if (!options.es && options.version < 430)
12989 require_extension_internal("GL_ARB_arrays_of_arrays");
12990 else if (options.es && options.version < 310)
12991 SPIRV_CROSS_THROW("Arrays of arrays not supported before ESSL version 310.");
12992 }
12993
12994 auto e = type_to_glsl(type);
12995 if (backend.use_array_constructor)
12996 {
12997 for (uint32_t i = 0; i < type.array.size(); i++)
12998 e += "[]";
12999 }
13000 return e;
13001 }
13002
13003 // The optional id parameter indicates the object whose type we are trying
13004 // to find the description for. It is optional. Most type descriptions do not
13005 // depend on a specific object's use of that type.
type_to_glsl(const SPIRType & type,uint32_t id)13006 string CompilerGLSL::type_to_glsl(const SPIRType &type, uint32_t id)
13007 {
13008 if (type.pointer && type.storage == StorageClassPhysicalStorageBufferEXT && type.basetype != SPIRType::Struct)
13009 {
13010 // Need to create a magic type name which compacts the entire type information.
13011 string name = type_to_glsl(get_pointee_type(type));
13012 for (size_t i = 0; i < type.array.size(); i++)
13013 {
13014 if (type.array_size_literal[i])
13015 name += join(type.array[i], "_");
13016 else
13017 name += join("id", type.array[i], "_");
13018 }
13019 name += "Pointer";
13020 return name;
13021 }
13022
13023 switch (type.basetype)
13024 {
13025 case SPIRType::Struct:
13026 // Need OpName lookup here to get a "sensible" name for a struct.
13027 if (backend.explicit_struct_type)
13028 return join("struct ", to_name(type.self));
13029 else
13030 return to_name(type.self);
13031
13032 case SPIRType::Image:
13033 case SPIRType::SampledImage:
13034 return image_type_glsl(type, id);
13035
13036 case SPIRType::Sampler:
13037 // The depth field is set by calling code based on the variable ID of the sampler, effectively reintroducing
13038 // this distinction into the type system.
13039 return comparison_ids.count(id) ? "samplerShadow" : "sampler";
13040
13041 case SPIRType::AccelerationStructure:
13042 return ray_tracing_is_khr ? "accelerationStructureEXT" : "accelerationStructureNV";
13043
13044 case SPIRType::Void:
13045 return "void";
13046
13047 default:
13048 break;
13049 }
13050
13051 if (type.basetype == SPIRType::UInt && is_legacy())
13052 SPIRV_CROSS_THROW("Unsigned integers are not supported on legacy targets.");
13053
13054 if (type.vecsize == 1 && type.columns == 1) // Scalar builtin
13055 {
13056 switch (type.basetype)
13057 {
13058 case SPIRType::Boolean:
13059 return "bool";
13060 case SPIRType::SByte:
13061 return backend.basic_int8_type;
13062 case SPIRType::UByte:
13063 return backend.basic_uint8_type;
13064 case SPIRType::Short:
13065 return backend.basic_int16_type;
13066 case SPIRType::UShort:
13067 return backend.basic_uint16_type;
13068 case SPIRType::Int:
13069 return backend.basic_int_type;
13070 case SPIRType::UInt:
13071 return backend.basic_uint_type;
13072 case SPIRType::AtomicCounter:
13073 return "atomic_uint";
13074 case SPIRType::Half:
13075 return "float16_t";
13076 case SPIRType::Float:
13077 return "float";
13078 case SPIRType::Double:
13079 return "double";
13080 case SPIRType::Int64:
13081 return "int64_t";
13082 case SPIRType::UInt64:
13083 return "uint64_t";
13084 default:
13085 return "???";
13086 }
13087 }
13088 else if (type.vecsize > 1 && type.columns == 1) // Vector builtin
13089 {
13090 switch (type.basetype)
13091 {
13092 case SPIRType::Boolean:
13093 return join("bvec", type.vecsize);
13094 case SPIRType::SByte:
13095 return join("i8vec", type.vecsize);
13096 case SPIRType::UByte:
13097 return join("u8vec", type.vecsize);
13098 case SPIRType::Short:
13099 return join("i16vec", type.vecsize);
13100 case SPIRType::UShort:
13101 return join("u16vec", type.vecsize);
13102 case SPIRType::Int:
13103 return join("ivec", type.vecsize);
13104 case SPIRType::UInt:
13105 return join("uvec", type.vecsize);
13106 case SPIRType::Half:
13107 return join("f16vec", type.vecsize);
13108 case SPIRType::Float:
13109 return join("vec", type.vecsize);
13110 case SPIRType::Double:
13111 return join("dvec", type.vecsize);
13112 case SPIRType::Int64:
13113 return join("i64vec", type.vecsize);
13114 case SPIRType::UInt64:
13115 return join("u64vec", type.vecsize);
13116 default:
13117 return "???";
13118 }
13119 }
13120 else if (type.vecsize == type.columns) // Simple Matrix builtin
13121 {
13122 switch (type.basetype)
13123 {
13124 case SPIRType::Boolean:
13125 return join("bmat", type.vecsize);
13126 case SPIRType::Int:
13127 return join("imat", type.vecsize);
13128 case SPIRType::UInt:
13129 return join("umat", type.vecsize);
13130 case SPIRType::Half:
13131 return join("f16mat", type.vecsize);
13132 case SPIRType::Float:
13133 return join("mat", type.vecsize);
13134 case SPIRType::Double:
13135 return join("dmat", type.vecsize);
13136 // Matrix types not supported for int64/uint64.
13137 default:
13138 return "???";
13139 }
13140 }
13141 else
13142 {
13143 switch (type.basetype)
13144 {
13145 case SPIRType::Boolean:
13146 return join("bmat", type.columns, "x", type.vecsize);
13147 case SPIRType::Int:
13148 return join("imat", type.columns, "x", type.vecsize);
13149 case SPIRType::UInt:
13150 return join("umat", type.columns, "x", type.vecsize);
13151 case SPIRType::Half:
13152 return join("f16mat", type.columns, "x", type.vecsize);
13153 case SPIRType::Float:
13154 return join("mat", type.columns, "x", type.vecsize);
13155 case SPIRType::Double:
13156 return join("dmat", type.columns, "x", type.vecsize);
13157 // Matrix types not supported for int64/uint64.
13158 default:
13159 return "???";
13160 }
13161 }
13162 }
13163
add_variable(unordered_set<string> & variables_primary,const unordered_set<string> & variables_secondary,string & name)13164 void CompilerGLSL::add_variable(unordered_set<string> &variables_primary,
13165 const unordered_set<string> &variables_secondary, string &name)
13166 {
13167 if (name.empty())
13168 return;
13169
13170 ParsedIR::sanitize_underscores(name);
13171 if (ParsedIR::is_globally_reserved_identifier(name, true))
13172 {
13173 name.clear();
13174 return;
13175 }
13176
13177 update_name_cache(variables_primary, variables_secondary, name);
13178 }
13179
add_local_variable_name(uint32_t id)13180 void CompilerGLSL::add_local_variable_name(uint32_t id)
13181 {
13182 add_variable(local_variable_names, block_names, ir.meta[id].decoration.alias);
13183 }
13184
add_resource_name(uint32_t id)13185 void CompilerGLSL::add_resource_name(uint32_t id)
13186 {
13187 add_variable(resource_names, block_names, ir.meta[id].decoration.alias);
13188 }
13189
add_header_line(const std::string & line)13190 void CompilerGLSL::add_header_line(const std::string &line)
13191 {
13192 header_lines.push_back(line);
13193 }
13194
has_extension(const std::string & ext) const13195 bool CompilerGLSL::has_extension(const std::string &ext) const
13196 {
13197 auto itr = find(begin(forced_extensions), end(forced_extensions), ext);
13198 return itr != end(forced_extensions);
13199 }
13200
require_extension(const std::string & ext)13201 void CompilerGLSL::require_extension(const std::string &ext)
13202 {
13203 if (!has_extension(ext))
13204 forced_extensions.push_back(ext);
13205 }
13206
require_extension_internal(const string & ext)13207 void CompilerGLSL::require_extension_internal(const string &ext)
13208 {
13209 if (backend.supports_extensions && !has_extension(ext))
13210 {
13211 forced_extensions.push_back(ext);
13212 force_recompile();
13213 }
13214 }
13215
flatten_buffer_block(VariableID id)13216 void CompilerGLSL::flatten_buffer_block(VariableID id)
13217 {
13218 auto &var = get<SPIRVariable>(id);
13219 auto &type = get<SPIRType>(var.basetype);
13220 auto name = to_name(type.self, false);
13221 auto &flags = ir.meta[type.self].decoration.decoration_flags;
13222
13223 if (!type.array.empty())
13224 SPIRV_CROSS_THROW(name + " is an array of UBOs.");
13225 if (type.basetype != SPIRType::Struct)
13226 SPIRV_CROSS_THROW(name + " is not a struct.");
13227 if (!flags.get(DecorationBlock))
13228 SPIRV_CROSS_THROW(name + " is not a block.");
13229 if (type.member_types.empty())
13230 SPIRV_CROSS_THROW(name + " is an empty struct.");
13231
13232 flattened_buffer_blocks.insert(id);
13233 }
13234
builtin_translates_to_nonarray(spv::BuiltIn) const13235 bool CompilerGLSL::builtin_translates_to_nonarray(spv::BuiltIn /*builtin*/) const
13236 {
13237 return false; // GLSL itself does not need to translate array builtin types to non-array builtin types
13238 }
13239
check_atomic_image(uint32_t id)13240 bool CompilerGLSL::check_atomic_image(uint32_t id)
13241 {
13242 auto &type = expression_type(id);
13243 if (type.storage == StorageClassImage)
13244 {
13245 if (options.es && options.version < 320)
13246 require_extension_internal("GL_OES_shader_image_atomic");
13247
13248 auto *var = maybe_get_backing_variable(id);
13249 if (var)
13250 {
13251 auto &flags = ir.meta[var->self].decoration.decoration_flags;
13252 if (flags.get(DecorationNonWritable) || flags.get(DecorationNonReadable))
13253 {
13254 flags.clear(DecorationNonWritable);
13255 flags.clear(DecorationNonReadable);
13256 force_recompile();
13257 }
13258 }
13259 return true;
13260 }
13261 else
13262 return false;
13263 }
13264
add_function_overload(const SPIRFunction & func)13265 void CompilerGLSL::add_function_overload(const SPIRFunction &func)
13266 {
13267 Hasher hasher;
13268 for (auto &arg : func.arguments)
13269 {
13270 // Parameters can vary with pointer type or not,
13271 // but that will not change the signature in GLSL/HLSL,
13272 // so strip the pointer type before hashing.
13273 uint32_t type_id = get_pointee_type_id(arg.type);
13274 auto &type = get<SPIRType>(type_id);
13275
13276 if (!combined_image_samplers.empty())
13277 {
13278 // If we have combined image samplers, we cannot really trust the image and sampler arguments
13279 // we pass down to callees, because they may be shuffled around.
13280 // Ignore these arguments, to make sure that functions need to differ in some other way
13281 // to be considered different overloads.
13282 if (type.basetype == SPIRType::SampledImage ||
13283 (type.basetype == SPIRType::Image && type.image.sampled == 1) || type.basetype == SPIRType::Sampler)
13284 {
13285 continue;
13286 }
13287 }
13288
13289 hasher.u32(type_id);
13290 }
13291 uint64_t types_hash = hasher.get();
13292
13293 auto function_name = to_name(func.self);
13294 auto itr = function_overloads.find(function_name);
13295 if (itr != end(function_overloads))
13296 {
13297 // There exists a function with this name already.
13298 auto &overloads = itr->second;
13299 if (overloads.count(types_hash) != 0)
13300 {
13301 // Overload conflict, assign a new name.
13302 add_resource_name(func.self);
13303 function_overloads[to_name(func.self)].insert(types_hash);
13304 }
13305 else
13306 {
13307 // Can reuse the name.
13308 overloads.insert(types_hash);
13309 }
13310 }
13311 else
13312 {
13313 // First time we see this function name.
13314 add_resource_name(func.self);
13315 function_overloads[to_name(func.self)].insert(types_hash);
13316 }
13317 }
13318
emit_function_prototype(SPIRFunction & func,const Bitset & return_flags)13319 void CompilerGLSL::emit_function_prototype(SPIRFunction &func, const Bitset &return_flags)
13320 {
13321 if (func.self != ir.default_entry_point)
13322 add_function_overload(func);
13323
13324 // Avoid shadow declarations.
13325 local_variable_names = resource_names;
13326
13327 string decl;
13328
13329 auto &type = get<SPIRType>(func.return_type);
13330 decl += flags_to_qualifiers_glsl(type, return_flags);
13331 decl += type_to_glsl(type);
13332 decl += type_to_array_glsl(type);
13333 decl += " ";
13334
13335 if (func.self == ir.default_entry_point)
13336 {
13337 // If we need complex fallback in GLSL, we just wrap main() in a function
13338 // and interlock the entire shader ...
13339 if (interlocked_is_complex)
13340 decl += "spvMainInterlockedBody";
13341 else
13342 decl += "main";
13343
13344 processing_entry_point = true;
13345 }
13346 else
13347 decl += to_name(func.self);
13348
13349 decl += "(";
13350 SmallVector<string> arglist;
13351 for (auto &arg : func.arguments)
13352 {
13353 // Do not pass in separate images or samplers if we're remapping
13354 // to combined image samplers.
13355 if (skip_argument(arg.id))
13356 continue;
13357
13358 // Might change the variable name if it already exists in this function.
13359 // SPIRV OpName doesn't have any semantic effect, so it's valid for an implementation
13360 // to use same name for variables.
13361 // Since we want to make the GLSL debuggable and somewhat sane, use fallback names for variables which are duplicates.
13362 add_local_variable_name(arg.id);
13363
13364 arglist.push_back(argument_decl(arg));
13365
13366 // Hold a pointer to the parameter so we can invalidate the readonly field if needed.
13367 auto *var = maybe_get<SPIRVariable>(arg.id);
13368 if (var)
13369 var->parameter = &arg;
13370 }
13371
13372 for (auto &arg : func.shadow_arguments)
13373 {
13374 // Might change the variable name if it already exists in this function.
13375 // SPIRV OpName doesn't have any semantic effect, so it's valid for an implementation
13376 // to use same name for variables.
13377 // Since we want to make the GLSL debuggable and somewhat sane, use fallback names for variables which are duplicates.
13378 add_local_variable_name(arg.id);
13379
13380 arglist.push_back(argument_decl(arg));
13381
13382 // Hold a pointer to the parameter so we can invalidate the readonly field if needed.
13383 auto *var = maybe_get<SPIRVariable>(arg.id);
13384 if (var)
13385 var->parameter = &arg;
13386 }
13387
13388 decl += merge(arglist);
13389 decl += ")";
13390 statement(decl);
13391 }
13392
emit_function(SPIRFunction & func,const Bitset & return_flags)13393 void CompilerGLSL::emit_function(SPIRFunction &func, const Bitset &return_flags)
13394 {
13395 // Avoid potential cycles.
13396 if (func.active)
13397 return;
13398 func.active = true;
13399
13400 // If we depend on a function, emit that function before we emit our own function.
13401 for (auto block : func.blocks)
13402 {
13403 auto &b = get<SPIRBlock>(block);
13404 for (auto &i : b.ops)
13405 {
13406 auto ops = stream(i);
13407 auto op = static_cast<Op>(i.op);
13408
13409 if (op == OpFunctionCall)
13410 {
13411 // Recursively emit functions which are called.
13412 uint32_t id = ops[2];
13413 emit_function(get<SPIRFunction>(id), ir.meta[ops[1]].decoration.decoration_flags);
13414 }
13415 }
13416 }
13417
13418 if (func.entry_line.file_id != 0)
13419 emit_line_directive(func.entry_line.file_id, func.entry_line.line_literal);
13420 emit_function_prototype(func, return_flags);
13421 begin_scope();
13422
13423 if (func.self == ir.default_entry_point)
13424 emit_entry_point_declarations();
13425
13426 current_function = &func;
13427 auto &entry_block = get<SPIRBlock>(func.entry_block);
13428
13429 sort(begin(func.constant_arrays_needed_on_stack), end(func.constant_arrays_needed_on_stack));
13430 for (auto &array : func.constant_arrays_needed_on_stack)
13431 {
13432 auto &c = get<SPIRConstant>(array);
13433 auto &type = get<SPIRType>(c.constant_type);
13434 statement(variable_decl(type, join("_", array, "_array_copy")), " = ", constant_expression(c), ";");
13435 }
13436
13437 for (auto &v : func.local_variables)
13438 {
13439 auto &var = get<SPIRVariable>(v);
13440 var.deferred_declaration = false;
13441
13442 if (var.storage == StorageClassWorkgroup)
13443 {
13444 // Special variable type which cannot have initializer,
13445 // need to be declared as standalone variables.
13446 // Comes from MSL which can push global variables as local variables in main function.
13447 add_local_variable_name(var.self);
13448 statement(variable_decl(var), ";");
13449 var.deferred_declaration = false;
13450 }
13451 else if (var.storage == StorageClassPrivate)
13452 {
13453 // These variables will not have had their CFG usage analyzed, so move it to the entry block.
13454 // Comes from MSL which can push global variables as local variables in main function.
13455 // We could just declare them right now, but we would miss out on an important initialization case which is
13456 // LUT declaration in MSL.
13457 // If we don't declare the variable when it is assigned we're forced to go through a helper function
13458 // which copies elements one by one.
13459 add_local_variable_name(var.self);
13460
13461 if (var.initializer)
13462 {
13463 statement(variable_decl(var), ";");
13464 var.deferred_declaration = false;
13465 }
13466 else
13467 {
13468 auto &dominated = entry_block.dominated_variables;
13469 if (find(begin(dominated), end(dominated), var.self) == end(dominated))
13470 entry_block.dominated_variables.push_back(var.self);
13471 var.deferred_declaration = true;
13472 }
13473 }
13474 else if (var.storage == StorageClassFunction && var.remapped_variable && var.static_expression)
13475 {
13476 // No need to declare this variable, it has a static expression.
13477 var.deferred_declaration = false;
13478 }
13479 else if (expression_is_lvalue(v))
13480 {
13481 add_local_variable_name(var.self);
13482
13483 // Loop variables should never be declared early, they are explicitly emitted in a loop.
13484 if (var.initializer && !var.loop_variable)
13485 statement(variable_decl_function_local(var), ";");
13486 else
13487 {
13488 // Don't declare variable until first use to declutter the GLSL output quite a lot.
13489 // If we don't touch the variable before first branch,
13490 // declare it then since we need variable declaration to be in top scope.
13491 var.deferred_declaration = true;
13492 }
13493 }
13494 else
13495 {
13496 // HACK: SPIR-V in older glslang output likes to use samplers and images as local variables, but GLSL does not allow this.
13497 // For these types (non-lvalue), we enforce forwarding through a shadowed variable.
13498 // This means that when we OpStore to these variables, we just write in the expression ID directly.
13499 // This breaks any kind of branching, since the variable must be statically assigned.
13500 // Branching on samplers and images would be pretty much impossible to fake in GLSL.
13501 var.statically_assigned = true;
13502 }
13503
13504 var.loop_variable_enable = false;
13505
13506 // Loop variables are never declared outside their for-loop, so block any implicit declaration.
13507 if (var.loop_variable)
13508 var.deferred_declaration = false;
13509 }
13510
13511 // Enforce declaration order for regression testing purposes.
13512 for (auto &block_id : func.blocks)
13513 {
13514 auto &block = get<SPIRBlock>(block_id);
13515 sort(begin(block.dominated_variables), end(block.dominated_variables));
13516 }
13517
13518 for (auto &line : current_function->fixup_hooks_in)
13519 line();
13520
13521 emit_block_chain(entry_block);
13522
13523 end_scope();
13524 processing_entry_point = false;
13525 statement("");
13526
13527 // Make sure deferred declaration state for local variables is cleared when we are done with function.
13528 // We risk declaring Private/Workgroup variables in places we are not supposed to otherwise.
13529 for (auto &v : func.local_variables)
13530 {
13531 auto &var = get<SPIRVariable>(v);
13532 var.deferred_declaration = false;
13533 }
13534 }
13535
emit_fixup()13536 void CompilerGLSL::emit_fixup()
13537 {
13538 if (is_vertex_like_shader())
13539 {
13540 if (options.vertex.fixup_clipspace)
13541 {
13542 const char *suffix = backend.float_literal_suffix ? "f" : "";
13543 statement("gl_Position.z = 2.0", suffix, " * gl_Position.z - gl_Position.w;");
13544 }
13545
13546 if (options.vertex.flip_vert_y)
13547 statement("gl_Position.y = -gl_Position.y;");
13548 }
13549 }
13550
flush_phi(BlockID from,BlockID to)13551 void CompilerGLSL::flush_phi(BlockID from, BlockID to)
13552 {
13553 auto &child = get<SPIRBlock>(to);
13554 if (child.ignore_phi_from_block == from)
13555 return;
13556
13557 unordered_set<uint32_t> temporary_phi_variables;
13558
13559 for (auto itr = begin(child.phi_variables); itr != end(child.phi_variables); ++itr)
13560 {
13561 auto &phi = *itr;
13562
13563 if (phi.parent == from)
13564 {
13565 auto &var = get<SPIRVariable>(phi.function_variable);
13566
13567 // A Phi variable might be a loop variable, so flush to static expression.
13568 if (var.loop_variable && !var.loop_variable_enable)
13569 var.static_expression = phi.local_variable;
13570 else
13571 {
13572 flush_variable_declaration(phi.function_variable);
13573
13574 // Check if we are going to write to a Phi variable that another statement will read from
13575 // as part of another Phi node in our target block.
13576 // For this case, we will need to copy phi.function_variable to a temporary, and use that for future reads.
13577 // This is judged to be extremely rare, so deal with it here using a simple, but suboptimal algorithm.
13578 bool need_saved_temporary =
13579 find_if(itr + 1, end(child.phi_variables), [&](const SPIRBlock::Phi &future_phi) -> bool {
13580 return future_phi.local_variable == ID(phi.function_variable) && future_phi.parent == from;
13581 }) != end(child.phi_variables);
13582
13583 if (need_saved_temporary)
13584 {
13585 // Need to make sure we declare the phi variable with a copy at the right scope.
13586 // We cannot safely declare a temporary here since we might be inside a continue block.
13587 if (!var.allocate_temporary_copy)
13588 {
13589 var.allocate_temporary_copy = true;
13590 force_recompile();
13591 }
13592 statement("_", phi.function_variable, "_copy", " = ", to_name(phi.function_variable), ";");
13593 temporary_phi_variables.insert(phi.function_variable);
13594 }
13595
13596 // This might be called in continue block, so make sure we
13597 // use this to emit ESSL 1.0 compliant increments/decrements.
13598 auto lhs = to_expression(phi.function_variable);
13599
13600 string rhs;
13601 if (temporary_phi_variables.count(phi.local_variable))
13602 rhs = join("_", phi.local_variable, "_copy");
13603 else
13604 rhs = to_pointer_expression(phi.local_variable);
13605
13606 if (!optimize_read_modify_write(get<SPIRType>(var.basetype), lhs, rhs))
13607 statement(lhs, " = ", rhs, ";");
13608 }
13609
13610 register_write(phi.function_variable);
13611 }
13612 }
13613 }
13614
branch_to_continue(BlockID from,BlockID to)13615 void CompilerGLSL::branch_to_continue(BlockID from, BlockID to)
13616 {
13617 auto &to_block = get<SPIRBlock>(to);
13618 if (from == to)
13619 return;
13620
13621 assert(is_continue(to));
13622 if (to_block.complex_continue)
13623 {
13624 // Just emit the whole block chain as is.
13625 auto usage_counts = expression_usage_counts;
13626
13627 emit_block_chain(to_block);
13628
13629 // Expression usage counts are moot after returning from the continue block.
13630 expression_usage_counts = usage_counts;
13631 }
13632 else
13633 {
13634 auto &from_block = get<SPIRBlock>(from);
13635 bool outside_control_flow = false;
13636 uint32_t loop_dominator = 0;
13637
13638 // FIXME: Refactor this to not use the old loop_dominator tracking.
13639 if (from_block.merge_block)
13640 {
13641 // If we are a loop header, we don't set the loop dominator,
13642 // so just use "self" here.
13643 loop_dominator = from;
13644 }
13645 else if (from_block.loop_dominator != BlockID(SPIRBlock::NoDominator))
13646 {
13647 loop_dominator = from_block.loop_dominator;
13648 }
13649
13650 if (loop_dominator != 0)
13651 {
13652 auto &cfg = get_cfg_for_current_function();
13653
13654 // For non-complex continue blocks, we implicitly branch to the continue block
13655 // by having the continue block be part of the loop header in for (; ; continue-block).
13656 outside_control_flow = cfg.node_terminates_control_flow_in_sub_graph(loop_dominator, from);
13657 }
13658
13659 // Some simplification for for-loops. We always end up with a useless continue;
13660 // statement since we branch to a loop block.
13661 // Walk the CFG, if we unconditionally execute the block calling continue assuming we're in the loop block,
13662 // we can avoid writing out an explicit continue statement.
13663 // Similar optimization to return statements if we know we're outside flow control.
13664 if (!outside_control_flow)
13665 statement("continue;");
13666 }
13667 }
13668
branch(BlockID from,BlockID to)13669 void CompilerGLSL::branch(BlockID from, BlockID to)
13670 {
13671 flush_phi(from, to);
13672 flush_control_dependent_expressions(from);
13673
13674 bool to_is_continue = is_continue(to);
13675
13676 // This is only a continue if we branch to our loop dominator.
13677 if ((ir.block_meta[to] & ParsedIR::BLOCK_META_LOOP_HEADER_BIT) != 0 && get<SPIRBlock>(from).loop_dominator == to)
13678 {
13679 // This can happen if we had a complex continue block which was emitted.
13680 // Once the continue block tries to branch to the loop header, just emit continue;
13681 // and end the chain here.
13682 statement("continue;");
13683 }
13684 else if (from != to && is_break(to))
13685 {
13686 // We cannot break to ourselves, so check explicitly for from != to.
13687 // This case can trigger if a loop header is all three of these things:
13688 // - Continue block
13689 // - Loop header
13690 // - Break merge target all at once ...
13691
13692 // Very dirty workaround.
13693 // Switch constructs are able to break, but they cannot break out of a loop at the same time.
13694 // Only sensible solution is to make a ladder variable, which we declare at the top of the switch block,
13695 // write to the ladder here, and defer the break.
13696 // The loop we're breaking out of must dominate the switch block, or there is no ladder breaking case.
13697 if (current_emitting_switch && is_loop_break(to) &&
13698 current_emitting_switch->loop_dominator != BlockID(SPIRBlock::NoDominator) &&
13699 get<SPIRBlock>(current_emitting_switch->loop_dominator).merge_block == to)
13700 {
13701 if (!current_emitting_switch->need_ladder_break)
13702 {
13703 force_recompile();
13704 current_emitting_switch->need_ladder_break = true;
13705 }
13706
13707 statement("_", current_emitting_switch->self, "_ladder_break = true;");
13708 }
13709 statement("break;");
13710 }
13711 else if (to_is_continue || from == to)
13712 {
13713 // For from == to case can happen for a do-while loop which branches into itself.
13714 // We don't mark these cases as continue blocks, but the only possible way to branch into
13715 // ourselves is through means of continue blocks.
13716
13717 // If we are merging to a continue block, there is no need to emit the block chain for continue here.
13718 // We can branch to the continue block after we merge execution.
13719
13720 // Here we make use of structured control flow rules from spec:
13721 // 2.11: - the merge block declared by a header block cannot be a merge block declared by any other header block
13722 // - each header block must strictly dominate its merge block, unless the merge block is unreachable in the CFG
13723 // If we are branching to a merge block, we must be inside a construct which dominates the merge block.
13724 auto &block_meta = ir.block_meta[to];
13725 bool branching_to_merge =
13726 (block_meta & (ParsedIR::BLOCK_META_SELECTION_MERGE_BIT | ParsedIR::BLOCK_META_MULTISELECT_MERGE_BIT |
13727 ParsedIR::BLOCK_META_LOOP_MERGE_BIT)) != 0;
13728 if (!to_is_continue || !branching_to_merge)
13729 branch_to_continue(from, to);
13730 }
13731 else if (!is_conditional(to))
13732 emit_block_chain(get<SPIRBlock>(to));
13733
13734 // It is important that we check for break before continue.
13735 // A block might serve two purposes, a break block for the inner scope, and
13736 // a continue block in the outer scope.
13737 // Inner scope always takes precedence.
13738 }
13739
branch(BlockID from,uint32_t cond,BlockID true_block,BlockID false_block)13740 void CompilerGLSL::branch(BlockID from, uint32_t cond, BlockID true_block, BlockID false_block)
13741 {
13742 auto &from_block = get<SPIRBlock>(from);
13743 BlockID merge_block = from_block.merge == SPIRBlock::MergeSelection ? from_block.next_block : BlockID(0);
13744
13745 // If we branch directly to our selection merge target, we don't need a code path.
13746 bool true_block_needs_code = true_block != merge_block || flush_phi_required(from, true_block);
13747 bool false_block_needs_code = false_block != merge_block || flush_phi_required(from, false_block);
13748
13749 if (!true_block_needs_code && !false_block_needs_code)
13750 return;
13751
13752 emit_block_hints(get<SPIRBlock>(from));
13753
13754 if (true_block_needs_code)
13755 {
13756 statement("if (", to_expression(cond), ")");
13757 begin_scope();
13758 branch(from, true_block);
13759 end_scope();
13760
13761 if (false_block_needs_code)
13762 {
13763 statement("else");
13764 begin_scope();
13765 branch(from, false_block);
13766 end_scope();
13767 }
13768 }
13769 else if (false_block_needs_code)
13770 {
13771 // Only need false path, use negative conditional.
13772 statement("if (!", to_enclosed_expression(cond), ")");
13773 begin_scope();
13774 branch(from, false_block);
13775 end_scope();
13776 }
13777 }
13778
13779 // FIXME: This currently cannot handle complex continue blocks
13780 // as in do-while.
13781 // This should be seen as a "trivial" continue block.
emit_continue_block(uint32_t continue_block,bool follow_true_block,bool follow_false_block)13782 string CompilerGLSL::emit_continue_block(uint32_t continue_block, bool follow_true_block, bool follow_false_block)
13783 {
13784 auto *block = &get<SPIRBlock>(continue_block);
13785
13786 // While emitting the continue block, declare_temporary will check this
13787 // if we have to emit temporaries.
13788 current_continue_block = block;
13789
13790 SmallVector<string> statements;
13791
13792 // Capture all statements into our list.
13793 auto *old = redirect_statement;
13794 redirect_statement = &statements;
13795
13796 // Stamp out all blocks one after each other.
13797 while ((ir.block_meta[block->self] & ParsedIR::BLOCK_META_LOOP_HEADER_BIT) == 0)
13798 {
13799 // Write out all instructions we have in this block.
13800 emit_block_instructions(*block);
13801
13802 // For plain branchless for/while continue blocks.
13803 if (block->next_block)
13804 {
13805 flush_phi(continue_block, block->next_block);
13806 block = &get<SPIRBlock>(block->next_block);
13807 }
13808 // For do while blocks. The last block will be a select block.
13809 else if (block->true_block && follow_true_block)
13810 {
13811 flush_phi(continue_block, block->true_block);
13812 block = &get<SPIRBlock>(block->true_block);
13813 }
13814 else if (block->false_block && follow_false_block)
13815 {
13816 flush_phi(continue_block, block->false_block);
13817 block = &get<SPIRBlock>(block->false_block);
13818 }
13819 else
13820 {
13821 SPIRV_CROSS_THROW("Invalid continue block detected!");
13822 }
13823 }
13824
13825 // Restore old pointer.
13826 redirect_statement = old;
13827
13828 // Somewhat ugly, strip off the last ';' since we use ',' instead.
13829 // Ideally, we should select this behavior in statement().
13830 for (auto &s : statements)
13831 {
13832 if (!s.empty() && s.back() == ';')
13833 s.erase(s.size() - 1, 1);
13834 }
13835
13836 current_continue_block = nullptr;
13837 return merge(statements);
13838 }
13839
emit_while_loop_initializers(const SPIRBlock & block)13840 void CompilerGLSL::emit_while_loop_initializers(const SPIRBlock &block)
13841 {
13842 // While loops do not take initializers, so declare all of them outside.
13843 for (auto &loop_var : block.loop_variables)
13844 {
13845 auto &var = get<SPIRVariable>(loop_var);
13846 statement(variable_decl(var), ";");
13847 }
13848 }
13849
emit_for_loop_initializers(const SPIRBlock & block)13850 string CompilerGLSL::emit_for_loop_initializers(const SPIRBlock &block)
13851 {
13852 if (block.loop_variables.empty())
13853 return "";
13854
13855 bool same_types = for_loop_initializers_are_same_type(block);
13856 // We can only declare for loop initializers if all variables are of same type.
13857 // If we cannot do this, declare individual variables before the loop header.
13858
13859 // We might have a loop variable candidate which was not assigned to for some reason.
13860 uint32_t missing_initializers = 0;
13861 for (auto &variable : block.loop_variables)
13862 {
13863 uint32_t expr = get<SPIRVariable>(variable).static_expression;
13864
13865 // Sometimes loop variables are initialized with OpUndef, but we can just declare
13866 // a plain variable without initializer in this case.
13867 if (expr == 0 || ir.ids[expr].get_type() == TypeUndef)
13868 missing_initializers++;
13869 }
13870
13871 if (block.loop_variables.size() == 1 && missing_initializers == 0)
13872 {
13873 return variable_decl(get<SPIRVariable>(block.loop_variables.front()));
13874 }
13875 else if (!same_types || missing_initializers == uint32_t(block.loop_variables.size()))
13876 {
13877 for (auto &loop_var : block.loop_variables)
13878 statement(variable_decl(get<SPIRVariable>(loop_var)), ";");
13879 return "";
13880 }
13881 else
13882 {
13883 // We have a mix of loop variables, either ones with a clear initializer, or ones without.
13884 // Separate the two streams.
13885 string expr;
13886
13887 for (auto &loop_var : block.loop_variables)
13888 {
13889 uint32_t static_expr = get<SPIRVariable>(loop_var).static_expression;
13890 if (static_expr == 0 || ir.ids[static_expr].get_type() == TypeUndef)
13891 {
13892 statement(variable_decl(get<SPIRVariable>(loop_var)), ";");
13893 }
13894 else
13895 {
13896 auto &var = get<SPIRVariable>(loop_var);
13897 auto &type = get_variable_data_type(var);
13898 if (expr.empty())
13899 {
13900 // For loop initializers are of the form <type id = value, id = value, id = value, etc ...
13901 expr = join(to_qualifiers_glsl(var.self), type_to_glsl(type), " ");
13902 }
13903 else
13904 {
13905 expr += ", ";
13906 // In MSL, being based on C++, the asterisk marking a pointer
13907 // binds to the identifier, not the type.
13908 if (type.pointer)
13909 expr += "* ";
13910 }
13911
13912 expr += join(to_name(loop_var), " = ", to_pointer_expression(var.static_expression));
13913 }
13914 }
13915 return expr;
13916 }
13917 }
13918
for_loop_initializers_are_same_type(const SPIRBlock & block)13919 bool CompilerGLSL::for_loop_initializers_are_same_type(const SPIRBlock &block)
13920 {
13921 if (block.loop_variables.size() <= 1)
13922 return true;
13923
13924 uint32_t expected = 0;
13925 Bitset expected_flags;
13926 for (auto &var : block.loop_variables)
13927 {
13928 // Don't care about uninitialized variables as they will not be part of the initializers.
13929 uint32_t expr = get<SPIRVariable>(var).static_expression;
13930 if (expr == 0 || ir.ids[expr].get_type() == TypeUndef)
13931 continue;
13932
13933 if (expected == 0)
13934 {
13935 expected = get<SPIRVariable>(var).basetype;
13936 expected_flags = get_decoration_bitset(var);
13937 }
13938 else if (expected != get<SPIRVariable>(var).basetype)
13939 return false;
13940
13941 // Precision flags and things like that must also match.
13942 if (expected_flags != get_decoration_bitset(var))
13943 return false;
13944 }
13945
13946 return true;
13947 }
13948
attempt_emit_loop_header(SPIRBlock & block,SPIRBlock::Method method)13949 bool CompilerGLSL::attempt_emit_loop_header(SPIRBlock &block, SPIRBlock::Method method)
13950 {
13951 SPIRBlock::ContinueBlockType continue_type = continue_block_type(get<SPIRBlock>(block.continue_block));
13952
13953 if (method == SPIRBlock::MergeToSelectForLoop || method == SPIRBlock::MergeToSelectContinueForLoop)
13954 {
13955 uint32_t current_count = statement_count;
13956 // If we're trying to create a true for loop,
13957 // we need to make sure that all opcodes before branch statement do not actually emit any code.
13958 // We can then take the condition expression and create a for (; cond ; ) { body; } structure instead.
13959 emit_block_instructions(block);
13960
13961 bool condition_is_temporary = forced_temporaries.find(block.condition) == end(forced_temporaries);
13962
13963 // This can work! We only did trivial things which could be forwarded in block body!
13964 if (current_count == statement_count && condition_is_temporary)
13965 {
13966 switch (continue_type)
13967 {
13968 case SPIRBlock::ForLoop:
13969 {
13970 // This block may be a dominating block, so make sure we flush undeclared variables before building the for loop header.
13971 flush_undeclared_variables(block);
13972
13973 // Important that we do this in this order because
13974 // emitting the continue block can invalidate the condition expression.
13975 auto initializer = emit_for_loop_initializers(block);
13976 auto condition = to_expression(block.condition);
13977
13978 // Condition might have to be inverted.
13979 if (execution_is_noop(get<SPIRBlock>(block.true_block), get<SPIRBlock>(block.merge_block)))
13980 condition = join("!", enclose_expression(condition));
13981
13982 emit_block_hints(block);
13983 if (method != SPIRBlock::MergeToSelectContinueForLoop)
13984 {
13985 auto continue_block = emit_continue_block(block.continue_block, false, false);
13986 statement("for (", initializer, "; ", condition, "; ", continue_block, ")");
13987 }
13988 else
13989 statement("for (", initializer, "; ", condition, "; )");
13990 break;
13991 }
13992
13993 case SPIRBlock::WhileLoop:
13994 {
13995 // This block may be a dominating block, so make sure we flush undeclared variables before building the while loop header.
13996 flush_undeclared_variables(block);
13997 emit_while_loop_initializers(block);
13998 emit_block_hints(block);
13999
14000 auto condition = to_expression(block.condition);
14001 // Condition might have to be inverted.
14002 if (execution_is_noop(get<SPIRBlock>(block.true_block), get<SPIRBlock>(block.merge_block)))
14003 condition = join("!", enclose_expression(condition));
14004
14005 statement("while (", condition, ")");
14006 break;
14007 }
14008
14009 default:
14010 block.disable_block_optimization = true;
14011 force_recompile();
14012 begin_scope(); // We'll see an end_scope() later.
14013 return false;
14014 }
14015
14016 begin_scope();
14017 return true;
14018 }
14019 else
14020 {
14021 block.disable_block_optimization = true;
14022 force_recompile();
14023 begin_scope(); // We'll see an end_scope() later.
14024 return false;
14025 }
14026 }
14027 else if (method == SPIRBlock::MergeToDirectForLoop)
14028 {
14029 auto &child = get<SPIRBlock>(block.next_block);
14030
14031 // This block may be a dominating block, so make sure we flush undeclared variables before building the for loop header.
14032 flush_undeclared_variables(child);
14033
14034 uint32_t current_count = statement_count;
14035
14036 // If we're trying to create a true for loop,
14037 // we need to make sure that all opcodes before branch statement do not actually emit any code.
14038 // We can then take the condition expression and create a for (; cond ; ) { body; } structure instead.
14039 emit_block_instructions(child);
14040
14041 bool condition_is_temporary = forced_temporaries.find(child.condition) == end(forced_temporaries);
14042
14043 if (current_count == statement_count && condition_is_temporary)
14044 {
14045 uint32_t target_block = child.true_block;
14046
14047 switch (continue_type)
14048 {
14049 case SPIRBlock::ForLoop:
14050 {
14051 // Important that we do this in this order because
14052 // emitting the continue block can invalidate the condition expression.
14053 auto initializer = emit_for_loop_initializers(block);
14054 auto condition = to_expression(child.condition);
14055
14056 // Condition might have to be inverted.
14057 if (execution_is_noop(get<SPIRBlock>(child.true_block), get<SPIRBlock>(block.merge_block)))
14058 {
14059 condition = join("!", enclose_expression(condition));
14060 target_block = child.false_block;
14061 }
14062
14063 auto continue_block = emit_continue_block(block.continue_block, false, false);
14064 emit_block_hints(block);
14065 statement("for (", initializer, "; ", condition, "; ", continue_block, ")");
14066 break;
14067 }
14068
14069 case SPIRBlock::WhileLoop:
14070 {
14071 emit_while_loop_initializers(block);
14072 emit_block_hints(block);
14073
14074 auto condition = to_expression(child.condition);
14075 // Condition might have to be inverted.
14076 if (execution_is_noop(get<SPIRBlock>(child.true_block), get<SPIRBlock>(block.merge_block)))
14077 {
14078 condition = join("!", enclose_expression(condition));
14079 target_block = child.false_block;
14080 }
14081
14082 statement("while (", condition, ")");
14083 break;
14084 }
14085
14086 default:
14087 block.disable_block_optimization = true;
14088 force_recompile();
14089 begin_scope(); // We'll see an end_scope() later.
14090 return false;
14091 }
14092
14093 begin_scope();
14094 branch(child.self, target_block);
14095 return true;
14096 }
14097 else
14098 {
14099 block.disable_block_optimization = true;
14100 force_recompile();
14101 begin_scope(); // We'll see an end_scope() later.
14102 return false;
14103 }
14104 }
14105 else
14106 return false;
14107 }
14108
flush_undeclared_variables(SPIRBlock & block)14109 void CompilerGLSL::flush_undeclared_variables(SPIRBlock &block)
14110 {
14111 for (auto &v : block.dominated_variables)
14112 flush_variable_declaration(v);
14113 }
14114
emit_hoisted_temporaries(SmallVector<pair<TypeID,ID>> & temporaries)14115 void CompilerGLSL::emit_hoisted_temporaries(SmallVector<pair<TypeID, ID>> &temporaries)
14116 {
14117 // If we need to force temporaries for certain IDs due to continue blocks, do it before starting loop header.
14118 // Need to sort these to ensure that reference output is stable.
14119 sort(begin(temporaries), end(temporaries),
14120 [](const pair<TypeID, ID> &a, const pair<TypeID, ID> &b) { return a.second < b.second; });
14121
14122 for (auto &tmp : temporaries)
14123 {
14124 add_local_variable_name(tmp.second);
14125 auto &flags = ir.meta[tmp.second].decoration.decoration_flags;
14126 auto &type = get<SPIRType>(tmp.first);
14127
14128 // Not all targets support pointer literals, so don't bother with that case.
14129 string initializer;
14130 if (options.force_zero_initialized_variables && type_can_zero_initialize(type))
14131 initializer = join(" = ", to_zero_initialized_expression(tmp.first));
14132
14133 statement(flags_to_qualifiers_glsl(type, flags), variable_decl(type, to_name(tmp.second)), initializer, ";");
14134
14135 hoisted_temporaries.insert(tmp.second);
14136 forced_temporaries.insert(tmp.second);
14137
14138 // The temporary might be read from before it's assigned, set up the expression now.
14139 set<SPIRExpression>(tmp.second, to_name(tmp.second), tmp.first, true);
14140 }
14141 }
14142
emit_block_chain(SPIRBlock & block)14143 void CompilerGLSL::emit_block_chain(SPIRBlock &block)
14144 {
14145 bool select_branch_to_true_block = false;
14146 bool select_branch_to_false_block = false;
14147 bool skip_direct_branch = false;
14148 bool emitted_loop_header_variables = false;
14149 bool force_complex_continue_block = false;
14150 ValueSaver<uint32_t> loop_level_saver(current_loop_level);
14151
14152 if (block.merge == SPIRBlock::MergeLoop)
14153 add_loop_level();
14154
14155 emit_hoisted_temporaries(block.declare_temporary);
14156
14157 SPIRBlock::ContinueBlockType continue_type = SPIRBlock::ContinueNone;
14158 if (block.continue_block)
14159 {
14160 continue_type = continue_block_type(get<SPIRBlock>(block.continue_block));
14161 // If we know we cannot emit a loop, mark the block early as a complex loop so we don't force unnecessary recompiles.
14162 if (continue_type == SPIRBlock::ComplexLoop)
14163 block.complex_continue = true;
14164 }
14165
14166 // If we have loop variables, stop masking out access to the variable now.
14167 for (auto var_id : block.loop_variables)
14168 {
14169 auto &var = get<SPIRVariable>(var_id);
14170 var.loop_variable_enable = true;
14171 // We're not going to declare the variable directly, so emit a copy here.
14172 emit_variable_temporary_copies(var);
14173 }
14174
14175 // Remember deferred declaration state. We will restore it before returning.
14176 SmallVector<bool, 64> rearm_dominated_variables(block.dominated_variables.size());
14177 for (size_t i = 0; i < block.dominated_variables.size(); i++)
14178 {
14179 uint32_t var_id = block.dominated_variables[i];
14180 auto &var = get<SPIRVariable>(var_id);
14181 rearm_dominated_variables[i] = var.deferred_declaration;
14182 }
14183
14184 // This is the method often used by spirv-opt to implement loops.
14185 // The loop header goes straight into the continue block.
14186 // However, don't attempt this on ESSL 1.0, because if a loop variable is used in a continue block,
14187 // it *MUST* be used in the continue block. This loop method will not work.
14188 if (!is_legacy_es() && block_is_loop_candidate(block, SPIRBlock::MergeToSelectContinueForLoop))
14189 {
14190 flush_undeclared_variables(block);
14191 if (attempt_emit_loop_header(block, SPIRBlock::MergeToSelectContinueForLoop))
14192 {
14193 if (execution_is_noop(get<SPIRBlock>(block.true_block), get<SPIRBlock>(block.merge_block)))
14194 select_branch_to_false_block = true;
14195 else
14196 select_branch_to_true_block = true;
14197
14198 emitted_loop_header_variables = true;
14199 force_complex_continue_block = true;
14200 }
14201 }
14202 // This is the older loop behavior in glslang which branches to loop body directly from the loop header.
14203 else if (block_is_loop_candidate(block, SPIRBlock::MergeToSelectForLoop))
14204 {
14205 flush_undeclared_variables(block);
14206 if (attempt_emit_loop_header(block, SPIRBlock::MergeToSelectForLoop))
14207 {
14208 // The body of while, is actually just the true (or false) block, so always branch there unconditionally.
14209 if (execution_is_noop(get<SPIRBlock>(block.true_block), get<SPIRBlock>(block.merge_block)))
14210 select_branch_to_false_block = true;
14211 else
14212 select_branch_to_true_block = true;
14213
14214 emitted_loop_header_variables = true;
14215 }
14216 }
14217 // This is the newer loop behavior in glslang which branches from Loop header directly to
14218 // a new block, which in turn has a OpBranchSelection without a selection merge.
14219 else if (block_is_loop_candidate(block, SPIRBlock::MergeToDirectForLoop))
14220 {
14221 flush_undeclared_variables(block);
14222 if (attempt_emit_loop_header(block, SPIRBlock::MergeToDirectForLoop))
14223 {
14224 skip_direct_branch = true;
14225 emitted_loop_header_variables = true;
14226 }
14227 }
14228 else if (continue_type == SPIRBlock::DoWhileLoop)
14229 {
14230 flush_undeclared_variables(block);
14231 emit_while_loop_initializers(block);
14232 emitted_loop_header_variables = true;
14233 // We have some temporaries where the loop header is the dominator.
14234 // We risk a case where we have code like:
14235 // for (;;) { create-temporary; break; } consume-temporary;
14236 // so force-declare temporaries here.
14237 emit_hoisted_temporaries(block.potential_declare_temporary);
14238 statement("do");
14239 begin_scope();
14240
14241 emit_block_instructions(block);
14242 }
14243 else if (block.merge == SPIRBlock::MergeLoop)
14244 {
14245 flush_undeclared_variables(block);
14246 emit_while_loop_initializers(block);
14247 emitted_loop_header_variables = true;
14248
14249 // We have a generic loop without any distinguishable pattern like for, while or do while.
14250 get<SPIRBlock>(block.continue_block).complex_continue = true;
14251 continue_type = SPIRBlock::ComplexLoop;
14252
14253 // We have some temporaries where the loop header is the dominator.
14254 // We risk a case where we have code like:
14255 // for (;;) { create-temporary; break; } consume-temporary;
14256 // so force-declare temporaries here.
14257 emit_hoisted_temporaries(block.potential_declare_temporary);
14258 statement("for (;;)");
14259 begin_scope();
14260
14261 emit_block_instructions(block);
14262 }
14263 else
14264 {
14265 emit_block_instructions(block);
14266 }
14267
14268 // If we didn't successfully emit a loop header and we had loop variable candidates, we have a problem
14269 // as writes to said loop variables might have been masked out, we need a recompile.
14270 if (!emitted_loop_header_variables && !block.loop_variables.empty())
14271 {
14272 force_recompile();
14273 for (auto var : block.loop_variables)
14274 get<SPIRVariable>(var).loop_variable = false;
14275 block.loop_variables.clear();
14276 }
14277
14278 flush_undeclared_variables(block);
14279 bool emit_next_block = true;
14280
14281 // Handle end of block.
14282 switch (block.terminator)
14283 {
14284 case SPIRBlock::Direct:
14285 // True when emitting complex continue block.
14286 if (block.loop_dominator == block.next_block)
14287 {
14288 branch(block.self, block.next_block);
14289 emit_next_block = false;
14290 }
14291 // True if MergeToDirectForLoop succeeded.
14292 else if (skip_direct_branch)
14293 emit_next_block = false;
14294 else if (is_continue(block.next_block) || is_break(block.next_block) || is_conditional(block.next_block))
14295 {
14296 branch(block.self, block.next_block);
14297 emit_next_block = false;
14298 }
14299 break;
14300
14301 case SPIRBlock::Select:
14302 // True if MergeToSelectForLoop or MergeToSelectContinueForLoop succeeded.
14303 if (select_branch_to_true_block)
14304 {
14305 if (force_complex_continue_block)
14306 {
14307 assert(block.true_block == block.continue_block);
14308
14309 // We're going to emit a continue block directly here, so make sure it's marked as complex.
14310 auto &complex_continue = get<SPIRBlock>(block.continue_block).complex_continue;
14311 bool old_complex = complex_continue;
14312 complex_continue = true;
14313 branch(block.self, block.true_block);
14314 complex_continue = old_complex;
14315 }
14316 else
14317 branch(block.self, block.true_block);
14318 }
14319 else if (select_branch_to_false_block)
14320 {
14321 if (force_complex_continue_block)
14322 {
14323 assert(block.false_block == block.continue_block);
14324
14325 // We're going to emit a continue block directly here, so make sure it's marked as complex.
14326 auto &complex_continue = get<SPIRBlock>(block.continue_block).complex_continue;
14327 bool old_complex = complex_continue;
14328 complex_continue = true;
14329 branch(block.self, block.false_block);
14330 complex_continue = old_complex;
14331 }
14332 else
14333 branch(block.self, block.false_block);
14334 }
14335 else
14336 branch(block.self, block.condition, block.true_block, block.false_block);
14337 break;
14338
14339 case SPIRBlock::MultiSelect:
14340 {
14341 auto &type = expression_type(block.condition);
14342 bool unsigned_case =
14343 type.basetype == SPIRType::UInt || type.basetype == SPIRType::UShort || type.basetype == SPIRType::UByte;
14344
14345 if (block.merge == SPIRBlock::MergeNone)
14346 SPIRV_CROSS_THROW("Switch statement is not structured");
14347
14348 if (type.basetype == SPIRType::UInt64 || type.basetype == SPIRType::Int64)
14349 {
14350 // SPIR-V spec suggests this is allowed, but we cannot support it in higher level languages.
14351 SPIRV_CROSS_THROW("Cannot use 64-bit switch selectors.");
14352 }
14353
14354 const char *label_suffix = "";
14355 if (type.basetype == SPIRType::UInt && backend.uint32_t_literal_suffix)
14356 label_suffix = "u";
14357 else if (type.basetype == SPIRType::UShort)
14358 label_suffix = backend.uint16_t_literal_suffix;
14359 else if (type.basetype == SPIRType::Short)
14360 label_suffix = backend.int16_t_literal_suffix;
14361
14362 SPIRBlock *old_emitting_switch = current_emitting_switch;
14363 current_emitting_switch = █
14364
14365 if (block.need_ladder_break)
14366 statement("bool _", block.self, "_ladder_break = false;");
14367
14368 // Find all unique case constructs.
14369 unordered_map<uint32_t, SmallVector<uint32_t>> case_constructs;
14370 SmallVector<uint32_t> block_declaration_order;
14371 SmallVector<uint32_t> literals_to_merge;
14372
14373 // If a switch case branches to the default block for some reason, we can just remove that literal from consideration
14374 // and let the default: block handle it.
14375 // 2.11 in SPIR-V spec states that for fall-through cases, there is a very strict declaration order which we can take advantage of here.
14376 // We only need to consider possible fallthrough if order[i] branches to order[i + 1].
14377 for (auto &c : block.cases)
14378 {
14379 if (c.block != block.next_block && c.block != block.default_block)
14380 {
14381 if (!case_constructs.count(c.block))
14382 block_declaration_order.push_back(c.block);
14383 case_constructs[c.block].push_back(c.value);
14384 }
14385 else if (c.block == block.next_block && block.default_block != block.next_block)
14386 {
14387 // We might have to flush phi inside specific case labels.
14388 // If we can piggyback on default:, do so instead.
14389 literals_to_merge.push_back(c.value);
14390 }
14391 }
14392
14393 // Empty literal array -> default.
14394 if (block.default_block != block.next_block)
14395 {
14396 auto &default_block = get<SPIRBlock>(block.default_block);
14397
14398 // We need to slide in the default block somewhere in this chain
14399 // if there are fall-through scenarios since the default is declared separately in OpSwitch.
14400 // Only consider trivial fall-through cases here.
14401 size_t num_blocks = block_declaration_order.size();
14402 bool injected_block = false;
14403
14404 for (size_t i = 0; i < num_blocks; i++)
14405 {
14406 auto &case_block = get<SPIRBlock>(block_declaration_order[i]);
14407 if (execution_is_direct_branch(case_block, default_block))
14408 {
14409 // Fallthrough to default block, we must inject the default block here.
14410 block_declaration_order.insert(begin(block_declaration_order) + i + 1, block.default_block);
14411 injected_block = true;
14412 break;
14413 }
14414 else if (execution_is_direct_branch(default_block, case_block))
14415 {
14416 // Default case is falling through to another case label, we must inject the default block here.
14417 block_declaration_order.insert(begin(block_declaration_order) + i, block.default_block);
14418 injected_block = true;
14419 break;
14420 }
14421 }
14422
14423 // Order does not matter.
14424 if (!injected_block)
14425 block_declaration_order.push_back(block.default_block);
14426 else if (is_legacy_es())
14427 SPIRV_CROSS_THROW("Default case label fallthrough to other case label is not supported in ESSL 1.0.");
14428
14429 case_constructs[block.default_block] = {};
14430 }
14431
14432 size_t num_blocks = block_declaration_order.size();
14433
14434 const auto to_case_label = [](uint32_t literal, bool is_unsigned_case) -> string {
14435 return is_unsigned_case ? convert_to_string(literal) : convert_to_string(int32_t(literal));
14436 };
14437
14438 const auto to_legacy_case_label = [&](uint32_t condition, const SmallVector<uint32_t> &labels,
14439 const char *suffix) -> string {
14440 string ret;
14441 size_t count = labels.size();
14442 for (size_t i = 0; i < count; i++)
14443 {
14444 if (i)
14445 ret += " || ";
14446 ret += join(count > 1 ? "(" : "", to_enclosed_expression(condition), " == ", labels[i], suffix,
14447 count > 1 ? ")" : "");
14448 }
14449 return ret;
14450 };
14451
14452 // We need to deal with a complex scenario for OpPhi. If we have case-fallthrough and Phi in the picture,
14453 // we need to flush phi nodes outside the switch block in a branch,
14454 // and skip any Phi handling inside the case label to make fall-through work as expected.
14455 // This kind of code-gen is super awkward and it's a last resort. Normally we would want to handle this
14456 // inside the case label if at all possible.
14457 for (size_t i = 1; backend.support_case_fallthrough && i < num_blocks; i++)
14458 {
14459 if (flush_phi_required(block.self, block_declaration_order[i]) &&
14460 flush_phi_required(block_declaration_order[i - 1], block_declaration_order[i]))
14461 {
14462 uint32_t target_block = block_declaration_order[i];
14463
14464 // Make sure we flush Phi, it might have been marked to be ignored earlier.
14465 get<SPIRBlock>(target_block).ignore_phi_from_block = 0;
14466
14467 auto &literals = case_constructs[target_block];
14468
14469 if (literals.empty())
14470 {
14471 // Oh boy, gotta make a complete negative test instead! o.o
14472 // Find all possible literals that would *not* make us enter the default block.
14473 // If none of those literals match, we flush Phi ...
14474 SmallVector<string> conditions;
14475 for (size_t j = 0; j < num_blocks; j++)
14476 {
14477 auto &negative_literals = case_constructs[block_declaration_order[j]];
14478 for (auto &case_label : negative_literals)
14479 conditions.push_back(join(to_enclosed_expression(block.condition),
14480 " != ", to_case_label(case_label, unsigned_case)));
14481 }
14482
14483 statement("if (", merge(conditions, " && "), ")");
14484 begin_scope();
14485 flush_phi(block.self, target_block);
14486 end_scope();
14487 }
14488 else
14489 {
14490 SmallVector<string> conditions;
14491 conditions.reserve(literals.size());
14492 for (auto &case_label : literals)
14493 conditions.push_back(join(to_enclosed_expression(block.condition),
14494 " == ", to_case_label(case_label, unsigned_case)));
14495 statement("if (", merge(conditions, " || "), ")");
14496 begin_scope();
14497 flush_phi(block.self, target_block);
14498 end_scope();
14499 }
14500
14501 // Mark the block so that we don't flush Phi from header to case label.
14502 get<SPIRBlock>(target_block).ignore_phi_from_block = block.self;
14503 }
14504 }
14505
14506 // If there is only one default block, and no cases, this is a case where SPIRV-opt decided to emulate
14507 // non-structured exits with the help of a switch block.
14508 // This is buggy on FXC, so just emit the logical equivalent of a do { } while(false), which is more idiomatic.
14509 bool degenerate_switch = block.default_block != block.merge_block && block.cases.empty();
14510
14511 if (degenerate_switch || is_legacy_es())
14512 {
14513 // ESSL 1.0 is not guaranteed to support do/while.
14514 if (is_legacy_es())
14515 {
14516 uint32_t counter = statement_count;
14517 statement("for (int spvDummy", counter, " = 0; spvDummy", counter,
14518 " < 1; spvDummy", counter, "++)");
14519 }
14520 else
14521 statement("do");
14522 }
14523 else
14524 {
14525 emit_block_hints(block);
14526 statement("switch (", to_expression(block.condition), ")");
14527 }
14528 begin_scope();
14529
14530 for (size_t i = 0; i < num_blocks; i++)
14531 {
14532 uint32_t target_block = block_declaration_order[i];
14533 auto &literals = case_constructs[target_block];
14534
14535 if (literals.empty())
14536 {
14537 // Default case.
14538 if (!degenerate_switch)
14539 {
14540 if (is_legacy_es())
14541 statement("else");
14542 else
14543 statement("default:");
14544 }
14545 }
14546 else
14547 {
14548 if (is_legacy_es())
14549 {
14550 statement((i ? "else " : ""), "if (", to_legacy_case_label(block.condition, literals, label_suffix),
14551 ")");
14552 }
14553 else
14554 {
14555 for (auto &case_literal : literals)
14556 {
14557 // The case label value must be sign-extended properly in SPIR-V, so we can assume 32-bit values here.
14558 statement("case ", to_case_label(case_literal, unsigned_case), label_suffix, ":");
14559 }
14560 }
14561 }
14562
14563 auto &case_block = get<SPIRBlock>(target_block);
14564 if (backend.support_case_fallthrough && i + 1 < num_blocks &&
14565 execution_is_direct_branch(case_block, get<SPIRBlock>(block_declaration_order[i + 1])))
14566 {
14567 // We will fall through here, so just terminate the block chain early.
14568 // We still need to deal with Phi potentially.
14569 // No need for a stack-like thing here since we only do fall-through when there is a
14570 // single trivial branch to fall-through target..
14571 current_emitting_switch_fallthrough = true;
14572 }
14573 else
14574 current_emitting_switch_fallthrough = false;
14575
14576 if (!degenerate_switch)
14577 begin_scope();
14578 branch(block.self, target_block);
14579 if (!degenerate_switch)
14580 end_scope();
14581
14582 current_emitting_switch_fallthrough = false;
14583 }
14584
14585 // Might still have to flush phi variables if we branch from loop header directly to merge target.
14586 if (flush_phi_required(block.self, block.next_block))
14587 {
14588 if (block.default_block == block.next_block || !literals_to_merge.empty())
14589 {
14590 for (auto &case_literal : literals_to_merge)
14591 statement("case ", to_case_label(case_literal, unsigned_case), label_suffix, ":");
14592
14593 if (block.default_block == block.next_block)
14594 {
14595 if (is_legacy_es())
14596 statement("else");
14597 else
14598 statement("default:");
14599 }
14600
14601 begin_scope();
14602 flush_phi(block.self, block.next_block);
14603 statement("break;");
14604 end_scope();
14605 }
14606 }
14607
14608 if (degenerate_switch && !is_legacy_es())
14609 end_scope_decl("while(false)");
14610 else
14611 end_scope();
14612
14613 if (block.need_ladder_break)
14614 {
14615 statement("if (_", block.self, "_ladder_break)");
14616 begin_scope();
14617 statement("break;");
14618 end_scope();
14619 }
14620
14621 current_emitting_switch = old_emitting_switch;
14622 break;
14623 }
14624
14625 case SPIRBlock::Return:
14626 {
14627 for (auto &line : current_function->fixup_hooks_out)
14628 line();
14629
14630 if (processing_entry_point)
14631 emit_fixup();
14632
14633 auto &cfg = get_cfg_for_current_function();
14634
14635 if (block.return_value)
14636 {
14637 auto &type = expression_type(block.return_value);
14638 if (!type.array.empty() && !backend.can_return_array)
14639 {
14640 // If we cannot return arrays, we will have a special out argument we can write to instead.
14641 // The backend is responsible for setting this up, and redirection the return values as appropriate.
14642 if (ir.ids[block.return_value].get_type() != TypeUndef)
14643 {
14644 emit_array_copy("spvReturnValue", block.return_value, StorageClassFunction,
14645 get_expression_effective_storage_class(block.return_value));
14646 }
14647
14648 if (!cfg.node_terminates_control_flow_in_sub_graph(current_function->entry_block, block.self) ||
14649 block.loop_dominator != BlockID(SPIRBlock::NoDominator))
14650 {
14651 statement("return;");
14652 }
14653 }
14654 else
14655 {
14656 // OpReturnValue can return Undef, so don't emit anything for this case.
14657 if (ir.ids[block.return_value].get_type() != TypeUndef)
14658 statement("return ", to_expression(block.return_value), ";");
14659 }
14660 }
14661 else if (!cfg.node_terminates_control_flow_in_sub_graph(current_function->entry_block, block.self) ||
14662 block.loop_dominator != BlockID(SPIRBlock::NoDominator))
14663 {
14664 // If this block is the very final block and not called from control flow,
14665 // we do not need an explicit return which looks out of place. Just end the function here.
14666 // In the very weird case of for(;;) { return; } executing return is unconditional,
14667 // but we actually need a return here ...
14668 statement("return;");
14669 }
14670 break;
14671 }
14672
14673 case SPIRBlock::Kill:
14674 statement(backend.discard_literal, ";");
14675 break;
14676
14677 case SPIRBlock::Unreachable:
14678 emit_next_block = false;
14679 break;
14680
14681 case SPIRBlock::IgnoreIntersection:
14682 statement("ignoreIntersectionEXT;");
14683 break;
14684
14685 case SPIRBlock::TerminateRay:
14686 statement("terminateRayEXT;");
14687 break;
14688
14689 default:
14690 SPIRV_CROSS_THROW("Unimplemented block terminator.");
14691 }
14692
14693 if (block.next_block && emit_next_block)
14694 {
14695 // If we hit this case, we're dealing with an unconditional branch, which means we will output
14696 // that block after this. If we had selection merge, we already flushed phi variables.
14697 if (block.merge != SPIRBlock::MergeSelection)
14698 {
14699 flush_phi(block.self, block.next_block);
14700 // For a direct branch, need to remember to invalidate expressions in the next linear block instead.
14701 get<SPIRBlock>(block.next_block).invalidate_expressions = block.invalidate_expressions;
14702 }
14703
14704 // For switch fallthrough cases, we terminate the chain here, but we still need to handle Phi.
14705 if (!current_emitting_switch_fallthrough)
14706 {
14707 // For merge selects we might have ignored the fact that a merge target
14708 // could have been a break; or continue;
14709 // We will need to deal with it here.
14710 if (is_loop_break(block.next_block))
14711 {
14712 // Cannot check for just break, because switch statements will also use break.
14713 assert(block.merge == SPIRBlock::MergeSelection);
14714 statement("break;");
14715 }
14716 else if (is_continue(block.next_block))
14717 {
14718 assert(block.merge == SPIRBlock::MergeSelection);
14719 branch_to_continue(block.self, block.next_block);
14720 }
14721 else if (BlockID(block.self) != block.next_block)
14722 emit_block_chain(get<SPIRBlock>(block.next_block));
14723 }
14724 }
14725
14726 if (block.merge == SPIRBlock::MergeLoop)
14727 {
14728 if (continue_type == SPIRBlock::DoWhileLoop)
14729 {
14730 // Make sure that we run the continue block to get the expressions set, but this
14731 // should become an empty string.
14732 // We have no fallbacks if we cannot forward everything to temporaries ...
14733 const auto &continue_block = get<SPIRBlock>(block.continue_block);
14734 bool positive_test = execution_is_noop(get<SPIRBlock>(continue_block.true_block),
14735 get<SPIRBlock>(continue_block.loop_dominator));
14736
14737 uint32_t current_count = statement_count;
14738 auto statements = emit_continue_block(block.continue_block, positive_test, !positive_test);
14739 if (statement_count != current_count)
14740 {
14741 // The DoWhile block has side effects, force ComplexLoop pattern next pass.
14742 get<SPIRBlock>(block.continue_block).complex_continue = true;
14743 force_recompile();
14744 }
14745
14746 // Might have to invert the do-while test here.
14747 auto condition = to_expression(continue_block.condition);
14748 if (!positive_test)
14749 condition = join("!", enclose_expression(condition));
14750
14751 end_scope_decl(join("while (", condition, ")"));
14752 }
14753 else
14754 end_scope();
14755
14756 loop_level_saver.release();
14757
14758 // We cannot break out of two loops at once, so don't check for break; here.
14759 // Using block.self as the "from" block isn't quite right, but it has the same scope
14760 // and dominance structure, so it's fine.
14761 if (is_continue(block.merge_block))
14762 branch_to_continue(block.self, block.merge_block);
14763 else
14764 emit_block_chain(get<SPIRBlock>(block.merge_block));
14765 }
14766
14767 // Forget about control dependent expressions now.
14768 block.invalidate_expressions.clear();
14769
14770 // After we return, we must be out of scope, so if we somehow have to re-emit this function,
14771 // re-declare variables if necessary.
14772 assert(rearm_dominated_variables.size() == block.dominated_variables.size());
14773 for (size_t i = 0; i < block.dominated_variables.size(); i++)
14774 {
14775 uint32_t var = block.dominated_variables[i];
14776 get<SPIRVariable>(var).deferred_declaration = rearm_dominated_variables[i];
14777 }
14778
14779 // Just like for deferred declaration, we need to forget about loop variable enable
14780 // if our block chain is reinstantiated later.
14781 for (auto &var_id : block.loop_variables)
14782 get<SPIRVariable>(var_id).loop_variable_enable = false;
14783 }
14784
begin_scope()14785 void CompilerGLSL::begin_scope()
14786 {
14787 statement("{");
14788 indent++;
14789 }
14790
end_scope()14791 void CompilerGLSL::end_scope()
14792 {
14793 if (!indent)
14794 SPIRV_CROSS_THROW("Popping empty indent stack.");
14795 indent--;
14796 statement("}");
14797 }
14798
end_scope(const string & trailer)14799 void CompilerGLSL::end_scope(const string &trailer)
14800 {
14801 if (!indent)
14802 SPIRV_CROSS_THROW("Popping empty indent stack.");
14803 indent--;
14804 statement("}", trailer);
14805 }
14806
end_scope_decl()14807 void CompilerGLSL::end_scope_decl()
14808 {
14809 if (!indent)
14810 SPIRV_CROSS_THROW("Popping empty indent stack.");
14811 indent--;
14812 statement("};");
14813 }
14814
end_scope_decl(const string & decl)14815 void CompilerGLSL::end_scope_decl(const string &decl)
14816 {
14817 if (!indent)
14818 SPIRV_CROSS_THROW("Popping empty indent stack.");
14819 indent--;
14820 statement("} ", decl, ";");
14821 }
14822
check_function_call_constraints(const uint32_t * args,uint32_t length)14823 void CompilerGLSL::check_function_call_constraints(const uint32_t *args, uint32_t length)
14824 {
14825 // If our variable is remapped, and we rely on type-remapping information as
14826 // well, then we cannot pass the variable as a function parameter.
14827 // Fixing this is non-trivial without stamping out variants of the same function,
14828 // so for now warn about this and suggest workarounds instead.
14829 for (uint32_t i = 0; i < length; i++)
14830 {
14831 auto *var = maybe_get<SPIRVariable>(args[i]);
14832 if (!var || !var->remapped_variable)
14833 continue;
14834
14835 auto &type = get<SPIRType>(var->basetype);
14836 if (type.basetype == SPIRType::Image && type.image.dim == DimSubpassData)
14837 {
14838 SPIRV_CROSS_THROW("Tried passing a remapped subpassInput variable to a function. "
14839 "This will not work correctly because type-remapping information is lost. "
14840 "To workaround, please consider not passing the subpass input as a function parameter, "
14841 "or use in/out variables instead which do not need type remapping information.");
14842 }
14843 }
14844 }
14845
get_next_instruction_in_block(const Instruction & instr)14846 const Instruction *CompilerGLSL::get_next_instruction_in_block(const Instruction &instr)
14847 {
14848 // FIXME: This is kind of hacky. There should be a cleaner way.
14849 auto offset = uint32_t(&instr - current_emitting_block->ops.data());
14850 if ((offset + 1) < current_emitting_block->ops.size())
14851 return ¤t_emitting_block->ops[offset + 1];
14852 else
14853 return nullptr;
14854 }
14855
mask_relevant_memory_semantics(uint32_t semantics)14856 uint32_t CompilerGLSL::mask_relevant_memory_semantics(uint32_t semantics)
14857 {
14858 return semantics & (MemorySemanticsAtomicCounterMemoryMask | MemorySemanticsImageMemoryMask |
14859 MemorySemanticsWorkgroupMemoryMask | MemorySemanticsUniformMemoryMask |
14860 MemorySemanticsCrossWorkgroupMemoryMask | MemorySemanticsSubgroupMemoryMask);
14861 }
14862
emit_array_copy(const string & lhs,uint32_t rhs_id,StorageClass,StorageClass)14863 void CompilerGLSL::emit_array_copy(const string &lhs, uint32_t rhs_id, StorageClass, StorageClass)
14864 {
14865 statement(lhs, " = ", to_expression(rhs_id), ";");
14866 }
14867
unroll_array_from_complex_load(uint32_t target_id,uint32_t source_id,std::string & expr)14868 void CompilerGLSL::unroll_array_from_complex_load(uint32_t target_id, uint32_t source_id, std::string &expr)
14869 {
14870 if (!backend.force_gl_in_out_block)
14871 return;
14872 // This path is only relevant for GL backends.
14873
14874 auto *var = maybe_get<SPIRVariable>(source_id);
14875 if (!var)
14876 return;
14877
14878 if (var->storage != StorageClassInput)
14879 return;
14880
14881 auto &type = get_variable_data_type(*var);
14882 if (type.array.empty())
14883 return;
14884
14885 auto builtin = BuiltIn(get_decoration(var->self, DecorationBuiltIn));
14886 bool is_builtin = is_builtin_variable(*var) && (builtin == BuiltInPointSize || builtin == BuiltInPosition);
14887 bool is_tess = is_tessellation_shader();
14888 bool is_patch = has_decoration(var->self, DecorationPatch);
14889
14890 // Tessellation input arrays are special in that they are unsized, so we cannot directly copy from it.
14891 // We must unroll the array load.
14892 // For builtins, we couldn't catch this case normally,
14893 // because this is resolved in the OpAccessChain in most cases.
14894 // If we load the entire array, we have no choice but to unroll here.
14895 if (!is_patch && (is_builtin || is_tess))
14896 {
14897 auto new_expr = join("_", target_id, "_unrolled");
14898 statement(variable_decl(type, new_expr, target_id), ";");
14899 string array_expr;
14900 if (type.array_size_literal.back())
14901 {
14902 array_expr = convert_to_string(type.array.back());
14903 if (type.array.back() == 0)
14904 SPIRV_CROSS_THROW("Cannot unroll an array copy from unsized array.");
14905 }
14906 else
14907 array_expr = to_expression(type.array.back());
14908
14909 // The array size might be a specialization constant, so use a for-loop instead.
14910 statement("for (int i = 0; i < int(", array_expr, "); i++)");
14911 begin_scope();
14912 if (is_builtin)
14913 statement(new_expr, "[i] = gl_in[i].", expr, ";");
14914 else
14915 statement(new_expr, "[i] = ", expr, "[i];");
14916 end_scope();
14917
14918 expr = move(new_expr);
14919 }
14920 }
14921
cast_from_builtin_load(uint32_t source_id,std::string & expr,const SPIRType & expr_type)14922 void CompilerGLSL::cast_from_builtin_load(uint32_t source_id, std::string &expr, const SPIRType &expr_type)
14923 {
14924 auto *var = maybe_get_backing_variable(source_id);
14925 if (var)
14926 source_id = var->self;
14927
14928 // Only interested in standalone builtin variables.
14929 if (!has_decoration(source_id, DecorationBuiltIn))
14930 return;
14931
14932 auto builtin = static_cast<BuiltIn>(get_decoration(source_id, DecorationBuiltIn));
14933 auto expected_type = expr_type.basetype;
14934
14935 // TODO: Fill in for more builtins.
14936 switch (builtin)
14937 {
14938 case BuiltInLayer:
14939 case BuiltInPrimitiveId:
14940 case BuiltInViewportIndex:
14941 case BuiltInInstanceId:
14942 case BuiltInInstanceIndex:
14943 case BuiltInVertexId:
14944 case BuiltInVertexIndex:
14945 case BuiltInSampleId:
14946 case BuiltInBaseVertex:
14947 case BuiltInBaseInstance:
14948 case BuiltInDrawIndex:
14949 case BuiltInFragStencilRefEXT:
14950 case BuiltInInstanceCustomIndexNV:
14951 expected_type = SPIRType::Int;
14952 break;
14953
14954 case BuiltInGlobalInvocationId:
14955 case BuiltInLocalInvocationId:
14956 case BuiltInWorkgroupId:
14957 case BuiltInLocalInvocationIndex:
14958 case BuiltInWorkgroupSize:
14959 case BuiltInNumWorkgroups:
14960 case BuiltInIncomingRayFlagsNV:
14961 case BuiltInLaunchIdNV:
14962 case BuiltInLaunchSizeNV:
14963 expected_type = SPIRType::UInt;
14964 break;
14965
14966 default:
14967 break;
14968 }
14969
14970 if (expected_type != expr_type.basetype)
14971 expr = bitcast_expression(expr_type, expected_type, expr);
14972 }
14973
cast_to_builtin_store(uint32_t target_id,std::string & expr,const SPIRType & expr_type)14974 void CompilerGLSL::cast_to_builtin_store(uint32_t target_id, std::string &expr, const SPIRType &expr_type)
14975 {
14976 // Only interested in standalone builtin variables.
14977 if (!has_decoration(target_id, DecorationBuiltIn))
14978 return;
14979
14980 auto builtin = static_cast<BuiltIn>(get_decoration(target_id, DecorationBuiltIn));
14981 auto expected_type = expr_type.basetype;
14982
14983 // TODO: Fill in for more builtins.
14984 switch (builtin)
14985 {
14986 case BuiltInLayer:
14987 case BuiltInPrimitiveId:
14988 case BuiltInViewportIndex:
14989 case BuiltInFragStencilRefEXT:
14990 expected_type = SPIRType::Int;
14991 break;
14992
14993 default:
14994 break;
14995 }
14996
14997 if (expected_type != expr_type.basetype)
14998 {
14999 auto type = expr_type;
15000 type.basetype = expected_type;
15001 expr = bitcast_expression(type, expr_type.basetype, expr);
15002 }
15003 }
15004
convert_non_uniform_expression(const SPIRType & type,std::string & expr)15005 void CompilerGLSL::convert_non_uniform_expression(const SPIRType &type, std::string &expr)
15006 {
15007 if (*backend.nonuniform_qualifier == '\0')
15008 return;
15009
15010 // Handle SPV_EXT_descriptor_indexing.
15011 if (type.basetype == SPIRType::Sampler || type.basetype == SPIRType::SampledImage ||
15012 type.basetype == SPIRType::Image)
15013 {
15014 // The image/sampler ID must be declared as non-uniform.
15015 // However, it is not legal GLSL to have
15016 // nonuniformEXT(samplers[index]), so we must move the nonuniform qualifier
15017 // to the array indexing, like
15018 // samplers[nonuniformEXT(index)].
15019 // While the access chain will generally be nonuniformEXT, it's not necessarily so,
15020 // so we might have to fixup the OpLoad-ed expression late.
15021
15022 auto start_array_index = expr.find_first_of('[');
15023
15024 if (start_array_index == string::npos)
15025 return;
15026
15027 // Check for the edge case that a non-arrayed resource was marked to be nonuniform,
15028 // and the bracket we found is actually part of non-resource related data.
15029 if (expr.find_first_of(',') < start_array_index)
15030 return;
15031
15032 // We've opened a bracket, track expressions until we can close the bracket.
15033 // This must be our image index.
15034 size_t end_array_index = string::npos;
15035 unsigned bracket_count = 1;
15036 for (size_t index = start_array_index + 1; index < expr.size(); index++)
15037 {
15038 if (expr[index] == ']')
15039 {
15040 if (--bracket_count == 0)
15041 {
15042 end_array_index = index;
15043 break;
15044 }
15045 }
15046 else if (expr[index] == '[')
15047 bracket_count++;
15048 }
15049
15050 assert(bracket_count == 0);
15051
15052 // Doesn't really make sense to declare a non-arrayed image with nonuniformEXT, but there's
15053 // nothing we can do here to express that.
15054 if (start_array_index == string::npos || end_array_index == string::npos || end_array_index < start_array_index)
15055 return;
15056
15057 start_array_index++;
15058
15059 expr = join(expr.substr(0, start_array_index), backend.nonuniform_qualifier, "(",
15060 expr.substr(start_array_index, end_array_index - start_array_index), ")",
15061 expr.substr(end_array_index, string::npos));
15062 }
15063 }
15064
emit_block_hints(const SPIRBlock &)15065 void CompilerGLSL::emit_block_hints(const SPIRBlock &)
15066 {
15067 }
15068
preserve_alias_on_reset(uint32_t id)15069 void CompilerGLSL::preserve_alias_on_reset(uint32_t id)
15070 {
15071 preserved_aliases[id] = get_name(id);
15072 }
15073
reset_name_caches()15074 void CompilerGLSL::reset_name_caches()
15075 {
15076 for (auto &preserved : preserved_aliases)
15077 set_name(preserved.first, preserved.second);
15078
15079 preserved_aliases.clear();
15080 resource_names.clear();
15081 block_input_names.clear();
15082 block_output_names.clear();
15083 block_ubo_names.clear();
15084 block_ssbo_names.clear();
15085 block_names.clear();
15086 function_overloads.clear();
15087 }
15088
fixup_type_alias()15089 void CompilerGLSL::fixup_type_alias()
15090 {
15091 // Due to how some backends work, the "master" type of type_alias must be a block-like type if it exists.
15092 ir.for_each_typed_id<SPIRType>([&](uint32_t self, SPIRType &type) {
15093 if (!type.type_alias)
15094 return;
15095
15096 if (has_decoration(type.self, DecorationBlock) || has_decoration(type.self, DecorationBufferBlock))
15097 {
15098 // Top-level block types should never alias anything else.
15099 type.type_alias = 0;
15100 }
15101 else if (type_is_block_like(type) && type.self == ID(self))
15102 {
15103 // A block-like type is any type which contains Offset decoration, but not top-level blocks,
15104 // i.e. blocks which are placed inside buffers.
15105 // Become the master.
15106 ir.for_each_typed_id<SPIRType>([&](uint32_t other_id, SPIRType &other_type) {
15107 if (other_id == self)
15108 return;
15109
15110 if (other_type.type_alias == type.type_alias)
15111 other_type.type_alias = self;
15112 });
15113
15114 this->get<SPIRType>(type.type_alias).type_alias = self;
15115 type.type_alias = 0;
15116 }
15117 });
15118 }
15119
reorder_type_alias()15120 void CompilerGLSL::reorder_type_alias()
15121 {
15122 // Reorder declaration of types so that the master of the type alias is always emitted first.
15123 // We need this in case a type B depends on type A (A must come before in the vector), but A is an alias of a type Abuffer, which
15124 // means declaration of A doesn't happen (yet), and order would be B, ABuffer and not ABuffer, B. Fix this up here.
15125 auto loop_lock = ir.create_loop_hard_lock();
15126
15127 auto &type_ids = ir.ids_for_type[TypeType];
15128 for (auto alias_itr = begin(type_ids); alias_itr != end(type_ids); ++alias_itr)
15129 {
15130 auto &type = get<SPIRType>(*alias_itr);
15131 if (type.type_alias != TypeID(0) &&
15132 !has_extended_decoration(type.type_alias, SPIRVCrossDecorationBufferBlockRepacked))
15133 {
15134 // We will skip declaring this type, so make sure the type_alias type comes before.
15135 auto master_itr = find(begin(type_ids), end(type_ids), ID(type.type_alias));
15136 assert(master_itr != end(type_ids));
15137
15138 if (alias_itr < master_itr)
15139 {
15140 // Must also swap the type order for the constant-type joined array.
15141 auto &joined_types = ir.ids_for_constant_or_type;
15142 auto alt_alias_itr = find(begin(joined_types), end(joined_types), *alias_itr);
15143 auto alt_master_itr = find(begin(joined_types), end(joined_types), *master_itr);
15144 assert(alt_alias_itr != end(joined_types));
15145 assert(alt_master_itr != end(joined_types));
15146
15147 swap(*alias_itr, *master_itr);
15148 swap(*alt_alias_itr, *alt_master_itr);
15149 }
15150 }
15151 }
15152 }
15153
emit_line_directive(uint32_t file_id,uint32_t line_literal)15154 void CompilerGLSL::emit_line_directive(uint32_t file_id, uint32_t line_literal)
15155 {
15156 // If we are redirecting statements, ignore the line directive.
15157 // Common case here is continue blocks.
15158 if (redirect_statement)
15159 return;
15160
15161 if (options.emit_line_directives)
15162 {
15163 require_extension_internal("GL_GOOGLE_cpp_style_line_directive");
15164 statement_no_indent("#line ", line_literal, " \"", get<SPIRString>(file_id).str, "\"");
15165 }
15166 }
15167
propagate_nonuniform_qualifier(uint32_t id)15168 void CompilerGLSL::propagate_nonuniform_qualifier(uint32_t id)
15169 {
15170 // SPIR-V might only tag the very last ID with NonUniformEXT, but for codegen,
15171 // we need to know NonUniformEXT a little earlier, when the resource is actually loaded.
15172 // Back-propagate the qualifier based on the expression dependency chain.
15173
15174 if (!has_decoration(id, DecorationNonUniformEXT))
15175 {
15176 set_decoration(id, DecorationNonUniformEXT);
15177 force_recompile();
15178 }
15179
15180 auto *e = maybe_get<SPIRExpression>(id);
15181 auto *combined = maybe_get<SPIRCombinedImageSampler>(id);
15182 auto *chain = maybe_get<SPIRAccessChain>(id);
15183 if (e)
15184 {
15185 for (auto &expr : e->expression_dependencies)
15186 propagate_nonuniform_qualifier(expr);
15187 for (auto &expr : e->implied_read_expressions)
15188 propagate_nonuniform_qualifier(expr);
15189 }
15190 else if (combined)
15191 {
15192 propagate_nonuniform_qualifier(combined->image);
15193 propagate_nonuniform_qualifier(combined->sampler);
15194 }
15195 else if (chain)
15196 {
15197 for (auto &expr : chain->implied_read_expressions)
15198 propagate_nonuniform_qualifier(expr);
15199 }
15200 }
15201
emit_copy_logical_type(uint32_t lhs_id,uint32_t lhs_type_id,uint32_t rhs_id,uint32_t rhs_type_id,SmallVector<uint32_t> chain)15202 void CompilerGLSL::emit_copy_logical_type(uint32_t lhs_id, uint32_t lhs_type_id, uint32_t rhs_id, uint32_t rhs_type_id,
15203 SmallVector<uint32_t> chain)
15204 {
15205 // Fully unroll all member/array indices one by one.
15206
15207 auto &lhs_type = get<SPIRType>(lhs_type_id);
15208 auto &rhs_type = get<SPIRType>(rhs_type_id);
15209
15210 if (!lhs_type.array.empty())
15211 {
15212 // Could use a loop here to support specialization constants, but it gets rather complicated with nested array types,
15213 // and this is a rather obscure opcode anyways, keep it simple unless we are forced to.
15214 uint32_t array_size = to_array_size_literal(lhs_type);
15215 chain.push_back(0);
15216
15217 for (uint32_t i = 0; i < array_size; i++)
15218 {
15219 chain.back() = i;
15220 emit_copy_logical_type(lhs_id, lhs_type.parent_type, rhs_id, rhs_type.parent_type, chain);
15221 }
15222 }
15223 else if (lhs_type.basetype == SPIRType::Struct)
15224 {
15225 chain.push_back(0);
15226 uint32_t member_count = uint32_t(lhs_type.member_types.size());
15227 for (uint32_t i = 0; i < member_count; i++)
15228 {
15229 chain.back() = i;
15230 emit_copy_logical_type(lhs_id, lhs_type.member_types[i], rhs_id, rhs_type.member_types[i], chain);
15231 }
15232 }
15233 else
15234 {
15235 // Need to handle unpack/packing fixups since this can differ wildly between the logical types,
15236 // particularly in MSL.
15237 // To deal with this, we emit access chains and go through emit_store_statement
15238 // to deal with all the special cases we can encounter.
15239
15240 AccessChainMeta lhs_meta, rhs_meta;
15241 auto lhs = access_chain_internal(lhs_id, chain.data(), uint32_t(chain.size()),
15242 ACCESS_CHAIN_INDEX_IS_LITERAL_BIT, &lhs_meta);
15243 auto rhs = access_chain_internal(rhs_id, chain.data(), uint32_t(chain.size()),
15244 ACCESS_CHAIN_INDEX_IS_LITERAL_BIT, &rhs_meta);
15245
15246 uint32_t id = ir.increase_bound_by(2);
15247 lhs_id = id;
15248 rhs_id = id + 1;
15249
15250 {
15251 auto &lhs_expr = set<SPIRExpression>(lhs_id, move(lhs), lhs_type_id, true);
15252 lhs_expr.need_transpose = lhs_meta.need_transpose;
15253
15254 if (lhs_meta.storage_is_packed)
15255 set_extended_decoration(lhs_id, SPIRVCrossDecorationPhysicalTypePacked);
15256 if (lhs_meta.storage_physical_type != 0)
15257 set_extended_decoration(lhs_id, SPIRVCrossDecorationPhysicalTypeID, lhs_meta.storage_physical_type);
15258
15259 forwarded_temporaries.insert(lhs_id);
15260 suppressed_usage_tracking.insert(lhs_id);
15261 }
15262
15263 {
15264 auto &rhs_expr = set<SPIRExpression>(rhs_id, move(rhs), rhs_type_id, true);
15265 rhs_expr.need_transpose = rhs_meta.need_transpose;
15266
15267 if (rhs_meta.storage_is_packed)
15268 set_extended_decoration(rhs_id, SPIRVCrossDecorationPhysicalTypePacked);
15269 if (rhs_meta.storage_physical_type != 0)
15270 set_extended_decoration(rhs_id, SPIRVCrossDecorationPhysicalTypeID, rhs_meta.storage_physical_type);
15271
15272 forwarded_temporaries.insert(rhs_id);
15273 suppressed_usage_tracking.insert(rhs_id);
15274 }
15275
15276 emit_store_statement(lhs_id, rhs_id);
15277 }
15278 }
15279
subpass_input_is_framebuffer_fetch(uint32_t id) const15280 bool CompilerGLSL::subpass_input_is_framebuffer_fetch(uint32_t id) const
15281 {
15282 if (!has_decoration(id, DecorationInputAttachmentIndex))
15283 return false;
15284
15285 uint32_t input_attachment_index = get_decoration(id, DecorationInputAttachmentIndex);
15286 for (auto &remap : subpass_to_framebuffer_fetch_attachment)
15287 if (remap.first == input_attachment_index)
15288 return true;
15289
15290 return false;
15291 }
15292
find_subpass_input_by_attachment_index(uint32_t index) const15293 const SPIRVariable *CompilerGLSL::find_subpass_input_by_attachment_index(uint32_t index) const
15294 {
15295 const SPIRVariable *ret = nullptr;
15296 ir.for_each_typed_id<SPIRVariable>([&](uint32_t, const SPIRVariable &var) {
15297 if (has_decoration(var.self, DecorationInputAttachmentIndex) &&
15298 get_decoration(var.self, DecorationInputAttachmentIndex) == index)
15299 {
15300 ret = &var;
15301 }
15302 });
15303 return ret;
15304 }
15305
find_color_output_by_location(uint32_t location) const15306 const SPIRVariable *CompilerGLSL::find_color_output_by_location(uint32_t location) const
15307 {
15308 const SPIRVariable *ret = nullptr;
15309 ir.for_each_typed_id<SPIRVariable>([&](uint32_t, const SPIRVariable &var) {
15310 if (var.storage == StorageClassOutput && get_decoration(var.self, DecorationLocation) == location)
15311 ret = &var;
15312 });
15313 return ret;
15314 }
15315
emit_inout_fragment_outputs_copy_to_subpass_inputs()15316 void CompilerGLSL::emit_inout_fragment_outputs_copy_to_subpass_inputs()
15317 {
15318 for (auto &remap : subpass_to_framebuffer_fetch_attachment)
15319 {
15320 auto *subpass_var = find_subpass_input_by_attachment_index(remap.first);
15321 auto *output_var = find_color_output_by_location(remap.second);
15322 if (!subpass_var)
15323 continue;
15324 if (!output_var)
15325 SPIRV_CROSS_THROW("Need to declare the corresponding fragment output variable to be able "
15326 "to read from it.");
15327 if (is_array(get<SPIRType>(output_var->basetype)))
15328 SPIRV_CROSS_THROW("Cannot use GL_EXT_shader_framebuffer_fetch with arrays of color outputs.");
15329
15330 auto &func = get<SPIRFunction>(get_entry_point().self);
15331 func.fixup_hooks_in.push_back([=]() {
15332 if (is_legacy())
15333 {
15334 statement(to_expression(subpass_var->self), " = ", "gl_LastFragData[",
15335 get_decoration(output_var->self, DecorationLocation), "];");
15336 }
15337 else
15338 {
15339 uint32_t num_rt_components = this->get<SPIRType>(output_var->basetype).vecsize;
15340 statement(to_expression(subpass_var->self), vector_swizzle(num_rt_components, 0), " = ",
15341 to_expression(output_var->self), ";");
15342 }
15343 });
15344 }
15345 }
15346
variable_is_depth_or_compare(VariableID id) const15347 bool CompilerGLSL::variable_is_depth_or_compare(VariableID id) const
15348 {
15349 return image_is_comparison(get<SPIRType>(get<SPIRVariable>(id).basetype), id);
15350 }
15351
get_extension_name(Candidate c)15352 const char *CompilerGLSL::ShaderSubgroupSupportHelper::get_extension_name(Candidate c)
15353 {
15354 static const char *const retval[CandidateCount] = { "GL_KHR_shader_subgroup_ballot",
15355 "GL_KHR_shader_subgroup_basic",
15356 "GL_KHR_shader_subgroup_vote",
15357 "GL_NV_gpu_shader_5",
15358 "GL_NV_shader_thread_group",
15359 "GL_NV_shader_thread_shuffle",
15360 "GL_ARB_shader_ballot",
15361 "GL_ARB_shader_group_vote",
15362 "GL_AMD_gcn_shader" };
15363 return retval[c];
15364 }
15365
get_extra_required_extension_names(Candidate c)15366 SmallVector<std::string> CompilerGLSL::ShaderSubgroupSupportHelper::get_extra_required_extension_names(Candidate c)
15367 {
15368 switch (c)
15369 {
15370 case ARB_shader_ballot:
15371 return { "GL_ARB_shader_int64" };
15372 case AMD_gcn_shader:
15373 return { "GL_AMD_gpu_shader_int64", "GL_NV_gpu_shader5" };
15374 default:
15375 return {};
15376 }
15377 }
15378
get_extra_required_extension_predicate(Candidate c)15379 const char *CompilerGLSL::ShaderSubgroupSupportHelper::get_extra_required_extension_predicate(Candidate c)
15380 {
15381 switch (c)
15382 {
15383 case ARB_shader_ballot:
15384 return "defined(GL_ARB_shader_int64)";
15385 case AMD_gcn_shader:
15386 return "(defined(GL_AMD_gpu_shader_int64) || defined(GL_NV_gpu_shader5))";
15387 default:
15388 return "";
15389 }
15390 }
15391
15392 CompilerGLSL::ShaderSubgroupSupportHelper::FeatureVector CompilerGLSL::ShaderSubgroupSupportHelper::
get_feature_dependencies(Feature feature)15393 get_feature_dependencies(Feature feature)
15394 {
15395 switch (feature)
15396 {
15397 case SubgroupAllEqualT:
15398 return { SubgroupBroadcast_First, SubgroupAll_Any_AllEqualBool };
15399 case SubgroupElect:
15400 return { SubgroupBallotFindLSB_MSB, SubgroupBallot, SubgroupInvocationID };
15401 case SubgroupInverseBallot_InclBitCount_ExclBitCout:
15402 return { SubgroupMask };
15403 case SubgroupBallotBitCount:
15404 return { SubgroupBallot };
15405 default:
15406 return {};
15407 }
15408 }
15409
15410 CompilerGLSL::ShaderSubgroupSupportHelper::FeatureMask CompilerGLSL::ShaderSubgroupSupportHelper::
get_feature_dependency_mask(Feature feature)15411 get_feature_dependency_mask(Feature feature)
15412 {
15413 return build_mask(get_feature_dependencies(feature));
15414 }
15415
can_feature_be_implemented_without_extensions(Feature feature)15416 bool CompilerGLSL::ShaderSubgroupSupportHelper::can_feature_be_implemented_without_extensions(Feature feature)
15417 {
15418 static const bool retval[FeatureCount] = { false, false, false, false, false, false,
15419 true, // SubgroupBalloFindLSB_MSB
15420 false, false, false, false,
15421 true, // SubgroupMemBarrier - replaced with workgroup memory barriers
15422 false, false, true, false };
15423
15424 return retval[feature];
15425 }
15426
15427 CompilerGLSL::ShaderSubgroupSupportHelper::Candidate CompilerGLSL::ShaderSubgroupSupportHelper::
get_KHR_extension_for_feature(Feature feature)15428 get_KHR_extension_for_feature(Feature feature)
15429 {
15430 static const Candidate extensions[FeatureCount] = {
15431 KHR_shader_subgroup_ballot, KHR_shader_subgroup_basic, KHR_shader_subgroup_basic, KHR_shader_subgroup_basic,
15432 KHR_shader_subgroup_basic, KHR_shader_subgroup_ballot, KHR_shader_subgroup_ballot, KHR_shader_subgroup_vote,
15433 KHR_shader_subgroup_vote, KHR_shader_subgroup_basic, KHR_shader_subgroup_basic, KHR_shader_subgroup_basic,
15434 KHR_shader_subgroup_ballot, KHR_shader_subgroup_ballot, KHR_shader_subgroup_ballot, KHR_shader_subgroup_ballot
15435 };
15436
15437 return extensions[feature];
15438 }
15439
request_feature(Feature feature)15440 void CompilerGLSL::ShaderSubgroupSupportHelper::request_feature(Feature feature)
15441 {
15442 feature_mask |= (FeatureMask(1) << feature) | get_feature_dependency_mask(feature);
15443 }
15444
is_feature_requested(Feature feature) const15445 bool CompilerGLSL::ShaderSubgroupSupportHelper::is_feature_requested(Feature feature) const
15446 {
15447 return (feature_mask & (1u << feature)) != 0;
15448 }
15449
resolve() const15450 CompilerGLSL::ShaderSubgroupSupportHelper::Result CompilerGLSL::ShaderSubgroupSupportHelper::resolve() const
15451 {
15452 Result res;
15453
15454 for (uint32_t i = 0u; i < FeatureCount; ++i)
15455 {
15456 if (feature_mask & (1u << i))
15457 {
15458 auto feature = static_cast<Feature>(i);
15459 std::unordered_set<uint32_t> unique_candidates;
15460
15461 auto candidates = get_candidates_for_feature(feature);
15462 unique_candidates.insert(candidates.begin(), candidates.end());
15463
15464 auto deps = get_feature_dependencies(feature);
15465 for (Feature d : deps)
15466 {
15467 candidates = get_candidates_for_feature(d);
15468 if (!candidates.empty())
15469 unique_candidates.insert(candidates.begin(), candidates.end());
15470 }
15471
15472 for (uint32_t c : unique_candidates)
15473 ++res.weights[static_cast<Candidate>(c)];
15474 }
15475 }
15476
15477 return res;
15478 }
15479
15480 CompilerGLSL::ShaderSubgroupSupportHelper::CandidateVector CompilerGLSL::ShaderSubgroupSupportHelper::
get_candidates_for_feature(Feature ft,const Result & r)15481 get_candidates_for_feature(Feature ft, const Result &r)
15482 {
15483 auto c = get_candidates_for_feature(ft);
15484 auto cmp = [&r](Candidate a, Candidate b) {
15485 if (r.weights[a] == r.weights[b])
15486 return a < b; // Prefer candidates with lower enum value
15487 return r.weights[a] > r.weights[b];
15488 };
15489 std::sort(c.begin(), c.end(), cmp);
15490 return c;
15491 }
15492
15493 CompilerGLSL::ShaderSubgroupSupportHelper::CandidateVector CompilerGLSL::ShaderSubgroupSupportHelper::
get_candidates_for_feature(Feature feature)15494 get_candidates_for_feature(Feature feature)
15495 {
15496 switch (feature)
15497 {
15498 case SubgroupMask:
15499 return { KHR_shader_subgroup_ballot, NV_shader_thread_group, ARB_shader_ballot };
15500 case SubgroupSize:
15501 return { KHR_shader_subgroup_basic, NV_shader_thread_group, AMD_gcn_shader, ARB_shader_ballot };
15502 case SubgroupInvocationID:
15503 return { KHR_shader_subgroup_basic, NV_shader_thread_group, ARB_shader_ballot };
15504 case SubgroupID:
15505 return { KHR_shader_subgroup_basic, NV_shader_thread_group };
15506 case NumSubgroups:
15507 return { KHR_shader_subgroup_basic, NV_shader_thread_group };
15508 case SubgroupBroadcast_First:
15509 return { KHR_shader_subgroup_ballot, NV_shader_thread_shuffle, ARB_shader_ballot };
15510 case SubgroupBallotFindLSB_MSB:
15511 return { KHR_shader_subgroup_ballot, NV_shader_thread_group };
15512 case SubgroupAll_Any_AllEqualBool:
15513 return { KHR_shader_subgroup_vote, NV_gpu_shader_5, ARB_shader_group_vote, AMD_gcn_shader };
15514 case SubgroupAllEqualT:
15515 return {}; // depends on other features only
15516 case SubgroupElect:
15517 return {}; // depends on other features only
15518 case SubgroupBallot:
15519 return { KHR_shader_subgroup_ballot, NV_shader_thread_group, ARB_shader_ballot };
15520 case SubgroupBarrier:
15521 return { KHR_shader_subgroup_basic, NV_shader_thread_group, ARB_shader_ballot, AMD_gcn_shader };
15522 case SubgroupMemBarrier:
15523 return { KHR_shader_subgroup_basic };
15524 case SubgroupInverseBallot_InclBitCount_ExclBitCout:
15525 return {};
15526 case SubgroupBallotBitExtract:
15527 return { NV_shader_thread_group };
15528 case SubgroupBallotBitCount:
15529 return {};
15530 default:
15531 return {};
15532 }
15533 }
15534
build_mask(const SmallVector<Feature> & features)15535 CompilerGLSL::ShaderSubgroupSupportHelper::FeatureMask CompilerGLSL::ShaderSubgroupSupportHelper::build_mask(
15536 const SmallVector<Feature> &features)
15537 {
15538 FeatureMask mask = 0;
15539 for (Feature f : features)
15540 mask |= FeatureMask(1) << f;
15541 return mask;
15542 }
15543
Result()15544 CompilerGLSL::ShaderSubgroupSupportHelper::Result::Result()
15545 {
15546 for (auto &weight : weights)
15547 weight = 0;
15548
15549 // Make sure KHR_shader_subgroup extensions are always prefered.
15550 const uint32_t big_num = FeatureCount;
15551 weights[KHR_shader_subgroup_ballot] = big_num;
15552 weights[KHR_shader_subgroup_basic] = big_num;
15553 weights[KHR_shader_subgroup_vote] = big_num;
15554 }
15555
request_workaround_wrapper_overload(TypeID id)15556 void CompilerGLSL::request_workaround_wrapper_overload(TypeID id)
15557 {
15558 // Must be ordered to maintain deterministic output, so vector is appropriate.
15559 if (find(begin(workaround_ubo_load_overload_types), end(workaround_ubo_load_overload_types), id) ==
15560 end(workaround_ubo_load_overload_types))
15561 {
15562 force_recompile();
15563 workaround_ubo_load_overload_types.push_back(id);
15564 }
15565 }
15566
rewrite_load_for_wrapped_row_major(std::string & expr,TypeID loaded_type,ID ptr)15567 void CompilerGLSL::rewrite_load_for_wrapped_row_major(std::string &expr, TypeID loaded_type, ID ptr)
15568 {
15569 // Loading row-major matrices from UBOs on older AMD Windows OpenGL drivers is problematic.
15570 // To load these types correctly, we must first wrap them in a dummy function which only purpose is to
15571 // ensure row_major decoration is actually respected.
15572 auto *var = maybe_get_backing_variable(ptr);
15573 if (!var)
15574 return;
15575
15576 auto &backing_type = get<SPIRType>(var->basetype);
15577 bool is_ubo = backing_type.basetype == SPIRType::Struct && backing_type.storage == StorageClassUniform &&
15578 has_decoration(backing_type.self, DecorationBlock);
15579 if (!is_ubo)
15580 return;
15581
15582 auto *type = &get<SPIRType>(loaded_type);
15583 bool rewrite = false;
15584
15585 if (is_matrix(*type))
15586 {
15587 // To avoid adding a lot of unnecessary meta tracking to forward the row_major state,
15588 // we will simply look at the base struct itself. It is exceptionally rare to mix and match row-major/col-major state.
15589 // If there is any row-major action going on, we apply the workaround.
15590 // It is harmless to apply the workaround to column-major matrices, so this is still a valid solution.
15591 // If an access chain occurred, the workaround is not required, so loading vectors or scalars don't need workaround.
15592 type = &backing_type;
15593 }
15594
15595 if (type->basetype == SPIRType::Struct)
15596 {
15597 // If we're loading a struct where any member is a row-major matrix, apply the workaround.
15598 for (uint32_t i = 0; i < uint32_t(type->member_types.size()); i++)
15599 {
15600 if (combined_decoration_for_member(*type, i).get(DecorationRowMajor))
15601 {
15602 rewrite = true;
15603 break;
15604 }
15605 }
15606 }
15607
15608 if (rewrite)
15609 {
15610 request_workaround_wrapper_overload(loaded_type);
15611 expr = join("spvWorkaroundRowMajor(", expr, ")");
15612 }
15613 }
15614