1 /*
2  * Copyright 2015-2021 Arm Limited
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *     http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 /*
18  * At your option, you may choose to accept this material under either:
19  *  1. The Apache License, Version 2.0, found at <http://www.apache.org/licenses/LICENSE-2.0>, or
20  *  2. The MIT License, found at <http://opensource.org/licenses/MIT>.
21  * SPDX-License-Identifier: Apache-2.0 OR MIT.
22  */
23 
24 #include "spirv_glsl.hpp"
25 #include "GLSL.std.450.h"
26 #include "spirv_common.hpp"
27 #include <algorithm>
28 #include <assert.h>
29 #include <cmath>
30 #include <limits>
31 #include <locale.h>
32 #include <utility>
33 
34 #ifndef _WIN32
35 #include <langinfo.h>
36 #endif
37 #include <locale.h>
38 
39 using namespace spv;
40 using namespace SPIRV_CROSS_NAMESPACE;
41 using namespace std;
42 
is_unsigned_opcode(Op op)43 static bool is_unsigned_opcode(Op op)
44 {
45 	// Don't have to be exhaustive, only relevant for legacy target checking ...
46 	switch (op)
47 	{
48 	case OpShiftRightLogical:
49 	case OpUGreaterThan:
50 	case OpUGreaterThanEqual:
51 	case OpULessThan:
52 	case OpULessThanEqual:
53 	case OpUConvert:
54 	case OpUDiv:
55 	case OpUMod:
56 	case OpUMulExtended:
57 	case OpConvertUToF:
58 	case OpConvertFToU:
59 		return true;
60 
61 	default:
62 		return false;
63 	}
64 }
65 
is_unsigned_glsl_opcode(GLSLstd450 op)66 static bool is_unsigned_glsl_opcode(GLSLstd450 op)
67 {
68 	// Don't have to be exhaustive, only relevant for legacy target checking ...
69 	switch (op)
70 	{
71 	case GLSLstd450UClamp:
72 	case GLSLstd450UMin:
73 	case GLSLstd450UMax:
74 	case GLSLstd450FindUMsb:
75 		return true;
76 
77 	default:
78 		return false;
79 	}
80 }
81 
packing_is_vec4_padded(BufferPackingStandard packing)82 static bool packing_is_vec4_padded(BufferPackingStandard packing)
83 {
84 	switch (packing)
85 	{
86 	case BufferPackingHLSLCbuffer:
87 	case BufferPackingHLSLCbufferPackOffset:
88 	case BufferPackingStd140:
89 	case BufferPackingStd140EnhancedLayout:
90 		return true;
91 
92 	default:
93 		return false;
94 	}
95 }
96 
packing_is_hlsl(BufferPackingStandard packing)97 static bool packing_is_hlsl(BufferPackingStandard packing)
98 {
99 	switch (packing)
100 	{
101 	case BufferPackingHLSLCbuffer:
102 	case BufferPackingHLSLCbufferPackOffset:
103 		return true;
104 
105 	default:
106 		return false;
107 	}
108 }
109 
packing_has_flexible_offset(BufferPackingStandard packing)110 static bool packing_has_flexible_offset(BufferPackingStandard packing)
111 {
112 	switch (packing)
113 	{
114 	case BufferPackingStd140:
115 	case BufferPackingStd430:
116 	case BufferPackingScalar:
117 	case BufferPackingHLSLCbuffer:
118 		return false;
119 
120 	default:
121 		return true;
122 	}
123 }
124 
packing_is_scalar(BufferPackingStandard packing)125 static bool packing_is_scalar(BufferPackingStandard packing)
126 {
127 	switch (packing)
128 	{
129 	case BufferPackingScalar:
130 	case BufferPackingScalarEnhancedLayout:
131 		return true;
132 
133 	default:
134 		return false;
135 	}
136 }
137 
packing_to_substruct_packing(BufferPackingStandard packing)138 static BufferPackingStandard packing_to_substruct_packing(BufferPackingStandard packing)
139 {
140 	switch (packing)
141 	{
142 	case BufferPackingStd140EnhancedLayout:
143 		return BufferPackingStd140;
144 	case BufferPackingStd430EnhancedLayout:
145 		return BufferPackingStd430;
146 	case BufferPackingHLSLCbufferPackOffset:
147 		return BufferPackingHLSLCbuffer;
148 	case BufferPackingScalarEnhancedLayout:
149 		return BufferPackingScalar;
150 	default:
151 		return packing;
152 	}
153 }
154 
init()155 void CompilerGLSL::init()
156 {
157 	if (ir.source.known)
158 	{
159 		options.es = ir.source.es;
160 		options.version = ir.source.version;
161 	}
162 
163 	// Query the locale to see what the decimal point is.
164 	// We'll rely on fixing it up ourselves in the rare case we have a comma-as-decimal locale
165 	// rather than setting locales ourselves. Settings locales in a safe and isolated way is rather
166 	// tricky.
167 #ifdef _WIN32
168 	// On Windows, localeconv uses thread-local storage, so it should be fine.
169 	const struct lconv *conv = localeconv();
170 	if (conv && conv->decimal_point)
171 		current_locale_radix_character = *conv->decimal_point;
172 #elif defined(__ANDROID__) && __ANDROID_API__ < 26
173 	// nl_langinfo is not supported on this platform, fall back to the worse alternative.
174 	const struct lconv *conv = localeconv();
175 	if (conv && conv->decimal_point)
176 		current_locale_radix_character = *conv->decimal_point;
177 #else
178 	// localeconv, the portable function is not MT safe ...
179 	const char *decimal_point = nl_langinfo(RADIXCHAR);
180 	if (decimal_point && *decimal_point != '\0')
181 		current_locale_radix_character = *decimal_point;
182 #endif
183 }
184 
to_pls_layout(PlsFormat format)185 static const char *to_pls_layout(PlsFormat format)
186 {
187 	switch (format)
188 	{
189 	case PlsR11FG11FB10F:
190 		return "layout(r11f_g11f_b10f) ";
191 	case PlsR32F:
192 		return "layout(r32f) ";
193 	case PlsRG16F:
194 		return "layout(rg16f) ";
195 	case PlsRGB10A2:
196 		return "layout(rgb10_a2) ";
197 	case PlsRGBA8:
198 		return "layout(rgba8) ";
199 	case PlsRG16:
200 		return "layout(rg16) ";
201 	case PlsRGBA8I:
202 		return "layout(rgba8i)";
203 	case PlsRG16I:
204 		return "layout(rg16i) ";
205 	case PlsRGB10A2UI:
206 		return "layout(rgb10_a2ui) ";
207 	case PlsRGBA8UI:
208 		return "layout(rgba8ui) ";
209 	case PlsRG16UI:
210 		return "layout(rg16ui) ";
211 	case PlsR32UI:
212 		return "layout(r32ui) ";
213 	default:
214 		return "";
215 	}
216 }
217 
pls_format_to_basetype(PlsFormat format)218 static SPIRType::BaseType pls_format_to_basetype(PlsFormat format)
219 {
220 	switch (format)
221 	{
222 	default:
223 	case PlsR11FG11FB10F:
224 	case PlsR32F:
225 	case PlsRG16F:
226 	case PlsRGB10A2:
227 	case PlsRGBA8:
228 	case PlsRG16:
229 		return SPIRType::Float;
230 
231 	case PlsRGBA8I:
232 	case PlsRG16I:
233 		return SPIRType::Int;
234 
235 	case PlsRGB10A2UI:
236 	case PlsRGBA8UI:
237 	case PlsRG16UI:
238 	case PlsR32UI:
239 		return SPIRType::UInt;
240 	}
241 }
242 
pls_format_to_components(PlsFormat format)243 static uint32_t pls_format_to_components(PlsFormat format)
244 {
245 	switch (format)
246 	{
247 	default:
248 	case PlsR32F:
249 	case PlsR32UI:
250 		return 1;
251 
252 	case PlsRG16F:
253 	case PlsRG16:
254 	case PlsRG16UI:
255 	case PlsRG16I:
256 		return 2;
257 
258 	case PlsR11FG11FB10F:
259 		return 3;
260 
261 	case PlsRGB10A2:
262 	case PlsRGBA8:
263 	case PlsRGBA8I:
264 	case PlsRGB10A2UI:
265 	case PlsRGBA8UI:
266 		return 4;
267 	}
268 }
269 
vector_swizzle(int vecsize,int index)270 const char *CompilerGLSL::vector_swizzle(int vecsize, int index)
271 {
272 	static const char *const swizzle[4][4] = {
273 		{ ".x", ".y", ".z", ".w" },
274 		{ ".xy", ".yz", ".zw", nullptr },
275 		{ ".xyz", ".yzw", nullptr, nullptr },
276 #if defined(__GNUC__) && (__GNUC__ == 9)
277 		// This works around a GCC 9 bug, see details in https://gcc.gnu.org/bugzilla/show_bug.cgi?id=90947.
278 		// This array ends up being compiled as all nullptrs, tripping the assertions below.
279 		{ "", nullptr, nullptr, "$" },
280 #else
281 		{ "", nullptr, nullptr, nullptr },
282 #endif
283 	};
284 
285 	assert(vecsize >= 1 && vecsize <= 4);
286 	assert(index >= 0 && index < 4);
287 	assert(swizzle[vecsize - 1][index]);
288 
289 	return swizzle[vecsize - 1][index];
290 }
291 
reset()292 void CompilerGLSL::reset()
293 {
294 	// We do some speculative optimizations which should pretty much always work out,
295 	// but just in case the SPIR-V is rather weird, recompile until it's happy.
296 	// This typically only means one extra pass.
297 	clear_force_recompile();
298 
299 	// Clear invalid expression tracking.
300 	invalid_expressions.clear();
301 	current_function = nullptr;
302 
303 	// Clear temporary usage tracking.
304 	expression_usage_counts.clear();
305 	forwarded_temporaries.clear();
306 	suppressed_usage_tracking.clear();
307 
308 	// Ensure that we declare phi-variable copies even if the original declaration isn't deferred
309 	flushed_phi_variables.clear();
310 
311 	reset_name_caches();
312 
313 	ir.for_each_typed_id<SPIRFunction>([&](uint32_t, SPIRFunction &func) {
314 		func.active = false;
315 		func.flush_undeclared = true;
316 	});
317 
318 	ir.for_each_typed_id<SPIRVariable>([&](uint32_t, SPIRVariable &var) { var.dependees.clear(); });
319 
320 	ir.reset_all_of_type<SPIRExpression>();
321 	ir.reset_all_of_type<SPIRAccessChain>();
322 
323 	statement_count = 0;
324 	indent = 0;
325 	current_loop_level = 0;
326 }
327 
remap_pls_variables()328 void CompilerGLSL::remap_pls_variables()
329 {
330 	for (auto &input : pls_inputs)
331 	{
332 		auto &var = get<SPIRVariable>(input.id);
333 
334 		bool input_is_target = false;
335 		if (var.storage == StorageClassUniformConstant)
336 		{
337 			auto &type = get<SPIRType>(var.basetype);
338 			input_is_target = type.image.dim == DimSubpassData;
339 		}
340 
341 		if (var.storage != StorageClassInput && !input_is_target)
342 			SPIRV_CROSS_THROW("Can only use in and target variables for PLS inputs.");
343 		var.remapped_variable = true;
344 	}
345 
346 	for (auto &output : pls_outputs)
347 	{
348 		auto &var = get<SPIRVariable>(output.id);
349 		if (var.storage != StorageClassOutput)
350 			SPIRV_CROSS_THROW("Can only use out variables for PLS outputs.");
351 		var.remapped_variable = true;
352 	}
353 }
354 
remap_ext_framebuffer_fetch(uint32_t input_attachment_index,uint32_t color_location)355 void CompilerGLSL::remap_ext_framebuffer_fetch(uint32_t input_attachment_index, uint32_t color_location)
356 {
357 	subpass_to_framebuffer_fetch_attachment.push_back({ input_attachment_index, color_location });
358 	inout_color_attachments.insert(color_location);
359 }
360 
find_static_extensions()361 void CompilerGLSL::find_static_extensions()
362 {
363 	ir.for_each_typed_id<SPIRType>([&](uint32_t, const SPIRType &type) {
364 		if (type.basetype == SPIRType::Double)
365 		{
366 			if (options.es)
367 				SPIRV_CROSS_THROW("FP64 not supported in ES profile.");
368 			if (!options.es && options.version < 400)
369 				require_extension_internal("GL_ARB_gpu_shader_fp64");
370 		}
371 		else if (type.basetype == SPIRType::Int64 || type.basetype == SPIRType::UInt64)
372 		{
373 			if (options.es)
374 				SPIRV_CROSS_THROW("64-bit integers not supported in ES profile.");
375 			if (!options.es)
376 				require_extension_internal("GL_ARB_gpu_shader_int64");
377 		}
378 		else if (type.basetype == SPIRType::Half)
379 		{
380 			require_extension_internal("GL_EXT_shader_explicit_arithmetic_types_float16");
381 			if (options.vulkan_semantics)
382 				require_extension_internal("GL_EXT_shader_16bit_storage");
383 		}
384 		else if (type.basetype == SPIRType::SByte || type.basetype == SPIRType::UByte)
385 		{
386 			require_extension_internal("GL_EXT_shader_explicit_arithmetic_types_int8");
387 			if (options.vulkan_semantics)
388 				require_extension_internal("GL_EXT_shader_8bit_storage");
389 		}
390 		else if (type.basetype == SPIRType::Short || type.basetype == SPIRType::UShort)
391 		{
392 			require_extension_internal("GL_EXT_shader_explicit_arithmetic_types_int16");
393 			if (options.vulkan_semantics)
394 				require_extension_internal("GL_EXT_shader_16bit_storage");
395 		}
396 	});
397 
398 	auto &execution = get_entry_point();
399 	switch (execution.model)
400 	{
401 	case ExecutionModelGLCompute:
402 		if (!options.es && options.version < 430)
403 			require_extension_internal("GL_ARB_compute_shader");
404 		if (options.es && options.version < 310)
405 			SPIRV_CROSS_THROW("At least ESSL 3.10 required for compute shaders.");
406 		break;
407 
408 	case ExecutionModelGeometry:
409 		if (options.es && options.version < 320)
410 			require_extension_internal("GL_EXT_geometry_shader");
411 		if (!options.es && options.version < 150)
412 			require_extension_internal("GL_ARB_geometry_shader4");
413 
414 		if (execution.flags.get(ExecutionModeInvocations) && execution.invocations != 1)
415 		{
416 			// Instanced GS is part of 400 core or this extension.
417 			if (!options.es && options.version < 400)
418 				require_extension_internal("GL_ARB_gpu_shader5");
419 		}
420 		break;
421 
422 	case ExecutionModelTessellationEvaluation:
423 	case ExecutionModelTessellationControl:
424 		if (options.es && options.version < 320)
425 			require_extension_internal("GL_EXT_tessellation_shader");
426 		if (!options.es && options.version < 400)
427 			require_extension_internal("GL_ARB_tessellation_shader");
428 		break;
429 
430 	case ExecutionModelRayGenerationKHR:
431 	case ExecutionModelIntersectionKHR:
432 	case ExecutionModelAnyHitKHR:
433 	case ExecutionModelClosestHitKHR:
434 	case ExecutionModelMissKHR:
435 	case ExecutionModelCallableKHR:
436 		// NV enums are aliases.
437 		if (options.es || options.version < 460)
438 			SPIRV_CROSS_THROW("Ray tracing shaders require non-es profile with version 460 or above.");
439 		if (!options.vulkan_semantics)
440 			SPIRV_CROSS_THROW("Ray tracing requires Vulkan semantics.");
441 
442 		// Need to figure out if we should target KHR or NV extension based on capabilities.
443 		for (auto &cap : ir.declared_capabilities)
444 		{
445 			if (cap == CapabilityRayTracingKHR || cap == CapabilityRayQueryKHR)
446 			{
447 				ray_tracing_is_khr = true;
448 				break;
449 			}
450 		}
451 
452 		if (ray_tracing_is_khr)
453 		{
454 			// In KHR ray tracing we pass payloads by pointer instead of location,
455 			// so make sure we assign locations properly.
456 			ray_tracing_khr_fixup_locations();
457 			require_extension_internal("GL_EXT_ray_tracing");
458 		}
459 		else
460 			require_extension_internal("GL_NV_ray_tracing");
461 		break;
462 
463 	default:
464 		break;
465 	}
466 
467 	if (!pls_inputs.empty() || !pls_outputs.empty())
468 	{
469 		if (execution.model != ExecutionModelFragment)
470 			SPIRV_CROSS_THROW("Can only use GL_EXT_shader_pixel_local_storage in fragment shaders.");
471 		require_extension_internal("GL_EXT_shader_pixel_local_storage");
472 	}
473 
474 	if (!inout_color_attachments.empty())
475 	{
476 		if (execution.model != ExecutionModelFragment)
477 			SPIRV_CROSS_THROW("Can only use GL_EXT_shader_framebuffer_fetch in fragment shaders.");
478 		if (options.vulkan_semantics)
479 			SPIRV_CROSS_THROW("Cannot use EXT_shader_framebuffer_fetch in Vulkan GLSL.");
480 		require_extension_internal("GL_EXT_shader_framebuffer_fetch");
481 	}
482 
483 	if (options.separate_shader_objects && !options.es && options.version < 410)
484 		require_extension_internal("GL_ARB_separate_shader_objects");
485 
486 	if (ir.addressing_model == AddressingModelPhysicalStorageBuffer64EXT)
487 	{
488 		if (!options.vulkan_semantics)
489 			SPIRV_CROSS_THROW("GL_EXT_buffer_reference is only supported in Vulkan GLSL.");
490 		if (options.es && options.version < 320)
491 			SPIRV_CROSS_THROW("GL_EXT_buffer_reference requires ESSL 320.");
492 		else if (!options.es && options.version < 450)
493 			SPIRV_CROSS_THROW("GL_EXT_buffer_reference requires GLSL 450.");
494 		require_extension_internal("GL_EXT_buffer_reference");
495 	}
496 	else if (ir.addressing_model != AddressingModelLogical)
497 	{
498 		SPIRV_CROSS_THROW("Only Logical and PhysicalStorageBuffer64EXT addressing models are supported.");
499 	}
500 
501 	// Check for nonuniform qualifier and passthrough.
502 	// Instead of looping over all decorations to find this, just look at capabilities.
503 	for (auto &cap : ir.declared_capabilities)
504 	{
505 		switch (cap)
506 		{
507 		case CapabilityShaderNonUniformEXT:
508 			if (!options.vulkan_semantics)
509 				require_extension_internal("GL_NV_gpu_shader5");
510 			else
511 				require_extension_internal("GL_EXT_nonuniform_qualifier");
512 			break;
513 		case CapabilityRuntimeDescriptorArrayEXT:
514 			if (!options.vulkan_semantics)
515 				SPIRV_CROSS_THROW("GL_EXT_nonuniform_qualifier is only supported in Vulkan GLSL.");
516 			require_extension_internal("GL_EXT_nonuniform_qualifier");
517 			break;
518 
519 		case CapabilityGeometryShaderPassthroughNV:
520 			if (execution.model == ExecutionModelGeometry)
521 			{
522 				require_extension_internal("GL_NV_geometry_shader_passthrough");
523 				execution.geometry_passthrough = true;
524 			}
525 			break;
526 
527 		case CapabilityVariablePointers:
528 		case CapabilityVariablePointersStorageBuffer:
529 			SPIRV_CROSS_THROW("VariablePointers capability is not supported in GLSL.");
530 
531 		default:
532 			break;
533 		}
534 	}
535 }
536 
ray_tracing_khr_fixup_locations()537 void CompilerGLSL::ray_tracing_khr_fixup_locations()
538 {
539 	uint32_t location = 0;
540 	ir.for_each_typed_id<SPIRVariable>([&](uint32_t, SPIRVariable &var) {
541 		if (var.storage != StorageClassRayPayloadKHR && var.storage != StorageClassCallableDataKHR)
542 			return;
543 		if (!interface_variable_exists_in_entry_point(var.self))
544 			return;
545 		set_decoration(var.self, DecorationLocation, location++);
546 	});
547 }
548 
compile()549 string CompilerGLSL::compile()
550 {
551 	ir.fixup_reserved_names();
552 
553 	if (options.vulkan_semantics)
554 		backend.allow_precision_qualifiers = true;
555 	else
556 	{
557 		// only NV_gpu_shader5 supports divergent indexing on OpenGL, and it does so without extra qualifiers
558 		backend.nonuniform_qualifier = "";
559 		backend.needs_row_major_load_workaround = true;
560 	}
561 	backend.force_gl_in_out_block = true;
562 	backend.supports_extensions = true;
563 	backend.use_array_constructor = true;
564 
565 	if (is_legacy_es())
566 		backend.support_case_fallthrough = false;
567 
568 	// Scan the SPIR-V to find trivial uses of extensions.
569 	fixup_type_alias();
570 	reorder_type_alias();
571 	build_function_control_flow_graphs_and_analyze();
572 	find_static_extensions();
573 	fixup_image_load_store_access();
574 	update_active_builtins();
575 	analyze_image_and_sampler_usage();
576 	analyze_interlocked_resource_usage();
577 	if (!inout_color_attachments.empty())
578 		emit_inout_fragment_outputs_copy_to_subpass_inputs();
579 
580 	// Shaders might cast unrelated data to pointers of non-block types.
581 	// Find all such instances and make sure we can cast the pointers to a synthesized block type.
582 	if (ir.addressing_model == AddressingModelPhysicalStorageBuffer64EXT)
583 		analyze_non_block_pointer_types();
584 
585 	uint32_t pass_count = 0;
586 	do
587 	{
588 		if (pass_count >= 3)
589 			SPIRV_CROSS_THROW("Over 3 compilation loops detected. Must be a bug!");
590 
591 		reset();
592 
593 		buffer.reset();
594 
595 		emit_header();
596 		emit_resources();
597 		emit_extension_workarounds(get_execution_model());
598 
599 		emit_function(get<SPIRFunction>(ir.default_entry_point), Bitset());
600 
601 		pass_count++;
602 	} while (is_forcing_recompilation());
603 
604 	// Implement the interlocked wrapper function at the end.
605 	// The body was implemented in lieu of main().
606 	if (interlocked_is_complex)
607 	{
608 		statement("void main()");
609 		begin_scope();
610 		statement("// Interlocks were used in a way not compatible with GLSL, this is very slow.");
611 		if (options.es)
612 			statement("beginInvocationInterlockNV();");
613 		else
614 			statement("beginInvocationInterlockARB();");
615 		statement("spvMainInterlockedBody();");
616 		if (options.es)
617 			statement("endInvocationInterlockNV();");
618 		else
619 			statement("endInvocationInterlockARB();");
620 		end_scope();
621 	}
622 
623 	// Entry point in GLSL is always main().
624 	get_entry_point().name = "main";
625 
626 	return buffer.str();
627 }
628 
get_partial_source()629 std::string CompilerGLSL::get_partial_source()
630 {
631 	return buffer.str();
632 }
633 
build_workgroup_size(SmallVector<string> & arguments,const SpecializationConstant & wg_x,const SpecializationConstant & wg_y,const SpecializationConstant & wg_z)634 void CompilerGLSL::build_workgroup_size(SmallVector<string> &arguments, const SpecializationConstant &wg_x,
635                                         const SpecializationConstant &wg_y, const SpecializationConstant &wg_z)
636 {
637 	auto &execution = get_entry_point();
638 
639 	if (wg_x.id)
640 	{
641 		if (options.vulkan_semantics)
642 			arguments.push_back(join("local_size_x_id = ", wg_x.constant_id));
643 		else
644 			arguments.push_back(join("local_size_x = ", get<SPIRConstant>(wg_x.id).specialization_constant_macro_name));
645 	}
646 	else
647 		arguments.push_back(join("local_size_x = ", execution.workgroup_size.x));
648 
649 	if (wg_y.id)
650 	{
651 		if (options.vulkan_semantics)
652 			arguments.push_back(join("local_size_y_id = ", wg_y.constant_id));
653 		else
654 			arguments.push_back(join("local_size_y = ", get<SPIRConstant>(wg_y.id).specialization_constant_macro_name));
655 	}
656 	else
657 		arguments.push_back(join("local_size_y = ", execution.workgroup_size.y));
658 
659 	if (wg_z.id)
660 	{
661 		if (options.vulkan_semantics)
662 			arguments.push_back(join("local_size_z_id = ", wg_z.constant_id));
663 		else
664 			arguments.push_back(join("local_size_z = ", get<SPIRConstant>(wg_z.id).specialization_constant_macro_name));
665 	}
666 	else
667 		arguments.push_back(join("local_size_z = ", execution.workgroup_size.z));
668 }
669 
request_subgroup_feature(ShaderSubgroupSupportHelper::Feature feature)670 void CompilerGLSL::request_subgroup_feature(ShaderSubgroupSupportHelper::Feature feature)
671 {
672 	if (options.vulkan_semantics)
673 	{
674 		auto khr_extension = ShaderSubgroupSupportHelper::get_KHR_extension_for_feature(feature);
675 		require_extension_internal(ShaderSubgroupSupportHelper::get_extension_name(khr_extension));
676 	}
677 	else
678 	{
679 		if (!shader_subgroup_supporter.is_feature_requested(feature))
680 			force_recompile();
681 		shader_subgroup_supporter.request_feature(feature);
682 	}
683 }
684 
emit_header()685 void CompilerGLSL::emit_header()
686 {
687 	auto &execution = get_entry_point();
688 	statement("#version ", options.version, options.es && options.version > 100 ? " es" : "");
689 
690 	if (!options.es && options.version < 420)
691 	{
692 		// Needed for binding = # on UBOs, etc.
693 		if (options.enable_420pack_extension)
694 		{
695 			statement("#ifdef GL_ARB_shading_language_420pack");
696 			statement("#extension GL_ARB_shading_language_420pack : require");
697 			statement("#endif");
698 		}
699 		// Needed for: layout(early_fragment_tests) in;
700 		if (execution.flags.get(ExecutionModeEarlyFragmentTests))
701 			require_extension_internal("GL_ARB_shader_image_load_store");
702 	}
703 
704 	// Needed for: layout(post_depth_coverage) in;
705 	if (execution.flags.get(ExecutionModePostDepthCoverage))
706 		require_extension_internal("GL_ARB_post_depth_coverage");
707 
708 	// Needed for: layout({pixel,sample}_interlock_[un]ordered) in;
709 	if (execution.flags.get(ExecutionModePixelInterlockOrderedEXT) ||
710 	    execution.flags.get(ExecutionModePixelInterlockUnorderedEXT) ||
711 	    execution.flags.get(ExecutionModeSampleInterlockOrderedEXT) ||
712 	    execution.flags.get(ExecutionModeSampleInterlockUnorderedEXT))
713 	{
714 		if (options.es)
715 		{
716 			if (options.version < 310)
717 				SPIRV_CROSS_THROW("At least ESSL 3.10 required for fragment shader interlock.");
718 			require_extension_internal("GL_NV_fragment_shader_interlock");
719 		}
720 		else
721 		{
722 			if (options.version < 420)
723 				require_extension_internal("GL_ARB_shader_image_load_store");
724 			require_extension_internal("GL_ARB_fragment_shader_interlock");
725 		}
726 	}
727 
728 	for (auto &ext : forced_extensions)
729 	{
730 		if (ext == "GL_EXT_shader_explicit_arithmetic_types_float16")
731 		{
732 			// Special case, this extension has a potential fallback to another vendor extension in normal GLSL.
733 			// GL_AMD_gpu_shader_half_float is a superset, so try that first.
734 			statement("#if defined(GL_AMD_gpu_shader_half_float)");
735 			statement("#extension GL_AMD_gpu_shader_half_float : require");
736 			if (!options.vulkan_semantics)
737 			{
738 				statement("#elif defined(GL_NV_gpu_shader5)");
739 				statement("#extension GL_NV_gpu_shader5 : require");
740 			}
741 			else
742 			{
743 				statement("#elif defined(GL_EXT_shader_explicit_arithmetic_types_float16)");
744 				statement("#extension GL_EXT_shader_explicit_arithmetic_types_float16 : require");
745 			}
746 			statement("#else");
747 			statement("#error No extension available for FP16.");
748 			statement("#endif");
749 		}
750 		else if (ext == "GL_EXT_shader_explicit_arithmetic_types_int16")
751 		{
752 			if (options.vulkan_semantics)
753 				statement("#extension GL_EXT_shader_explicit_arithmetic_types_int16 : require");
754 			else
755 			{
756 				statement("#if defined(GL_AMD_gpu_shader_int16)");
757 				statement("#extension GL_AMD_gpu_shader_int16 : require");
758 				statement("#else");
759 				statement("#error No extension available for Int16.");
760 				statement("#endif");
761 			}
762 		}
763 		else if (ext == "GL_ARB_post_depth_coverage")
764 		{
765 			if (options.es)
766 				statement("#extension GL_EXT_post_depth_coverage : require");
767 			else
768 			{
769 				statement("#if defined(GL_ARB_post_depth_coverge)");
770 				statement("#extension GL_ARB_post_depth_coverage : require");
771 				statement("#else");
772 				statement("#extension GL_EXT_post_depth_coverage : require");
773 				statement("#endif");
774 			}
775 		}
776 		else if (!options.vulkan_semantics && ext == "GL_ARB_shader_draw_parameters")
777 		{
778 			// Soft-enable this extension on plain GLSL.
779 			statement("#ifdef ", ext);
780 			statement("#extension ", ext, " : enable");
781 			statement("#endif");
782 		}
783 		else
784 			statement("#extension ", ext, " : require");
785 	}
786 
787 	if (!options.vulkan_semantics)
788 	{
789 		using Supp = ShaderSubgroupSupportHelper;
790 		auto result = shader_subgroup_supporter.resolve();
791 
792 		for (uint32_t feature_index = 0; feature_index < Supp::FeatureCount; feature_index++)
793 		{
794 			auto feature = static_cast<Supp::Feature>(feature_index);
795 			if (!shader_subgroup_supporter.is_feature_requested(feature))
796 				continue;
797 
798 			auto exts = Supp::get_candidates_for_feature(feature, result);
799 			if (exts.empty())
800 				continue;
801 
802 			statement("");
803 
804 			for (auto &ext : exts)
805 			{
806 				const char *name = Supp::get_extension_name(ext);
807 				const char *extra_predicate = Supp::get_extra_required_extension_predicate(ext);
808 				auto extra_names = Supp::get_extra_required_extension_names(ext);
809 				statement(&ext != &exts.front() ? "#elif" : "#if", " defined(", name, ")",
810 				          (*extra_predicate != '\0' ? " && " : ""), extra_predicate);
811 				for (const auto &e : extra_names)
812 					statement("#extension ", e, " : enable");
813 				statement("#extension ", name, " : require");
814 			}
815 
816 			if (!Supp::can_feature_be_implemented_without_extensions(feature))
817 			{
818 				statement("#else");
819 				statement("#error No extensions available to emulate requested subgroup feature.");
820 			}
821 
822 			statement("#endif");
823 		}
824 	}
825 
826 	for (auto &header : header_lines)
827 		statement(header);
828 
829 	SmallVector<string> inputs;
830 	SmallVector<string> outputs;
831 
832 	switch (execution.model)
833 	{
834 	case ExecutionModelGeometry:
835 		if ((execution.flags.get(ExecutionModeInvocations)) && execution.invocations != 1)
836 			inputs.push_back(join("invocations = ", execution.invocations));
837 		if (execution.flags.get(ExecutionModeInputPoints))
838 			inputs.push_back("points");
839 		if (execution.flags.get(ExecutionModeInputLines))
840 			inputs.push_back("lines");
841 		if (execution.flags.get(ExecutionModeInputLinesAdjacency))
842 			inputs.push_back("lines_adjacency");
843 		if (execution.flags.get(ExecutionModeTriangles))
844 			inputs.push_back("triangles");
845 		if (execution.flags.get(ExecutionModeInputTrianglesAdjacency))
846 			inputs.push_back("triangles_adjacency");
847 
848 		if (!execution.geometry_passthrough)
849 		{
850 			// For passthrough, these are implies and cannot be declared in shader.
851 			outputs.push_back(join("max_vertices = ", execution.output_vertices));
852 			if (execution.flags.get(ExecutionModeOutputTriangleStrip))
853 				outputs.push_back("triangle_strip");
854 			if (execution.flags.get(ExecutionModeOutputPoints))
855 				outputs.push_back("points");
856 			if (execution.flags.get(ExecutionModeOutputLineStrip))
857 				outputs.push_back("line_strip");
858 		}
859 		break;
860 
861 	case ExecutionModelTessellationControl:
862 		if (execution.flags.get(ExecutionModeOutputVertices))
863 			outputs.push_back(join("vertices = ", execution.output_vertices));
864 		break;
865 
866 	case ExecutionModelTessellationEvaluation:
867 		if (execution.flags.get(ExecutionModeQuads))
868 			inputs.push_back("quads");
869 		if (execution.flags.get(ExecutionModeTriangles))
870 			inputs.push_back("triangles");
871 		if (execution.flags.get(ExecutionModeIsolines))
872 			inputs.push_back("isolines");
873 		if (execution.flags.get(ExecutionModePointMode))
874 			inputs.push_back("point_mode");
875 
876 		if (!execution.flags.get(ExecutionModeIsolines))
877 		{
878 			if (execution.flags.get(ExecutionModeVertexOrderCw))
879 				inputs.push_back("cw");
880 			if (execution.flags.get(ExecutionModeVertexOrderCcw))
881 				inputs.push_back("ccw");
882 		}
883 
884 		if (execution.flags.get(ExecutionModeSpacingFractionalEven))
885 			inputs.push_back("fractional_even_spacing");
886 		if (execution.flags.get(ExecutionModeSpacingFractionalOdd))
887 			inputs.push_back("fractional_odd_spacing");
888 		if (execution.flags.get(ExecutionModeSpacingEqual))
889 			inputs.push_back("equal_spacing");
890 		break;
891 
892 	case ExecutionModelGLCompute:
893 	{
894 		if (execution.workgroup_size.constant != 0)
895 		{
896 			SpecializationConstant wg_x, wg_y, wg_z;
897 			get_work_group_size_specialization_constants(wg_x, wg_y, wg_z);
898 
899 			// If there are any spec constants on legacy GLSL, defer declaration, we need to set up macro
900 			// declarations before we can emit the work group size.
901 			if (options.vulkan_semantics ||
902 			    ((wg_x.id == ConstantID(0)) && (wg_y.id == ConstantID(0)) && (wg_z.id == ConstantID(0))))
903 				build_workgroup_size(inputs, wg_x, wg_y, wg_z);
904 		}
905 		else
906 		{
907 			inputs.push_back(join("local_size_x = ", execution.workgroup_size.x));
908 			inputs.push_back(join("local_size_y = ", execution.workgroup_size.y));
909 			inputs.push_back(join("local_size_z = ", execution.workgroup_size.z));
910 		}
911 		break;
912 	}
913 
914 	case ExecutionModelFragment:
915 		if (options.es)
916 		{
917 			switch (options.fragment.default_float_precision)
918 			{
919 			case Options::Lowp:
920 				statement("precision lowp float;");
921 				break;
922 
923 			case Options::Mediump:
924 				statement("precision mediump float;");
925 				break;
926 
927 			case Options::Highp:
928 				statement("precision highp float;");
929 				break;
930 
931 			default:
932 				break;
933 			}
934 
935 			switch (options.fragment.default_int_precision)
936 			{
937 			case Options::Lowp:
938 				statement("precision lowp int;");
939 				break;
940 
941 			case Options::Mediump:
942 				statement("precision mediump int;");
943 				break;
944 
945 			case Options::Highp:
946 				statement("precision highp int;");
947 				break;
948 
949 			default:
950 				break;
951 			}
952 		}
953 
954 		if (execution.flags.get(ExecutionModeEarlyFragmentTests))
955 			inputs.push_back("early_fragment_tests");
956 		if (execution.flags.get(ExecutionModePostDepthCoverage))
957 			inputs.push_back("post_depth_coverage");
958 
959 		if (execution.flags.get(ExecutionModePixelInterlockOrderedEXT))
960 			inputs.push_back("pixel_interlock_ordered");
961 		else if (execution.flags.get(ExecutionModePixelInterlockUnorderedEXT))
962 			inputs.push_back("pixel_interlock_unordered");
963 		else if (execution.flags.get(ExecutionModeSampleInterlockOrderedEXT))
964 			inputs.push_back("sample_interlock_ordered");
965 		else if (execution.flags.get(ExecutionModeSampleInterlockUnorderedEXT))
966 			inputs.push_back("sample_interlock_unordered");
967 
968 		if (!options.es && execution.flags.get(ExecutionModeDepthGreater))
969 			statement("layout(depth_greater) out float gl_FragDepth;");
970 		else if (!options.es && execution.flags.get(ExecutionModeDepthLess))
971 			statement("layout(depth_less) out float gl_FragDepth;");
972 
973 		break;
974 
975 	default:
976 		break;
977 	}
978 
979 	if (!inputs.empty())
980 		statement("layout(", merge(inputs), ") in;");
981 	if (!outputs.empty())
982 		statement("layout(", merge(outputs), ") out;");
983 
984 	statement("");
985 }
986 
type_is_empty(const SPIRType & type)987 bool CompilerGLSL::type_is_empty(const SPIRType &type)
988 {
989 	return type.basetype == SPIRType::Struct && type.member_types.empty();
990 }
991 
emit_struct(SPIRType & type)992 void CompilerGLSL::emit_struct(SPIRType &type)
993 {
994 	// Struct types can be stamped out multiple times
995 	// with just different offsets, matrix layouts, etc ...
996 	// Type-punning with these types is legal, which complicates things
997 	// when we are storing struct and array types in an SSBO for example.
998 	// If the type master is packed however, we can no longer assume that the struct declaration will be redundant.
999 	if (type.type_alias != TypeID(0) &&
1000 	    !has_extended_decoration(type.type_alias, SPIRVCrossDecorationBufferBlockRepacked))
1001 		return;
1002 
1003 	add_resource_name(type.self);
1004 	auto name = type_to_glsl(type);
1005 
1006 	statement(!backend.explicit_struct_type ? "struct " : "", name);
1007 	begin_scope();
1008 
1009 	type.member_name_cache.clear();
1010 
1011 	uint32_t i = 0;
1012 	bool emitted = false;
1013 	for (auto &member : type.member_types)
1014 	{
1015 		add_member_name(type, i);
1016 		emit_struct_member(type, member, i);
1017 		i++;
1018 		emitted = true;
1019 	}
1020 
1021 	// Don't declare empty structs in GLSL, this is not allowed.
1022 	if (type_is_empty(type) && !backend.supports_empty_struct)
1023 	{
1024 		statement("int empty_struct_member;");
1025 		emitted = true;
1026 	}
1027 
1028 	if (has_extended_decoration(type.self, SPIRVCrossDecorationPaddingTarget))
1029 		emit_struct_padding_target(type);
1030 
1031 	end_scope_decl();
1032 
1033 	if (emitted)
1034 		statement("");
1035 }
1036 
to_interpolation_qualifiers(const Bitset & flags)1037 string CompilerGLSL::to_interpolation_qualifiers(const Bitset &flags)
1038 {
1039 	string res;
1040 	//if (flags & (1ull << DecorationSmooth))
1041 	//    res += "smooth ";
1042 	if (flags.get(DecorationFlat))
1043 		res += "flat ";
1044 	if (flags.get(DecorationNoPerspective))
1045 		res += "noperspective ";
1046 	if (flags.get(DecorationCentroid))
1047 		res += "centroid ";
1048 	if (flags.get(DecorationPatch))
1049 		res += "patch ";
1050 	if (flags.get(DecorationSample))
1051 		res += "sample ";
1052 	if (flags.get(DecorationInvariant))
1053 		res += "invariant ";
1054 	if (flags.get(DecorationExplicitInterpAMD))
1055 		res += "__explicitInterpAMD ";
1056 
1057 	return res;
1058 }
1059 
layout_for_member(const SPIRType & type,uint32_t index)1060 string CompilerGLSL::layout_for_member(const SPIRType &type, uint32_t index)
1061 {
1062 	if (is_legacy())
1063 		return "";
1064 
1065 	bool is_block = ir.meta[type.self].decoration.decoration_flags.get(DecorationBlock) ||
1066 	                ir.meta[type.self].decoration.decoration_flags.get(DecorationBufferBlock);
1067 	if (!is_block)
1068 		return "";
1069 
1070 	auto &memb = ir.meta[type.self].members;
1071 	if (index >= memb.size())
1072 		return "";
1073 	auto &dec = memb[index];
1074 
1075 	SmallVector<string> attr;
1076 
1077 	if (has_member_decoration(type.self, index, DecorationPassthroughNV))
1078 		attr.push_back("passthrough");
1079 
1080 	// We can only apply layouts on members in block interfaces.
1081 	// This is a bit problematic because in SPIR-V decorations are applied on the struct types directly.
1082 	// This is not supported on GLSL, so we have to make the assumption that if a struct within our buffer block struct
1083 	// has a decoration, it was originally caused by a top-level layout() qualifier in GLSL.
1084 	//
1085 	// We would like to go from (SPIR-V style):
1086 	//
1087 	// struct Foo { layout(row_major) mat4 matrix; };
1088 	// buffer UBO { Foo foo; };
1089 	//
1090 	// to
1091 	//
1092 	// struct Foo { mat4 matrix; }; // GLSL doesn't support any layout shenanigans in raw struct declarations.
1093 	// buffer UBO { layout(row_major) Foo foo; }; // Apply the layout on top-level.
1094 	auto flags = combined_decoration_for_member(type, index);
1095 
1096 	if (flags.get(DecorationRowMajor))
1097 		attr.push_back("row_major");
1098 	// We don't emit any global layouts, so column_major is default.
1099 	//if (flags & (1ull << DecorationColMajor))
1100 	//    attr.push_back("column_major");
1101 
1102 	if (dec.decoration_flags.get(DecorationLocation) && can_use_io_location(type.storage, true))
1103 		attr.push_back(join("location = ", dec.location));
1104 
1105 	// Can only declare component if we can declare location.
1106 	if (dec.decoration_flags.get(DecorationComponent) && can_use_io_location(type.storage, true))
1107 	{
1108 		if (!options.es)
1109 		{
1110 			if (options.version < 440 && options.version >= 140)
1111 				require_extension_internal("GL_ARB_enhanced_layouts");
1112 			else if (options.version < 140)
1113 				SPIRV_CROSS_THROW("Component decoration is not supported in targets below GLSL 1.40.");
1114 			attr.push_back(join("component = ", dec.component));
1115 		}
1116 		else
1117 			SPIRV_CROSS_THROW("Component decoration is not supported in ES targets.");
1118 	}
1119 
1120 	// SPIRVCrossDecorationPacked is set by layout_for_variable earlier to mark that we need to emit offset qualifiers.
1121 	// This is only done selectively in GLSL as needed.
1122 	if (has_extended_decoration(type.self, SPIRVCrossDecorationExplicitOffset) &&
1123 	    dec.decoration_flags.get(DecorationOffset))
1124 		attr.push_back(join("offset = ", dec.offset));
1125 	else if (type.storage == StorageClassOutput && dec.decoration_flags.get(DecorationOffset))
1126 		attr.push_back(join("xfb_offset = ", dec.offset));
1127 
1128 	if (attr.empty())
1129 		return "";
1130 
1131 	string res = "layout(";
1132 	res += merge(attr);
1133 	res += ") ";
1134 	return res;
1135 }
1136 
format_to_glsl(spv::ImageFormat format)1137 const char *CompilerGLSL::format_to_glsl(spv::ImageFormat format)
1138 {
1139 	if (options.es && is_desktop_only_format(format))
1140 		SPIRV_CROSS_THROW("Attempting to use image format not supported in ES profile.");
1141 
1142 	switch (format)
1143 	{
1144 	case ImageFormatRgba32f:
1145 		return "rgba32f";
1146 	case ImageFormatRgba16f:
1147 		return "rgba16f";
1148 	case ImageFormatR32f:
1149 		return "r32f";
1150 	case ImageFormatRgba8:
1151 		return "rgba8";
1152 	case ImageFormatRgba8Snorm:
1153 		return "rgba8_snorm";
1154 	case ImageFormatRg32f:
1155 		return "rg32f";
1156 	case ImageFormatRg16f:
1157 		return "rg16f";
1158 	case ImageFormatRgba32i:
1159 		return "rgba32i";
1160 	case ImageFormatRgba16i:
1161 		return "rgba16i";
1162 	case ImageFormatR32i:
1163 		return "r32i";
1164 	case ImageFormatRgba8i:
1165 		return "rgba8i";
1166 	case ImageFormatRg32i:
1167 		return "rg32i";
1168 	case ImageFormatRg16i:
1169 		return "rg16i";
1170 	case ImageFormatRgba32ui:
1171 		return "rgba32ui";
1172 	case ImageFormatRgba16ui:
1173 		return "rgba16ui";
1174 	case ImageFormatR32ui:
1175 		return "r32ui";
1176 	case ImageFormatRgba8ui:
1177 		return "rgba8ui";
1178 	case ImageFormatRg32ui:
1179 		return "rg32ui";
1180 	case ImageFormatRg16ui:
1181 		return "rg16ui";
1182 	case ImageFormatR11fG11fB10f:
1183 		return "r11f_g11f_b10f";
1184 	case ImageFormatR16f:
1185 		return "r16f";
1186 	case ImageFormatRgb10A2:
1187 		return "rgb10_a2";
1188 	case ImageFormatR8:
1189 		return "r8";
1190 	case ImageFormatRg8:
1191 		return "rg8";
1192 	case ImageFormatR16:
1193 		return "r16";
1194 	case ImageFormatRg16:
1195 		return "rg16";
1196 	case ImageFormatRgba16:
1197 		return "rgba16";
1198 	case ImageFormatR16Snorm:
1199 		return "r16_snorm";
1200 	case ImageFormatRg16Snorm:
1201 		return "rg16_snorm";
1202 	case ImageFormatRgba16Snorm:
1203 		return "rgba16_snorm";
1204 	case ImageFormatR8Snorm:
1205 		return "r8_snorm";
1206 	case ImageFormatRg8Snorm:
1207 		return "rg8_snorm";
1208 	case ImageFormatR8ui:
1209 		return "r8ui";
1210 	case ImageFormatRg8ui:
1211 		return "rg8ui";
1212 	case ImageFormatR16ui:
1213 		return "r16ui";
1214 	case ImageFormatRgb10a2ui:
1215 		return "rgb10_a2ui";
1216 	case ImageFormatR8i:
1217 		return "r8i";
1218 	case ImageFormatRg8i:
1219 		return "rg8i";
1220 	case ImageFormatR16i:
1221 		return "r16i";
1222 	default:
1223 	case ImageFormatUnknown:
1224 		return nullptr;
1225 	}
1226 }
1227 
type_to_packed_base_size(const SPIRType & type,BufferPackingStandard)1228 uint32_t CompilerGLSL::type_to_packed_base_size(const SPIRType &type, BufferPackingStandard)
1229 {
1230 	switch (type.basetype)
1231 	{
1232 	case SPIRType::Double:
1233 	case SPIRType::Int64:
1234 	case SPIRType::UInt64:
1235 		return 8;
1236 	case SPIRType::Float:
1237 	case SPIRType::Int:
1238 	case SPIRType::UInt:
1239 		return 4;
1240 	case SPIRType::Half:
1241 	case SPIRType::Short:
1242 	case SPIRType::UShort:
1243 		return 2;
1244 	case SPIRType::SByte:
1245 	case SPIRType::UByte:
1246 		return 1;
1247 
1248 	default:
1249 		SPIRV_CROSS_THROW("Unrecognized type in type_to_packed_base_size.");
1250 	}
1251 }
1252 
type_to_packed_alignment(const SPIRType & type,const Bitset & flags,BufferPackingStandard packing)1253 uint32_t CompilerGLSL::type_to_packed_alignment(const SPIRType &type, const Bitset &flags,
1254                                                 BufferPackingStandard packing)
1255 {
1256 	// If using PhysicalStorageBufferEXT storage class, this is a pointer,
1257 	// and is 64-bit.
1258 	if (type.storage == StorageClassPhysicalStorageBufferEXT)
1259 	{
1260 		if (!type.pointer)
1261 			SPIRV_CROSS_THROW("Types in PhysicalStorageBufferEXT must be pointers.");
1262 
1263 		if (ir.addressing_model == AddressingModelPhysicalStorageBuffer64EXT)
1264 		{
1265 			if (packing_is_vec4_padded(packing) && type_is_array_of_pointers(type))
1266 				return 16;
1267 			else
1268 				return 8;
1269 		}
1270 		else
1271 			SPIRV_CROSS_THROW("AddressingModelPhysicalStorageBuffer64EXT must be used for PhysicalStorageBufferEXT.");
1272 	}
1273 
1274 	if (!type.array.empty())
1275 	{
1276 		uint32_t minimum_alignment = 1;
1277 		if (packing_is_vec4_padded(packing))
1278 			minimum_alignment = 16;
1279 
1280 		auto *tmp = &get<SPIRType>(type.parent_type);
1281 		while (!tmp->array.empty())
1282 			tmp = &get<SPIRType>(tmp->parent_type);
1283 
1284 		// Get the alignment of the base type, then maybe round up.
1285 		return max(minimum_alignment, type_to_packed_alignment(*tmp, flags, packing));
1286 	}
1287 
1288 	if (type.basetype == SPIRType::Struct)
1289 	{
1290 		// Rule 9. Structs alignments are maximum alignment of its members.
1291 		uint32_t alignment = 1;
1292 		for (uint32_t i = 0; i < type.member_types.size(); i++)
1293 		{
1294 			auto member_flags = ir.meta[type.self].members[i].decoration_flags;
1295 			alignment =
1296 			    max(alignment, type_to_packed_alignment(get<SPIRType>(type.member_types[i]), member_flags, packing));
1297 		}
1298 
1299 		// In std140, struct alignment is rounded up to 16.
1300 		if (packing_is_vec4_padded(packing))
1301 			alignment = max(alignment, 16u);
1302 
1303 		return alignment;
1304 	}
1305 	else
1306 	{
1307 		const uint32_t base_alignment = type_to_packed_base_size(type, packing);
1308 
1309 		// Alignment requirement for scalar block layout is always the alignment for the most basic component.
1310 		if (packing_is_scalar(packing))
1311 			return base_alignment;
1312 
1313 		// Vectors are *not* aligned in HLSL, but there's an extra rule where vectors cannot straddle
1314 		// a vec4, this is handled outside since that part knows our current offset.
1315 		if (type.columns == 1 && packing_is_hlsl(packing))
1316 			return base_alignment;
1317 
1318 		// From 7.6.2.2 in GL 4.5 core spec.
1319 		// Rule 1
1320 		if (type.vecsize == 1 && type.columns == 1)
1321 			return base_alignment;
1322 
1323 		// Rule 2
1324 		if ((type.vecsize == 2 || type.vecsize == 4) && type.columns == 1)
1325 			return type.vecsize * base_alignment;
1326 
1327 		// Rule 3
1328 		if (type.vecsize == 3 && type.columns == 1)
1329 			return 4 * base_alignment;
1330 
1331 		// Rule 4 implied. Alignment does not change in std430.
1332 
1333 		// Rule 5. Column-major matrices are stored as arrays of
1334 		// vectors.
1335 		if (flags.get(DecorationColMajor) && type.columns > 1)
1336 		{
1337 			if (packing_is_vec4_padded(packing))
1338 				return 4 * base_alignment;
1339 			else if (type.vecsize == 3)
1340 				return 4 * base_alignment;
1341 			else
1342 				return type.vecsize * base_alignment;
1343 		}
1344 
1345 		// Rule 6 implied.
1346 
1347 		// Rule 7.
1348 		if (flags.get(DecorationRowMajor) && type.vecsize > 1)
1349 		{
1350 			if (packing_is_vec4_padded(packing))
1351 				return 4 * base_alignment;
1352 			else if (type.columns == 3)
1353 				return 4 * base_alignment;
1354 			else
1355 				return type.columns * base_alignment;
1356 		}
1357 
1358 		// Rule 8 implied.
1359 	}
1360 
1361 	SPIRV_CROSS_THROW("Did not find suitable rule for type. Bogus decorations?");
1362 }
1363 
type_to_packed_array_stride(const SPIRType & type,const Bitset & flags,BufferPackingStandard packing)1364 uint32_t CompilerGLSL::type_to_packed_array_stride(const SPIRType &type, const Bitset &flags,
1365                                                    BufferPackingStandard packing)
1366 {
1367 	// Array stride is equal to aligned size of the underlying type.
1368 	uint32_t parent = type.parent_type;
1369 	assert(parent);
1370 
1371 	auto &tmp = get<SPIRType>(parent);
1372 
1373 	uint32_t size = type_to_packed_size(tmp, flags, packing);
1374 	uint32_t alignment = type_to_packed_alignment(type, flags, packing);
1375 	return (size + alignment - 1) & ~(alignment - 1);
1376 }
1377 
type_to_packed_size(const SPIRType & type,const Bitset & flags,BufferPackingStandard packing)1378 uint32_t CompilerGLSL::type_to_packed_size(const SPIRType &type, const Bitset &flags, BufferPackingStandard packing)
1379 {
1380 	if (!type.array.empty())
1381 	{
1382 		uint32_t packed_size = to_array_size_literal(type) * type_to_packed_array_stride(type, flags, packing);
1383 
1384 		// For arrays of vectors and matrices in HLSL, the last element has a size which depends on its vector size,
1385 		// so that it is possible to pack other vectors into the last element.
1386 		if (packing_is_hlsl(packing) && type.basetype != SPIRType::Struct)
1387 			packed_size -= (4 - type.vecsize) * (type.width / 8);
1388 
1389 		return packed_size;
1390 	}
1391 
1392 	// If using PhysicalStorageBufferEXT storage class, this is a pointer,
1393 	// and is 64-bit.
1394 	if (type.storage == StorageClassPhysicalStorageBufferEXT)
1395 	{
1396 		if (!type.pointer)
1397 			SPIRV_CROSS_THROW("Types in PhysicalStorageBufferEXT must be pointers.");
1398 
1399 		if (ir.addressing_model == AddressingModelPhysicalStorageBuffer64EXT)
1400 			return 8;
1401 		else
1402 			SPIRV_CROSS_THROW("AddressingModelPhysicalStorageBuffer64EXT must be used for PhysicalStorageBufferEXT.");
1403 	}
1404 
1405 	uint32_t size = 0;
1406 
1407 	if (type.basetype == SPIRType::Struct)
1408 	{
1409 		uint32_t pad_alignment = 1;
1410 
1411 		for (uint32_t i = 0; i < type.member_types.size(); i++)
1412 		{
1413 			auto member_flags = ir.meta[type.self].members[i].decoration_flags;
1414 			auto &member_type = get<SPIRType>(type.member_types[i]);
1415 
1416 			uint32_t packed_alignment = type_to_packed_alignment(member_type, member_flags, packing);
1417 			uint32_t alignment = max(packed_alignment, pad_alignment);
1418 
1419 			// The next member following a struct member is aligned to the base alignment of the struct that came before.
1420 			// GL 4.5 spec, 7.6.2.2.
1421 			if (member_type.basetype == SPIRType::Struct)
1422 				pad_alignment = packed_alignment;
1423 			else
1424 				pad_alignment = 1;
1425 
1426 			size = (size + alignment - 1) & ~(alignment - 1);
1427 			size += type_to_packed_size(member_type, member_flags, packing);
1428 		}
1429 	}
1430 	else
1431 	{
1432 		const uint32_t base_alignment = type_to_packed_base_size(type, packing);
1433 
1434 		if (packing_is_scalar(packing))
1435 		{
1436 			size = type.vecsize * type.columns * base_alignment;
1437 		}
1438 		else
1439 		{
1440 			if (type.columns == 1)
1441 				size = type.vecsize * base_alignment;
1442 
1443 			if (flags.get(DecorationColMajor) && type.columns > 1)
1444 			{
1445 				if (packing_is_vec4_padded(packing))
1446 					size = type.columns * 4 * base_alignment;
1447 				else if (type.vecsize == 3)
1448 					size = type.columns * 4 * base_alignment;
1449 				else
1450 					size = type.columns * type.vecsize * base_alignment;
1451 			}
1452 
1453 			if (flags.get(DecorationRowMajor) && type.vecsize > 1)
1454 			{
1455 				if (packing_is_vec4_padded(packing))
1456 					size = type.vecsize * 4 * base_alignment;
1457 				else if (type.columns == 3)
1458 					size = type.vecsize * 4 * base_alignment;
1459 				else
1460 					size = type.vecsize * type.columns * base_alignment;
1461 			}
1462 
1463 			// For matrices in HLSL, the last element has a size which depends on its vector size,
1464 			// so that it is possible to pack other vectors into the last element.
1465 			if (packing_is_hlsl(packing) && type.columns > 1)
1466 				size -= (4 - type.vecsize) * (type.width / 8);
1467 		}
1468 	}
1469 
1470 	return size;
1471 }
1472 
buffer_is_packing_standard(const SPIRType & type,BufferPackingStandard packing,uint32_t * failed_validation_index,uint32_t start_offset,uint32_t end_offset)1473 bool CompilerGLSL::buffer_is_packing_standard(const SPIRType &type, BufferPackingStandard packing,
1474                                               uint32_t *failed_validation_index, uint32_t start_offset,
1475                                               uint32_t end_offset)
1476 {
1477 	// This is very tricky and error prone, but try to be exhaustive and correct here.
1478 	// SPIR-V doesn't directly say if we're using std430 or std140.
1479 	// SPIR-V communicates this using Offset and ArrayStride decorations (which is what really matters),
1480 	// so we have to try to infer whether or not the original GLSL source was std140 or std430 based on this information.
1481 	// We do not have to consider shared or packed since these layouts are not allowed in Vulkan SPIR-V (they are useless anyways, and custom offsets would do the same thing).
1482 	//
1483 	// It is almost certain that we're using std430, but it gets tricky with arrays in particular.
1484 	// We will assume std430, but infer std140 if we can prove the struct is not compliant with std430.
1485 	//
1486 	// The only two differences between std140 and std430 are related to padding alignment/array stride
1487 	// in arrays and structs. In std140 they take minimum vec4 alignment.
1488 	// std430 only removes the vec4 requirement.
1489 
1490 	uint32_t offset = 0;
1491 	uint32_t pad_alignment = 1;
1492 
1493 	bool is_top_level_block =
1494 	    has_decoration(type.self, DecorationBlock) || has_decoration(type.self, DecorationBufferBlock);
1495 
1496 	for (uint32_t i = 0; i < type.member_types.size(); i++)
1497 	{
1498 		auto &memb_type = get<SPIRType>(type.member_types[i]);
1499 		auto member_flags = ir.meta[type.self].members[i].decoration_flags;
1500 
1501 		// Verify alignment rules.
1502 		uint32_t packed_alignment = type_to_packed_alignment(memb_type, member_flags, packing);
1503 
1504 		// This is a rather dirty workaround to deal with some cases of OpSpecConstantOp used as array size, e.g:
1505 		// layout(constant_id = 0) const int s = 10;
1506 		// const int S = s + 5; // SpecConstantOp
1507 		// buffer Foo { int data[S]; }; // <-- Very hard for us to deduce a fixed value here,
1508 		// we would need full implementation of compile-time constant folding. :(
1509 		// If we are the last member of a struct, there might be cases where the actual size of that member is irrelevant
1510 		// for our analysis (e.g. unsized arrays).
1511 		// This lets us simply ignore that there are spec constant op sized arrays in our buffers.
1512 		// Querying size of this member will fail, so just don't call it unless we have to.
1513 		//
1514 		// This is likely "best effort" we can support without going into unacceptably complicated workarounds.
1515 		bool member_can_be_unsized =
1516 		    is_top_level_block && size_t(i + 1) == type.member_types.size() && !memb_type.array.empty();
1517 
1518 		uint32_t packed_size = 0;
1519 		if (!member_can_be_unsized || packing_is_hlsl(packing))
1520 			packed_size = type_to_packed_size(memb_type, member_flags, packing);
1521 
1522 		// We only need to care about this if we have non-array types which can straddle the vec4 boundary.
1523 		if (packing_is_hlsl(packing))
1524 		{
1525 			// If a member straddles across a vec4 boundary, alignment is actually vec4.
1526 			uint32_t begin_word = offset / 16;
1527 			uint32_t end_word = (offset + packed_size - 1) / 16;
1528 			if (begin_word != end_word)
1529 				packed_alignment = max(packed_alignment, 16u);
1530 		}
1531 
1532 		uint32_t actual_offset = type_struct_member_offset(type, i);
1533 		// Field is not in the specified range anymore and we can ignore any further fields.
1534 		if (actual_offset >= end_offset)
1535 			break;
1536 
1537 		uint32_t alignment = max(packed_alignment, pad_alignment);
1538 		offset = (offset + alignment - 1) & ~(alignment - 1);
1539 
1540 		// The next member following a struct member is aligned to the base alignment of the struct that came before.
1541 		// GL 4.5 spec, 7.6.2.2.
1542 		if (memb_type.basetype == SPIRType::Struct && !memb_type.pointer)
1543 			pad_alignment = packed_alignment;
1544 		else
1545 			pad_alignment = 1;
1546 
1547 		// Only care about packing if we are in the given range
1548 		if (actual_offset >= start_offset)
1549 		{
1550 			// We only care about offsets in std140, std430, etc ...
1551 			// For EnhancedLayout variants, we have the flexibility to choose our own offsets.
1552 			if (!packing_has_flexible_offset(packing))
1553 			{
1554 				if (actual_offset != offset) // This cannot be the packing we're looking for.
1555 				{
1556 					if (failed_validation_index)
1557 						*failed_validation_index = i;
1558 					return false;
1559 				}
1560 			}
1561 			else if ((actual_offset & (alignment - 1)) != 0)
1562 			{
1563 				// We still need to verify that alignment rules are observed, even if we have explicit offset.
1564 				if (failed_validation_index)
1565 					*failed_validation_index = i;
1566 				return false;
1567 			}
1568 
1569 			// Verify array stride rules.
1570 			if (!memb_type.array.empty() && type_to_packed_array_stride(memb_type, member_flags, packing) !=
1571 			                                    type_struct_member_array_stride(type, i))
1572 			{
1573 				if (failed_validation_index)
1574 					*failed_validation_index = i;
1575 				return false;
1576 			}
1577 
1578 			// Verify that sub-structs also follow packing rules.
1579 			// We cannot use enhanced layouts on substructs, so they better be up to spec.
1580 			auto substruct_packing = packing_to_substruct_packing(packing);
1581 
1582 			if (!memb_type.pointer && !memb_type.member_types.empty() &&
1583 			    !buffer_is_packing_standard(memb_type, substruct_packing))
1584 			{
1585 				if (failed_validation_index)
1586 					*failed_validation_index = i;
1587 				return false;
1588 			}
1589 		}
1590 
1591 		// Bump size.
1592 		offset = actual_offset + packed_size;
1593 	}
1594 
1595 	return true;
1596 }
1597 
can_use_io_location(StorageClass storage,bool block)1598 bool CompilerGLSL::can_use_io_location(StorageClass storage, bool block)
1599 {
1600 	// Location specifiers are must have in SPIR-V, but they aren't really supported in earlier versions of GLSL.
1601 	// Be very explicit here about how to solve the issue.
1602 	if ((get_execution_model() != ExecutionModelVertex && storage == StorageClassInput) ||
1603 	    (get_execution_model() != ExecutionModelFragment && storage == StorageClassOutput))
1604 	{
1605 		uint32_t minimum_desktop_version = block ? 440 : 410;
1606 		// ARB_enhanced_layouts vs ARB_separate_shader_objects ...
1607 
1608 		if (!options.es && options.version < minimum_desktop_version && !options.separate_shader_objects)
1609 			return false;
1610 		else if (options.es && options.version < 310)
1611 			return false;
1612 	}
1613 
1614 	if ((get_execution_model() == ExecutionModelVertex && storage == StorageClassInput) ||
1615 	    (get_execution_model() == ExecutionModelFragment && storage == StorageClassOutput))
1616 	{
1617 		if (options.es && options.version < 300)
1618 			return false;
1619 		else if (!options.es && options.version < 330)
1620 			return false;
1621 	}
1622 
1623 	if (storage == StorageClassUniform || storage == StorageClassUniformConstant || storage == StorageClassPushConstant)
1624 	{
1625 		if (options.es && options.version < 310)
1626 			return false;
1627 		else if (!options.es && options.version < 430)
1628 			return false;
1629 	}
1630 
1631 	return true;
1632 }
1633 
layout_for_variable(const SPIRVariable & var)1634 string CompilerGLSL::layout_for_variable(const SPIRVariable &var)
1635 {
1636 	// FIXME: Come up with a better solution for when to disable layouts.
1637 	// Having layouts depend on extensions as well as which types
1638 	// of layouts are used. For now, the simple solution is to just disable
1639 	// layouts for legacy versions.
1640 	if (is_legacy())
1641 		return "";
1642 
1643 	if (subpass_input_is_framebuffer_fetch(var.self))
1644 		return "";
1645 
1646 	SmallVector<string> attr;
1647 
1648 	auto &type = get<SPIRType>(var.basetype);
1649 	auto &flags = get_decoration_bitset(var.self);
1650 	auto &typeflags = get_decoration_bitset(type.self);
1651 
1652 	if (flags.get(DecorationPassthroughNV))
1653 		attr.push_back("passthrough");
1654 
1655 	if (options.vulkan_semantics && var.storage == StorageClassPushConstant)
1656 		attr.push_back("push_constant");
1657 	else if (var.storage == StorageClassShaderRecordBufferKHR)
1658 		attr.push_back(ray_tracing_is_khr ? "shaderRecordEXT" : "shaderRecordNV");
1659 
1660 	if (flags.get(DecorationRowMajor))
1661 		attr.push_back("row_major");
1662 	if (flags.get(DecorationColMajor))
1663 		attr.push_back("column_major");
1664 
1665 	if (options.vulkan_semantics)
1666 	{
1667 		if (flags.get(DecorationInputAttachmentIndex))
1668 			attr.push_back(join("input_attachment_index = ", get_decoration(var.self, DecorationInputAttachmentIndex)));
1669 	}
1670 
1671 	bool is_block = has_decoration(type.self, DecorationBlock);
1672 	if (flags.get(DecorationLocation) && can_use_io_location(var.storage, is_block))
1673 	{
1674 		Bitset combined_decoration;
1675 		for (uint32_t i = 0; i < ir.meta[type.self].members.size(); i++)
1676 			combined_decoration.merge_or(combined_decoration_for_member(type, i));
1677 
1678 		// If our members have location decorations, we don't need to
1679 		// emit location decorations at the top as well (looks weird).
1680 		if (!combined_decoration.get(DecorationLocation))
1681 			attr.push_back(join("location = ", get_decoration(var.self, DecorationLocation)));
1682 	}
1683 
1684 	// Transform feedback
1685 	bool uses_enhanced_layouts = false;
1686 	if (is_block && var.storage == StorageClassOutput)
1687 	{
1688 		// For blocks, there is a restriction where xfb_stride/xfb_buffer must only be declared on the block itself,
1689 		// since all members must match the same xfb_buffer. The only thing we will declare for members of the block
1690 		// is the xfb_offset.
1691 		uint32_t member_count = uint32_t(type.member_types.size());
1692 		bool have_xfb_buffer_stride = false;
1693 		bool have_any_xfb_offset = false;
1694 		bool have_geom_stream = false;
1695 		uint32_t xfb_stride = 0, xfb_buffer = 0, geom_stream = 0;
1696 
1697 		if (flags.get(DecorationXfbBuffer) && flags.get(DecorationXfbStride))
1698 		{
1699 			have_xfb_buffer_stride = true;
1700 			xfb_buffer = get_decoration(var.self, DecorationXfbBuffer);
1701 			xfb_stride = get_decoration(var.self, DecorationXfbStride);
1702 		}
1703 
1704 		if (flags.get(DecorationStream))
1705 		{
1706 			have_geom_stream = true;
1707 			geom_stream = get_decoration(var.self, DecorationStream);
1708 		}
1709 
1710 		// Verify that none of the members violate our assumption.
1711 		for (uint32_t i = 0; i < member_count; i++)
1712 		{
1713 			if (has_member_decoration(type.self, i, DecorationStream))
1714 			{
1715 				uint32_t member_geom_stream = get_member_decoration(type.self, i, DecorationStream);
1716 				if (have_geom_stream && member_geom_stream != geom_stream)
1717 					SPIRV_CROSS_THROW("IO block member Stream mismatch.");
1718 				have_geom_stream = true;
1719 				geom_stream = member_geom_stream;
1720 			}
1721 
1722 			// Only members with an Offset decoration participate in XFB.
1723 			if (!has_member_decoration(type.self, i, DecorationOffset))
1724 				continue;
1725 			have_any_xfb_offset = true;
1726 
1727 			if (has_member_decoration(type.self, i, DecorationXfbBuffer))
1728 			{
1729 				uint32_t buffer_index = get_member_decoration(type.self, i, DecorationXfbBuffer);
1730 				if (have_xfb_buffer_stride && buffer_index != xfb_buffer)
1731 					SPIRV_CROSS_THROW("IO block member XfbBuffer mismatch.");
1732 				have_xfb_buffer_stride = true;
1733 				xfb_buffer = buffer_index;
1734 			}
1735 
1736 			if (has_member_decoration(type.self, i, DecorationXfbStride))
1737 			{
1738 				uint32_t stride = get_member_decoration(type.self, i, DecorationXfbStride);
1739 				if (have_xfb_buffer_stride && stride != xfb_stride)
1740 					SPIRV_CROSS_THROW("IO block member XfbStride mismatch.");
1741 				have_xfb_buffer_stride = true;
1742 				xfb_stride = stride;
1743 			}
1744 		}
1745 
1746 		if (have_xfb_buffer_stride && have_any_xfb_offset)
1747 		{
1748 			attr.push_back(join("xfb_buffer = ", xfb_buffer));
1749 			attr.push_back(join("xfb_stride = ", xfb_stride));
1750 			uses_enhanced_layouts = true;
1751 		}
1752 
1753 		if (have_geom_stream)
1754 		{
1755 			if (get_execution_model() != ExecutionModelGeometry)
1756 				SPIRV_CROSS_THROW("Geometry streams can only be used in geometry shaders.");
1757 			if (options.es)
1758 				SPIRV_CROSS_THROW("Multiple geometry streams not supported in ESSL.");
1759 			if (options.version < 400)
1760 				require_extension_internal("GL_ARB_transform_feedback3");
1761 			attr.push_back(join("stream = ", get_decoration(var.self, DecorationStream)));
1762 		}
1763 	}
1764 	else if (var.storage == StorageClassOutput)
1765 	{
1766 		if (flags.get(DecorationXfbBuffer) && flags.get(DecorationXfbStride) && flags.get(DecorationOffset))
1767 		{
1768 			// XFB for standalone variables, we can emit all decorations.
1769 			attr.push_back(join("xfb_buffer = ", get_decoration(var.self, DecorationXfbBuffer)));
1770 			attr.push_back(join("xfb_stride = ", get_decoration(var.self, DecorationXfbStride)));
1771 			attr.push_back(join("xfb_offset = ", get_decoration(var.self, DecorationOffset)));
1772 			uses_enhanced_layouts = true;
1773 		}
1774 
1775 		if (flags.get(DecorationStream))
1776 		{
1777 			if (get_execution_model() != ExecutionModelGeometry)
1778 				SPIRV_CROSS_THROW("Geometry streams can only be used in geometry shaders.");
1779 			if (options.es)
1780 				SPIRV_CROSS_THROW("Multiple geometry streams not supported in ESSL.");
1781 			if (options.version < 400)
1782 				require_extension_internal("GL_ARB_transform_feedback3");
1783 			attr.push_back(join("stream = ", get_decoration(var.self, DecorationStream)));
1784 		}
1785 	}
1786 
1787 	// Can only declare Component if we can declare location.
1788 	if (flags.get(DecorationComponent) && can_use_io_location(var.storage, is_block))
1789 	{
1790 		uses_enhanced_layouts = true;
1791 		attr.push_back(join("component = ", get_decoration(var.self, DecorationComponent)));
1792 	}
1793 
1794 	if (uses_enhanced_layouts)
1795 	{
1796 		if (!options.es)
1797 		{
1798 			if (options.version < 440 && options.version >= 140)
1799 				require_extension_internal("GL_ARB_enhanced_layouts");
1800 			else if (options.version < 140)
1801 				SPIRV_CROSS_THROW("GL_ARB_enhanced_layouts is not supported in targets below GLSL 1.40.");
1802 			if (!options.es && options.version < 440)
1803 				require_extension_internal("GL_ARB_enhanced_layouts");
1804 		}
1805 		else if (options.es)
1806 			SPIRV_CROSS_THROW("GL_ARB_enhanced_layouts is not supported in ESSL.");
1807 	}
1808 
1809 	if (flags.get(DecorationIndex))
1810 		attr.push_back(join("index = ", get_decoration(var.self, DecorationIndex)));
1811 
1812 	// Do not emit set = decoration in regular GLSL output, but
1813 	// we need to preserve it in Vulkan GLSL mode.
1814 	if (var.storage != StorageClassPushConstant && var.storage != StorageClassShaderRecordBufferKHR)
1815 	{
1816 		if (flags.get(DecorationDescriptorSet) && options.vulkan_semantics)
1817 			attr.push_back(join("set = ", get_decoration(var.self, DecorationDescriptorSet)));
1818 	}
1819 
1820 	bool push_constant_block = options.vulkan_semantics && var.storage == StorageClassPushConstant;
1821 	bool ssbo_block = var.storage == StorageClassStorageBuffer || var.storage == StorageClassShaderRecordBufferKHR ||
1822 	                  (var.storage == StorageClassUniform && typeflags.get(DecorationBufferBlock));
1823 	bool emulated_ubo = var.storage == StorageClassPushConstant && options.emit_push_constant_as_uniform_buffer;
1824 	bool ubo_block = var.storage == StorageClassUniform && typeflags.get(DecorationBlock);
1825 
1826 	// GL 3.0/GLSL 1.30 is not considered legacy, but it doesn't have UBOs ...
1827 	bool can_use_buffer_blocks = (options.es && options.version >= 300) || (!options.es && options.version >= 140);
1828 
1829 	// pretend no UBOs when options say so
1830 	if (ubo_block && options.emit_uniform_buffer_as_plain_uniforms)
1831 		can_use_buffer_blocks = false;
1832 
1833 	bool can_use_binding;
1834 	if (options.es)
1835 		can_use_binding = options.version >= 310;
1836 	else
1837 		can_use_binding = options.enable_420pack_extension || (options.version >= 420);
1838 
1839 	// Make sure we don't emit binding layout for a classic uniform on GLSL 1.30.
1840 	if (!can_use_buffer_blocks && var.storage == StorageClassUniform)
1841 		can_use_binding = false;
1842 
1843 	if (var.storage == StorageClassShaderRecordBufferKHR)
1844 		can_use_binding = false;
1845 
1846 	if (can_use_binding && flags.get(DecorationBinding))
1847 		attr.push_back(join("binding = ", get_decoration(var.self, DecorationBinding)));
1848 
1849 	if (var.storage != StorageClassOutput && flags.get(DecorationOffset))
1850 		attr.push_back(join("offset = ", get_decoration(var.self, DecorationOffset)));
1851 
1852 	// Instead of adding explicit offsets for every element here, just assume we're using std140 or std430.
1853 	// If SPIR-V does not comply with either layout, we cannot really work around it.
1854 	if (can_use_buffer_blocks && (ubo_block || emulated_ubo))
1855 	{
1856 		attr.push_back(buffer_to_packing_standard(type, false));
1857 	}
1858 	else if (can_use_buffer_blocks && (push_constant_block || ssbo_block))
1859 	{
1860 		attr.push_back(buffer_to_packing_standard(type, true));
1861 	}
1862 
1863 	// For images, the type itself adds a layout qualifer.
1864 	// Only emit the format for storage images.
1865 	if (type.basetype == SPIRType::Image && type.image.sampled == 2)
1866 	{
1867 		const char *fmt = format_to_glsl(type.image.format);
1868 		if (fmt)
1869 			attr.push_back(fmt);
1870 	}
1871 
1872 	if (attr.empty())
1873 		return "";
1874 
1875 	string res = "layout(";
1876 	res += merge(attr);
1877 	res += ") ";
1878 	return res;
1879 }
1880 
buffer_to_packing_standard(const SPIRType & type,bool support_std430_without_scalar_layout)1881 string CompilerGLSL::buffer_to_packing_standard(const SPIRType &type, bool support_std430_without_scalar_layout)
1882 {
1883 	if (support_std430_without_scalar_layout && buffer_is_packing_standard(type, BufferPackingStd430))
1884 		return "std430";
1885 	else if (buffer_is_packing_standard(type, BufferPackingStd140))
1886 		return "std140";
1887 	else if (options.vulkan_semantics && buffer_is_packing_standard(type, BufferPackingScalar))
1888 	{
1889 		require_extension_internal("GL_EXT_scalar_block_layout");
1890 		return "scalar";
1891 	}
1892 	else if (support_std430_without_scalar_layout &&
1893 	         buffer_is_packing_standard(type, BufferPackingStd430EnhancedLayout))
1894 	{
1895 		if (options.es && !options.vulkan_semantics)
1896 			SPIRV_CROSS_THROW("Push constant block cannot be expressed as neither std430 nor std140. ES-targets do "
1897 			                  "not support GL_ARB_enhanced_layouts.");
1898 		if (!options.es && !options.vulkan_semantics && options.version < 440)
1899 			require_extension_internal("GL_ARB_enhanced_layouts");
1900 
1901 		set_extended_decoration(type.self, SPIRVCrossDecorationExplicitOffset);
1902 		return "std430";
1903 	}
1904 	else if (buffer_is_packing_standard(type, BufferPackingStd140EnhancedLayout))
1905 	{
1906 		// Fallback time. We might be able to use the ARB_enhanced_layouts to deal with this difference,
1907 		// however, we can only use layout(offset) on the block itself, not any substructs, so the substructs better be the appropriate layout.
1908 		// Enhanced layouts seem to always work in Vulkan GLSL, so no need for extensions there.
1909 		if (options.es && !options.vulkan_semantics)
1910 			SPIRV_CROSS_THROW("Push constant block cannot be expressed as neither std430 nor std140. ES-targets do "
1911 			                  "not support GL_ARB_enhanced_layouts.");
1912 		if (!options.es && !options.vulkan_semantics && options.version < 440)
1913 			require_extension_internal("GL_ARB_enhanced_layouts");
1914 
1915 		set_extended_decoration(type.self, SPIRVCrossDecorationExplicitOffset);
1916 		return "std140";
1917 	}
1918 	else if (options.vulkan_semantics && buffer_is_packing_standard(type, BufferPackingScalarEnhancedLayout))
1919 	{
1920 		set_extended_decoration(type.self, SPIRVCrossDecorationExplicitOffset);
1921 		require_extension_internal("GL_EXT_scalar_block_layout");
1922 		return "scalar";
1923 	}
1924 	else if (!support_std430_without_scalar_layout && options.vulkan_semantics &&
1925 	         buffer_is_packing_standard(type, BufferPackingStd430))
1926 	{
1927 		// UBOs can support std430 with GL_EXT_scalar_block_layout.
1928 		require_extension_internal("GL_EXT_scalar_block_layout");
1929 		return "std430";
1930 	}
1931 	else if (!support_std430_without_scalar_layout && options.vulkan_semantics &&
1932 	         buffer_is_packing_standard(type, BufferPackingStd430EnhancedLayout))
1933 	{
1934 		// UBOs can support std430 with GL_EXT_scalar_block_layout.
1935 		set_extended_decoration(type.self, SPIRVCrossDecorationExplicitOffset);
1936 		require_extension_internal("GL_EXT_scalar_block_layout");
1937 		return "std430";
1938 	}
1939 	else
1940 	{
1941 		SPIRV_CROSS_THROW("Buffer block cannot be expressed as any of std430, std140, scalar, even with enhanced "
1942 		                  "layouts. You can try flattening this block to support a more flexible layout.");
1943 	}
1944 }
1945 
emit_push_constant_block(const SPIRVariable & var)1946 void CompilerGLSL::emit_push_constant_block(const SPIRVariable &var)
1947 {
1948 	if (flattened_buffer_blocks.count(var.self))
1949 		emit_buffer_block_flattened(var);
1950 	else if (options.vulkan_semantics)
1951 		emit_push_constant_block_vulkan(var);
1952 	else if (options.emit_push_constant_as_uniform_buffer)
1953 		emit_buffer_block_native(var);
1954 	else
1955 		emit_push_constant_block_glsl(var);
1956 }
1957 
emit_push_constant_block_vulkan(const SPIRVariable & var)1958 void CompilerGLSL::emit_push_constant_block_vulkan(const SPIRVariable &var)
1959 {
1960 	emit_buffer_block(var);
1961 }
1962 
emit_push_constant_block_glsl(const SPIRVariable & var)1963 void CompilerGLSL::emit_push_constant_block_glsl(const SPIRVariable &var)
1964 {
1965 	// OpenGL has no concept of push constant blocks, implement it as a uniform struct.
1966 	auto &type = get<SPIRType>(var.basetype);
1967 
1968 	auto &flags = ir.meta[var.self].decoration.decoration_flags;
1969 	flags.clear(DecorationBinding);
1970 	flags.clear(DecorationDescriptorSet);
1971 
1972 #if 0
1973     if (flags & ((1ull << DecorationBinding) | (1ull << DecorationDescriptorSet)))
1974         SPIRV_CROSS_THROW("Push constant blocks cannot be compiled to GLSL with Binding or Set syntax. "
1975                             "Remap to location with reflection API first or disable these decorations.");
1976 #endif
1977 
1978 	// We're emitting the push constant block as a regular struct, so disable the block qualifier temporarily.
1979 	// Otherwise, we will end up emitting layout() qualifiers on naked structs which is not allowed.
1980 	auto &block_flags = ir.meta[type.self].decoration.decoration_flags;
1981 	bool block_flag = block_flags.get(DecorationBlock);
1982 	block_flags.clear(DecorationBlock);
1983 
1984 	emit_struct(type);
1985 
1986 	if (block_flag)
1987 		block_flags.set(DecorationBlock);
1988 
1989 	emit_uniform(var);
1990 	statement("");
1991 }
1992 
emit_buffer_block(const SPIRVariable & var)1993 void CompilerGLSL::emit_buffer_block(const SPIRVariable &var)
1994 {
1995 	auto &type = get<SPIRType>(var.basetype);
1996 	bool ubo_block = var.storage == StorageClassUniform && has_decoration(type.self, DecorationBlock);
1997 
1998 	if (flattened_buffer_blocks.count(var.self))
1999 		emit_buffer_block_flattened(var);
2000 	else if (is_legacy() || (!options.es && options.version == 130) ||
2001 	         (ubo_block && options.emit_uniform_buffer_as_plain_uniforms))
2002 		emit_buffer_block_legacy(var);
2003 	else
2004 		emit_buffer_block_native(var);
2005 }
2006 
emit_buffer_block_legacy(const SPIRVariable & var)2007 void CompilerGLSL::emit_buffer_block_legacy(const SPIRVariable &var)
2008 {
2009 	auto &type = get<SPIRType>(var.basetype);
2010 	bool ssbo = var.storage == StorageClassStorageBuffer ||
2011 	            ir.meta[type.self].decoration.decoration_flags.get(DecorationBufferBlock);
2012 	if (ssbo)
2013 		SPIRV_CROSS_THROW("SSBOs not supported in legacy targets.");
2014 
2015 	// We're emitting the push constant block as a regular struct, so disable the block qualifier temporarily.
2016 	// Otherwise, we will end up emitting layout() qualifiers on naked structs which is not allowed.
2017 	auto &block_flags = ir.meta[type.self].decoration.decoration_flags;
2018 	bool block_flag = block_flags.get(DecorationBlock);
2019 	block_flags.clear(DecorationBlock);
2020 	emit_struct(type);
2021 	if (block_flag)
2022 		block_flags.set(DecorationBlock);
2023 	emit_uniform(var);
2024 	statement("");
2025 }
2026 
emit_buffer_reference_block(SPIRType & type,bool forward_declaration)2027 void CompilerGLSL::emit_buffer_reference_block(SPIRType &type, bool forward_declaration)
2028 {
2029 	string buffer_name;
2030 
2031 	if (forward_declaration)
2032 	{
2033 		// Block names should never alias, but from HLSL input they kind of can because block types are reused for UAVs ...
2034 		// Allow aliased name since we might be declaring the block twice. Once with buffer reference (forward declared) and one proper declaration.
2035 		// The names must match up.
2036 		buffer_name = to_name(type.self, false);
2037 
2038 		// Shaders never use the block by interface name, so we don't
2039 		// have to track this other than updating name caches.
2040 		// If we have a collision for any reason, just fallback immediately.
2041 		if (ir.meta[type.self].decoration.alias.empty() ||
2042 		    block_ssbo_names.find(buffer_name) != end(block_ssbo_names) ||
2043 		    resource_names.find(buffer_name) != end(resource_names))
2044 		{
2045 			buffer_name = join("_", type.self);
2046 		}
2047 
2048 		// Make sure we get something unique for both global name scope and block name scope.
2049 		// See GLSL 4.5 spec: section 4.3.9 for details.
2050 		add_variable(block_ssbo_names, resource_names, buffer_name);
2051 
2052 		// If for some reason buffer_name is an illegal name, make a final fallback to a workaround name.
2053 		// This cannot conflict with anything else, so we're safe now.
2054 		// We cannot reuse this fallback name in neither global scope (blocked by block_names) nor block name scope.
2055 		if (buffer_name.empty())
2056 			buffer_name = join("_", type.self);
2057 
2058 		block_names.insert(buffer_name);
2059 		block_ssbo_names.insert(buffer_name);
2060 
2061 		// Ensure we emit the correct name when emitting non-forward pointer type.
2062 		ir.meta[type.self].decoration.alias = buffer_name;
2063 	}
2064 	else if (type.basetype != SPIRType::Struct)
2065 		buffer_name = type_to_glsl(type);
2066 	else
2067 		buffer_name = to_name(type.self, false);
2068 
2069 	if (!forward_declaration)
2070 	{
2071 		if (type.basetype == SPIRType::Struct)
2072 		{
2073 			auto flags = ir.get_buffer_block_type_flags(type);
2074 			string decorations;
2075 			if (flags.get(DecorationRestrict))
2076 				decorations += " restrict";
2077 			if (flags.get(DecorationCoherent))
2078 				decorations += " coherent";
2079 			if (flags.get(DecorationNonReadable))
2080 				decorations += " writeonly";
2081 			if (flags.get(DecorationNonWritable))
2082 				decorations += " readonly";
2083 			statement("layout(buffer_reference, ", buffer_to_packing_standard(type, true),
2084 			          ")", decorations, " buffer ", buffer_name);
2085 		}
2086 		else
2087 			statement("layout(buffer_reference) buffer ", buffer_name);
2088 
2089 		begin_scope();
2090 
2091 		if (type.basetype == SPIRType::Struct)
2092 		{
2093 			type.member_name_cache.clear();
2094 
2095 			uint32_t i = 0;
2096 			for (auto &member : type.member_types)
2097 			{
2098 				add_member_name(type, i);
2099 				emit_struct_member(type, member, i);
2100 				i++;
2101 			}
2102 		}
2103 		else
2104 		{
2105 			auto &pointee_type = get_pointee_type(type);
2106 			statement(type_to_glsl(pointee_type), " value", type_to_array_glsl(pointee_type), ";");
2107 		}
2108 
2109 		end_scope_decl();
2110 		statement("");
2111 	}
2112 	else
2113 	{
2114 		statement("layout(buffer_reference) buffer ", buffer_name, ";");
2115 	}
2116 }
2117 
emit_buffer_block_native(const SPIRVariable & var)2118 void CompilerGLSL::emit_buffer_block_native(const SPIRVariable &var)
2119 {
2120 	auto &type = get<SPIRType>(var.basetype);
2121 
2122 	Bitset flags = ir.get_buffer_block_flags(var);
2123 	bool ssbo = var.storage == StorageClassStorageBuffer || var.storage == StorageClassShaderRecordBufferKHR ||
2124 	            ir.meta[type.self].decoration.decoration_flags.get(DecorationBufferBlock);
2125 	bool is_restrict = ssbo && flags.get(DecorationRestrict);
2126 	bool is_writeonly = ssbo && flags.get(DecorationNonReadable);
2127 	bool is_readonly = ssbo && flags.get(DecorationNonWritable);
2128 	bool is_coherent = ssbo && flags.get(DecorationCoherent);
2129 
2130 	// Block names should never alias, but from HLSL input they kind of can because block types are reused for UAVs ...
2131 	auto buffer_name = to_name(type.self, false);
2132 
2133 	auto &block_namespace = ssbo ? block_ssbo_names : block_ubo_names;
2134 
2135 	// Shaders never use the block by interface name, so we don't
2136 	// have to track this other than updating name caches.
2137 	// If we have a collision for any reason, just fallback immediately.
2138 	if (ir.meta[type.self].decoration.alias.empty() || block_namespace.find(buffer_name) != end(block_namespace) ||
2139 	    resource_names.find(buffer_name) != end(resource_names))
2140 	{
2141 		buffer_name = get_block_fallback_name(var.self);
2142 	}
2143 
2144 	// Make sure we get something unique for both global name scope and block name scope.
2145 	// See GLSL 4.5 spec: section 4.3.9 for details.
2146 	add_variable(block_namespace, resource_names, buffer_name);
2147 
2148 	// If for some reason buffer_name is an illegal name, make a final fallback to a workaround name.
2149 	// This cannot conflict with anything else, so we're safe now.
2150 	// We cannot reuse this fallback name in neither global scope (blocked by block_names) nor block name scope.
2151 	if (buffer_name.empty())
2152 		buffer_name = join("_", get<SPIRType>(var.basetype).self, "_", var.self);
2153 
2154 	block_names.insert(buffer_name);
2155 	block_namespace.insert(buffer_name);
2156 
2157 	// Save for post-reflection later.
2158 	declared_block_names[var.self] = buffer_name;
2159 
2160 	statement(layout_for_variable(var), is_coherent ? "coherent " : "", is_restrict ? "restrict " : "",
2161 	          is_writeonly ? "writeonly " : "", is_readonly ? "readonly " : "", ssbo ? "buffer " : "uniform ",
2162 	          buffer_name);
2163 
2164 	begin_scope();
2165 
2166 	type.member_name_cache.clear();
2167 
2168 	uint32_t i = 0;
2169 	for (auto &member : type.member_types)
2170 	{
2171 		add_member_name(type, i);
2172 		emit_struct_member(type, member, i);
2173 		i++;
2174 	}
2175 
2176 	// var.self can be used as a backup name for the block name,
2177 	// so we need to make sure we don't disturb the name here on a recompile.
2178 	// It will need to be reset if we have to recompile.
2179 	preserve_alias_on_reset(var.self);
2180 	add_resource_name(var.self);
2181 	end_scope_decl(to_name(var.self) + type_to_array_glsl(type));
2182 	statement("");
2183 }
2184 
emit_buffer_block_flattened(const SPIRVariable & var)2185 void CompilerGLSL::emit_buffer_block_flattened(const SPIRVariable &var)
2186 {
2187 	auto &type = get<SPIRType>(var.basetype);
2188 
2189 	// Block names should never alias.
2190 	auto buffer_name = to_name(type.self, false);
2191 	size_t buffer_size = (get_declared_struct_size(type) + 15) / 16;
2192 
2193 	SPIRType::BaseType basic_type;
2194 	if (get_common_basic_type(type, basic_type))
2195 	{
2196 		SPIRType tmp;
2197 		tmp.basetype = basic_type;
2198 		tmp.vecsize = 4;
2199 		if (basic_type != SPIRType::Float && basic_type != SPIRType::Int && basic_type != SPIRType::UInt)
2200 			SPIRV_CROSS_THROW("Basic types in a flattened UBO must be float, int or uint.");
2201 
2202 		auto flags = ir.get_buffer_block_flags(var);
2203 		statement("uniform ", flags_to_qualifiers_glsl(tmp, flags), type_to_glsl(tmp), " ", buffer_name, "[",
2204 		          buffer_size, "];");
2205 	}
2206 	else
2207 		SPIRV_CROSS_THROW("All basic types in a flattened block must be the same.");
2208 }
2209 
to_storage_qualifiers_glsl(const SPIRVariable & var)2210 const char *CompilerGLSL::to_storage_qualifiers_glsl(const SPIRVariable &var)
2211 {
2212 	auto &execution = get_entry_point();
2213 
2214 	if (subpass_input_is_framebuffer_fetch(var.self))
2215 		return "";
2216 
2217 	if (var.storage == StorageClassInput || var.storage == StorageClassOutput)
2218 	{
2219 		if (is_legacy() && execution.model == ExecutionModelVertex)
2220 			return var.storage == StorageClassInput ? "attribute " : "varying ";
2221 		else if (is_legacy() && execution.model == ExecutionModelFragment)
2222 			return "varying "; // Fragment outputs are renamed so they never hit this case.
2223 		else if (execution.model == ExecutionModelFragment && var.storage == StorageClassOutput)
2224 		{
2225 			if (inout_color_attachments.count(get_decoration(var.self, DecorationLocation)) != 0)
2226 				return "inout ";
2227 			else
2228 				return "out ";
2229 		}
2230 		else
2231 			return var.storage == StorageClassInput ? "in " : "out ";
2232 	}
2233 	else if (var.storage == StorageClassUniformConstant || var.storage == StorageClassUniform ||
2234 	         var.storage == StorageClassPushConstant)
2235 	{
2236 		return "uniform ";
2237 	}
2238 	else if (var.storage == StorageClassRayPayloadKHR)
2239 	{
2240 		return ray_tracing_is_khr ? "rayPayloadEXT " : "rayPayloadNV ";
2241 	}
2242 	else if (var.storage == StorageClassIncomingRayPayloadKHR)
2243 	{
2244 		return ray_tracing_is_khr ? "rayPayloadInEXT " : "rayPayloadInNV ";
2245 	}
2246 	else if (var.storage == StorageClassHitAttributeKHR)
2247 	{
2248 		return ray_tracing_is_khr ? "hitAttributeEXT " : "hitAttributeNV ";
2249 	}
2250 	else if (var.storage == StorageClassCallableDataKHR)
2251 	{
2252 		return ray_tracing_is_khr ? "callableDataEXT " : "callableDataNV ";
2253 	}
2254 	else if (var.storage == StorageClassIncomingCallableDataKHR)
2255 	{
2256 		return ray_tracing_is_khr ? "callableDataInEXT " : "callableDataInNV ";
2257 	}
2258 
2259 	return "";
2260 }
2261 
emit_flattened_io_block_member(const std::string & basename,const SPIRType & type,const char * qual,const SmallVector<uint32_t> & indices)2262 void CompilerGLSL::emit_flattened_io_block_member(const std::string &basename, const SPIRType &type, const char *qual,
2263                                                   const SmallVector<uint32_t> &indices)
2264 {
2265 	uint32_t member_type_id = type.self;
2266 	const SPIRType *member_type = &type;
2267 	const SPIRType *parent_type = nullptr;
2268 	auto flattened_name = basename;
2269 	for (auto &index : indices)
2270 	{
2271 		flattened_name += "_";
2272 		flattened_name += to_member_name(*member_type, index);
2273 		parent_type = member_type;
2274 		member_type_id = member_type->member_types[index];
2275 		member_type = &get<SPIRType>(member_type_id);
2276 	}
2277 
2278 	assert(member_type->basetype != SPIRType::Struct);
2279 
2280 	// We're overriding struct member names, so ensure we do so on the primary type.
2281 	if (parent_type->type_alias)
2282 		parent_type = &get<SPIRType>(parent_type->type_alias);
2283 
2284 	// Sanitize underscores because joining the two identifiers might create more than 1 underscore in a row,
2285 	// which is not allowed.
2286 	ParsedIR::sanitize_underscores(flattened_name);
2287 
2288 	uint32_t last_index = indices.back();
2289 
2290 	// Pass in the varying qualifier here so it will appear in the correct declaration order.
2291 	// Replace member name while emitting it so it encodes both struct name and member name.
2292 	auto backup_name = get_member_name(parent_type->self, last_index);
2293 	auto member_name = to_member_name(*parent_type, last_index);
2294 	set_member_name(parent_type->self, last_index, flattened_name);
2295 	emit_struct_member(*parent_type, member_type_id, last_index, qual);
2296 	// Restore member name.
2297 	set_member_name(parent_type->self, last_index, member_name);
2298 }
2299 
emit_flattened_io_block_struct(const std::string & basename,const SPIRType & type,const char * qual,const SmallVector<uint32_t> & indices)2300 void CompilerGLSL::emit_flattened_io_block_struct(const std::string &basename, const SPIRType &type, const char *qual,
2301                                                   const SmallVector<uint32_t> &indices)
2302 {
2303 	auto sub_indices = indices;
2304 	sub_indices.push_back(0);
2305 
2306 	const SPIRType *member_type = &type;
2307 	for (auto &index : indices)
2308 		member_type = &get<SPIRType>(member_type->member_types[index]);
2309 
2310 	assert(member_type->basetype == SPIRType::Struct);
2311 
2312 	if (!member_type->array.empty())
2313 		SPIRV_CROSS_THROW("Cannot flatten array of structs in I/O blocks.");
2314 
2315 	for (uint32_t i = 0; i < uint32_t(member_type->member_types.size()); i++)
2316 	{
2317 		sub_indices.back() = i;
2318 		if (get<SPIRType>(member_type->member_types[i]).basetype == SPIRType::Struct)
2319 			emit_flattened_io_block_struct(basename, type, qual, sub_indices);
2320 		else
2321 			emit_flattened_io_block_member(basename, type, qual, sub_indices);
2322 	}
2323 }
2324 
emit_flattened_io_block(const SPIRVariable & var,const char * qual)2325 void CompilerGLSL::emit_flattened_io_block(const SPIRVariable &var, const char *qual)
2326 {
2327 	auto &var_type = get<SPIRType>(var.basetype);
2328 	if (!var_type.array.empty())
2329 		SPIRV_CROSS_THROW("Array of varying structs cannot be flattened to legacy-compatible varyings.");
2330 
2331 	// Emit flattened types based on the type alias. Normally, we are never supposed to emit
2332 	// struct declarations for aliased types.
2333 	auto &type = var_type.type_alias ? get<SPIRType>(var_type.type_alias) : var_type;
2334 
2335 	auto old_flags = ir.meta[type.self].decoration.decoration_flags;
2336 	// Emit the members as if they are part of a block to get all qualifiers.
2337 	ir.meta[type.self].decoration.decoration_flags.set(DecorationBlock);
2338 
2339 	type.member_name_cache.clear();
2340 
2341 	SmallVector<uint32_t> member_indices;
2342 	member_indices.push_back(0);
2343 	auto basename = to_name(var.self);
2344 
2345 	uint32_t i = 0;
2346 	for (auto &member : type.member_types)
2347 	{
2348 		add_member_name(type, i);
2349 		auto &membertype = get<SPIRType>(member);
2350 
2351 		member_indices.back() = i;
2352 		if (membertype.basetype == SPIRType::Struct)
2353 			emit_flattened_io_block_struct(basename, type, qual, member_indices);
2354 		else
2355 			emit_flattened_io_block_member(basename, type, qual, member_indices);
2356 		i++;
2357 	}
2358 
2359 	ir.meta[type.self].decoration.decoration_flags = old_flags;
2360 
2361 	// Treat this variable as fully flattened from now on.
2362 	flattened_structs[var.self] = true;
2363 }
2364 
emit_interface_block(const SPIRVariable & var)2365 void CompilerGLSL::emit_interface_block(const SPIRVariable &var)
2366 {
2367 	auto &type = get<SPIRType>(var.basetype);
2368 
2369 	if (var.storage == StorageClassInput && type.basetype == SPIRType::Double &&
2370 	    !options.es && options.version < 410)
2371 	{
2372 		require_extension_internal("GL_ARB_vertex_attrib_64bit");
2373 	}
2374 
2375 	// Either make it plain in/out or in/out blocks depending on what shader is doing ...
2376 	bool block = ir.meta[type.self].decoration.decoration_flags.get(DecorationBlock);
2377 	const char *qual = to_storage_qualifiers_glsl(var);
2378 
2379 	if (block)
2380 	{
2381 		// ESSL earlier than 310 and GLSL earlier than 150 did not support
2382 		// I/O variables which are struct types.
2383 		// To support this, flatten the struct into separate varyings instead.
2384 		if (options.force_flattened_io_blocks || (options.es && options.version < 310) ||
2385 		    (!options.es && options.version < 150))
2386 		{
2387 			// I/O blocks on ES require version 310 with Android Extension Pack extensions, or core version 320.
2388 			// On desktop, I/O blocks were introduced with geometry shaders in GL 3.2 (GLSL 150).
2389 			emit_flattened_io_block(var, qual);
2390 		}
2391 		else
2392 		{
2393 			if (options.es && options.version < 320)
2394 			{
2395 				// Geometry and tessellation extensions imply this extension.
2396 				if (!has_extension("GL_EXT_geometry_shader") && !has_extension("GL_EXT_tessellation_shader"))
2397 					require_extension_internal("GL_EXT_shader_io_blocks");
2398 			}
2399 
2400 			// Workaround to make sure we can emit "patch in/out" correctly.
2401 			fixup_io_block_patch_qualifiers(var);
2402 
2403 			// Block names should never alias.
2404 			auto block_name = to_name(type.self, false);
2405 
2406 			// The namespace for I/O blocks is separate from other variables in GLSL.
2407 			auto &block_namespace = type.storage == StorageClassInput ? block_input_names : block_output_names;
2408 
2409 			// Shaders never use the block by interface name, so we don't
2410 			// have to track this other than updating name caches.
2411 			if (block_name.empty() || block_namespace.find(block_name) != end(block_namespace))
2412 				block_name = get_fallback_name(type.self);
2413 			else
2414 				block_namespace.insert(block_name);
2415 
2416 			// If for some reason buffer_name is an illegal name, make a final fallback to a workaround name.
2417 			// This cannot conflict with anything else, so we're safe now.
2418 			if (block_name.empty())
2419 				block_name = join("_", get<SPIRType>(var.basetype).self, "_", var.self);
2420 
2421 			// Instance names cannot alias block names.
2422 			resource_names.insert(block_name);
2423 
2424 			bool is_patch = has_decoration(var.self, DecorationPatch);
2425 			statement(layout_for_variable(var), (is_patch ? "patch " : ""), qual, block_name);
2426 			begin_scope();
2427 
2428 			type.member_name_cache.clear();
2429 
2430 			uint32_t i = 0;
2431 			for (auto &member : type.member_types)
2432 			{
2433 				add_member_name(type, i);
2434 				emit_struct_member(type, member, i);
2435 				i++;
2436 			}
2437 
2438 			add_resource_name(var.self);
2439 			end_scope_decl(join(to_name(var.self), type_to_array_glsl(type)));
2440 			statement("");
2441 		}
2442 	}
2443 	else
2444 	{
2445 		// ESSL earlier than 310 and GLSL earlier than 150 did not support
2446 		// I/O variables which are struct types.
2447 		// To support this, flatten the struct into separate varyings instead.
2448 		if (type.basetype == SPIRType::Struct &&
2449 		    (options.force_flattened_io_blocks || (options.es && options.version < 310) ||
2450 		     (!options.es && options.version < 150)))
2451 		{
2452 			emit_flattened_io_block(var, qual);
2453 		}
2454 		else
2455 		{
2456 			add_resource_name(var.self);
2457 
2458 			// Tessellation control and evaluation shaders must have either gl_MaxPatchVertices or unsized arrays for input arrays.
2459 			// Opt for unsized as it's the more "correct" variant to use.
2460 			bool control_point_input_array = type.storage == StorageClassInput && !type.array.empty() &&
2461 			                                 !has_decoration(var.self, DecorationPatch) &&
2462 			                                 (get_entry_point().model == ExecutionModelTessellationControl ||
2463 			                                  get_entry_point().model == ExecutionModelTessellationEvaluation);
2464 
2465 			uint32_t old_array_size = 0;
2466 			bool old_array_size_literal = true;
2467 
2468 			if (control_point_input_array)
2469 			{
2470 				swap(type.array.back(), old_array_size);
2471 				swap(type.array_size_literal.back(), old_array_size_literal);
2472 			}
2473 
2474 			statement(layout_for_variable(var), to_qualifiers_glsl(var.self),
2475 			          variable_decl(type, to_name(var.self), var.self), ";");
2476 
2477 			if (control_point_input_array)
2478 			{
2479 				swap(type.array.back(), old_array_size);
2480 				swap(type.array_size_literal.back(), old_array_size_literal);
2481 			}
2482 		}
2483 	}
2484 }
2485 
emit_uniform(const SPIRVariable & var)2486 void CompilerGLSL::emit_uniform(const SPIRVariable &var)
2487 {
2488 	auto &type = get<SPIRType>(var.basetype);
2489 	if (type.basetype == SPIRType::Image && type.image.sampled == 2 && type.image.dim != DimSubpassData)
2490 	{
2491 		if (!options.es && options.version < 420)
2492 			require_extension_internal("GL_ARB_shader_image_load_store");
2493 		else if (options.es && options.version < 310)
2494 			SPIRV_CROSS_THROW("At least ESSL 3.10 required for shader image load store.");
2495 	}
2496 
2497 	add_resource_name(var.self);
2498 	statement(layout_for_variable(var), variable_decl(var), ";");
2499 }
2500 
constant_value_macro_name(uint32_t id)2501 string CompilerGLSL::constant_value_macro_name(uint32_t id)
2502 {
2503 	return join("SPIRV_CROSS_CONSTANT_ID_", id);
2504 }
2505 
emit_specialization_constant_op(const SPIRConstantOp & constant)2506 void CompilerGLSL::emit_specialization_constant_op(const SPIRConstantOp &constant)
2507 {
2508 	auto &type = get<SPIRType>(constant.basetype);
2509 	auto name = to_name(constant.self);
2510 	statement("const ", variable_decl(type, name), " = ", constant_op_expression(constant), ";");
2511 }
2512 
emit_constant(const SPIRConstant & constant)2513 void CompilerGLSL::emit_constant(const SPIRConstant &constant)
2514 {
2515 	auto &type = get<SPIRType>(constant.constant_type);
2516 	auto name = to_name(constant.self);
2517 
2518 	SpecializationConstant wg_x, wg_y, wg_z;
2519 	ID workgroup_size_id = get_work_group_size_specialization_constants(wg_x, wg_y, wg_z);
2520 
2521 	// This specialization constant is implicitly declared by emitting layout() in;
2522 	if (constant.self == workgroup_size_id)
2523 		return;
2524 
2525 	// These specialization constants are implicitly declared by emitting layout() in;
2526 	// In legacy GLSL, we will still need to emit macros for these, so a layout() in; declaration
2527 	// later can use macro overrides for work group size.
2528 	bool is_workgroup_size_constant = ConstantID(constant.self) == wg_x.id || ConstantID(constant.self) == wg_y.id ||
2529 	                                  ConstantID(constant.self) == wg_z.id;
2530 
2531 	if (options.vulkan_semantics && is_workgroup_size_constant)
2532 	{
2533 		// Vulkan GLSL does not need to declare workgroup spec constants explicitly, it is handled in layout().
2534 		return;
2535 	}
2536 	else if (!options.vulkan_semantics && is_workgroup_size_constant &&
2537 	         !has_decoration(constant.self, DecorationSpecId))
2538 	{
2539 		// Only bother declaring a workgroup size if it is actually a specialization constant, because we need macros.
2540 		return;
2541 	}
2542 
2543 	// Only scalars have constant IDs.
2544 	if (has_decoration(constant.self, DecorationSpecId))
2545 	{
2546 		if (options.vulkan_semantics)
2547 		{
2548 			statement("layout(constant_id = ", get_decoration(constant.self, DecorationSpecId), ") const ",
2549 			          variable_decl(type, name), " = ", constant_expression(constant), ";");
2550 		}
2551 		else
2552 		{
2553 			const string &macro_name = constant.specialization_constant_macro_name;
2554 			statement("#ifndef ", macro_name);
2555 			statement("#define ", macro_name, " ", constant_expression(constant));
2556 			statement("#endif");
2557 
2558 			// For workgroup size constants, only emit the macros.
2559 			if (!is_workgroup_size_constant)
2560 				statement("const ", variable_decl(type, name), " = ", macro_name, ";");
2561 		}
2562 	}
2563 	else
2564 	{
2565 		statement("const ", variable_decl(type, name), " = ", constant_expression(constant), ";");
2566 	}
2567 }
2568 
emit_entry_point_declarations()2569 void CompilerGLSL::emit_entry_point_declarations()
2570 {
2571 }
2572 
replace_illegal_names(const unordered_set<string> & keywords)2573 void CompilerGLSL::replace_illegal_names(const unordered_set<string> &keywords)
2574 {
2575 	ir.for_each_typed_id<SPIRVariable>([&](uint32_t, const SPIRVariable &var) {
2576 		if (is_hidden_variable(var))
2577 			return;
2578 
2579 		auto *meta = ir.find_meta(var.self);
2580 		if (!meta)
2581 			return;
2582 
2583 		auto &m = meta->decoration;
2584 		if (keywords.find(m.alias) != end(keywords))
2585 			m.alias = join("_", m.alias);
2586 	});
2587 
2588 	ir.for_each_typed_id<SPIRFunction>([&](uint32_t, const SPIRFunction &func) {
2589 		auto *meta = ir.find_meta(func.self);
2590 		if (!meta)
2591 			return;
2592 
2593 		auto &m = meta->decoration;
2594 		if (keywords.find(m.alias) != end(keywords))
2595 			m.alias = join("_", m.alias);
2596 	});
2597 
2598 	ir.for_each_typed_id<SPIRType>([&](uint32_t, const SPIRType &type) {
2599 		auto *meta = ir.find_meta(type.self);
2600 		if (!meta)
2601 			return;
2602 
2603 		auto &m = meta->decoration;
2604 		if (keywords.find(m.alias) != end(keywords))
2605 			m.alias = join("_", m.alias);
2606 
2607 		for (auto &memb : meta->members)
2608 			if (keywords.find(memb.alias) != end(keywords))
2609 				memb.alias = join("_", memb.alias);
2610 	});
2611 }
2612 
replace_illegal_names()2613 void CompilerGLSL::replace_illegal_names()
2614 {
2615 	// clang-format off
2616 	static const unordered_set<string> keywords = {
2617 		"abs", "acos", "acosh", "all", "any", "asin", "asinh", "atan", "atanh",
2618 		"atomicAdd", "atomicCompSwap", "atomicCounter", "atomicCounterDecrement", "atomicCounterIncrement",
2619 		"atomicExchange", "atomicMax", "atomicMin", "atomicOr", "atomicXor",
2620 		"bitCount", "bitfieldExtract", "bitfieldInsert", "bitfieldReverse",
2621 		"ceil", "cos", "cosh", "cross", "degrees",
2622 		"dFdx", "dFdxCoarse", "dFdxFine",
2623 		"dFdy", "dFdyCoarse", "dFdyFine",
2624 		"distance", "dot", "EmitStreamVertex", "EmitVertex", "EndPrimitive", "EndStreamPrimitive", "equal", "exp", "exp2",
2625 		"faceforward", "findLSB", "findMSB", "float16BitsToInt16", "float16BitsToUint16", "floatBitsToInt", "floatBitsToUint", "floor", "fma", "fract",
2626 		"frexp", "fwidth", "fwidthCoarse", "fwidthFine",
2627 		"greaterThan", "greaterThanEqual", "groupMemoryBarrier",
2628 		"imageAtomicAdd", "imageAtomicAnd", "imageAtomicCompSwap", "imageAtomicExchange", "imageAtomicMax", "imageAtomicMin", "imageAtomicOr", "imageAtomicXor",
2629 		"imageLoad", "imageSamples", "imageSize", "imageStore", "imulExtended", "int16BitsToFloat16", "intBitsToFloat", "interpolateAtOffset", "interpolateAtCentroid", "interpolateAtSample",
2630 		"inverse", "inversesqrt", "isinf", "isnan", "ldexp", "length", "lessThan", "lessThanEqual", "log", "log2",
2631 		"matrixCompMult", "max", "memoryBarrier", "memoryBarrierAtomicCounter", "memoryBarrierBuffer", "memoryBarrierImage", "memoryBarrierShared",
2632 		"min", "mix", "mod", "modf", "noise", "noise1", "noise2", "noise3", "noise4", "normalize", "not", "notEqual",
2633 		"outerProduct", "packDouble2x32", "packHalf2x16", "packInt2x16", "packInt4x16", "packSnorm2x16", "packSnorm4x8",
2634 		"packUint2x16", "packUint4x16", "packUnorm2x16", "packUnorm4x8", "pow",
2635 		"radians", "reflect", "refract", "round", "roundEven", "sign", "sin", "sinh", "smoothstep", "sqrt", "step",
2636 		"tan", "tanh", "texelFetch", "texelFetchOffset", "texture", "textureGather", "textureGatherOffset", "textureGatherOffsets",
2637 		"textureGrad", "textureGradOffset", "textureLod", "textureLodOffset", "textureOffset", "textureProj", "textureProjGrad",
2638 		"textureProjGradOffset", "textureProjLod", "textureProjLodOffset", "textureProjOffset", "textureQueryLevels", "textureQueryLod", "textureSamples", "textureSize",
2639 		"transpose", "trunc", "uaddCarry", "uint16BitsToFloat16", "uintBitsToFloat", "umulExtended", "unpackDouble2x32", "unpackHalf2x16", "unpackInt2x16", "unpackInt4x16",
2640 		"unpackSnorm2x16", "unpackSnorm4x8", "unpackUint2x16", "unpackUint4x16", "unpackUnorm2x16", "unpackUnorm4x8", "usubBorrow",
2641 
2642 		"active", "asm", "atomic_uint", "attribute", "bool", "break", "buffer",
2643 		"bvec2", "bvec3", "bvec4", "case", "cast", "centroid", "class", "coherent", "common", "const", "continue", "default", "discard",
2644 		"dmat2", "dmat2x2", "dmat2x3", "dmat2x4", "dmat3", "dmat3x2", "dmat3x3", "dmat3x4", "dmat4", "dmat4x2", "dmat4x3", "dmat4x4",
2645 		"do", "double", "dvec2", "dvec3", "dvec4", "else", "enum", "extern", "external", "false", "filter", "fixed", "flat", "float",
2646 		"for", "fvec2", "fvec3", "fvec4", "goto", "half", "highp", "hvec2", "hvec3", "hvec4", "if", "iimage1D", "iimage1DArray",
2647 		"iimage2D", "iimage2DArray", "iimage2DMS", "iimage2DMSArray", "iimage2DRect", "iimage3D", "iimageBuffer", "iimageCube",
2648 		"iimageCubeArray", "image1D", "image1DArray", "image2D", "image2DArray", "image2DMS", "image2DMSArray", "image2DRect",
2649 		"image3D", "imageBuffer", "imageCube", "imageCubeArray", "in", "inline", "inout", "input", "int", "interface", "invariant",
2650 		"isampler1D", "isampler1DArray", "isampler2D", "isampler2DArray", "isampler2DMS", "isampler2DMSArray", "isampler2DRect",
2651 		"isampler3D", "isamplerBuffer", "isamplerCube", "isamplerCubeArray", "ivec2", "ivec3", "ivec4", "layout", "long", "lowp",
2652 		"mat2", "mat2x2", "mat2x3", "mat2x4", "mat3", "mat3x2", "mat3x3", "mat3x4", "mat4", "mat4x2", "mat4x3", "mat4x4", "mediump",
2653 		"namespace", "noinline", "noperspective", "out", "output", "packed", "partition", "patch", "precise", "precision", "public", "readonly",
2654 		"resource", "restrict", "return", "sample", "sampler1D", "sampler1DArray", "sampler1DArrayShadow",
2655 		"sampler1DShadow", "sampler2D", "sampler2DArray", "sampler2DArrayShadow", "sampler2DMS", "sampler2DMSArray",
2656 		"sampler2DRect", "sampler2DRectShadow", "sampler2DShadow", "sampler3D", "sampler3DRect", "samplerBuffer",
2657 		"samplerCube", "samplerCubeArray", "samplerCubeArrayShadow", "samplerCubeShadow", "shared", "short", "sizeof", "smooth", "static",
2658 		"struct", "subroutine", "superp", "switch", "template", "this", "true", "typedef", "uimage1D", "uimage1DArray", "uimage2D",
2659 		"uimage2DArray", "uimage2DMS", "uimage2DMSArray", "uimage2DRect", "uimage3D", "uimageBuffer", "uimageCube",
2660 		"uimageCubeArray", "uint", "uniform", "union", "unsigned", "usampler1D", "usampler1DArray", "usampler2D", "usampler2DArray",
2661 		"usampler2DMS", "usampler2DMSArray", "usampler2DRect", "usampler3D", "usamplerBuffer", "usamplerCube",
2662 		"usamplerCubeArray", "using", "uvec2", "uvec3", "uvec4", "varying", "vec2", "vec3", "vec4", "void", "volatile",
2663 		"while", "writeonly",
2664 	};
2665 	// clang-format on
2666 
2667 	replace_illegal_names(keywords);
2668 }
2669 
replace_fragment_output(SPIRVariable & var)2670 void CompilerGLSL::replace_fragment_output(SPIRVariable &var)
2671 {
2672 	auto &m = ir.meta[var.self].decoration;
2673 	uint32_t location = 0;
2674 	if (m.decoration_flags.get(DecorationLocation))
2675 		location = m.location;
2676 
2677 	// If our variable is arrayed, we must not emit the array part of this as the SPIR-V will
2678 	// do the access chain part of this for us.
2679 	auto &type = get<SPIRType>(var.basetype);
2680 
2681 	if (type.array.empty())
2682 	{
2683 		// Redirect the write to a specific render target in legacy GLSL.
2684 		m.alias = join("gl_FragData[", location, "]");
2685 
2686 		if (is_legacy_es() && location != 0)
2687 			require_extension_internal("GL_EXT_draw_buffers");
2688 	}
2689 	else if (type.array.size() == 1)
2690 	{
2691 		// If location is non-zero, we probably have to add an offset.
2692 		// This gets really tricky since we'd have to inject an offset in the access chain.
2693 		// FIXME: This seems like an extremely odd-ball case, so it's probably fine to leave it like this for now.
2694 		m.alias = "gl_FragData";
2695 		if (location != 0)
2696 			SPIRV_CROSS_THROW("Arrayed output variable used, but location is not 0. "
2697 			                  "This is unimplemented in SPIRV-Cross.");
2698 
2699 		if (is_legacy_es())
2700 			require_extension_internal("GL_EXT_draw_buffers");
2701 	}
2702 	else
2703 		SPIRV_CROSS_THROW("Array-of-array output variable used. This cannot be implemented in legacy GLSL.");
2704 
2705 	var.compat_builtin = true; // We don't want to declare this variable, but use the name as-is.
2706 }
2707 
replace_fragment_outputs()2708 void CompilerGLSL::replace_fragment_outputs()
2709 {
2710 	ir.for_each_typed_id<SPIRVariable>([&](uint32_t, SPIRVariable &var) {
2711 		auto &type = this->get<SPIRType>(var.basetype);
2712 
2713 		if (!is_builtin_variable(var) && !var.remapped_variable && type.pointer && var.storage == StorageClassOutput)
2714 			replace_fragment_output(var);
2715 	});
2716 }
2717 
remap_swizzle(const SPIRType & out_type,uint32_t input_components,const string & expr)2718 string CompilerGLSL::remap_swizzle(const SPIRType &out_type, uint32_t input_components, const string &expr)
2719 {
2720 	if (out_type.vecsize == input_components)
2721 		return expr;
2722 	else if (input_components == 1 && !backend.can_swizzle_scalar)
2723 		return join(type_to_glsl(out_type), "(", expr, ")");
2724 	else
2725 	{
2726 		// FIXME: This will not work with packed expressions.
2727 		auto e = enclose_expression(expr) + ".";
2728 		// Just clamp the swizzle index if we have more outputs than inputs.
2729 		for (uint32_t c = 0; c < out_type.vecsize; c++)
2730 			e += index_to_swizzle(min(c, input_components - 1));
2731 		if (backend.swizzle_is_function && out_type.vecsize > 1)
2732 			e += "()";
2733 
2734 		remove_duplicate_swizzle(e);
2735 		return e;
2736 	}
2737 }
2738 
emit_pls()2739 void CompilerGLSL::emit_pls()
2740 {
2741 	auto &execution = get_entry_point();
2742 	if (execution.model != ExecutionModelFragment)
2743 		SPIRV_CROSS_THROW("Pixel local storage only supported in fragment shaders.");
2744 
2745 	if (!options.es)
2746 		SPIRV_CROSS_THROW("Pixel local storage only supported in OpenGL ES.");
2747 
2748 	if (options.version < 300)
2749 		SPIRV_CROSS_THROW("Pixel local storage only supported in ESSL 3.0 and above.");
2750 
2751 	if (!pls_inputs.empty())
2752 	{
2753 		statement("__pixel_local_inEXT _PLSIn");
2754 		begin_scope();
2755 		for (auto &input : pls_inputs)
2756 			statement(pls_decl(input), ";");
2757 		end_scope_decl();
2758 		statement("");
2759 	}
2760 
2761 	if (!pls_outputs.empty())
2762 	{
2763 		statement("__pixel_local_outEXT _PLSOut");
2764 		begin_scope();
2765 		for (auto &output : pls_outputs)
2766 			statement(pls_decl(output), ";");
2767 		end_scope_decl();
2768 		statement("");
2769 	}
2770 }
2771 
fixup_image_load_store_access()2772 void CompilerGLSL::fixup_image_load_store_access()
2773 {
2774 	if (!options.enable_storage_image_qualifier_deduction)
2775 		return;
2776 
2777 	ir.for_each_typed_id<SPIRVariable>([&](uint32_t var, const SPIRVariable &) {
2778 		auto &vartype = expression_type(var);
2779 		if (vartype.basetype == SPIRType::Image && vartype.image.sampled == 2)
2780 		{
2781 			// Very old glslangValidator and HLSL compilers do not emit required qualifiers here.
2782 			// Solve this by making the image access as restricted as possible and loosen up if we need to.
2783 			// If any no-read/no-write flags are actually set, assume that the compiler knows what it's doing.
2784 
2785 			auto &flags = ir.meta[var].decoration.decoration_flags;
2786 			if (!flags.get(DecorationNonWritable) && !flags.get(DecorationNonReadable))
2787 			{
2788 				flags.set(DecorationNonWritable);
2789 				flags.set(DecorationNonReadable);
2790 			}
2791 		}
2792 	});
2793 }
2794 
is_block_builtin(BuiltIn builtin)2795 static bool is_block_builtin(BuiltIn builtin)
2796 {
2797 	return builtin == BuiltInPosition || builtin == BuiltInPointSize || builtin == BuiltInClipDistance ||
2798 	       builtin == BuiltInCullDistance;
2799 }
2800 
should_force_emit_builtin_block(StorageClass storage)2801 bool CompilerGLSL::should_force_emit_builtin_block(StorageClass storage)
2802 {
2803 	// If the builtin block uses XFB, we need to force explicit redeclaration of the builtin block.
2804 
2805 	if (storage != StorageClassOutput)
2806 		return false;
2807 	bool should_force = false;
2808 
2809 	ir.for_each_typed_id<SPIRVariable>([&](uint32_t, SPIRVariable &var) {
2810 		if (should_force)
2811 			return;
2812 
2813 		auto &type = this->get<SPIRType>(var.basetype);
2814 		bool block = has_decoration(type.self, DecorationBlock);
2815 		if (var.storage == storage && block && is_builtin_variable(var))
2816 		{
2817 			uint32_t member_count = uint32_t(type.member_types.size());
2818 			for (uint32_t i = 0; i < member_count; i++)
2819 			{
2820 				if (has_member_decoration(type.self, i, DecorationBuiltIn) &&
2821 				    is_block_builtin(BuiltIn(get_member_decoration(type.self, i, DecorationBuiltIn))) &&
2822 				    has_member_decoration(type.self, i, DecorationOffset))
2823 				{
2824 					should_force = true;
2825 				}
2826 			}
2827 		}
2828 		else if (var.storage == storage && !block && is_builtin_variable(var))
2829 		{
2830 			if (is_block_builtin(BuiltIn(get_decoration(type.self, DecorationBuiltIn))) &&
2831 			    has_decoration(var.self, DecorationOffset))
2832 			{
2833 				should_force = true;
2834 			}
2835 		}
2836 	});
2837 
2838 	// If we're declaring clip/cull planes with control points we need to force block declaration.
2839 	if (get_execution_model() == ExecutionModelTessellationControl &&
2840 	    (clip_distance_count || cull_distance_count))
2841 	{
2842 		should_force = true;
2843 	}
2844 
2845 	return should_force;
2846 }
2847 
fixup_implicit_builtin_block_names()2848 void CompilerGLSL::fixup_implicit_builtin_block_names()
2849 {
2850 	ir.for_each_typed_id<SPIRVariable>([&](uint32_t, SPIRVariable &var) {
2851 		auto &type = this->get<SPIRType>(var.basetype);
2852 		bool block = has_decoration(type.self, DecorationBlock);
2853 		if ((var.storage == StorageClassOutput || var.storage == StorageClassInput) && block &&
2854 		    is_builtin_variable(var))
2855 		{
2856 			// Make sure the array has a supported name in the code.
2857 			if (var.storage == StorageClassOutput)
2858 				set_name(var.self, "gl_out");
2859 			else if (var.storage == StorageClassInput)
2860 				set_name(var.self, "gl_in");
2861 		}
2862 	});
2863 }
2864 
emit_declared_builtin_block(StorageClass storage,ExecutionModel model)2865 void CompilerGLSL::emit_declared_builtin_block(StorageClass storage, ExecutionModel model)
2866 {
2867 	Bitset emitted_builtins;
2868 	Bitset global_builtins;
2869 	const SPIRVariable *block_var = nullptr;
2870 	bool emitted_block = false;
2871 	bool builtin_array = false;
2872 
2873 	// Need to use declared size in the type.
2874 	// These variables might have been declared, but not statically used, so we haven't deduced their size yet.
2875 	uint32_t cull_distance_size = 0;
2876 	uint32_t clip_distance_size = 0;
2877 
2878 	bool have_xfb_buffer_stride = false;
2879 	bool have_geom_stream = false;
2880 	bool have_any_xfb_offset = false;
2881 	uint32_t xfb_stride = 0, xfb_buffer = 0, geom_stream = 0;
2882 	std::unordered_map<uint32_t, uint32_t> builtin_xfb_offsets;
2883 
2884 	ir.for_each_typed_id<SPIRVariable>([&](uint32_t, SPIRVariable &var) {
2885 		auto &type = this->get<SPIRType>(var.basetype);
2886 		bool block = has_decoration(type.self, DecorationBlock);
2887 		Bitset builtins;
2888 
2889 		if (var.storage == storage && block && is_builtin_variable(var))
2890 		{
2891 			uint32_t index = 0;
2892 			for (auto &m : ir.meta[type.self].members)
2893 			{
2894 				if (m.builtin)
2895 				{
2896 					builtins.set(m.builtin_type);
2897 					if (m.builtin_type == BuiltInCullDistance)
2898 						cull_distance_size = to_array_size_literal(this->get<SPIRType>(type.member_types[index]));
2899 					else if (m.builtin_type == BuiltInClipDistance)
2900 						clip_distance_size = to_array_size_literal(this->get<SPIRType>(type.member_types[index]));
2901 
2902 					if (is_block_builtin(m.builtin_type) && m.decoration_flags.get(DecorationOffset))
2903 					{
2904 						have_any_xfb_offset = true;
2905 						builtin_xfb_offsets[m.builtin_type] = m.offset;
2906 					}
2907 
2908 					if (is_block_builtin(m.builtin_type) && m.decoration_flags.get(DecorationStream))
2909 					{
2910 						uint32_t stream = m.stream;
2911 						if (have_geom_stream && geom_stream != stream)
2912 							SPIRV_CROSS_THROW("IO block member Stream mismatch.");
2913 						have_geom_stream = true;
2914 						geom_stream = stream;
2915 					}
2916 				}
2917 				index++;
2918 			}
2919 
2920 			if (storage == StorageClassOutput && has_decoration(var.self, DecorationXfbBuffer) &&
2921 			    has_decoration(var.self, DecorationXfbStride))
2922 			{
2923 				uint32_t buffer_index = get_decoration(var.self, DecorationXfbBuffer);
2924 				uint32_t stride = get_decoration(var.self, DecorationXfbStride);
2925 				if (have_xfb_buffer_stride && buffer_index != xfb_buffer)
2926 					SPIRV_CROSS_THROW("IO block member XfbBuffer mismatch.");
2927 				if (have_xfb_buffer_stride && stride != xfb_stride)
2928 					SPIRV_CROSS_THROW("IO block member XfbBuffer mismatch.");
2929 				have_xfb_buffer_stride = true;
2930 				xfb_buffer = buffer_index;
2931 				xfb_stride = stride;
2932 			}
2933 
2934 			if (storage == StorageClassOutput && has_decoration(var.self, DecorationStream))
2935 			{
2936 				uint32_t stream = get_decoration(var.self, DecorationStream);
2937 				if (have_geom_stream && geom_stream != stream)
2938 					SPIRV_CROSS_THROW("IO block member Stream mismatch.");
2939 				have_geom_stream = true;
2940 				geom_stream = stream;
2941 			}
2942 		}
2943 		else if (var.storage == storage && !block && is_builtin_variable(var))
2944 		{
2945 			// While we're at it, collect all declared global builtins (HLSL mostly ...).
2946 			auto &m = ir.meta[var.self].decoration;
2947 			if (m.builtin)
2948 			{
2949 				global_builtins.set(m.builtin_type);
2950 				if (m.builtin_type == BuiltInCullDistance)
2951 					cull_distance_size = to_array_size_literal(type);
2952 				else if (m.builtin_type == BuiltInClipDistance)
2953 					clip_distance_size = to_array_size_literal(type);
2954 
2955 				if (is_block_builtin(m.builtin_type) && m.decoration_flags.get(DecorationXfbStride) &&
2956 				    m.decoration_flags.get(DecorationXfbBuffer) && m.decoration_flags.get(DecorationOffset))
2957 				{
2958 					have_any_xfb_offset = true;
2959 					builtin_xfb_offsets[m.builtin_type] = m.offset;
2960 					uint32_t buffer_index = m.xfb_buffer;
2961 					uint32_t stride = m.xfb_stride;
2962 					if (have_xfb_buffer_stride && buffer_index != xfb_buffer)
2963 						SPIRV_CROSS_THROW("IO block member XfbBuffer mismatch.");
2964 					if (have_xfb_buffer_stride && stride != xfb_stride)
2965 						SPIRV_CROSS_THROW("IO block member XfbBuffer mismatch.");
2966 					have_xfb_buffer_stride = true;
2967 					xfb_buffer = buffer_index;
2968 					xfb_stride = stride;
2969 				}
2970 
2971 				if (is_block_builtin(m.builtin_type) && m.decoration_flags.get(DecorationStream))
2972 				{
2973 					uint32_t stream = get_decoration(var.self, DecorationStream);
2974 					if (have_geom_stream && geom_stream != stream)
2975 						SPIRV_CROSS_THROW("IO block member Stream mismatch.");
2976 					have_geom_stream = true;
2977 					geom_stream = stream;
2978 				}
2979 			}
2980 		}
2981 
2982 		if (builtins.empty())
2983 			return;
2984 
2985 		if (emitted_block)
2986 			SPIRV_CROSS_THROW("Cannot use more than one builtin I/O block.");
2987 
2988 		emitted_builtins = builtins;
2989 		emitted_block = true;
2990 		builtin_array = !type.array.empty();
2991 		block_var = &var;
2992 	});
2993 
2994 	global_builtins =
2995 	    Bitset(global_builtins.get_lower() & ((1ull << BuiltInPosition) | (1ull << BuiltInPointSize) |
2996 	                                          (1ull << BuiltInClipDistance) | (1ull << BuiltInCullDistance)));
2997 
2998 	// Try to collect all other declared builtins.
2999 	if (!emitted_block)
3000 		emitted_builtins = global_builtins;
3001 
3002 	// Can't declare an empty interface block.
3003 	if (emitted_builtins.empty())
3004 		return;
3005 
3006 	if (storage == StorageClassOutput)
3007 	{
3008 		SmallVector<string> attr;
3009 		if (have_xfb_buffer_stride && have_any_xfb_offset)
3010 		{
3011 			if (!options.es)
3012 			{
3013 				if (options.version < 440 && options.version >= 140)
3014 					require_extension_internal("GL_ARB_enhanced_layouts");
3015 				else if (options.version < 140)
3016 					SPIRV_CROSS_THROW("Component decoration is not supported in targets below GLSL 1.40.");
3017 				if (!options.es && options.version < 440)
3018 					require_extension_internal("GL_ARB_enhanced_layouts");
3019 			}
3020 			else if (options.es)
3021 				SPIRV_CROSS_THROW("Need GL_ARB_enhanced_layouts for xfb_stride or xfb_buffer.");
3022 			attr.push_back(join("xfb_buffer = ", xfb_buffer, ", xfb_stride = ", xfb_stride));
3023 		}
3024 
3025 		if (have_geom_stream)
3026 		{
3027 			if (get_execution_model() != ExecutionModelGeometry)
3028 				SPIRV_CROSS_THROW("Geometry streams can only be used in geometry shaders.");
3029 			if (options.es)
3030 				SPIRV_CROSS_THROW("Multiple geometry streams not supported in ESSL.");
3031 			if (options.version < 400)
3032 				require_extension_internal("GL_ARB_transform_feedback3");
3033 			attr.push_back(join("stream = ", geom_stream));
3034 		}
3035 
3036 		if (!attr.empty())
3037 			statement("layout(", merge(attr), ") out gl_PerVertex");
3038 		else
3039 			statement("out gl_PerVertex");
3040 	}
3041 	else
3042 	{
3043 		// If we have passthrough, there is no way PerVertex cannot be passthrough.
3044 		if (get_entry_point().geometry_passthrough)
3045 			statement("layout(passthrough) in gl_PerVertex");
3046 		else
3047 			statement("in gl_PerVertex");
3048 	}
3049 
3050 	begin_scope();
3051 	if (emitted_builtins.get(BuiltInPosition))
3052 	{
3053 		auto itr = builtin_xfb_offsets.find(BuiltInPosition);
3054 		if (itr != end(builtin_xfb_offsets))
3055 			statement("layout(xfb_offset = ", itr->second, ") vec4 gl_Position;");
3056 		else
3057 			statement("vec4 gl_Position;");
3058 	}
3059 
3060 	if (emitted_builtins.get(BuiltInPointSize))
3061 	{
3062 		auto itr = builtin_xfb_offsets.find(BuiltInPointSize);
3063 		if (itr != end(builtin_xfb_offsets))
3064 			statement("layout(xfb_offset = ", itr->second, ") float gl_PointSize;");
3065 		else
3066 			statement("float gl_PointSize;");
3067 	}
3068 
3069 	if (emitted_builtins.get(BuiltInClipDistance))
3070 	{
3071 		auto itr = builtin_xfb_offsets.find(BuiltInClipDistance);
3072 		if (itr != end(builtin_xfb_offsets))
3073 			statement("layout(xfb_offset = ", itr->second, ") float gl_ClipDistance[", clip_distance_size, "];");
3074 		else
3075 			statement("float gl_ClipDistance[", clip_distance_size, "];");
3076 	}
3077 
3078 	if (emitted_builtins.get(BuiltInCullDistance))
3079 	{
3080 		auto itr = builtin_xfb_offsets.find(BuiltInCullDistance);
3081 		if (itr != end(builtin_xfb_offsets))
3082 			statement("layout(xfb_offset = ", itr->second, ") float gl_CullDistance[", cull_distance_size, "];");
3083 		else
3084 			statement("float gl_CullDistance[", cull_distance_size, "];");
3085 	}
3086 
3087 	if (builtin_array)
3088 	{
3089 		if (model == ExecutionModelTessellationControl && storage == StorageClassOutput)
3090 			end_scope_decl(join(to_name(block_var->self), "[", get_entry_point().output_vertices, "]"));
3091 		else
3092 			end_scope_decl(join(to_name(block_var->self), "[]"));
3093 	}
3094 	else
3095 		end_scope_decl();
3096 	statement("");
3097 }
3098 
declare_undefined_values()3099 void CompilerGLSL::declare_undefined_values()
3100 {
3101 	bool emitted = false;
3102 	ir.for_each_typed_id<SPIRUndef>([&](uint32_t, const SPIRUndef &undef) {
3103 		auto &type = this->get<SPIRType>(undef.basetype);
3104 		// OpUndef can be void for some reason ...
3105 		if (type.basetype == SPIRType::Void)
3106 			return;
3107 
3108 		string initializer;
3109 		if (options.force_zero_initialized_variables && type_can_zero_initialize(type))
3110 			initializer = join(" = ", to_zero_initialized_expression(undef.basetype));
3111 
3112 		statement(variable_decl(type, to_name(undef.self), undef.self), initializer, ";");
3113 		emitted = true;
3114 	});
3115 
3116 	if (emitted)
3117 		statement("");
3118 }
3119 
variable_is_lut(const SPIRVariable & var) const3120 bool CompilerGLSL::variable_is_lut(const SPIRVariable &var) const
3121 {
3122 	bool statically_assigned = var.statically_assigned && var.static_expression != ID(0) && var.remapped_variable;
3123 
3124 	if (statically_assigned)
3125 	{
3126 		auto *constant = maybe_get<SPIRConstant>(var.static_expression);
3127 		if (constant && constant->is_used_as_lut)
3128 			return true;
3129 	}
3130 
3131 	return false;
3132 }
3133 
emit_resources()3134 void CompilerGLSL::emit_resources()
3135 {
3136 	auto &execution = get_entry_point();
3137 
3138 	replace_illegal_names();
3139 
3140 	// Legacy GL uses gl_FragData[], redeclare all fragment outputs
3141 	// with builtins.
3142 	if (execution.model == ExecutionModelFragment && is_legacy())
3143 		replace_fragment_outputs();
3144 
3145 	// Emit PLS blocks if we have such variables.
3146 	if (!pls_inputs.empty() || !pls_outputs.empty())
3147 		emit_pls();
3148 
3149 	switch (execution.model)
3150 	{
3151 	case ExecutionModelGeometry:
3152 	case ExecutionModelTessellationControl:
3153 	case ExecutionModelTessellationEvaluation:
3154 		fixup_implicit_builtin_block_names();
3155 		break;
3156 
3157 	default:
3158 		break;
3159 	}
3160 
3161 	// Emit custom gl_PerVertex for SSO compatibility.
3162 	if (options.separate_shader_objects && !options.es && execution.model != ExecutionModelFragment)
3163 	{
3164 		switch (execution.model)
3165 		{
3166 		case ExecutionModelGeometry:
3167 		case ExecutionModelTessellationControl:
3168 		case ExecutionModelTessellationEvaluation:
3169 			emit_declared_builtin_block(StorageClassInput, execution.model);
3170 			emit_declared_builtin_block(StorageClassOutput, execution.model);
3171 			break;
3172 
3173 		case ExecutionModelVertex:
3174 			emit_declared_builtin_block(StorageClassOutput, execution.model);
3175 			break;
3176 
3177 		default:
3178 			break;
3179 		}
3180 	}
3181 	else if (should_force_emit_builtin_block(StorageClassOutput))
3182 	{
3183 		emit_declared_builtin_block(StorageClassOutput, execution.model);
3184 	}
3185 	else if (execution.geometry_passthrough)
3186 	{
3187 		// Need to declare gl_in with Passthrough.
3188 		// If we're doing passthrough, we cannot emit an output block, so the output block test above will never pass.
3189 		emit_declared_builtin_block(StorageClassInput, execution.model);
3190 	}
3191 	else
3192 	{
3193 		// Need to redeclare clip/cull distance with explicit size to use them.
3194 		// SPIR-V mandates these builtins have a size declared.
3195 		const char *storage = execution.model == ExecutionModelFragment ? "in" : "out";
3196 		if (clip_distance_count != 0)
3197 			statement(storage, " float gl_ClipDistance[", clip_distance_count, "];");
3198 		if (cull_distance_count != 0)
3199 			statement(storage, " float gl_CullDistance[", cull_distance_count, "];");
3200 		if (clip_distance_count != 0 || cull_distance_count != 0)
3201 			statement("");
3202 	}
3203 
3204 	if (position_invariant)
3205 	{
3206 		statement("invariant gl_Position;");
3207 		statement("");
3208 	}
3209 
3210 	bool emitted = false;
3211 
3212 	// If emitted Vulkan GLSL,
3213 	// emit specialization constants as actual floats,
3214 	// spec op expressions will redirect to the constant name.
3215 	//
3216 	{
3217 		auto loop_lock = ir.create_loop_hard_lock();
3218 		for (auto &id_ : ir.ids_for_constant_or_type)
3219 		{
3220 			auto &id = ir.ids[id_];
3221 
3222 			if (id.get_type() == TypeConstant)
3223 			{
3224 				auto &c = id.get<SPIRConstant>();
3225 
3226 				bool needs_declaration = c.specialization || c.is_used_as_lut;
3227 
3228 				if (needs_declaration)
3229 				{
3230 					if (!options.vulkan_semantics && c.specialization)
3231 					{
3232 						c.specialization_constant_macro_name =
3233 						    constant_value_macro_name(get_decoration(c.self, DecorationSpecId));
3234 					}
3235 					emit_constant(c);
3236 					emitted = true;
3237 				}
3238 			}
3239 			else if (id.get_type() == TypeConstantOp)
3240 			{
3241 				emit_specialization_constant_op(id.get<SPIRConstantOp>());
3242 				emitted = true;
3243 			}
3244 			else if (id.get_type() == TypeType)
3245 			{
3246 				auto *type = &id.get<SPIRType>();
3247 
3248 				bool is_natural_struct = type->basetype == SPIRType::Struct && type->array.empty() && !type->pointer &&
3249 				                         (!has_decoration(type->self, DecorationBlock) &&
3250 				                          !has_decoration(type->self, DecorationBufferBlock));
3251 
3252 				// Special case, ray payload and hit attribute blocks are not really blocks, just regular structs.
3253 				if (type->basetype == SPIRType::Struct && type->pointer &&
3254 				    has_decoration(type->self, DecorationBlock) &&
3255 				    (type->storage == StorageClassRayPayloadKHR || type->storage == StorageClassIncomingRayPayloadKHR ||
3256 				     type->storage == StorageClassHitAttributeKHR))
3257 				{
3258 					type = &get<SPIRType>(type->parent_type);
3259 					is_natural_struct = true;
3260 				}
3261 
3262 				if (is_natural_struct)
3263 				{
3264 					if (emitted)
3265 						statement("");
3266 					emitted = false;
3267 
3268 					emit_struct(*type);
3269 				}
3270 			}
3271 		}
3272 	}
3273 
3274 	if (emitted)
3275 		statement("");
3276 
3277 	// If we needed to declare work group size late, check here.
3278 	// If the work group size depends on a specialization constant, we need to declare the layout() block
3279 	// after constants (and their macros) have been declared.
3280 	if (execution.model == ExecutionModelGLCompute && !options.vulkan_semantics &&
3281 	    execution.workgroup_size.constant != 0)
3282 	{
3283 		SpecializationConstant wg_x, wg_y, wg_z;
3284 		get_work_group_size_specialization_constants(wg_x, wg_y, wg_z);
3285 
3286 		if ((wg_x.id != ConstantID(0)) || (wg_y.id != ConstantID(0)) || (wg_z.id != ConstantID(0)))
3287 		{
3288 			SmallVector<string> inputs;
3289 			build_workgroup_size(inputs, wg_x, wg_y, wg_z);
3290 			statement("layout(", merge(inputs), ") in;");
3291 			statement("");
3292 		}
3293 	}
3294 
3295 	emitted = false;
3296 
3297 	if (ir.addressing_model == AddressingModelPhysicalStorageBuffer64EXT)
3298 	{
3299 		for (auto type : physical_storage_non_block_pointer_types)
3300 		{
3301 			emit_buffer_reference_block(get<SPIRType>(type), false);
3302 		}
3303 
3304 		// Output buffer reference blocks.
3305 		// Do this in two stages, one with forward declaration,
3306 		// and one without. Buffer reference blocks can reference themselves
3307 		// to support things like linked lists.
3308 		ir.for_each_typed_id<SPIRType>([&](uint32_t, SPIRType &type) {
3309 			bool has_block_flags = has_decoration(type.self, DecorationBlock);
3310 			if (has_block_flags && type.pointer && type.pointer_depth == 1 && !type_is_array_of_pointers(type) &&
3311 			    type.storage == StorageClassPhysicalStorageBufferEXT)
3312 			{
3313 				emit_buffer_reference_block(type, true);
3314 			}
3315 		});
3316 
3317 		ir.for_each_typed_id<SPIRType>([&](uint32_t, SPIRType &type) {
3318 			bool has_block_flags = has_decoration(type.self, DecorationBlock);
3319 			if (has_block_flags && type.pointer && type.pointer_depth == 1 && !type_is_array_of_pointers(type) &&
3320 			    type.storage == StorageClassPhysicalStorageBufferEXT)
3321 			{
3322 				emit_buffer_reference_block(type, false);
3323 			}
3324 		});
3325 	}
3326 
3327 	// Output UBOs and SSBOs
3328 	ir.for_each_typed_id<SPIRVariable>([&](uint32_t, SPIRVariable &var) {
3329 		auto &type = this->get<SPIRType>(var.basetype);
3330 
3331 		bool is_block_storage = type.storage == StorageClassStorageBuffer || type.storage == StorageClassUniform ||
3332 		                        type.storage == StorageClassShaderRecordBufferKHR;
3333 		bool has_block_flags = ir.meta[type.self].decoration.decoration_flags.get(DecorationBlock) ||
3334 		                       ir.meta[type.self].decoration.decoration_flags.get(DecorationBufferBlock);
3335 
3336 		if (var.storage != StorageClassFunction && type.pointer && is_block_storage && !is_hidden_variable(var) &&
3337 		    has_block_flags)
3338 		{
3339 			emit_buffer_block(var);
3340 		}
3341 	});
3342 
3343 	// Output push constant blocks
3344 	ir.for_each_typed_id<SPIRVariable>([&](uint32_t, SPIRVariable &var) {
3345 		auto &type = this->get<SPIRType>(var.basetype);
3346 		if (var.storage != StorageClassFunction && type.pointer && type.storage == StorageClassPushConstant &&
3347 		    !is_hidden_variable(var))
3348 		{
3349 			emit_push_constant_block(var);
3350 		}
3351 	});
3352 
3353 	bool skip_separate_image_sampler = !combined_image_samplers.empty() || !options.vulkan_semantics;
3354 
3355 	// Output Uniform Constants (values, samplers, images, etc).
3356 	ir.for_each_typed_id<SPIRVariable>([&](uint32_t, SPIRVariable &var) {
3357 		auto &type = this->get<SPIRType>(var.basetype);
3358 
3359 		// If we're remapping separate samplers and images, only emit the combined samplers.
3360 		if (skip_separate_image_sampler)
3361 		{
3362 			// Sampler buffers are always used without a sampler, and they will also work in regular GL.
3363 			bool sampler_buffer = type.basetype == SPIRType::Image && type.image.dim == DimBuffer;
3364 			bool separate_image = type.basetype == SPIRType::Image && type.image.sampled == 1;
3365 			bool separate_sampler = type.basetype == SPIRType::Sampler;
3366 			if (!sampler_buffer && (separate_image || separate_sampler))
3367 				return;
3368 		}
3369 
3370 		if (var.storage != StorageClassFunction && type.pointer &&
3371 		    (type.storage == StorageClassUniformConstant || type.storage == StorageClassAtomicCounter ||
3372 		     type.storage == StorageClassRayPayloadKHR || type.storage == StorageClassIncomingRayPayloadKHR ||
3373 		     type.storage == StorageClassCallableDataKHR || type.storage == StorageClassIncomingCallableDataKHR ||
3374 		     type.storage == StorageClassHitAttributeKHR) &&
3375 		    !is_hidden_variable(var))
3376 		{
3377 			emit_uniform(var);
3378 			emitted = true;
3379 		}
3380 	});
3381 
3382 	if (emitted)
3383 		statement("");
3384 	emitted = false;
3385 
3386 	bool emitted_base_instance = false;
3387 
3388 	// Output in/out interfaces.
3389 	ir.for_each_typed_id<SPIRVariable>([&](uint32_t, SPIRVariable &var) {
3390 		auto &type = this->get<SPIRType>(var.basetype);
3391 
3392 		bool is_hidden = is_hidden_variable(var);
3393 
3394 		// Unused output I/O variables might still be required to implement framebuffer fetch.
3395 		if (var.storage == StorageClassOutput && !is_legacy() &&
3396 		    inout_color_attachments.count(get_decoration(var.self, DecorationLocation)) != 0)
3397 		{
3398 			is_hidden = false;
3399 		}
3400 
3401 		if (var.storage != StorageClassFunction && type.pointer &&
3402 		    (var.storage == StorageClassInput || var.storage == StorageClassOutput) &&
3403 		    interface_variable_exists_in_entry_point(var.self) && !is_hidden)
3404 		{
3405 			emit_interface_block(var);
3406 			emitted = true;
3407 		}
3408 		else if (is_builtin_variable(var))
3409 		{
3410 			auto builtin = BuiltIn(get_decoration(var.self, DecorationBuiltIn));
3411 			// For gl_InstanceIndex emulation on GLES, the API user needs to
3412 			// supply this uniform.
3413 
3414 			// The draw parameter extension is soft-enabled on GL with some fallbacks.
3415 			if (!options.vulkan_semantics)
3416 			{
3417 				if (!emitted_base_instance &&
3418 				    ((options.vertex.support_nonzero_base_instance && builtin == BuiltInInstanceIndex) ||
3419 				     (builtin == BuiltInBaseInstance)))
3420 				{
3421 					statement("#ifdef GL_ARB_shader_draw_parameters");
3422 					statement("#define SPIRV_Cross_BaseInstance gl_BaseInstanceARB");
3423 					statement("#else");
3424 					// A crude, but simple workaround which should be good enough for non-indirect draws.
3425 					statement("uniform int SPIRV_Cross_BaseInstance;");
3426 					statement("#endif");
3427 					emitted = true;
3428 					emitted_base_instance = true;
3429 				}
3430 				else if (builtin == BuiltInBaseVertex)
3431 				{
3432 					statement("#ifdef GL_ARB_shader_draw_parameters");
3433 					statement("#define SPIRV_Cross_BaseVertex gl_BaseVertexARB");
3434 					statement("#else");
3435 					// A crude, but simple workaround which should be good enough for non-indirect draws.
3436 					statement("uniform int SPIRV_Cross_BaseVertex;");
3437 					statement("#endif");
3438 				}
3439 				else if (builtin == BuiltInDrawIndex)
3440 				{
3441 					statement("#ifndef GL_ARB_shader_draw_parameters");
3442 					// Cannot really be worked around.
3443 					statement("#error GL_ARB_shader_draw_parameters is not supported.");
3444 					statement("#endif");
3445 				}
3446 			}
3447 		}
3448 	});
3449 
3450 	// Global variables.
3451 	for (auto global : global_variables)
3452 	{
3453 		auto &var = get<SPIRVariable>(global);
3454 		if (var.storage != StorageClassOutput)
3455 		{
3456 			if (!variable_is_lut(var))
3457 			{
3458 				add_resource_name(var.self);
3459 
3460 				string initializer;
3461 				if (options.force_zero_initialized_variables && var.storage == StorageClassPrivate &&
3462 				    !var.initializer && !var.static_expression && type_can_zero_initialize(get_variable_data_type(var)))
3463 				{
3464 					initializer = join(" = ", to_zero_initialized_expression(get_variable_data_type_id(var)));
3465 				}
3466 
3467 				statement(variable_decl(var), initializer, ";");
3468 				emitted = true;
3469 			}
3470 		}
3471 		else if (var.initializer && maybe_get<SPIRConstant>(var.initializer) != nullptr)
3472 		{
3473 			emit_output_variable_initializer(var);
3474 		}
3475 	}
3476 
3477 	if (emitted)
3478 		statement("");
3479 
3480 	declare_undefined_values();
3481 }
3482 
emit_output_variable_initializer(const SPIRVariable & var)3483 void CompilerGLSL::emit_output_variable_initializer(const SPIRVariable &var)
3484 {
3485 	// If a StorageClassOutput variable has an initializer, we need to initialize it in main().
3486 	auto &entry_func = this->get<SPIRFunction>(ir.default_entry_point);
3487 	auto &type = get<SPIRType>(var.basetype);
3488 	bool is_patch = has_decoration(var.self, DecorationPatch);
3489 	bool is_block = has_decoration(type.self, DecorationBlock);
3490 	bool is_control_point = get_execution_model() == ExecutionModelTessellationControl && !is_patch;
3491 
3492 	if (is_block)
3493 	{
3494 		uint32_t member_count = uint32_t(type.member_types.size());
3495 		bool type_is_array = type.array.size() == 1;
3496 		uint32_t array_size = 1;
3497 		if (type_is_array)
3498 			array_size = to_array_size_literal(type);
3499 		uint32_t iteration_count = is_control_point ? 1 : array_size;
3500 
3501 		// If the initializer is a block, we must initialize each block member one at a time.
3502 		for (uint32_t i = 0; i < member_count; i++)
3503 		{
3504 			// These outputs might not have been properly declared, so don't initialize them in that case.
3505 			if (has_member_decoration(type.self, i, DecorationBuiltIn))
3506 			{
3507 				if (get_member_decoration(type.self, i, DecorationBuiltIn) == BuiltInCullDistance &&
3508 				    !cull_distance_count)
3509 					continue;
3510 
3511 				if (get_member_decoration(type.self, i, DecorationBuiltIn) == BuiltInClipDistance &&
3512 				    !clip_distance_count)
3513 					continue;
3514 			}
3515 
3516 			// We need to build a per-member array first, essentially transposing from AoS to SoA.
3517 			// This code path hits when we have an array of blocks.
3518 			string lut_name;
3519 			if (type_is_array)
3520 			{
3521 				lut_name = join("_", var.self, "_", i, "_init");
3522 				uint32_t member_type_id = get<SPIRType>(var.basetype).member_types[i];
3523 				auto &member_type = get<SPIRType>(member_type_id);
3524 				auto array_type = member_type;
3525 				array_type.parent_type = member_type_id;
3526 				array_type.array.push_back(array_size);
3527 				array_type.array_size_literal.push_back(true);
3528 
3529 				SmallVector<string> exprs;
3530 				exprs.reserve(array_size);
3531 				auto &c = get<SPIRConstant>(var.initializer);
3532 				for (uint32_t j = 0; j < array_size; j++)
3533 					exprs.push_back(to_expression(get<SPIRConstant>(c.subconstants[j]).subconstants[i]));
3534 				statement("const ", type_to_glsl(array_type), " ", lut_name, type_to_array_glsl(array_type), " = ",
3535 				          type_to_glsl_constructor(array_type), "(", merge(exprs, ", "), ");");
3536 			}
3537 
3538 			for (uint32_t j = 0; j < iteration_count; j++)
3539 			{
3540 				entry_func.fixup_hooks_in.push_back([=, &var]() {
3541 					AccessChainMeta meta;
3542 					auto &c = this->get<SPIRConstant>(var.initializer);
3543 
3544 					uint32_t invocation_id = 0;
3545 					uint32_t member_index_id = 0;
3546 					if (is_control_point)
3547 					{
3548 						uint32_t ids = ir.increase_bound_by(3);
3549 						SPIRType uint_type;
3550 						uint_type.basetype = SPIRType::UInt;
3551 						uint_type.width = 32;
3552 						set<SPIRType>(ids, uint_type);
3553 						set<SPIRExpression>(ids + 1, builtin_to_glsl(BuiltInInvocationId, StorageClassInput), ids, true);
3554 						set<SPIRConstant>(ids + 2, ids, i, false);
3555 						invocation_id = ids + 1;
3556 						member_index_id = ids + 2;
3557 					}
3558 
3559 					if (is_patch)
3560 					{
3561 						statement("if (gl_InvocationID == 0)");
3562 						begin_scope();
3563 					}
3564 
3565 					if (type_is_array && !is_control_point)
3566 					{
3567 						uint32_t indices[2] = { j, i };
3568 						auto chain = access_chain_internal(var.self, indices, 2, ACCESS_CHAIN_INDEX_IS_LITERAL_BIT, &meta);
3569 						statement(chain, " = ", lut_name, "[", j, "];");
3570 					}
3571 					else if (is_control_point)
3572 					{
3573 						uint32_t indices[2] = { invocation_id, member_index_id };
3574 						auto chain = access_chain_internal(var.self, indices, 2, 0, &meta);
3575 						statement(chain, " = ", lut_name, "[", builtin_to_glsl(BuiltInInvocationId, StorageClassInput), "];");
3576 					}
3577 					else
3578 					{
3579 						auto chain =
3580 								access_chain_internal(var.self, &i, 1, ACCESS_CHAIN_INDEX_IS_LITERAL_BIT, &meta);
3581 						statement(chain, " = ", to_expression(c.subconstants[i]), ";");
3582 					}
3583 
3584 					if (is_patch)
3585 						end_scope();
3586 				});
3587 			}
3588 		}
3589 	}
3590 	else if (is_control_point)
3591 	{
3592 		auto lut_name = join("_", var.self, "_init");
3593 		statement("const ", type_to_glsl(type), " ", lut_name, type_to_array_glsl(type),
3594 		          " = ", to_expression(var.initializer), ";");
3595 		entry_func.fixup_hooks_in.push_back([&, lut_name]() {
3596 			statement(to_expression(var.self), "[gl_InvocationID] = ", lut_name, "[gl_InvocationID];");
3597 		});
3598 	}
3599 	else
3600 	{
3601 		auto lut_name = join("_", var.self, "_init");
3602 		statement("const ", type_to_glsl(type), " ", lut_name,
3603 		          type_to_array_glsl(type), " = ", to_expression(var.initializer), ";");
3604 		entry_func.fixup_hooks_in.push_back([&, lut_name, is_patch]() {
3605 			if (is_patch)
3606 			{
3607 				statement("if (gl_InvocationID == 0)");
3608 				begin_scope();
3609 			}
3610 			statement(to_expression(var.self), " = ", lut_name, ";");
3611 			if (is_patch)
3612 				end_scope();
3613 		});
3614 	}
3615 }
3616 
emit_extension_workarounds(spv::ExecutionModel model)3617 void CompilerGLSL::emit_extension_workarounds(spv::ExecutionModel model)
3618 {
3619 	static const char *workaround_types[] = { "int",   "ivec2", "ivec3", "ivec4", "uint",   "uvec2", "uvec3", "uvec4",
3620 		                                      "float", "vec2",  "vec3",  "vec4",  "double", "dvec2", "dvec3", "dvec4" };
3621 
3622 	if (!options.vulkan_semantics)
3623 	{
3624 		using Supp = ShaderSubgroupSupportHelper;
3625 		auto result = shader_subgroup_supporter.resolve();
3626 
3627 		if (shader_subgroup_supporter.is_feature_requested(Supp::SubgroupMask))
3628 		{
3629 			auto exts = Supp::get_candidates_for_feature(Supp::SubgroupMask, result);
3630 
3631 			for (auto &e : exts)
3632 			{
3633 				const char *name = Supp::get_extension_name(e);
3634 				statement(&e == &exts.front() ? "#if" : "#elif", " defined(", name, ")");
3635 
3636 				switch (e)
3637 				{
3638 				case Supp::NV_shader_thread_group:
3639 					statement("#define gl_SubgroupEqMask uvec4(gl_ThreadEqMaskNV, 0u, 0u, 0u)");
3640 					statement("#define gl_SubgroupGeMask uvec4(gl_ThreadGeMaskNV, 0u, 0u, 0u)");
3641 					statement("#define gl_SubgroupGtMask uvec4(gl_ThreadGtMaskNV, 0u, 0u, 0u)");
3642 					statement("#define gl_SubgroupLeMask uvec4(gl_ThreadLeMaskNV, 0u, 0u, 0u)");
3643 					statement("#define gl_SubgroupLtMask uvec4(gl_ThreadLtMaskNV, 0u, 0u, 0u)");
3644 					break;
3645 				case Supp::ARB_shader_ballot:
3646 					statement("#define gl_SubgroupEqMask uvec4(unpackUint2x32(gl_SubGroupEqMaskARB), 0u, 0u)");
3647 					statement("#define gl_SubgroupGeMask uvec4(unpackUint2x32(gl_SubGroupGeMaskARB), 0u, 0u)");
3648 					statement("#define gl_SubgroupGtMask uvec4(unpackUint2x32(gl_SubGroupGtMaskARB), 0u, 0u)");
3649 					statement("#define gl_SubgroupLeMask uvec4(unpackUint2x32(gl_SubGroupLeMaskARB), 0u, 0u)");
3650 					statement("#define gl_SubgroupLtMask uvec4(unpackUint2x32(gl_SubGroupLtMaskARB), 0u, 0u)");
3651 					break;
3652 				default:
3653 					break;
3654 				}
3655 			}
3656 			statement("#endif");
3657 			statement("");
3658 		}
3659 
3660 		if (shader_subgroup_supporter.is_feature_requested(Supp::SubgroupSize))
3661 		{
3662 			auto exts = Supp::get_candidates_for_feature(Supp::SubgroupSize, result);
3663 
3664 			for (auto &e : exts)
3665 			{
3666 				const char *name = Supp::get_extension_name(e);
3667 				statement(&e == &exts.front() ? "#if" : "#elif", " defined(", name, ")");
3668 
3669 				switch (e)
3670 				{
3671 				case Supp::NV_shader_thread_group:
3672 					statement("#define gl_SubgroupSize gl_WarpSizeNV");
3673 					break;
3674 				case Supp::ARB_shader_ballot:
3675 					statement("#define gl_SubgroupSize gl_SubGroupSizeARB");
3676 					break;
3677 				case Supp::AMD_gcn_shader:
3678 					statement("#define gl_SubgroupSize uint(gl_SIMDGroupSizeAMD)");
3679 					break;
3680 				default:
3681 					break;
3682 				}
3683 			}
3684 			statement("#endif");
3685 			statement("");
3686 		}
3687 
3688 		if (shader_subgroup_supporter.is_feature_requested(Supp::SubgroupInvocationID))
3689 		{
3690 			auto exts = Supp::get_candidates_for_feature(Supp::SubgroupInvocationID, result);
3691 
3692 			for (auto &e : exts)
3693 			{
3694 				const char *name = Supp::get_extension_name(e);
3695 				statement(&e == &exts.front() ? "#if" : "#elif", " defined(", name, ")");
3696 
3697 				switch (e)
3698 				{
3699 				case Supp::NV_shader_thread_group:
3700 					statement("#define gl_SubgroupInvocationID gl_ThreadInWarpNV");
3701 					break;
3702 				case Supp::ARB_shader_ballot:
3703 					statement("#define gl_SubgroupInvocationID gl_SubGroupInvocationARB");
3704 					break;
3705 				default:
3706 					break;
3707 				}
3708 			}
3709 			statement("#endif");
3710 			statement("");
3711 		}
3712 
3713 		if (shader_subgroup_supporter.is_feature_requested(Supp::SubgroupID))
3714 		{
3715 			auto exts = Supp::get_candidates_for_feature(Supp::SubgroupID, result);
3716 
3717 			for (auto &e : exts)
3718 			{
3719 				const char *name = Supp::get_extension_name(e);
3720 				statement(&e == &exts.front() ? "#if" : "#elif", " defined(", name, ")");
3721 
3722 				switch (e)
3723 				{
3724 				case Supp::NV_shader_thread_group:
3725 					statement("#define gl_SubgroupID gl_WarpIDNV");
3726 					break;
3727 				default:
3728 					break;
3729 				}
3730 			}
3731 			statement("#endif");
3732 			statement("");
3733 		}
3734 
3735 		if (shader_subgroup_supporter.is_feature_requested(Supp::NumSubgroups))
3736 		{
3737 			auto exts = Supp::get_candidates_for_feature(Supp::NumSubgroups, result);
3738 
3739 			for (auto &e : exts)
3740 			{
3741 				const char *name = Supp::get_extension_name(e);
3742 				statement(&e == &exts.front() ? "#if" : "#elif", " defined(", name, ")");
3743 
3744 				switch (e)
3745 				{
3746 				case Supp::NV_shader_thread_group:
3747 					statement("#define gl_NumSubgroups gl_WarpsPerSMNV");
3748 					break;
3749 				default:
3750 					break;
3751 				}
3752 			}
3753 			statement("#endif");
3754 			statement("");
3755 		}
3756 
3757 		if (shader_subgroup_supporter.is_feature_requested(Supp::SubgroupBroadcast_First))
3758 		{
3759 			auto exts = Supp::get_candidates_for_feature(Supp::SubgroupBroadcast_First, result);
3760 
3761 			for (auto &e : exts)
3762 			{
3763 				const char *name = Supp::get_extension_name(e);
3764 				statement(&e == &exts.front() ? "#if" : "#elif", " defined(", name, ")");
3765 
3766 				switch (e)
3767 				{
3768 				case Supp::NV_shader_thread_shuffle:
3769 					for (const char *t : workaround_types)
3770 					{
3771 						statement(t, " subgroupBroadcastFirst(", t,
3772 						          " value) { return shuffleNV(value, findLSB(ballotThreadNV(true)), gl_WarpSizeNV); }");
3773 					}
3774 					for (const char *t : workaround_types)
3775 					{
3776 						statement(t, " subgroupBroadcast(", t,
3777 						          " value, uint id) { return shuffleNV(value, id, gl_WarpSizeNV); }");
3778 					}
3779 					break;
3780 				case Supp::ARB_shader_ballot:
3781 					for (const char *t : workaround_types)
3782 					{
3783 						statement(t, " subgroupBroadcastFirst(", t,
3784 						          " value) { return readFirstInvocationARB(value); }");
3785 					}
3786 					for (const char *t : workaround_types)
3787 					{
3788 						statement(t, " subgroupBroadcast(", t,
3789 						          " value, uint id) { return readInvocationARB(value, id); }");
3790 					}
3791 					break;
3792 				default:
3793 					break;
3794 				}
3795 			}
3796 			statement("#endif");
3797 			statement("");
3798 		}
3799 
3800 		if (shader_subgroup_supporter.is_feature_requested(Supp::SubgroupBallotFindLSB_MSB))
3801 		{
3802 			auto exts = Supp::get_candidates_for_feature(Supp::SubgroupBallotFindLSB_MSB, result);
3803 
3804 			for (auto &e : exts)
3805 			{
3806 				const char *name = Supp::get_extension_name(e);
3807 				statement(&e == &exts.front() ? "#if" : "#elif", " defined(", name, ")");
3808 
3809 				switch (e)
3810 				{
3811 				case Supp::NV_shader_thread_group:
3812 					statement("uint subgroupBallotFindLSB(uvec4 value) { return findLSB(value.x); }");
3813 					statement("uint subgroupBallotFindMSB(uvec4 value) { return findMSB(value.x); }");
3814 					break;
3815 				default:
3816 					break;
3817 				}
3818 			}
3819 			statement("#else");
3820 			statement("uint subgroupBallotFindLSB(uvec4 value)");
3821 			begin_scope();
3822 			statement("int firstLive = findLSB(value.x);");
3823 			statement("return uint(firstLive != -1 ? firstLive : (findLSB(value.y) + 32));");
3824 			end_scope();
3825 			statement("uint subgroupBallotFindMSB(uvec4 value)");
3826 			begin_scope();
3827 			statement("int firstLive = findMSB(value.y);");
3828 			statement("return uint(firstLive != -1 ? (firstLive + 32) : findMSB(value.x));");
3829 			end_scope();
3830 			statement("#endif");
3831 			statement("");
3832 		}
3833 
3834 		if (shader_subgroup_supporter.is_feature_requested(Supp::SubgroupAll_Any_AllEqualBool))
3835 		{
3836 			auto exts = Supp::get_candidates_for_feature(Supp::SubgroupAll_Any_AllEqualBool, result);
3837 
3838 			for (auto &e : exts)
3839 			{
3840 				const char *name = Supp::get_extension_name(e);
3841 				statement(&e == &exts.front() ? "#if" : "#elif", " defined(", name, ")");
3842 
3843 				switch (e)
3844 				{
3845 				case Supp::NV_gpu_shader_5:
3846 					statement("bool subgroupAll(bool value) { return allThreadsNV(value); }");
3847 					statement("bool subgroupAny(bool value) { return anyThreadNV(value); }");
3848 					statement("bool subgroupAllEqual(bool value) { return allThreadsEqualNV(value); }");
3849 					break;
3850 				case Supp::ARB_shader_group_vote:
3851 					statement("bool subgroupAll(bool v) { return allInvocationsARB(v); }");
3852 					statement("bool subgroupAny(bool v) { return anyInvocationARB(v); }");
3853 					statement("bool subgroupAllEqual(bool v) { return allInvocationsEqualARB(v); }");
3854 					break;
3855 				case Supp::AMD_gcn_shader:
3856 					statement("bool subgroupAll(bool value) { return ballotAMD(value) == ballotAMD(true); }");
3857 					statement("bool subgroupAny(bool value) { return ballotAMD(value) != 0ull; }");
3858 					statement("bool subgroupAllEqual(bool value) { uint64_t b = ballotAMD(value); return b == 0ull || "
3859 					          "b == ballotAMD(true); }");
3860 					break;
3861 				default:
3862 					break;
3863 				}
3864 			}
3865 			statement("#endif");
3866 			statement("");
3867 		}
3868 
3869 		if (shader_subgroup_supporter.is_feature_requested(Supp::SubgroupAllEqualT))
3870 		{
3871 			statement("#ifndef GL_KHR_shader_subgroup_vote");
3872 			statement(
3873 			    "#define _SPIRV_CROSS_SUBGROUP_ALL_EQUAL_WORKAROUND(type) bool subgroupAllEqual(type value) { return "
3874 			    "subgroupAllEqual(subgroupBroadcastFirst(value) == value); }");
3875 			for (const char *t : workaround_types)
3876 				statement("_SPIRV_CROSS_SUBGROUP_ALL_EQUAL_WORKAROUND(", t, ")");
3877 			statement("#undef _SPIRV_CROSS_SUBGROUP_ALL_EQUAL_WORKAROUND");
3878 			statement("#endif");
3879 			statement("");
3880 		}
3881 
3882 		if (shader_subgroup_supporter.is_feature_requested(Supp::SubgroupBallot))
3883 		{
3884 			auto exts = Supp::get_candidates_for_feature(Supp::SubgroupBallot, result);
3885 
3886 			for (auto &e : exts)
3887 			{
3888 				const char *name = Supp::get_extension_name(e);
3889 				statement(&e == &exts.front() ? "#if" : "#elif", " defined(", name, ")");
3890 
3891 				switch (e)
3892 				{
3893 				case Supp::NV_shader_thread_group:
3894 					statement("uvec4 subgroupBallot(bool v) { return uvec4(ballotThreadNV(v), 0u, 0u, 0u); }");
3895 					break;
3896 				case Supp::ARB_shader_ballot:
3897 					statement("uvec4 subgroupBallot(bool v) { return uvec4(unpackUint2x32(ballotARB(v)), 0u, 0u); }");
3898 					break;
3899 				default:
3900 					break;
3901 				}
3902 			}
3903 			statement("#endif");
3904 			statement("");
3905 		}
3906 
3907 		if (shader_subgroup_supporter.is_feature_requested(Supp::SubgroupElect))
3908 		{
3909 			statement("#ifndef GL_KHR_shader_subgroup_basic");
3910 			statement("bool subgroupElect()");
3911 			begin_scope();
3912 			statement("uvec4 activeMask = subgroupBallot(true);");
3913 			statement("uint firstLive = subgroupBallotFindLSB(activeMask);");
3914 			statement("return gl_SubgroupInvocationID == firstLive;");
3915 			end_scope();
3916 			statement("#endif");
3917 			statement("");
3918 		}
3919 
3920 		if (shader_subgroup_supporter.is_feature_requested(Supp::SubgroupBarrier))
3921 		{
3922 			// Extensions we're using in place of GL_KHR_shader_subgroup_basic state
3923 			// that subgroup execute in lockstep so this barrier is implicit.
3924 			// However the GL 4.6 spec also states that `barrier` implies a shared memory barrier,
3925 			// and a specific test of optimizing scans by leveraging lock-step invocation execution,
3926 			// has shown that a `memoryBarrierShared` is needed in place of a `subgroupBarrier`.
3927 			// https://github.com/buildaworldnet/IrrlichtBAW/commit/d8536857991b89a30a6b65d29441e51b64c2c7ad#diff-9f898d27be1ea6fc79b03d9b361e299334c1a347b6e4dc344ee66110c6aa596aR19
3928 			statement("#ifndef GL_KHR_shader_subgroup_basic");
3929 			statement("void subgroupBarrier() { memoryBarrierShared(); }");
3930 			statement("#endif");
3931 			statement("");
3932 		}
3933 
3934 		if (shader_subgroup_supporter.is_feature_requested(Supp::SubgroupMemBarrier))
3935 		{
3936 			if (model == spv::ExecutionModelGLCompute)
3937 			{
3938 				statement("#ifndef GL_KHR_shader_subgroup_basic");
3939 				statement("void subgroupMemoryBarrier() { groupMemoryBarrier(); }");
3940 				statement("void subgroupMemoryBarrierBuffer() { groupMemoryBarrier(); }");
3941 				statement("void subgroupMemoryBarrierShared() { memoryBarrierShared(); }");
3942 				statement("void subgroupMemoryBarrierImage() { groupMemoryBarrier(); }");
3943 				statement("#endif");
3944 			}
3945 			else
3946 			{
3947 				statement("#ifndef GL_KHR_shader_subgroup_basic");
3948 				statement("void subgroupMemoryBarrier() { memoryBarrier(); }");
3949 				statement("void subgroupMemoryBarrierBuffer() { memoryBarrierBuffer(); }");
3950 				statement("void subgroupMemoryBarrierImage() { memoryBarrierImage(); }");
3951 				statement("#endif");
3952 			}
3953 			statement("");
3954 		}
3955 
3956 		if (shader_subgroup_supporter.is_feature_requested(Supp::SubgroupInverseBallot_InclBitCount_ExclBitCout))
3957 		{
3958 			statement("#ifndef GL_KHR_shader_subgroup_ballot");
3959 			statement("bool subgroupInverseBallot(uvec4 value)");
3960 			begin_scope();
3961 			statement("return any(notEqual(value.xy & gl_SubgroupEqMask.xy, uvec2(0u)));");
3962 			end_scope();
3963 
3964 			statement("uint subgroupBallotInclusiveBitCount(uvec4 value)");
3965 			begin_scope();
3966 			statement("uvec2 v = value.xy & gl_SubgroupLeMask.xy;");
3967 			statement("ivec2 c = bitCount(v);");
3968 			statement_no_indent("#ifdef GL_NV_shader_thread_group");
3969 			statement("return uint(c.x);");
3970 			statement_no_indent("#else");
3971 			statement("return uint(c.x + c.y);");
3972 			statement_no_indent("#endif");
3973 			end_scope();
3974 
3975 			statement("uint subgroupBallotExclusiveBitCount(uvec4 value)");
3976 			begin_scope();
3977 			statement("uvec2 v = value.xy & gl_SubgroupLtMask.xy;");
3978 			statement("ivec2 c = bitCount(v);");
3979 			statement_no_indent("#ifdef GL_NV_shader_thread_group");
3980 			statement("return uint(c.x);");
3981 			statement_no_indent("#else");
3982 			statement("return uint(c.x + c.y);");
3983 			statement_no_indent("#endif");
3984 			end_scope();
3985 			statement("#endif");
3986 			statement("");
3987 		}
3988 
3989 		if (shader_subgroup_supporter.is_feature_requested(Supp::SubgroupBallotBitCount))
3990 		{
3991 			statement("#ifndef GL_KHR_shader_subgroup_ballot");
3992 			statement("uint subgroupBallotBitCount(uvec4 value)");
3993 			begin_scope();
3994 			statement("ivec2 c = bitCount(value.xy);");
3995 			statement_no_indent("#ifdef GL_NV_shader_thread_group");
3996 			statement("return uint(c.x);");
3997 			statement_no_indent("#else");
3998 			statement("return uint(c.x + c.y);");
3999 			statement_no_indent("#endif");
4000 			end_scope();
4001 			statement("#endif");
4002 			statement("");
4003 		}
4004 
4005 		if (shader_subgroup_supporter.is_feature_requested(Supp::SubgroupBallotBitExtract))
4006 		{
4007 			statement("#ifndef GL_KHR_shader_subgroup_ballot");
4008 			statement("bool subgroupBallotBitExtract(uvec4 value, uint index)");
4009 			begin_scope();
4010 			statement_no_indent("#ifdef GL_NV_shader_thread_group");
4011 			statement("uint shifted = value.x >> index;");
4012 			statement_no_indent("#else");
4013 			statement("uint shifted = value[index >> 5u] >> (index & 0x1fu);");
4014 			statement_no_indent("#endif");
4015 			statement("return (shifted & 1u) != 0u;");
4016 			end_scope();
4017 			statement("#endif");
4018 			statement("");
4019 		}
4020 	}
4021 
4022 	if (!workaround_ubo_load_overload_types.empty())
4023 	{
4024 		for (auto &type_id : workaround_ubo_load_overload_types)
4025 		{
4026 			auto &type = get<SPIRType>(type_id);
4027 			statement(type_to_glsl(type), " spvWorkaroundRowMajor(", type_to_glsl(type),
4028 			          " wrap) { return wrap; }");
4029 		}
4030 		statement("");
4031 	}
4032 
4033 	if (requires_transpose_2x2)
4034 	{
4035 		statement("mat2 spvTranspose(mat2 m)");
4036 		begin_scope();
4037 		statement("return mat2(m[0][0], m[1][0], m[0][1], m[1][1]);");
4038 		end_scope();
4039 		statement("");
4040 	}
4041 
4042 	if (requires_transpose_3x3)
4043 	{
4044 		statement("mat3 spvTranspose(mat3 m)");
4045 		begin_scope();
4046 		statement("return mat3(m[0][0], m[1][0], m[2][0], m[0][1], m[1][1], m[2][1], m[0][2], m[1][2], m[2][2]);");
4047 		end_scope();
4048 		statement("");
4049 	}
4050 
4051 	if (requires_transpose_4x4)
4052 	{
4053 		statement("mat4 spvTranspose(mat4 m)");
4054 		begin_scope();
4055 		statement("return mat4(m[0][0], m[1][0], m[2][0], m[3][0], m[0][1], m[1][1], m[2][1], m[3][1], m[0][2], "
4056 		          "m[1][2], m[2][2], m[3][2], m[0][3], m[1][3], m[2][3], m[3][3]);");
4057 		end_scope();
4058 		statement("");
4059 	}
4060 }
4061 
4062 // Returns a string representation of the ID, usable as a function arg.
4063 // Default is to simply return the expression representation fo the arg ID.
4064 // Subclasses may override to modify the return value.
to_func_call_arg(const SPIRFunction::Parameter &,uint32_t id)4065 string CompilerGLSL::to_func_call_arg(const SPIRFunction::Parameter &, uint32_t id)
4066 {
4067 	// Make sure that we use the name of the original variable, and not the parameter alias.
4068 	uint32_t name_id = id;
4069 	auto *var = maybe_get<SPIRVariable>(id);
4070 	if (var && var->basevariable)
4071 		name_id = var->basevariable;
4072 	return to_expression(name_id);
4073 }
4074 
handle_invalid_expression(uint32_t id)4075 void CompilerGLSL::handle_invalid_expression(uint32_t id)
4076 {
4077 	// We tried to read an invalidated expression.
4078 	// This means we need another pass at compilation, but next time, force temporary variables so that they cannot be invalidated.
4079 	forced_temporaries.insert(id);
4080 	force_recompile();
4081 }
4082 
4083 // Converts the format of the current expression from packed to unpacked,
4084 // by wrapping the expression in a constructor of the appropriate type.
4085 // GLSL does not support packed formats, so simply return the expression.
4086 // Subclasses that do will override.
unpack_expression_type(string expr_str,const SPIRType &,uint32_t,bool,bool)4087 string CompilerGLSL::unpack_expression_type(string expr_str, const SPIRType &, uint32_t, bool, bool)
4088 {
4089 	return expr_str;
4090 }
4091 
4092 // Sometimes we proactively enclosed an expression where it turns out we might have not needed it after all.
strip_enclosed_expression(string & expr)4093 void CompilerGLSL::strip_enclosed_expression(string &expr)
4094 {
4095 	if (expr.size() < 2 || expr.front() != '(' || expr.back() != ')')
4096 		return;
4097 
4098 	// Have to make sure that our first and last parens actually enclose everything inside it.
4099 	uint32_t paren_count = 0;
4100 	for (auto &c : expr)
4101 	{
4102 		if (c == '(')
4103 			paren_count++;
4104 		else if (c == ')')
4105 		{
4106 			paren_count--;
4107 
4108 			// If we hit 0 and this is not the final char, our first and final parens actually don't
4109 			// enclose the expression, and we cannot strip, e.g.: (a + b) * (c + d).
4110 			if (paren_count == 0 && &c != &expr.back())
4111 				return;
4112 		}
4113 	}
4114 	expr.erase(expr.size() - 1, 1);
4115 	expr.erase(begin(expr));
4116 }
4117 
enclose_expression(const string & expr)4118 string CompilerGLSL::enclose_expression(const string &expr)
4119 {
4120 	bool need_parens = false;
4121 
4122 	// If the expression starts with a unary we need to enclose to deal with cases where we have back-to-back
4123 	// unary expressions.
4124 	if (!expr.empty())
4125 	{
4126 		auto c = expr.front();
4127 		if (c == '-' || c == '+' || c == '!' || c == '~' || c == '&' || c == '*')
4128 			need_parens = true;
4129 	}
4130 
4131 	if (!need_parens)
4132 	{
4133 		uint32_t paren_count = 0;
4134 		for (auto c : expr)
4135 		{
4136 			if (c == '(' || c == '[')
4137 				paren_count++;
4138 			else if (c == ')' || c == ']')
4139 			{
4140 				assert(paren_count);
4141 				paren_count--;
4142 			}
4143 			else if (c == ' ' && paren_count == 0)
4144 			{
4145 				need_parens = true;
4146 				break;
4147 			}
4148 		}
4149 		assert(paren_count == 0);
4150 	}
4151 
4152 	// If this expression contains any spaces which are not enclosed by parentheses,
4153 	// we need to enclose it so we can treat the whole string as an expression.
4154 	// This happens when two expressions have been part of a binary op earlier.
4155 	if (need_parens)
4156 		return join('(', expr, ')');
4157 	else
4158 		return expr;
4159 }
4160 
dereference_expression(const SPIRType & expr_type,const std::string & expr)4161 string CompilerGLSL::dereference_expression(const SPIRType &expr_type, const std::string &expr)
4162 {
4163 	// If this expression starts with an address-of operator ('&'), then
4164 	// just return the part after the operator.
4165 	// TODO: Strip parens if unnecessary?
4166 	if (expr.front() == '&')
4167 		return expr.substr(1);
4168 	else if (backend.native_pointers)
4169 		return join('*', expr);
4170 	else if (expr_type.storage == StorageClassPhysicalStorageBufferEXT && expr_type.basetype != SPIRType::Struct &&
4171 	         expr_type.pointer_depth == 1)
4172 	{
4173 		return join(enclose_expression(expr), ".value");
4174 	}
4175 	else
4176 		return expr;
4177 }
4178 
address_of_expression(const std::string & expr)4179 string CompilerGLSL::address_of_expression(const std::string &expr)
4180 {
4181 	if (expr.size() > 3 && expr[0] == '(' && expr[1] == '*' && expr.back() == ')')
4182 	{
4183 		// If we have an expression which looks like (*foo), taking the address of it is the same as stripping
4184 		// the first two and last characters. We might have to enclose the expression.
4185 		// This doesn't work for cases like (*foo + 10),
4186 		// but this is an r-value expression which we cannot take the address of anyways.
4187 		return enclose_expression(expr.substr(2, expr.size() - 3));
4188 	}
4189 	else if (expr.front() == '*')
4190 	{
4191 		// If this expression starts with a dereference operator ('*'), then
4192 		// just return the part after the operator.
4193 		return expr.substr(1);
4194 	}
4195 	else
4196 		return join('&', enclose_expression(expr));
4197 }
4198 
4199 // Just like to_expression except that we enclose the expression inside parentheses if needed.
to_enclosed_expression(uint32_t id,bool register_expression_read)4200 string CompilerGLSL::to_enclosed_expression(uint32_t id, bool register_expression_read)
4201 {
4202 	return enclose_expression(to_expression(id, register_expression_read));
4203 }
4204 
4205 // Used explicitly when we want to read a row-major expression, but without any transpose shenanigans.
4206 // need_transpose must be forced to false.
to_unpacked_row_major_matrix_expression(uint32_t id)4207 string CompilerGLSL::to_unpacked_row_major_matrix_expression(uint32_t id)
4208 {
4209 	return unpack_expression_type(to_expression(id), expression_type(id),
4210 	                              get_extended_decoration(id, SPIRVCrossDecorationPhysicalTypeID),
4211 	                              has_extended_decoration(id, SPIRVCrossDecorationPhysicalTypePacked), true);
4212 }
4213 
to_unpacked_expression(uint32_t id,bool register_expression_read)4214 string CompilerGLSL::to_unpacked_expression(uint32_t id, bool register_expression_read)
4215 {
4216 	// If we need to transpose, it will also take care of unpacking rules.
4217 	auto *e = maybe_get<SPIRExpression>(id);
4218 	bool need_transpose = e && e->need_transpose;
4219 	bool is_remapped = has_extended_decoration(id, SPIRVCrossDecorationPhysicalTypeID);
4220 	bool is_packed = has_extended_decoration(id, SPIRVCrossDecorationPhysicalTypePacked);
4221 
4222 	if (!need_transpose && (is_remapped || is_packed))
4223 	{
4224 		return unpack_expression_type(to_expression(id, register_expression_read),
4225 		                              get_pointee_type(expression_type_id(id)),
4226 		                              get_extended_decoration(id, SPIRVCrossDecorationPhysicalTypeID),
4227 		                              has_extended_decoration(id, SPIRVCrossDecorationPhysicalTypePacked), false);
4228 	}
4229 	else
4230 		return to_expression(id, register_expression_read);
4231 }
4232 
to_enclosed_unpacked_expression(uint32_t id,bool register_expression_read)4233 string CompilerGLSL::to_enclosed_unpacked_expression(uint32_t id, bool register_expression_read)
4234 {
4235 	// If we need to transpose, it will also take care of unpacking rules.
4236 	auto *e = maybe_get<SPIRExpression>(id);
4237 	bool need_transpose = e && e->need_transpose;
4238 	bool is_remapped = has_extended_decoration(id, SPIRVCrossDecorationPhysicalTypeID);
4239 	bool is_packed = has_extended_decoration(id, SPIRVCrossDecorationPhysicalTypePacked);
4240 	if (!need_transpose && (is_remapped || is_packed))
4241 	{
4242 		return unpack_expression_type(to_expression(id, register_expression_read), expression_type(id),
4243 		                              get_extended_decoration(id, SPIRVCrossDecorationPhysicalTypeID),
4244 		                              has_extended_decoration(id, SPIRVCrossDecorationPhysicalTypePacked), false);
4245 	}
4246 	else
4247 		return to_enclosed_expression(id, register_expression_read);
4248 }
4249 
to_dereferenced_expression(uint32_t id,bool register_expression_read)4250 string CompilerGLSL::to_dereferenced_expression(uint32_t id, bool register_expression_read)
4251 {
4252 	auto &type = expression_type(id);
4253 	if (type.pointer && should_dereference(id))
4254 		return dereference_expression(type, to_enclosed_expression(id, register_expression_read));
4255 	else
4256 		return to_expression(id, register_expression_read);
4257 }
4258 
to_pointer_expression(uint32_t id,bool register_expression_read)4259 string CompilerGLSL::to_pointer_expression(uint32_t id, bool register_expression_read)
4260 {
4261 	auto &type = expression_type(id);
4262 	if (type.pointer && expression_is_lvalue(id) && !should_dereference(id))
4263 		return address_of_expression(to_enclosed_expression(id, register_expression_read));
4264 	else
4265 		return to_unpacked_expression(id, register_expression_read);
4266 }
4267 
to_enclosed_pointer_expression(uint32_t id,bool register_expression_read)4268 string CompilerGLSL::to_enclosed_pointer_expression(uint32_t id, bool register_expression_read)
4269 {
4270 	auto &type = expression_type(id);
4271 	if (type.pointer && expression_is_lvalue(id) && !should_dereference(id))
4272 		return address_of_expression(to_enclosed_expression(id, register_expression_read));
4273 	else
4274 		return to_enclosed_unpacked_expression(id, register_expression_read);
4275 }
4276 
to_extract_component_expression(uint32_t id,uint32_t index)4277 string CompilerGLSL::to_extract_component_expression(uint32_t id, uint32_t index)
4278 {
4279 	auto expr = to_enclosed_expression(id);
4280 	if (has_extended_decoration(id, SPIRVCrossDecorationPhysicalTypePacked))
4281 		return join(expr, "[", index, "]");
4282 	else
4283 		return join(expr, ".", index_to_swizzle(index));
4284 }
4285 
to_rerolled_array_expression(const string & base_expr,const SPIRType & type)4286 string CompilerGLSL::to_rerolled_array_expression(const string &base_expr, const SPIRType &type)
4287 {
4288 	uint32_t size = to_array_size_literal(type);
4289 	auto &parent = get<SPIRType>(type.parent_type);
4290 	string expr = "{ ";
4291 
4292 	for (uint32_t i = 0; i < size; i++)
4293 	{
4294 		auto subexpr = join(base_expr, "[", convert_to_string(i), "]");
4295 		if (parent.array.empty())
4296 			expr += subexpr;
4297 		else
4298 			expr += to_rerolled_array_expression(subexpr, parent);
4299 
4300 		if (i + 1 < size)
4301 			expr += ", ";
4302 	}
4303 
4304 	expr += " }";
4305 	return expr;
4306 }
4307 
to_composite_constructor_expression(uint32_t id,bool uses_buffer_offset)4308 string CompilerGLSL::to_composite_constructor_expression(uint32_t id, bool uses_buffer_offset)
4309 {
4310 	auto &type = expression_type(id);
4311 
4312 	bool reroll_array = !type.array.empty() && (!backend.array_is_value_type ||
4313 	                                            (uses_buffer_offset && !backend.buffer_offset_array_is_value_type));
4314 
4315 	if (reroll_array)
4316 	{
4317 		// For this case, we need to "re-roll" an array initializer from a temporary.
4318 		// We cannot simply pass the array directly, since it decays to a pointer and it cannot
4319 		// participate in a struct initializer. E.g.
4320 		// float arr[2] = { 1.0, 2.0 };
4321 		// Foo foo = { arr }; must be transformed to
4322 		// Foo foo = { { arr[0], arr[1] } };
4323 		// The array sizes cannot be deduced from specialization constants since we cannot use any loops.
4324 
4325 		// We're only triggering one read of the array expression, but this is fine since arrays have to be declared
4326 		// as temporaries anyways.
4327 		return to_rerolled_array_expression(to_enclosed_expression(id), type);
4328 	}
4329 	else
4330 		return to_unpacked_expression(id);
4331 }
4332 
to_expression(uint32_t id,bool register_expression_read)4333 string CompilerGLSL::to_expression(uint32_t id, bool register_expression_read)
4334 {
4335 	auto itr = invalid_expressions.find(id);
4336 	if (itr != end(invalid_expressions))
4337 		handle_invalid_expression(id);
4338 
4339 	if (ir.ids[id].get_type() == TypeExpression)
4340 	{
4341 		// We might have a more complex chain of dependencies.
4342 		// A possible scenario is that we
4343 		//
4344 		// %1 = OpLoad
4345 		// %2 = OpDoSomething %1 %1. here %2 will have a dependency on %1.
4346 		// %3 = OpDoSomethingAgain %2 %2. Here %3 will lose the link to %1 since we don't propagate the dependencies like that.
4347 		// OpStore %1 %foo // Here we can invalidate %1, and hence all expressions which depend on %1. Only %2 will know since it's part of invalid_expressions.
4348 		// %4 = OpDoSomethingAnotherTime %3 %3 // If we forward all expressions we will see %1 expression after store, not before.
4349 		//
4350 		// However, we can propagate up a list of depended expressions when we used %2, so we can check if %2 is invalid when reading %3 after the store,
4351 		// and see that we should not forward reads of the original variable.
4352 		auto &expr = get<SPIRExpression>(id);
4353 		for (uint32_t dep : expr.expression_dependencies)
4354 			if (invalid_expressions.find(dep) != end(invalid_expressions))
4355 				handle_invalid_expression(dep);
4356 	}
4357 
4358 	if (register_expression_read)
4359 		track_expression_read(id);
4360 
4361 	switch (ir.ids[id].get_type())
4362 	{
4363 	case TypeExpression:
4364 	{
4365 		auto &e = get<SPIRExpression>(id);
4366 		if (e.base_expression)
4367 			return to_enclosed_expression(e.base_expression) + e.expression;
4368 		else if (e.need_transpose)
4369 		{
4370 			// This should not be reached for access chains, since we always deal explicitly with transpose state
4371 			// when consuming an access chain expression.
4372 			uint32_t physical_type_id = get_extended_decoration(id, SPIRVCrossDecorationPhysicalTypeID);
4373 			bool is_packed = has_extended_decoration(id, SPIRVCrossDecorationPhysicalTypePacked);
4374 			return convert_row_major_matrix(e.expression, get<SPIRType>(e.expression_type), physical_type_id,
4375 			                                is_packed);
4376 		}
4377 		else if (flattened_structs.count(id))
4378 		{
4379 			return load_flattened_struct(e.expression, get<SPIRType>(e.expression_type));
4380 		}
4381 		else
4382 		{
4383 			if (is_forcing_recompilation())
4384 			{
4385 				// During first compilation phase, certain expression patterns can trigger exponential growth of memory.
4386 				// Avoid this by returning dummy expressions during this phase.
4387 				// Do not use empty expressions here, because those are sentinels for other cases.
4388 				return "_";
4389 			}
4390 			else
4391 				return e.expression;
4392 		}
4393 	}
4394 
4395 	case TypeConstant:
4396 	{
4397 		auto &c = get<SPIRConstant>(id);
4398 		auto &type = get<SPIRType>(c.constant_type);
4399 
4400 		// WorkGroupSize may be a constant.
4401 		auto &dec = ir.meta[c.self].decoration;
4402 		if (dec.builtin)
4403 			return builtin_to_glsl(dec.builtin_type, StorageClassGeneric);
4404 		else if (c.specialization)
4405 			return to_name(id);
4406 		else if (c.is_used_as_lut)
4407 			return to_name(id);
4408 		else if (type.basetype == SPIRType::Struct && !backend.can_declare_struct_inline)
4409 			return to_name(id);
4410 		else if (!type.array.empty() && !backend.can_declare_arrays_inline)
4411 			return to_name(id);
4412 		else
4413 			return constant_expression(c);
4414 	}
4415 
4416 	case TypeConstantOp:
4417 		return to_name(id);
4418 
4419 	case TypeVariable:
4420 	{
4421 		auto &var = get<SPIRVariable>(id);
4422 		// If we try to use a loop variable before the loop header, we have to redirect it to the static expression,
4423 		// the variable has not been declared yet.
4424 		if (var.statically_assigned || (var.loop_variable && !var.loop_variable_enable))
4425 			return to_expression(var.static_expression);
4426 		else if (var.deferred_declaration)
4427 		{
4428 			var.deferred_declaration = false;
4429 			return variable_decl(var);
4430 		}
4431 		else if (flattened_structs.count(id))
4432 		{
4433 			return load_flattened_struct(to_name(id), get<SPIRType>(var.basetype));
4434 		}
4435 		else
4436 		{
4437 			auto &dec = ir.meta[var.self].decoration;
4438 			if (dec.builtin)
4439 				return builtin_to_glsl(dec.builtin_type, var.storage);
4440 			else
4441 				return to_name(id);
4442 		}
4443 	}
4444 
4445 	case TypeCombinedImageSampler:
4446 		// This type should never be taken the expression of directly.
4447 		// The intention is that texture sampling functions will extract the image and samplers
4448 		// separately and take their expressions as needed.
4449 		// GLSL does not use this type because OpSampledImage immediately creates a combined image sampler
4450 		// expression ala sampler2D(texture, sampler).
4451 		SPIRV_CROSS_THROW("Combined image samplers have no default expression representation.");
4452 
4453 	case TypeAccessChain:
4454 		// We cannot express this type. They only have meaning in other OpAccessChains, OpStore or OpLoad.
4455 		SPIRV_CROSS_THROW("Access chains have no default expression representation.");
4456 
4457 	default:
4458 		return to_name(id);
4459 	}
4460 }
4461 
constant_op_expression(const SPIRConstantOp & cop)4462 string CompilerGLSL::constant_op_expression(const SPIRConstantOp &cop)
4463 {
4464 	auto &type = get<SPIRType>(cop.basetype);
4465 	bool binary = false;
4466 	bool unary = false;
4467 	string op;
4468 
4469 	if (is_legacy() && is_unsigned_opcode(cop.opcode))
4470 		SPIRV_CROSS_THROW("Unsigned integers are not supported on legacy targets.");
4471 
4472 	// TODO: Find a clean way to reuse emit_instruction.
4473 	switch (cop.opcode)
4474 	{
4475 	case OpSConvert:
4476 	case OpUConvert:
4477 	case OpFConvert:
4478 		op = type_to_glsl_constructor(type);
4479 		break;
4480 
4481 #define GLSL_BOP(opname, x) \
4482 	case Op##opname:        \
4483 		binary = true;      \
4484 		op = x;             \
4485 		break
4486 
4487 #define GLSL_UOP(opname, x) \
4488 	case Op##opname:        \
4489 		unary = true;       \
4490 		op = x;             \
4491 		break
4492 
4493 		GLSL_UOP(SNegate, "-");
4494 		GLSL_UOP(Not, "~");
4495 		GLSL_BOP(IAdd, "+");
4496 		GLSL_BOP(ISub, "-");
4497 		GLSL_BOP(IMul, "*");
4498 		GLSL_BOP(SDiv, "/");
4499 		GLSL_BOP(UDiv, "/");
4500 		GLSL_BOP(UMod, "%");
4501 		GLSL_BOP(SMod, "%");
4502 		GLSL_BOP(ShiftRightLogical, ">>");
4503 		GLSL_BOP(ShiftRightArithmetic, ">>");
4504 		GLSL_BOP(ShiftLeftLogical, "<<");
4505 		GLSL_BOP(BitwiseOr, "|");
4506 		GLSL_BOP(BitwiseXor, "^");
4507 		GLSL_BOP(BitwiseAnd, "&");
4508 		GLSL_BOP(LogicalOr, "||");
4509 		GLSL_BOP(LogicalAnd, "&&");
4510 		GLSL_UOP(LogicalNot, "!");
4511 		GLSL_BOP(LogicalEqual, "==");
4512 		GLSL_BOP(LogicalNotEqual, "!=");
4513 		GLSL_BOP(IEqual, "==");
4514 		GLSL_BOP(INotEqual, "!=");
4515 		GLSL_BOP(ULessThan, "<");
4516 		GLSL_BOP(SLessThan, "<");
4517 		GLSL_BOP(ULessThanEqual, "<=");
4518 		GLSL_BOP(SLessThanEqual, "<=");
4519 		GLSL_BOP(UGreaterThan, ">");
4520 		GLSL_BOP(SGreaterThan, ">");
4521 		GLSL_BOP(UGreaterThanEqual, ">=");
4522 		GLSL_BOP(SGreaterThanEqual, ">=");
4523 
4524 	case OpSelect:
4525 	{
4526 		if (cop.arguments.size() < 3)
4527 			SPIRV_CROSS_THROW("Not enough arguments to OpSpecConstantOp.");
4528 
4529 		// This one is pretty annoying. It's triggered from
4530 		// uint(bool), int(bool) from spec constants.
4531 		// In order to preserve its compile-time constness in Vulkan GLSL,
4532 		// we need to reduce the OpSelect expression back to this simplified model.
4533 		// If we cannot, fail.
4534 		if (to_trivial_mix_op(type, op, cop.arguments[2], cop.arguments[1], cop.arguments[0]))
4535 		{
4536 			// Implement as a simple cast down below.
4537 		}
4538 		else
4539 		{
4540 			// Implement a ternary and pray the compiler understands it :)
4541 			return to_ternary_expression(type, cop.arguments[0], cop.arguments[1], cop.arguments[2]);
4542 		}
4543 		break;
4544 	}
4545 
4546 	case OpVectorShuffle:
4547 	{
4548 		string expr = type_to_glsl_constructor(type);
4549 		expr += "(";
4550 
4551 		uint32_t left_components = expression_type(cop.arguments[0]).vecsize;
4552 		string left_arg = to_enclosed_expression(cop.arguments[0]);
4553 		string right_arg = to_enclosed_expression(cop.arguments[1]);
4554 
4555 		for (uint32_t i = 2; i < uint32_t(cop.arguments.size()); i++)
4556 		{
4557 			uint32_t index = cop.arguments[i];
4558 			if (index >= left_components)
4559 				expr += right_arg + "." + "xyzw"[index - left_components];
4560 			else
4561 				expr += left_arg + "." + "xyzw"[index];
4562 
4563 			if (i + 1 < uint32_t(cop.arguments.size()))
4564 				expr += ", ";
4565 		}
4566 
4567 		expr += ")";
4568 		return expr;
4569 	}
4570 
4571 	case OpCompositeExtract:
4572 	{
4573 		auto expr = access_chain_internal(cop.arguments[0], &cop.arguments[1], uint32_t(cop.arguments.size() - 1),
4574 		                                  ACCESS_CHAIN_INDEX_IS_LITERAL_BIT, nullptr);
4575 		return expr;
4576 	}
4577 
4578 	case OpCompositeInsert:
4579 		SPIRV_CROSS_THROW("OpCompositeInsert spec constant op is not supported.");
4580 
4581 	default:
4582 		// Some opcodes are unimplemented here, these are currently not possible to test from glslang.
4583 		SPIRV_CROSS_THROW("Unimplemented spec constant op.");
4584 	}
4585 
4586 	uint32_t bit_width = 0;
4587 	if (unary || binary || cop.opcode == OpSConvert || cop.opcode == OpUConvert)
4588 		bit_width = expression_type(cop.arguments[0]).width;
4589 
4590 	SPIRType::BaseType input_type;
4591 	bool skip_cast_if_equal_type = opcode_is_sign_invariant(cop.opcode);
4592 
4593 	switch (cop.opcode)
4594 	{
4595 	case OpIEqual:
4596 	case OpINotEqual:
4597 		input_type = to_signed_basetype(bit_width);
4598 		break;
4599 
4600 	case OpSLessThan:
4601 	case OpSLessThanEqual:
4602 	case OpSGreaterThan:
4603 	case OpSGreaterThanEqual:
4604 	case OpSMod:
4605 	case OpSDiv:
4606 	case OpShiftRightArithmetic:
4607 	case OpSConvert:
4608 	case OpSNegate:
4609 		input_type = to_signed_basetype(bit_width);
4610 		break;
4611 
4612 	case OpULessThan:
4613 	case OpULessThanEqual:
4614 	case OpUGreaterThan:
4615 	case OpUGreaterThanEqual:
4616 	case OpUMod:
4617 	case OpUDiv:
4618 	case OpShiftRightLogical:
4619 	case OpUConvert:
4620 		input_type = to_unsigned_basetype(bit_width);
4621 		break;
4622 
4623 	default:
4624 		input_type = type.basetype;
4625 		break;
4626 	}
4627 
4628 #undef GLSL_BOP
4629 #undef GLSL_UOP
4630 	if (binary)
4631 	{
4632 		if (cop.arguments.size() < 2)
4633 			SPIRV_CROSS_THROW("Not enough arguments to OpSpecConstantOp.");
4634 
4635 		string cast_op0;
4636 		string cast_op1;
4637 		auto expected_type = binary_op_bitcast_helper(cast_op0, cast_op1, input_type, cop.arguments[0],
4638 		                                              cop.arguments[1], skip_cast_if_equal_type);
4639 
4640 		if (type.basetype != input_type && type.basetype != SPIRType::Boolean)
4641 		{
4642 			expected_type.basetype = input_type;
4643 			auto expr = bitcast_glsl_op(type, expected_type);
4644 			expr += '(';
4645 			expr += join(cast_op0, " ", op, " ", cast_op1);
4646 			expr += ')';
4647 			return expr;
4648 		}
4649 		else
4650 			return join("(", cast_op0, " ", op, " ", cast_op1, ")");
4651 	}
4652 	else if (unary)
4653 	{
4654 		if (cop.arguments.size() < 1)
4655 			SPIRV_CROSS_THROW("Not enough arguments to OpSpecConstantOp.");
4656 
4657 		// Auto-bitcast to result type as needed.
4658 		// Works around various casting scenarios in glslang as there is no OpBitcast for specialization constants.
4659 		return join("(", op, bitcast_glsl(type, cop.arguments[0]), ")");
4660 	}
4661 	else if (cop.opcode == OpSConvert || cop.opcode == OpUConvert)
4662 	{
4663 		if (cop.arguments.size() < 1)
4664 			SPIRV_CROSS_THROW("Not enough arguments to OpSpecConstantOp.");
4665 
4666 		auto &arg_type = expression_type(cop.arguments[0]);
4667 		if (arg_type.width < type.width && input_type != arg_type.basetype)
4668 		{
4669 			auto expected = arg_type;
4670 			expected.basetype = input_type;
4671 			return join(op, "(", bitcast_glsl(expected, cop.arguments[0]), ")");
4672 		}
4673 		else
4674 			return join(op, "(", to_expression(cop.arguments[0]), ")");
4675 	}
4676 	else
4677 	{
4678 		if (cop.arguments.size() < 1)
4679 			SPIRV_CROSS_THROW("Not enough arguments to OpSpecConstantOp.");
4680 		return join(op, "(", to_expression(cop.arguments[0]), ")");
4681 	}
4682 }
4683 
constant_expression(const SPIRConstant & c)4684 string CompilerGLSL::constant_expression(const SPIRConstant &c)
4685 {
4686 	auto &type = get<SPIRType>(c.constant_type);
4687 
4688 	if (type.pointer)
4689 	{
4690 		return backend.null_pointer_literal;
4691 	}
4692 	else if (!c.subconstants.empty())
4693 	{
4694 		// Handles Arrays and structures.
4695 		string res;
4696 
4697 		// Allow Metal to use the array<T> template to make arrays a value type
4698 		bool needs_trailing_tracket = false;
4699 		if (backend.use_initializer_list && backend.use_typed_initializer_list && type.basetype == SPIRType::Struct &&
4700 		    type.array.empty())
4701 		{
4702 			res = type_to_glsl_constructor(type) + "{ ";
4703 		}
4704 		else if (backend.use_initializer_list && backend.use_typed_initializer_list && backend.array_is_value_type &&
4705 		         !type.array.empty())
4706 		{
4707 			res = type_to_glsl_constructor(type) + "({ ";
4708 			needs_trailing_tracket = true;
4709 		}
4710 		else if (backend.use_initializer_list)
4711 		{
4712 			res = "{ ";
4713 		}
4714 		else
4715 		{
4716 			res = type_to_glsl_constructor(type) + "(";
4717 		}
4718 
4719 		for (auto &elem : c.subconstants)
4720 		{
4721 			auto &subc = get<SPIRConstant>(elem);
4722 			if (subc.specialization)
4723 				res += to_name(elem);
4724 			else
4725 				res += constant_expression(subc);
4726 
4727 			if (&elem != &c.subconstants.back())
4728 				res += ", ";
4729 		}
4730 
4731 		res += backend.use_initializer_list ? " }" : ")";
4732 		if (needs_trailing_tracket)
4733 			res += ")";
4734 
4735 		return res;
4736 	}
4737 	else if (type.basetype == SPIRType::Struct && type.member_types.size() == 0)
4738 	{
4739 		// Metal tessellation likes empty structs which are then constant expressions.
4740 		if (backend.supports_empty_struct)
4741 			return "{ }";
4742 		else if (backend.use_typed_initializer_list)
4743 			return join(type_to_glsl(get<SPIRType>(c.constant_type)), "{ 0 }");
4744 		else if (backend.use_initializer_list)
4745 			return "{ 0 }";
4746 		else
4747 			return join(type_to_glsl(get<SPIRType>(c.constant_type)), "(0)");
4748 	}
4749 	else if (c.columns() == 1)
4750 	{
4751 		return constant_expression_vector(c, 0);
4752 	}
4753 	else
4754 	{
4755 		string res = type_to_glsl(get<SPIRType>(c.constant_type)) + "(";
4756 		for (uint32_t col = 0; col < c.columns(); col++)
4757 		{
4758 			if (c.specialization_constant_id(col) != 0)
4759 				res += to_name(c.specialization_constant_id(col));
4760 			else
4761 				res += constant_expression_vector(c, col);
4762 
4763 			if (col + 1 < c.columns())
4764 				res += ", ";
4765 		}
4766 		res += ")";
4767 		return res;
4768 	}
4769 }
4770 
4771 #ifdef _MSC_VER
4772 // sprintf warning.
4773 // We cannot rely on snprintf existing because, ..., MSVC.
4774 #pragma warning(push)
4775 #pragma warning(disable : 4996)
4776 #endif
4777 
convert_half_to_string(const SPIRConstant & c,uint32_t col,uint32_t row)4778 string CompilerGLSL::convert_half_to_string(const SPIRConstant &c, uint32_t col, uint32_t row)
4779 {
4780 	string res;
4781 	float float_value = c.scalar_f16(col, row);
4782 
4783 	// There is no literal "hf" in GL_NV_gpu_shader5, so to avoid lots
4784 	// of complicated workarounds, just value-cast to the half type always.
4785 	if (std::isnan(float_value) || std::isinf(float_value))
4786 	{
4787 		SPIRType type;
4788 		type.basetype = SPIRType::Half;
4789 		type.vecsize = 1;
4790 		type.columns = 1;
4791 
4792 		if (float_value == numeric_limits<float>::infinity())
4793 			res = join(type_to_glsl(type), "(1.0 / 0.0)");
4794 		else if (float_value == -numeric_limits<float>::infinity())
4795 			res = join(type_to_glsl(type), "(-1.0 / 0.0)");
4796 		else if (std::isnan(float_value))
4797 			res = join(type_to_glsl(type), "(0.0 / 0.0)");
4798 		else
4799 			SPIRV_CROSS_THROW("Cannot represent non-finite floating point constant.");
4800 	}
4801 	else
4802 	{
4803 		SPIRType type;
4804 		type.basetype = SPIRType::Half;
4805 		type.vecsize = 1;
4806 		type.columns = 1;
4807 		res = join(type_to_glsl(type), "(", convert_to_string(float_value, current_locale_radix_character), ")");
4808 	}
4809 
4810 	return res;
4811 }
4812 
convert_float_to_string(const SPIRConstant & c,uint32_t col,uint32_t row)4813 string CompilerGLSL::convert_float_to_string(const SPIRConstant &c, uint32_t col, uint32_t row)
4814 {
4815 	string res;
4816 	float float_value = c.scalar_f32(col, row);
4817 
4818 	if (std::isnan(float_value) || std::isinf(float_value))
4819 	{
4820 		// Use special representation.
4821 		if (!is_legacy())
4822 		{
4823 			SPIRType out_type;
4824 			SPIRType in_type;
4825 			out_type.basetype = SPIRType::Float;
4826 			in_type.basetype = SPIRType::UInt;
4827 			out_type.vecsize = 1;
4828 			in_type.vecsize = 1;
4829 			out_type.width = 32;
4830 			in_type.width = 32;
4831 
4832 			char print_buffer[32];
4833 			sprintf(print_buffer, "0x%xu", c.scalar(col, row));
4834 			res = join(bitcast_glsl_op(out_type, in_type), "(", print_buffer, ")");
4835 		}
4836 		else
4837 		{
4838 			if (float_value == numeric_limits<float>::infinity())
4839 			{
4840 				if (backend.float_literal_suffix)
4841 					res = "(1.0f / 0.0f)";
4842 				else
4843 					res = "(1.0 / 0.0)";
4844 			}
4845 			else if (float_value == -numeric_limits<float>::infinity())
4846 			{
4847 				if (backend.float_literal_suffix)
4848 					res = "(-1.0f / 0.0f)";
4849 				else
4850 					res = "(-1.0 / 0.0)";
4851 			}
4852 			else if (std::isnan(float_value))
4853 			{
4854 				if (backend.float_literal_suffix)
4855 					res = "(0.0f / 0.0f)";
4856 				else
4857 					res = "(0.0 / 0.0)";
4858 			}
4859 			else
4860 				SPIRV_CROSS_THROW("Cannot represent non-finite floating point constant.");
4861 		}
4862 	}
4863 	else
4864 	{
4865 		res = convert_to_string(float_value, current_locale_radix_character);
4866 		if (backend.float_literal_suffix)
4867 			res += "f";
4868 	}
4869 
4870 	return res;
4871 }
4872 
convert_double_to_string(const SPIRConstant & c,uint32_t col,uint32_t row)4873 std::string CompilerGLSL::convert_double_to_string(const SPIRConstant &c, uint32_t col, uint32_t row)
4874 {
4875 	string res;
4876 	double double_value = c.scalar_f64(col, row);
4877 
4878 	if (std::isnan(double_value) || std::isinf(double_value))
4879 	{
4880 		// Use special representation.
4881 		if (!is_legacy())
4882 		{
4883 			SPIRType out_type;
4884 			SPIRType in_type;
4885 			out_type.basetype = SPIRType::Double;
4886 			in_type.basetype = SPIRType::UInt64;
4887 			out_type.vecsize = 1;
4888 			in_type.vecsize = 1;
4889 			out_type.width = 64;
4890 			in_type.width = 64;
4891 
4892 			uint64_t u64_value = c.scalar_u64(col, row);
4893 
4894 			if (options.es)
4895 				SPIRV_CROSS_THROW("64-bit integers/float not supported in ES profile.");
4896 			require_extension_internal("GL_ARB_gpu_shader_int64");
4897 
4898 			char print_buffer[64];
4899 			sprintf(print_buffer, "0x%llx%s", static_cast<unsigned long long>(u64_value),
4900 			        backend.long_long_literal_suffix ? "ull" : "ul");
4901 			res = join(bitcast_glsl_op(out_type, in_type), "(", print_buffer, ")");
4902 		}
4903 		else
4904 		{
4905 			if (options.es)
4906 				SPIRV_CROSS_THROW("FP64 not supported in ES profile.");
4907 			if (options.version < 400)
4908 				require_extension_internal("GL_ARB_gpu_shader_fp64");
4909 
4910 			if (double_value == numeric_limits<double>::infinity())
4911 			{
4912 				if (backend.double_literal_suffix)
4913 					res = "(1.0lf / 0.0lf)";
4914 				else
4915 					res = "(1.0 / 0.0)";
4916 			}
4917 			else if (double_value == -numeric_limits<double>::infinity())
4918 			{
4919 				if (backend.double_literal_suffix)
4920 					res = "(-1.0lf / 0.0lf)";
4921 				else
4922 					res = "(-1.0 / 0.0)";
4923 			}
4924 			else if (std::isnan(double_value))
4925 			{
4926 				if (backend.double_literal_suffix)
4927 					res = "(0.0lf / 0.0lf)";
4928 				else
4929 					res = "(0.0 / 0.0)";
4930 			}
4931 			else
4932 				SPIRV_CROSS_THROW("Cannot represent non-finite floating point constant.");
4933 		}
4934 	}
4935 	else
4936 	{
4937 		res = convert_to_string(double_value, current_locale_radix_character);
4938 		if (backend.double_literal_suffix)
4939 			res += "lf";
4940 	}
4941 
4942 	return res;
4943 }
4944 
4945 #ifdef _MSC_VER
4946 #pragma warning(pop)
4947 #endif
4948 
constant_expression_vector(const SPIRConstant & c,uint32_t vector)4949 string CompilerGLSL::constant_expression_vector(const SPIRConstant &c, uint32_t vector)
4950 {
4951 	auto type = get<SPIRType>(c.constant_type);
4952 	type.columns = 1;
4953 
4954 	auto scalar_type = type;
4955 	scalar_type.vecsize = 1;
4956 
4957 	string res;
4958 	bool splat = backend.use_constructor_splatting && c.vector_size() > 1;
4959 	bool swizzle_splat = backend.can_swizzle_scalar && c.vector_size() > 1;
4960 
4961 	if (!type_is_floating_point(type))
4962 	{
4963 		// Cannot swizzle literal integers as a special case.
4964 		swizzle_splat = false;
4965 	}
4966 
4967 	if (splat || swizzle_splat)
4968 	{
4969 		// Cannot use constant splatting if we have specialization constants somewhere in the vector.
4970 		for (uint32_t i = 0; i < c.vector_size(); i++)
4971 		{
4972 			if (c.specialization_constant_id(vector, i) != 0)
4973 			{
4974 				splat = false;
4975 				swizzle_splat = false;
4976 				break;
4977 			}
4978 		}
4979 	}
4980 
4981 	if (splat || swizzle_splat)
4982 	{
4983 		if (type.width == 64)
4984 		{
4985 			uint64_t ident = c.scalar_u64(vector, 0);
4986 			for (uint32_t i = 1; i < c.vector_size(); i++)
4987 			{
4988 				if (ident != c.scalar_u64(vector, i))
4989 				{
4990 					splat = false;
4991 					swizzle_splat = false;
4992 					break;
4993 				}
4994 			}
4995 		}
4996 		else
4997 		{
4998 			uint32_t ident = c.scalar(vector, 0);
4999 			for (uint32_t i = 1; i < c.vector_size(); i++)
5000 			{
5001 				if (ident != c.scalar(vector, i))
5002 				{
5003 					splat = false;
5004 					swizzle_splat = false;
5005 				}
5006 			}
5007 		}
5008 	}
5009 
5010 	if (c.vector_size() > 1 && !swizzle_splat)
5011 		res += type_to_glsl(type) + "(";
5012 
5013 	switch (type.basetype)
5014 	{
5015 	case SPIRType::Half:
5016 		if (splat || swizzle_splat)
5017 		{
5018 			res += convert_half_to_string(c, vector, 0);
5019 			if (swizzle_splat)
5020 				res = remap_swizzle(get<SPIRType>(c.constant_type), 1, res);
5021 		}
5022 		else
5023 		{
5024 			for (uint32_t i = 0; i < c.vector_size(); i++)
5025 			{
5026 				if (c.vector_size() > 1 && c.specialization_constant_id(vector, i) != 0)
5027 					res += to_name(c.specialization_constant_id(vector, i));
5028 				else
5029 					res += convert_half_to_string(c, vector, i);
5030 
5031 				if (i + 1 < c.vector_size())
5032 					res += ", ";
5033 			}
5034 		}
5035 		break;
5036 
5037 	case SPIRType::Float:
5038 		if (splat || swizzle_splat)
5039 		{
5040 			res += convert_float_to_string(c, vector, 0);
5041 			if (swizzle_splat)
5042 				res = remap_swizzle(get<SPIRType>(c.constant_type), 1, res);
5043 		}
5044 		else
5045 		{
5046 			for (uint32_t i = 0; i < c.vector_size(); i++)
5047 			{
5048 				if (c.vector_size() > 1 && c.specialization_constant_id(vector, i) != 0)
5049 					res += to_name(c.specialization_constant_id(vector, i));
5050 				else
5051 					res += convert_float_to_string(c, vector, i);
5052 
5053 				if (i + 1 < c.vector_size())
5054 					res += ", ";
5055 			}
5056 		}
5057 		break;
5058 
5059 	case SPIRType::Double:
5060 		if (splat || swizzle_splat)
5061 		{
5062 			res += convert_double_to_string(c, vector, 0);
5063 			if (swizzle_splat)
5064 				res = remap_swizzle(get<SPIRType>(c.constant_type), 1, res);
5065 		}
5066 		else
5067 		{
5068 			for (uint32_t i = 0; i < c.vector_size(); i++)
5069 			{
5070 				if (c.vector_size() > 1 && c.specialization_constant_id(vector, i) != 0)
5071 					res += to_name(c.specialization_constant_id(vector, i));
5072 				else
5073 					res += convert_double_to_string(c, vector, i);
5074 
5075 				if (i + 1 < c.vector_size())
5076 					res += ", ";
5077 			}
5078 		}
5079 		break;
5080 
5081 	case SPIRType::Int64:
5082 		if (splat)
5083 		{
5084 			res += convert_to_string(c.scalar_i64(vector, 0));
5085 			if (backend.long_long_literal_suffix)
5086 				res += "ll";
5087 			else
5088 				res += "l";
5089 		}
5090 		else
5091 		{
5092 			for (uint32_t i = 0; i < c.vector_size(); i++)
5093 			{
5094 				if (c.vector_size() > 1 && c.specialization_constant_id(vector, i) != 0)
5095 					res += to_name(c.specialization_constant_id(vector, i));
5096 				else
5097 				{
5098 					res += convert_to_string(c.scalar_i64(vector, i));
5099 					if (backend.long_long_literal_suffix)
5100 						res += "ll";
5101 					else
5102 						res += "l";
5103 				}
5104 
5105 				if (i + 1 < c.vector_size())
5106 					res += ", ";
5107 			}
5108 		}
5109 		break;
5110 
5111 	case SPIRType::UInt64:
5112 		if (splat)
5113 		{
5114 			res += convert_to_string(c.scalar_u64(vector, 0));
5115 			if (backend.long_long_literal_suffix)
5116 				res += "ull";
5117 			else
5118 				res += "ul";
5119 		}
5120 		else
5121 		{
5122 			for (uint32_t i = 0; i < c.vector_size(); i++)
5123 			{
5124 				if (c.vector_size() > 1 && c.specialization_constant_id(vector, i) != 0)
5125 					res += to_name(c.specialization_constant_id(vector, i));
5126 				else
5127 				{
5128 					res += convert_to_string(c.scalar_u64(vector, i));
5129 					if (backend.long_long_literal_suffix)
5130 						res += "ull";
5131 					else
5132 						res += "ul";
5133 				}
5134 
5135 				if (i + 1 < c.vector_size())
5136 					res += ", ";
5137 			}
5138 		}
5139 		break;
5140 
5141 	case SPIRType::UInt:
5142 		if (splat)
5143 		{
5144 			res += convert_to_string(c.scalar(vector, 0));
5145 			if (is_legacy())
5146 			{
5147 				// Fake unsigned constant literals with signed ones if possible.
5148 				// Things like array sizes, etc, tend to be unsigned even though they could just as easily be signed.
5149 				if (c.scalar_i32(vector, 0) < 0)
5150 					SPIRV_CROSS_THROW("Tried to convert uint literal into int, but this made the literal negative.");
5151 			}
5152 			else if (backend.uint32_t_literal_suffix)
5153 				res += "u";
5154 		}
5155 		else
5156 		{
5157 			for (uint32_t i = 0; i < c.vector_size(); i++)
5158 			{
5159 				if (c.vector_size() > 1 && c.specialization_constant_id(vector, i) != 0)
5160 					res += to_name(c.specialization_constant_id(vector, i));
5161 				else
5162 				{
5163 					res += convert_to_string(c.scalar(vector, i));
5164 					if (is_legacy())
5165 					{
5166 						// Fake unsigned constant literals with signed ones if possible.
5167 						// Things like array sizes, etc, tend to be unsigned even though they could just as easily be signed.
5168 						if (c.scalar_i32(vector, i) < 0)
5169 							SPIRV_CROSS_THROW("Tried to convert uint literal into int, but this made "
5170 							                  "the literal negative.");
5171 					}
5172 					else if (backend.uint32_t_literal_suffix)
5173 						res += "u";
5174 				}
5175 
5176 				if (i + 1 < c.vector_size())
5177 					res += ", ";
5178 			}
5179 		}
5180 		break;
5181 
5182 	case SPIRType::Int:
5183 		if (splat)
5184 			res += convert_to_string(c.scalar_i32(vector, 0));
5185 		else
5186 		{
5187 			for (uint32_t i = 0; i < c.vector_size(); i++)
5188 			{
5189 				if (c.vector_size() > 1 && c.specialization_constant_id(vector, i) != 0)
5190 					res += to_name(c.specialization_constant_id(vector, i));
5191 				else
5192 					res += convert_to_string(c.scalar_i32(vector, i));
5193 				if (i + 1 < c.vector_size())
5194 					res += ", ";
5195 			}
5196 		}
5197 		break;
5198 
5199 	case SPIRType::UShort:
5200 		if (splat)
5201 		{
5202 			res += convert_to_string(c.scalar(vector, 0));
5203 		}
5204 		else
5205 		{
5206 			for (uint32_t i = 0; i < c.vector_size(); i++)
5207 			{
5208 				if (c.vector_size() > 1 && c.specialization_constant_id(vector, i) != 0)
5209 					res += to_name(c.specialization_constant_id(vector, i));
5210 				else
5211 				{
5212 					if (*backend.uint16_t_literal_suffix)
5213 					{
5214 						res += convert_to_string(c.scalar_u16(vector, i));
5215 						res += backend.uint16_t_literal_suffix;
5216 					}
5217 					else
5218 					{
5219 						// If backend doesn't have a literal suffix, we need to value cast.
5220 						res += type_to_glsl(scalar_type);
5221 						res += "(";
5222 						res += convert_to_string(c.scalar_u16(vector, i));
5223 						res += ")";
5224 					}
5225 				}
5226 
5227 				if (i + 1 < c.vector_size())
5228 					res += ", ";
5229 			}
5230 		}
5231 		break;
5232 
5233 	case SPIRType::Short:
5234 		if (splat)
5235 		{
5236 			res += convert_to_string(c.scalar_i16(vector, 0));
5237 		}
5238 		else
5239 		{
5240 			for (uint32_t i = 0; i < c.vector_size(); i++)
5241 			{
5242 				if (c.vector_size() > 1 && c.specialization_constant_id(vector, i) != 0)
5243 					res += to_name(c.specialization_constant_id(vector, i));
5244 				else
5245 				{
5246 					if (*backend.int16_t_literal_suffix)
5247 					{
5248 						res += convert_to_string(c.scalar_i16(vector, i));
5249 						res += backend.int16_t_literal_suffix;
5250 					}
5251 					else
5252 					{
5253 						// If backend doesn't have a literal suffix, we need to value cast.
5254 						res += type_to_glsl(scalar_type);
5255 						res += "(";
5256 						res += convert_to_string(c.scalar_i16(vector, i));
5257 						res += ")";
5258 					}
5259 				}
5260 
5261 				if (i + 1 < c.vector_size())
5262 					res += ", ";
5263 			}
5264 		}
5265 		break;
5266 
5267 	case SPIRType::UByte:
5268 		if (splat)
5269 		{
5270 			res += convert_to_string(c.scalar_u8(vector, 0));
5271 		}
5272 		else
5273 		{
5274 			for (uint32_t i = 0; i < c.vector_size(); i++)
5275 			{
5276 				if (c.vector_size() > 1 && c.specialization_constant_id(vector, i) != 0)
5277 					res += to_name(c.specialization_constant_id(vector, i));
5278 				else
5279 				{
5280 					res += type_to_glsl(scalar_type);
5281 					res += "(";
5282 					res += convert_to_string(c.scalar_u8(vector, i));
5283 					res += ")";
5284 				}
5285 
5286 				if (i + 1 < c.vector_size())
5287 					res += ", ";
5288 			}
5289 		}
5290 		break;
5291 
5292 	case SPIRType::SByte:
5293 		if (splat)
5294 		{
5295 			res += convert_to_string(c.scalar_i8(vector, 0));
5296 		}
5297 		else
5298 		{
5299 			for (uint32_t i = 0; i < c.vector_size(); i++)
5300 			{
5301 				if (c.vector_size() > 1 && c.specialization_constant_id(vector, i) != 0)
5302 					res += to_name(c.specialization_constant_id(vector, i));
5303 				else
5304 				{
5305 					res += type_to_glsl(scalar_type);
5306 					res += "(";
5307 					res += convert_to_string(c.scalar_i8(vector, i));
5308 					res += ")";
5309 				}
5310 
5311 				if (i + 1 < c.vector_size())
5312 					res += ", ";
5313 			}
5314 		}
5315 		break;
5316 
5317 	case SPIRType::Boolean:
5318 		if (splat)
5319 			res += c.scalar(vector, 0) ? "true" : "false";
5320 		else
5321 		{
5322 			for (uint32_t i = 0; i < c.vector_size(); i++)
5323 			{
5324 				if (c.vector_size() > 1 && c.specialization_constant_id(vector, i) != 0)
5325 					res += to_name(c.specialization_constant_id(vector, i));
5326 				else
5327 					res += c.scalar(vector, i) ? "true" : "false";
5328 
5329 				if (i + 1 < c.vector_size())
5330 					res += ", ";
5331 			}
5332 		}
5333 		break;
5334 
5335 	default:
5336 		SPIRV_CROSS_THROW("Invalid constant expression basetype.");
5337 	}
5338 
5339 	if (c.vector_size() > 1 && !swizzle_splat)
5340 		res += ")";
5341 
5342 	return res;
5343 }
5344 
emit_uninitialized_temporary_expression(uint32_t type,uint32_t id)5345 SPIRExpression &CompilerGLSL::emit_uninitialized_temporary_expression(uint32_t type, uint32_t id)
5346 {
5347 	forced_temporaries.insert(id);
5348 	emit_uninitialized_temporary(type, id);
5349 	return set<SPIRExpression>(id, to_name(id), type, true);
5350 }
5351 
emit_uninitialized_temporary(uint32_t result_type,uint32_t result_id)5352 void CompilerGLSL::emit_uninitialized_temporary(uint32_t result_type, uint32_t result_id)
5353 {
5354 	// If we're declaring temporaries inside continue blocks,
5355 	// we must declare the temporary in the loop header so that the continue block can avoid declaring new variables.
5356 	if (current_continue_block && !hoisted_temporaries.count(result_id))
5357 	{
5358 		auto &header = get<SPIRBlock>(current_continue_block->loop_dominator);
5359 		if (find_if(begin(header.declare_temporary), end(header.declare_temporary),
5360 		            [result_type, result_id](const pair<uint32_t, uint32_t> &tmp) {
5361 			            return tmp.first == result_type && tmp.second == result_id;
5362 		            }) == end(header.declare_temporary))
5363 		{
5364 			header.declare_temporary.emplace_back(result_type, result_id);
5365 			hoisted_temporaries.insert(result_id);
5366 			force_recompile();
5367 		}
5368 	}
5369 	else if (hoisted_temporaries.count(result_id) == 0)
5370 	{
5371 		auto &type = get<SPIRType>(result_type);
5372 		auto &flags = ir.meta[result_id].decoration.decoration_flags;
5373 
5374 		// The result_id has not been made into an expression yet, so use flags interface.
5375 		add_local_variable_name(result_id);
5376 
5377 		string initializer;
5378 		if (options.force_zero_initialized_variables && type_can_zero_initialize(type))
5379 			initializer = join(" = ", to_zero_initialized_expression(result_type));
5380 
5381 		statement(flags_to_qualifiers_glsl(type, flags), variable_decl(type, to_name(result_id)), initializer, ";");
5382 	}
5383 }
5384 
declare_temporary(uint32_t result_type,uint32_t result_id)5385 string CompilerGLSL::declare_temporary(uint32_t result_type, uint32_t result_id)
5386 {
5387 	auto &type = get<SPIRType>(result_type);
5388 	auto &flags = ir.meta[result_id].decoration.decoration_flags;
5389 
5390 	// If we're declaring temporaries inside continue blocks,
5391 	// we must declare the temporary in the loop header so that the continue block can avoid declaring new variables.
5392 	if (current_continue_block && !hoisted_temporaries.count(result_id))
5393 	{
5394 		auto &header = get<SPIRBlock>(current_continue_block->loop_dominator);
5395 		if (find_if(begin(header.declare_temporary), end(header.declare_temporary),
5396 		            [result_type, result_id](const pair<uint32_t, uint32_t> &tmp) {
5397 			            return tmp.first == result_type && tmp.second == result_id;
5398 		            }) == end(header.declare_temporary))
5399 		{
5400 			header.declare_temporary.emplace_back(result_type, result_id);
5401 			hoisted_temporaries.insert(result_id);
5402 			force_recompile();
5403 		}
5404 
5405 		return join(to_name(result_id), " = ");
5406 	}
5407 	else if (hoisted_temporaries.count(result_id))
5408 	{
5409 		// The temporary has already been declared earlier, so just "declare" the temporary by writing to it.
5410 		return join(to_name(result_id), " = ");
5411 	}
5412 	else
5413 	{
5414 		// The result_id has not been made into an expression yet, so use flags interface.
5415 		add_local_variable_name(result_id);
5416 		return join(flags_to_qualifiers_glsl(type, flags), variable_decl(type, to_name(result_id)), " = ");
5417 	}
5418 }
5419 
expression_is_forwarded(uint32_t id) const5420 bool CompilerGLSL::expression_is_forwarded(uint32_t id) const
5421 {
5422 	return forwarded_temporaries.count(id) != 0;
5423 }
5424 
expression_suppresses_usage_tracking(uint32_t id) const5425 bool CompilerGLSL::expression_suppresses_usage_tracking(uint32_t id) const
5426 {
5427 	return suppressed_usage_tracking.count(id) != 0;
5428 }
5429 
expression_read_implies_multiple_reads(uint32_t id) const5430 bool CompilerGLSL::expression_read_implies_multiple_reads(uint32_t id) const
5431 {
5432 	auto *expr = maybe_get<SPIRExpression>(id);
5433 	if (!expr)
5434 		return false;
5435 
5436 	// If we're emitting code at a deeper loop level than when we emitted the expression,
5437 	// we're probably reading the same expression over and over.
5438 	return current_loop_level > expr->emitted_loop_level;
5439 }
5440 
emit_op(uint32_t result_type,uint32_t result_id,const string & rhs,bool forwarding,bool suppress_usage_tracking)5441 SPIRExpression &CompilerGLSL::emit_op(uint32_t result_type, uint32_t result_id, const string &rhs, bool forwarding,
5442                                       bool suppress_usage_tracking)
5443 {
5444 	if (forwarding && (forced_temporaries.find(result_id) == end(forced_temporaries)))
5445 	{
5446 		// Just forward it without temporary.
5447 		// If the forward is trivial, we do not force flushing to temporary for this expression.
5448 		forwarded_temporaries.insert(result_id);
5449 		if (suppress_usage_tracking)
5450 			suppressed_usage_tracking.insert(result_id);
5451 
5452 		return set<SPIRExpression>(result_id, rhs, result_type, true);
5453 	}
5454 	else
5455 	{
5456 		// If expression isn't immutable, bind it to a temporary and make the new temporary immutable (they always are).
5457 		statement(declare_temporary(result_type, result_id), rhs, ";");
5458 		return set<SPIRExpression>(result_id, to_name(result_id), result_type, true);
5459 	}
5460 }
5461 
emit_unary_op(uint32_t result_type,uint32_t result_id,uint32_t op0,const char * op)5462 void CompilerGLSL::emit_unary_op(uint32_t result_type, uint32_t result_id, uint32_t op0, const char *op)
5463 {
5464 	bool forward = should_forward(op0);
5465 	emit_op(result_type, result_id, join(op, to_enclosed_unpacked_expression(op0)), forward);
5466 	inherit_expression_dependencies(result_id, op0);
5467 }
5468 
emit_binary_op(uint32_t result_type,uint32_t result_id,uint32_t op0,uint32_t op1,const char * op)5469 void CompilerGLSL::emit_binary_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1, const char *op)
5470 {
5471 	bool forward = should_forward(op0) && should_forward(op1);
5472 	emit_op(result_type, result_id,
5473 	        join(to_enclosed_unpacked_expression(op0), " ", op, " ", to_enclosed_unpacked_expression(op1)), forward);
5474 
5475 	inherit_expression_dependencies(result_id, op0);
5476 	inherit_expression_dependencies(result_id, op1);
5477 }
5478 
emit_unrolled_unary_op(uint32_t result_type,uint32_t result_id,uint32_t operand,const char * op)5479 void CompilerGLSL::emit_unrolled_unary_op(uint32_t result_type, uint32_t result_id, uint32_t operand, const char *op)
5480 {
5481 	auto &type = get<SPIRType>(result_type);
5482 	auto expr = type_to_glsl_constructor(type);
5483 	expr += '(';
5484 	for (uint32_t i = 0; i < type.vecsize; i++)
5485 	{
5486 		// Make sure to call to_expression multiple times to ensure
5487 		// that these expressions are properly flushed to temporaries if needed.
5488 		expr += op;
5489 		expr += to_extract_component_expression(operand, i);
5490 
5491 		if (i + 1 < type.vecsize)
5492 			expr += ", ";
5493 	}
5494 	expr += ')';
5495 	emit_op(result_type, result_id, expr, should_forward(operand));
5496 
5497 	inherit_expression_dependencies(result_id, operand);
5498 }
5499 
emit_unrolled_binary_op(uint32_t result_type,uint32_t result_id,uint32_t op0,uint32_t op1,const char * op,bool negate,SPIRType::BaseType expected_type)5500 void CompilerGLSL::emit_unrolled_binary_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1,
5501                                            const char *op, bool negate, SPIRType::BaseType expected_type)
5502 {
5503 	auto &type0 = expression_type(op0);
5504 	auto &type1 = expression_type(op1);
5505 
5506 	SPIRType target_type0 = type0;
5507 	SPIRType target_type1 = type1;
5508 	target_type0.basetype = expected_type;
5509 	target_type1.basetype = expected_type;
5510 	target_type0.vecsize = 1;
5511 	target_type1.vecsize = 1;
5512 
5513 	auto &type = get<SPIRType>(result_type);
5514 	auto expr = type_to_glsl_constructor(type);
5515 	expr += '(';
5516 	for (uint32_t i = 0; i < type.vecsize; i++)
5517 	{
5518 		// Make sure to call to_expression multiple times to ensure
5519 		// that these expressions are properly flushed to temporaries if needed.
5520 		if (negate)
5521 			expr += "!(";
5522 
5523 		if (expected_type != SPIRType::Unknown && type0.basetype != expected_type)
5524 			expr += bitcast_expression(target_type0, type0.basetype, to_extract_component_expression(op0, i));
5525 		else
5526 			expr += to_extract_component_expression(op0, i);
5527 
5528 		expr += ' ';
5529 		expr += op;
5530 		expr += ' ';
5531 
5532 		if (expected_type != SPIRType::Unknown && type1.basetype != expected_type)
5533 			expr += bitcast_expression(target_type1, type1.basetype, to_extract_component_expression(op1, i));
5534 		else
5535 			expr += to_extract_component_expression(op1, i);
5536 
5537 		if (negate)
5538 			expr += ")";
5539 
5540 		if (i + 1 < type.vecsize)
5541 			expr += ", ";
5542 	}
5543 	expr += ')';
5544 	emit_op(result_type, result_id, expr, should_forward(op0) && should_forward(op1));
5545 
5546 	inherit_expression_dependencies(result_id, op0);
5547 	inherit_expression_dependencies(result_id, op1);
5548 }
5549 
binary_op_bitcast_helper(string & cast_op0,string & cast_op1,SPIRType::BaseType & input_type,uint32_t op0,uint32_t op1,bool skip_cast_if_equal_type)5550 SPIRType CompilerGLSL::binary_op_bitcast_helper(string &cast_op0, string &cast_op1, SPIRType::BaseType &input_type,
5551                                                 uint32_t op0, uint32_t op1, bool skip_cast_if_equal_type)
5552 {
5553 	auto &type0 = expression_type(op0);
5554 	auto &type1 = expression_type(op1);
5555 
5556 	// We have to bitcast if our inputs are of different type, or if our types are not equal to expected inputs.
5557 	// For some functions like OpIEqual and INotEqual, we don't care if inputs are of different types than expected
5558 	// since equality test is exactly the same.
5559 	bool cast = (type0.basetype != type1.basetype) || (!skip_cast_if_equal_type && type0.basetype != input_type);
5560 
5561 	// Create a fake type so we can bitcast to it.
5562 	// We only deal with regular arithmetic types here like int, uints and so on.
5563 	SPIRType expected_type;
5564 	expected_type.basetype = input_type;
5565 	expected_type.vecsize = type0.vecsize;
5566 	expected_type.columns = type0.columns;
5567 	expected_type.width = type0.width;
5568 
5569 	if (cast)
5570 	{
5571 		cast_op0 = bitcast_glsl(expected_type, op0);
5572 		cast_op1 = bitcast_glsl(expected_type, op1);
5573 	}
5574 	else
5575 	{
5576 		// If we don't cast, our actual input type is that of the first (or second) argument.
5577 		cast_op0 = to_enclosed_unpacked_expression(op0);
5578 		cast_op1 = to_enclosed_unpacked_expression(op1);
5579 		input_type = type0.basetype;
5580 	}
5581 
5582 	return expected_type;
5583 }
5584 
emit_complex_bitcast(uint32_t result_type,uint32_t id,uint32_t op0)5585 bool CompilerGLSL::emit_complex_bitcast(uint32_t result_type, uint32_t id, uint32_t op0)
5586 {
5587 	// Some bitcasts may require complex casting sequences, and are implemented here.
5588 	// Otherwise a simply unary function will do with bitcast_glsl_op.
5589 
5590 	auto &output_type = get<SPIRType>(result_type);
5591 	auto &input_type = expression_type(op0);
5592 	string expr;
5593 
5594 	if (output_type.basetype == SPIRType::Half && input_type.basetype == SPIRType::Float && input_type.vecsize == 1)
5595 		expr = join("unpackFloat2x16(floatBitsToUint(", to_unpacked_expression(op0), "))");
5596 	else if (output_type.basetype == SPIRType::Float && input_type.basetype == SPIRType::Half &&
5597 	         input_type.vecsize == 2)
5598 		expr = join("uintBitsToFloat(packFloat2x16(", to_unpacked_expression(op0), "))");
5599 	else
5600 		return false;
5601 
5602 	emit_op(result_type, id, expr, should_forward(op0));
5603 	return true;
5604 }
5605 
emit_binary_op_cast(uint32_t result_type,uint32_t result_id,uint32_t op0,uint32_t op1,const char * op,SPIRType::BaseType input_type,bool skip_cast_if_equal_type)5606 void CompilerGLSL::emit_binary_op_cast(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1,
5607                                        const char *op, SPIRType::BaseType input_type, bool skip_cast_if_equal_type)
5608 {
5609 	string cast_op0, cast_op1;
5610 	auto expected_type = binary_op_bitcast_helper(cast_op0, cast_op1, input_type, op0, op1, skip_cast_if_equal_type);
5611 	auto &out_type = get<SPIRType>(result_type);
5612 
5613 	// We might have casted away from the result type, so bitcast again.
5614 	// For example, arithmetic right shift with uint inputs.
5615 	// Special case boolean outputs since relational opcodes output booleans instead of int/uint.
5616 	string expr;
5617 	if (out_type.basetype != input_type && out_type.basetype != SPIRType::Boolean)
5618 	{
5619 		expected_type.basetype = input_type;
5620 		expr = bitcast_glsl_op(out_type, expected_type);
5621 		expr += '(';
5622 		expr += join(cast_op0, " ", op, " ", cast_op1);
5623 		expr += ')';
5624 	}
5625 	else
5626 		expr += join(cast_op0, " ", op, " ", cast_op1);
5627 
5628 	emit_op(result_type, result_id, expr, should_forward(op0) && should_forward(op1));
5629 	inherit_expression_dependencies(result_id, op0);
5630 	inherit_expression_dependencies(result_id, op1);
5631 }
5632 
emit_unary_func_op(uint32_t result_type,uint32_t result_id,uint32_t op0,const char * op)5633 void CompilerGLSL::emit_unary_func_op(uint32_t result_type, uint32_t result_id, uint32_t op0, const char *op)
5634 {
5635 	bool forward = should_forward(op0);
5636 	emit_op(result_type, result_id, join(op, "(", to_unpacked_expression(op0), ")"), forward);
5637 	inherit_expression_dependencies(result_id, op0);
5638 }
5639 
emit_binary_func_op(uint32_t result_type,uint32_t result_id,uint32_t op0,uint32_t op1,const char * op)5640 void CompilerGLSL::emit_binary_func_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1,
5641                                        const char *op)
5642 {
5643 	bool forward = should_forward(op0) && should_forward(op1);
5644 	emit_op(result_type, result_id, join(op, "(", to_unpacked_expression(op0), ", ", to_unpacked_expression(op1), ")"),
5645 	        forward);
5646 	inherit_expression_dependencies(result_id, op0);
5647 	inherit_expression_dependencies(result_id, op1);
5648 }
5649 
emit_unary_func_op_cast(uint32_t result_type,uint32_t result_id,uint32_t op0,const char * op,SPIRType::BaseType input_type,SPIRType::BaseType expected_result_type)5650 void CompilerGLSL::emit_unary_func_op_cast(uint32_t result_type, uint32_t result_id, uint32_t op0, const char *op,
5651                                            SPIRType::BaseType input_type, SPIRType::BaseType expected_result_type)
5652 {
5653 	auto &out_type = get<SPIRType>(result_type);
5654 	auto &expr_type = expression_type(op0);
5655 	auto expected_type = out_type;
5656 
5657 	// Bit-widths might be different in unary cases because we use it for SConvert/UConvert and friends.
5658 	expected_type.basetype = input_type;
5659 	expected_type.width = expr_type.width;
5660 	string cast_op = expr_type.basetype != input_type ? bitcast_glsl(expected_type, op0) : to_unpacked_expression(op0);
5661 
5662 	string expr;
5663 	if (out_type.basetype != expected_result_type)
5664 	{
5665 		expected_type.basetype = expected_result_type;
5666 		expected_type.width = out_type.width;
5667 		expr = bitcast_glsl_op(out_type, expected_type);
5668 		expr += '(';
5669 		expr += join(op, "(", cast_op, ")");
5670 		expr += ')';
5671 	}
5672 	else
5673 	{
5674 		expr += join(op, "(", cast_op, ")");
5675 	}
5676 
5677 	emit_op(result_type, result_id, expr, should_forward(op0));
5678 	inherit_expression_dependencies(result_id, op0);
5679 }
5680 
5681 // Very special case. Handling bitfieldExtract requires us to deal with different bitcasts of different signs
5682 // and different vector sizes all at once. Need a special purpose method here.
emit_trinary_func_op_bitextract(uint32_t result_type,uint32_t result_id,uint32_t op0,uint32_t op1,uint32_t op2,const char * op,SPIRType::BaseType expected_result_type,SPIRType::BaseType input_type0,SPIRType::BaseType input_type1,SPIRType::BaseType input_type2)5683 void CompilerGLSL::emit_trinary_func_op_bitextract(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1,
5684                                                    uint32_t op2, const char *op,
5685                                                    SPIRType::BaseType expected_result_type,
5686                                                    SPIRType::BaseType input_type0, SPIRType::BaseType input_type1,
5687                                                    SPIRType::BaseType input_type2)
5688 {
5689 	auto &out_type = get<SPIRType>(result_type);
5690 	auto expected_type = out_type;
5691 	expected_type.basetype = input_type0;
5692 
5693 	string cast_op0 =
5694 	    expression_type(op0).basetype != input_type0 ? bitcast_glsl(expected_type, op0) : to_unpacked_expression(op0);
5695 
5696 	auto op1_expr = to_unpacked_expression(op1);
5697 	auto op2_expr = to_unpacked_expression(op2);
5698 
5699 	// Use value casts here instead. Input must be exactly int or uint, but SPIR-V might be 16-bit.
5700 	expected_type.basetype = input_type1;
5701 	expected_type.vecsize = 1;
5702 	string cast_op1 = expression_type(op1).basetype != input_type1 ?
5703 	                      join(type_to_glsl_constructor(expected_type), "(", op1_expr, ")") :
5704 	                      op1_expr;
5705 
5706 	expected_type.basetype = input_type2;
5707 	expected_type.vecsize = 1;
5708 	string cast_op2 = expression_type(op2).basetype != input_type2 ?
5709 	                      join(type_to_glsl_constructor(expected_type), "(", op2_expr, ")") :
5710 	                      op2_expr;
5711 
5712 	string expr;
5713 	if (out_type.basetype != expected_result_type)
5714 	{
5715 		expected_type.vecsize = out_type.vecsize;
5716 		expected_type.basetype = expected_result_type;
5717 		expr = bitcast_glsl_op(out_type, expected_type);
5718 		expr += '(';
5719 		expr += join(op, "(", cast_op0, ", ", cast_op1, ", ", cast_op2, ")");
5720 		expr += ')';
5721 	}
5722 	else
5723 	{
5724 		expr += join(op, "(", cast_op0, ", ", cast_op1, ", ", cast_op2, ")");
5725 	}
5726 
5727 	emit_op(result_type, result_id, expr, should_forward(op0) && should_forward(op1) && should_forward(op2));
5728 	inherit_expression_dependencies(result_id, op0);
5729 	inherit_expression_dependencies(result_id, op1);
5730 	inherit_expression_dependencies(result_id, op2);
5731 }
5732 
emit_trinary_func_op_cast(uint32_t result_type,uint32_t result_id,uint32_t op0,uint32_t op1,uint32_t op2,const char * op,SPIRType::BaseType input_type)5733 void CompilerGLSL::emit_trinary_func_op_cast(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1,
5734                                              uint32_t op2, const char *op, SPIRType::BaseType input_type)
5735 {
5736 	auto &out_type = get<SPIRType>(result_type);
5737 	auto expected_type = out_type;
5738 	expected_type.basetype = input_type;
5739 	string cast_op0 =
5740 	    expression_type(op0).basetype != input_type ? bitcast_glsl(expected_type, op0) : to_unpacked_expression(op0);
5741 	string cast_op1 =
5742 	    expression_type(op1).basetype != input_type ? bitcast_glsl(expected_type, op1) : to_unpacked_expression(op1);
5743 	string cast_op2 =
5744 	    expression_type(op2).basetype != input_type ? bitcast_glsl(expected_type, op2) : to_unpacked_expression(op2);
5745 
5746 	string expr;
5747 	if (out_type.basetype != input_type)
5748 	{
5749 		expr = bitcast_glsl_op(out_type, expected_type);
5750 		expr += '(';
5751 		expr += join(op, "(", cast_op0, ", ", cast_op1, ", ", cast_op2, ")");
5752 		expr += ')';
5753 	}
5754 	else
5755 	{
5756 		expr += join(op, "(", cast_op0, ", ", cast_op1, ", ", cast_op2, ")");
5757 	}
5758 
5759 	emit_op(result_type, result_id, expr, should_forward(op0) && should_forward(op1) && should_forward(op2));
5760 	inherit_expression_dependencies(result_id, op0);
5761 	inherit_expression_dependencies(result_id, op1);
5762 	inherit_expression_dependencies(result_id, op2);
5763 }
5764 
emit_binary_func_op_cast_clustered(uint32_t result_type,uint32_t result_id,uint32_t op0,uint32_t op1,const char * op,SPIRType::BaseType input_type)5765 void CompilerGLSL::emit_binary_func_op_cast_clustered(uint32_t result_type, uint32_t result_id, uint32_t op0,
5766                                                       uint32_t op1, const char *op, SPIRType::BaseType input_type)
5767 {
5768 	// Special purpose method for implementing clustered subgroup opcodes.
5769 	// Main difference is that op1 does not participate in any casting, it needs to be a literal.
5770 	auto &out_type = get<SPIRType>(result_type);
5771 	auto expected_type = out_type;
5772 	expected_type.basetype = input_type;
5773 	string cast_op0 =
5774 	    expression_type(op0).basetype != input_type ? bitcast_glsl(expected_type, op0) : to_unpacked_expression(op0);
5775 
5776 	string expr;
5777 	if (out_type.basetype != input_type)
5778 	{
5779 		expr = bitcast_glsl_op(out_type, expected_type);
5780 		expr += '(';
5781 		expr += join(op, "(", cast_op0, ", ", to_expression(op1), ")");
5782 		expr += ')';
5783 	}
5784 	else
5785 	{
5786 		expr += join(op, "(", cast_op0, ", ", to_expression(op1), ")");
5787 	}
5788 
5789 	emit_op(result_type, result_id, expr, should_forward(op0));
5790 	inherit_expression_dependencies(result_id, op0);
5791 }
5792 
emit_binary_func_op_cast(uint32_t result_type,uint32_t result_id,uint32_t op0,uint32_t op1,const char * op,SPIRType::BaseType input_type,bool skip_cast_if_equal_type)5793 void CompilerGLSL::emit_binary_func_op_cast(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1,
5794                                             const char *op, SPIRType::BaseType input_type, bool skip_cast_if_equal_type)
5795 {
5796 	string cast_op0, cast_op1;
5797 	auto expected_type = binary_op_bitcast_helper(cast_op0, cast_op1, input_type, op0, op1, skip_cast_if_equal_type);
5798 	auto &out_type = get<SPIRType>(result_type);
5799 
5800 	// Special case boolean outputs since relational opcodes output booleans instead of int/uint.
5801 	string expr;
5802 	if (out_type.basetype != input_type && out_type.basetype != SPIRType::Boolean)
5803 	{
5804 		expected_type.basetype = input_type;
5805 		expr = bitcast_glsl_op(out_type, expected_type);
5806 		expr += '(';
5807 		expr += join(op, "(", cast_op0, ", ", cast_op1, ")");
5808 		expr += ')';
5809 	}
5810 	else
5811 	{
5812 		expr += join(op, "(", cast_op0, ", ", cast_op1, ")");
5813 	}
5814 
5815 	emit_op(result_type, result_id, expr, should_forward(op0) && should_forward(op1));
5816 	inherit_expression_dependencies(result_id, op0);
5817 	inherit_expression_dependencies(result_id, op1);
5818 }
5819 
emit_trinary_func_op(uint32_t result_type,uint32_t result_id,uint32_t op0,uint32_t op1,uint32_t op2,const char * op)5820 void CompilerGLSL::emit_trinary_func_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1,
5821                                         uint32_t op2, const char *op)
5822 {
5823 	bool forward = should_forward(op0) && should_forward(op1) && should_forward(op2);
5824 	emit_op(result_type, result_id,
5825 	        join(op, "(", to_unpacked_expression(op0), ", ", to_unpacked_expression(op1), ", ",
5826 	             to_unpacked_expression(op2), ")"),
5827 	        forward);
5828 
5829 	inherit_expression_dependencies(result_id, op0);
5830 	inherit_expression_dependencies(result_id, op1);
5831 	inherit_expression_dependencies(result_id, op2);
5832 }
5833 
emit_quaternary_func_op(uint32_t result_type,uint32_t result_id,uint32_t op0,uint32_t op1,uint32_t op2,uint32_t op3,const char * op)5834 void CompilerGLSL::emit_quaternary_func_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1,
5835                                            uint32_t op2, uint32_t op3, const char *op)
5836 {
5837 	bool forward = should_forward(op0) && should_forward(op1) && should_forward(op2) && should_forward(op3);
5838 	emit_op(result_type, result_id,
5839 	        join(op, "(", to_unpacked_expression(op0), ", ", to_unpacked_expression(op1), ", ",
5840 	             to_unpacked_expression(op2), ", ", to_unpacked_expression(op3), ")"),
5841 	        forward);
5842 
5843 	inherit_expression_dependencies(result_id, op0);
5844 	inherit_expression_dependencies(result_id, op1);
5845 	inherit_expression_dependencies(result_id, op2);
5846 	inherit_expression_dependencies(result_id, op3);
5847 }
5848 
emit_bitfield_insert_op(uint32_t result_type,uint32_t result_id,uint32_t op0,uint32_t op1,uint32_t op2,uint32_t op3,const char * op,SPIRType::BaseType offset_count_type)5849 void CompilerGLSL::emit_bitfield_insert_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1,
5850                                            uint32_t op2, uint32_t op3, const char *op,
5851                                            SPIRType::BaseType offset_count_type)
5852 {
5853 	// Only need to cast offset/count arguments. Types of base/insert must be same as result type,
5854 	// and bitfieldInsert is sign invariant.
5855 	bool forward = should_forward(op0) && should_forward(op1) && should_forward(op2) && should_forward(op3);
5856 
5857 	auto op0_expr = to_unpacked_expression(op0);
5858 	auto op1_expr = to_unpacked_expression(op1);
5859 	auto op2_expr = to_unpacked_expression(op2);
5860 	auto op3_expr = to_unpacked_expression(op3);
5861 
5862 	SPIRType target_type;
5863 	target_type.vecsize = 1;
5864 	target_type.basetype = offset_count_type;
5865 
5866 	if (expression_type(op2).basetype != offset_count_type)
5867 	{
5868 		// Value-cast here. Input might be 16-bit. GLSL requires int.
5869 		op2_expr = join(type_to_glsl_constructor(target_type), "(", op2_expr, ")");
5870 	}
5871 
5872 	if (expression_type(op3).basetype != offset_count_type)
5873 	{
5874 		// Value-cast here. Input might be 16-bit. GLSL requires int.
5875 		op3_expr = join(type_to_glsl_constructor(target_type), "(", op3_expr, ")");
5876 	}
5877 
5878 	emit_op(result_type, result_id, join(op, "(", op0_expr, ", ", op1_expr, ", ", op2_expr, ", ", op3_expr, ")"),
5879 	        forward);
5880 
5881 	inherit_expression_dependencies(result_id, op0);
5882 	inherit_expression_dependencies(result_id, op1);
5883 	inherit_expression_dependencies(result_id, op2);
5884 	inherit_expression_dependencies(result_id, op3);
5885 }
5886 
legacy_tex_op(const std::string & op,const SPIRType & imgtype,uint32_t tex)5887 string CompilerGLSL::legacy_tex_op(const std::string &op, const SPIRType &imgtype, uint32_t tex)
5888 {
5889 	const char *type;
5890 	switch (imgtype.image.dim)
5891 	{
5892 	case spv::Dim1D:
5893 		type = (imgtype.image.arrayed && !options.es) ? "1DArray" : "1D";
5894 		break;
5895 	case spv::Dim2D:
5896 		type = (imgtype.image.arrayed && !options.es) ? "2DArray" : "2D";
5897 		break;
5898 	case spv::Dim3D:
5899 		type = "3D";
5900 		break;
5901 	case spv::DimCube:
5902 		type = "Cube";
5903 		break;
5904 	case spv::DimRect:
5905 		type = "2DRect";
5906 		break;
5907 	case spv::DimBuffer:
5908 		type = "Buffer";
5909 		break;
5910 	case spv::DimSubpassData:
5911 		type = "2D";
5912 		break;
5913 	default:
5914 		type = "";
5915 		break;
5916 	}
5917 
5918 	// In legacy GLSL, an extension is required for textureLod in the fragment
5919 	// shader or textureGrad anywhere.
5920 	bool legacy_lod_ext = false;
5921 	auto &execution = get_entry_point();
5922 	if (op == "textureGrad" || op == "textureProjGrad" ||
5923 	    ((op == "textureLod" || op == "textureProjLod") && execution.model != ExecutionModelVertex))
5924 	{
5925 		if (is_legacy_es())
5926 		{
5927 			legacy_lod_ext = true;
5928 			require_extension_internal("GL_EXT_shader_texture_lod");
5929 		}
5930 		else if (is_legacy_desktop())
5931 			require_extension_internal("GL_ARB_shader_texture_lod");
5932 	}
5933 
5934 	if (op == "textureLodOffset" || op == "textureProjLodOffset")
5935 	{
5936 		if (is_legacy_es())
5937 			SPIRV_CROSS_THROW(join(op, " not allowed in legacy ES"));
5938 
5939 		require_extension_internal("GL_EXT_gpu_shader4");
5940 	}
5941 
5942 	// GLES has very limited support for shadow samplers.
5943 	// Basically shadow2D and shadow2DProj work through EXT_shadow_samplers,
5944 	// everything else can just throw
5945 	bool is_comparison = image_is_comparison(imgtype, tex);
5946 	if (is_comparison && is_legacy_es())
5947 	{
5948 		if (op == "texture" || op == "textureProj")
5949 			require_extension_internal("GL_EXT_shadow_samplers");
5950 		else
5951 			SPIRV_CROSS_THROW(join(op, " not allowed on depth samplers in legacy ES"));
5952 	}
5953 
5954 	if (op == "textureSize")
5955 	{
5956 		if (is_legacy_es())
5957 			SPIRV_CROSS_THROW("textureSize not supported in legacy ES");
5958 		if (is_comparison)
5959 			SPIRV_CROSS_THROW("textureSize not supported on shadow sampler in legacy GLSL");
5960 		require_extension_internal("GL_EXT_gpu_shader4");
5961 	}
5962 
5963 	if (op == "texelFetch" && is_legacy_es())
5964 		SPIRV_CROSS_THROW("texelFetch not supported in legacy ES");
5965 
5966 	bool is_es_and_depth = is_legacy_es() && is_comparison;
5967 	std::string type_prefix = is_comparison ? "shadow" : "texture";
5968 
5969 	if (op == "texture")
5970 		return is_es_and_depth ? join(type_prefix, type, "EXT") : join(type_prefix, type);
5971 	else if (op == "textureLod")
5972 		return join(type_prefix, type, legacy_lod_ext ? "LodEXT" : "Lod");
5973 	else if (op == "textureProj")
5974 		return join(type_prefix, type, is_es_and_depth ? "ProjEXT" : "Proj");
5975 	else if (op == "textureGrad")
5976 		return join(type_prefix, type, is_legacy_es() ? "GradEXT" : is_legacy_desktop() ? "GradARB" : "Grad");
5977 	else if (op == "textureProjLod")
5978 		return join(type_prefix, type, legacy_lod_ext ? "ProjLodEXT" : "ProjLod");
5979 	else if (op == "textureLodOffset")
5980 		return join(type_prefix, type, "LodOffset");
5981 	else if (op == "textureProjGrad")
5982 		return join(type_prefix, type,
5983 		            is_legacy_es() ? "ProjGradEXT" : is_legacy_desktop() ? "ProjGradARB" : "ProjGrad");
5984 	else if (op == "textureProjLodOffset")
5985 		return join(type_prefix, type, "ProjLodOffset");
5986 	else if (op == "textureSize")
5987 		return join("textureSize", type);
5988 	else if (op == "texelFetch")
5989 		return join("texelFetch", type);
5990 	else
5991 	{
5992 		SPIRV_CROSS_THROW(join("Unsupported legacy texture op: ", op));
5993 	}
5994 }
5995 
to_trivial_mix_op(const SPIRType & type,string & op,uint32_t left,uint32_t right,uint32_t lerp)5996 bool CompilerGLSL::to_trivial_mix_op(const SPIRType &type, string &op, uint32_t left, uint32_t right, uint32_t lerp)
5997 {
5998 	auto *cleft = maybe_get<SPIRConstant>(left);
5999 	auto *cright = maybe_get<SPIRConstant>(right);
6000 	auto &lerptype = expression_type(lerp);
6001 
6002 	// If our targets aren't constants, we cannot use construction.
6003 	if (!cleft || !cright)
6004 		return false;
6005 
6006 	// If our targets are spec constants, we cannot use construction.
6007 	if (cleft->specialization || cright->specialization)
6008 		return false;
6009 
6010 	// We can only use trivial construction if we have a scalar
6011 	// (should be possible to do it for vectors as well, but that is overkill for now).
6012 	if (lerptype.basetype != SPIRType::Boolean || lerptype.vecsize > 1)
6013 		return false;
6014 
6015 	// If our bool selects between 0 and 1, we can cast from bool instead, making our trivial constructor.
6016 	bool ret = false;
6017 	switch (type.basetype)
6018 	{
6019 	case SPIRType::Short:
6020 	case SPIRType::UShort:
6021 		ret = cleft->scalar_u16() == 0 && cright->scalar_u16() == 1;
6022 		break;
6023 
6024 	case SPIRType::Int:
6025 	case SPIRType::UInt:
6026 		ret = cleft->scalar() == 0 && cright->scalar() == 1;
6027 		break;
6028 
6029 	case SPIRType::Half:
6030 		ret = cleft->scalar_f16() == 0.0f && cright->scalar_f16() == 1.0f;
6031 		break;
6032 
6033 	case SPIRType::Float:
6034 		ret = cleft->scalar_f32() == 0.0f && cright->scalar_f32() == 1.0f;
6035 		break;
6036 
6037 	case SPIRType::Double:
6038 		ret = cleft->scalar_f64() == 0.0 && cright->scalar_f64() == 1.0;
6039 		break;
6040 
6041 	case SPIRType::Int64:
6042 	case SPIRType::UInt64:
6043 		ret = cleft->scalar_u64() == 0 && cright->scalar_u64() == 1;
6044 		break;
6045 
6046 	default:
6047 		break;
6048 	}
6049 
6050 	if (ret)
6051 		op = type_to_glsl_constructor(type);
6052 	return ret;
6053 }
6054 
to_ternary_expression(const SPIRType & restype,uint32_t select,uint32_t true_value,uint32_t false_value)6055 string CompilerGLSL::to_ternary_expression(const SPIRType &restype, uint32_t select, uint32_t true_value,
6056                                            uint32_t false_value)
6057 {
6058 	string expr;
6059 	auto &lerptype = expression_type(select);
6060 
6061 	if (lerptype.vecsize == 1)
6062 		expr = join(to_enclosed_expression(select), " ? ", to_enclosed_pointer_expression(true_value), " : ",
6063 		            to_enclosed_pointer_expression(false_value));
6064 	else
6065 	{
6066 		auto swiz = [this](uint32_t expression, uint32_t i) { return to_extract_component_expression(expression, i); };
6067 
6068 		expr = type_to_glsl_constructor(restype);
6069 		expr += "(";
6070 		for (uint32_t i = 0; i < restype.vecsize; i++)
6071 		{
6072 			expr += swiz(select, i);
6073 			expr += " ? ";
6074 			expr += swiz(true_value, i);
6075 			expr += " : ";
6076 			expr += swiz(false_value, i);
6077 			if (i + 1 < restype.vecsize)
6078 				expr += ", ";
6079 		}
6080 		expr += ")";
6081 	}
6082 
6083 	return expr;
6084 }
6085 
emit_mix_op(uint32_t result_type,uint32_t id,uint32_t left,uint32_t right,uint32_t lerp)6086 void CompilerGLSL::emit_mix_op(uint32_t result_type, uint32_t id, uint32_t left, uint32_t right, uint32_t lerp)
6087 {
6088 	auto &lerptype = expression_type(lerp);
6089 	auto &restype = get<SPIRType>(result_type);
6090 
6091 	// If this results in a variable pointer, assume it may be written through.
6092 	if (restype.pointer)
6093 	{
6094 		register_write(left);
6095 		register_write(right);
6096 	}
6097 
6098 	string mix_op;
6099 	bool has_boolean_mix = *backend.boolean_mix_function &&
6100 	                       ((options.es && options.version >= 310) || (!options.es && options.version >= 450));
6101 	bool trivial_mix = to_trivial_mix_op(restype, mix_op, left, right, lerp);
6102 
6103 	// Cannot use boolean mix when the lerp argument is just one boolean,
6104 	// fall back to regular trinary statements.
6105 	if (lerptype.vecsize == 1)
6106 		has_boolean_mix = false;
6107 
6108 	// If we can reduce the mix to a simple cast, do so.
6109 	// This helps for cases like int(bool), uint(bool) which is implemented with
6110 	// OpSelect bool 1 0.
6111 	if (trivial_mix)
6112 	{
6113 		emit_unary_func_op(result_type, id, lerp, mix_op.c_str());
6114 	}
6115 	else if (!has_boolean_mix && lerptype.basetype == SPIRType::Boolean)
6116 	{
6117 		// Boolean mix not supported on desktop without extension.
6118 		// Was added in OpenGL 4.5 with ES 3.1 compat.
6119 		//
6120 		// Could use GL_EXT_shader_integer_mix on desktop at least,
6121 		// but Apple doesn't support it. :(
6122 		// Just implement it as ternary expressions.
6123 		auto expr = to_ternary_expression(get<SPIRType>(result_type), lerp, right, left);
6124 		emit_op(result_type, id, expr, should_forward(left) && should_forward(right) && should_forward(lerp));
6125 		inherit_expression_dependencies(id, left);
6126 		inherit_expression_dependencies(id, right);
6127 		inherit_expression_dependencies(id, lerp);
6128 	}
6129 	else if (lerptype.basetype == SPIRType::Boolean)
6130 		emit_trinary_func_op(result_type, id, left, right, lerp, backend.boolean_mix_function);
6131 	else
6132 		emit_trinary_func_op(result_type, id, left, right, lerp, "mix");
6133 }
6134 
to_combined_image_sampler(VariableID image_id,VariableID samp_id)6135 string CompilerGLSL::to_combined_image_sampler(VariableID image_id, VariableID samp_id)
6136 {
6137 	// Keep track of the array indices we have used to load the image.
6138 	// We'll need to use the same array index into the combined image sampler array.
6139 	auto image_expr = to_expression(image_id);
6140 	string array_expr;
6141 	auto array_index = image_expr.find_first_of('[');
6142 	if (array_index != string::npos)
6143 		array_expr = image_expr.substr(array_index, string::npos);
6144 
6145 	auto &args = current_function->arguments;
6146 
6147 	// For GLSL and ESSL targets, we must enumerate all possible combinations for sampler2D(texture2D, sampler) and redirect
6148 	// all possible combinations into new sampler2D uniforms.
6149 	auto *image = maybe_get_backing_variable(image_id);
6150 	auto *samp = maybe_get_backing_variable(samp_id);
6151 	if (image)
6152 		image_id = image->self;
6153 	if (samp)
6154 		samp_id = samp->self;
6155 
6156 	auto image_itr = find_if(begin(args), end(args),
6157 	                         [image_id](const SPIRFunction::Parameter &param) { return image_id == param.id; });
6158 
6159 	auto sampler_itr = find_if(begin(args), end(args),
6160 	                           [samp_id](const SPIRFunction::Parameter &param) { return samp_id == param.id; });
6161 
6162 	if (image_itr != end(args) || sampler_itr != end(args))
6163 	{
6164 		// If any parameter originates from a parameter, we will find it in our argument list.
6165 		bool global_image = image_itr == end(args);
6166 		bool global_sampler = sampler_itr == end(args);
6167 		VariableID iid = global_image ? image_id : VariableID(uint32_t(image_itr - begin(args)));
6168 		VariableID sid = global_sampler ? samp_id : VariableID(uint32_t(sampler_itr - begin(args)));
6169 
6170 		auto &combined = current_function->combined_parameters;
6171 		auto itr = find_if(begin(combined), end(combined), [=](const SPIRFunction::CombinedImageSamplerParameter &p) {
6172 			return p.global_image == global_image && p.global_sampler == global_sampler && p.image_id == iid &&
6173 			       p.sampler_id == sid;
6174 		});
6175 
6176 		if (itr != end(combined))
6177 			return to_expression(itr->id) + array_expr;
6178 		else
6179 		{
6180 			SPIRV_CROSS_THROW("Cannot find mapping for combined sampler parameter, was "
6181 			                  "build_combined_image_samplers() used "
6182 			                  "before compile() was called?");
6183 		}
6184 	}
6185 	else
6186 	{
6187 		// For global sampler2D, look directly at the global remapping table.
6188 		auto &mapping = combined_image_samplers;
6189 		auto itr = find_if(begin(mapping), end(mapping), [image_id, samp_id](const CombinedImageSampler &combined) {
6190 			return combined.image_id == image_id && combined.sampler_id == samp_id;
6191 		});
6192 
6193 		if (itr != end(combined_image_samplers))
6194 			return to_expression(itr->combined_id) + array_expr;
6195 		else
6196 		{
6197 			SPIRV_CROSS_THROW("Cannot find mapping for combined sampler, was build_combined_image_samplers() used "
6198 			                  "before compile() was called?");
6199 		}
6200 	}
6201 }
6202 
is_supported_subgroup_op_in_opengl(spv::Op op)6203 bool CompilerGLSL::is_supported_subgroup_op_in_opengl(spv::Op op)
6204 {
6205 	switch (op)
6206 	{
6207 	case OpGroupNonUniformElect:
6208 	case OpGroupNonUniformBallot:
6209 	case OpGroupNonUniformBallotFindLSB:
6210 	case OpGroupNonUniformBallotFindMSB:
6211 	case OpGroupNonUniformBroadcast:
6212 	case OpGroupNonUniformBroadcastFirst:
6213 	case OpGroupNonUniformAll:
6214 	case OpGroupNonUniformAny:
6215 	case OpGroupNonUniformAllEqual:
6216 	case OpControlBarrier:
6217 	case OpMemoryBarrier:
6218 	case OpGroupNonUniformBallotBitCount:
6219 	case OpGroupNonUniformBallotBitExtract:
6220 	case OpGroupNonUniformInverseBallot:
6221 		return true;
6222 	default:
6223 		return false;
6224 	}
6225 }
6226 
emit_sampled_image_op(uint32_t result_type,uint32_t result_id,uint32_t image_id,uint32_t samp_id)6227 void CompilerGLSL::emit_sampled_image_op(uint32_t result_type, uint32_t result_id, uint32_t image_id, uint32_t samp_id)
6228 {
6229 	if (options.vulkan_semantics && combined_image_samplers.empty())
6230 	{
6231 		emit_binary_func_op(result_type, result_id, image_id, samp_id,
6232 		                    type_to_glsl(get<SPIRType>(result_type), result_id).c_str());
6233 	}
6234 	else
6235 	{
6236 		// Make sure to suppress usage tracking. It is illegal to create temporaries of opaque types.
6237 		emit_op(result_type, result_id, to_combined_image_sampler(image_id, samp_id), true, true);
6238 	}
6239 
6240 	// Make sure to suppress usage tracking and any expression invalidation.
6241 	// It is illegal to create temporaries of opaque types.
6242 	forwarded_temporaries.erase(result_id);
6243 }
6244 
image_opcode_is_sample_no_dref(Op op)6245 static inline bool image_opcode_is_sample_no_dref(Op op)
6246 {
6247 	switch (op)
6248 	{
6249 	case OpImageSampleExplicitLod:
6250 	case OpImageSampleImplicitLod:
6251 	case OpImageSampleProjExplicitLod:
6252 	case OpImageSampleProjImplicitLod:
6253 	case OpImageFetch:
6254 	case OpImageRead:
6255 	case OpImageSparseSampleExplicitLod:
6256 	case OpImageSparseSampleImplicitLod:
6257 	case OpImageSparseSampleProjExplicitLod:
6258 	case OpImageSparseSampleProjImplicitLod:
6259 	case OpImageSparseFetch:
6260 	case OpImageSparseRead:
6261 		return true;
6262 
6263 	default:
6264 		return false;
6265 	}
6266 }
6267 
emit_sparse_feedback_temporaries(uint32_t result_type_id,uint32_t id,uint32_t & feedback_id,uint32_t & texel_id)6268 void CompilerGLSL::emit_sparse_feedback_temporaries(uint32_t result_type_id, uint32_t id, uint32_t &feedback_id,
6269                                                     uint32_t &texel_id)
6270 {
6271 	// Need to allocate two temporaries.
6272 	if (options.es)
6273 		SPIRV_CROSS_THROW("Sparse texture feedback is not supported on ESSL.");
6274 	require_extension_internal("GL_ARB_sparse_texture2");
6275 
6276 	auto &temps = extra_sub_expressions[id];
6277 	if (temps == 0)
6278 		temps = ir.increase_bound_by(2);
6279 
6280 	feedback_id = temps + 0;
6281 	texel_id = temps + 1;
6282 
6283 	auto &return_type = get<SPIRType>(result_type_id);
6284 	if (return_type.basetype != SPIRType::Struct || return_type.member_types.size() != 2)
6285 		SPIRV_CROSS_THROW("Invalid return type for sparse feedback.");
6286 	emit_uninitialized_temporary(return_type.member_types[0], feedback_id);
6287 	emit_uninitialized_temporary(return_type.member_types[1], texel_id);
6288 }
6289 
get_sparse_feedback_texel_id(uint32_t id) const6290 uint32_t CompilerGLSL::get_sparse_feedback_texel_id(uint32_t id) const
6291 {
6292 	auto itr = extra_sub_expressions.find(id);
6293 	if (itr == extra_sub_expressions.end())
6294 		return 0;
6295 	else
6296 		return itr->second + 1;
6297 }
6298 
emit_texture_op(const Instruction & i,bool sparse)6299 void CompilerGLSL::emit_texture_op(const Instruction &i, bool sparse)
6300 {
6301 	auto *ops = stream(i);
6302 	auto op = static_cast<Op>(i.op);
6303 
6304 	SmallVector<uint32_t> inherited_expressions;
6305 
6306 	uint32_t result_type_id = ops[0];
6307 	uint32_t id = ops[1];
6308 	auto &return_type = get<SPIRType>(result_type_id);
6309 
6310 	uint32_t sparse_code_id = 0;
6311 	uint32_t sparse_texel_id = 0;
6312 	if (sparse)
6313 		emit_sparse_feedback_temporaries(result_type_id, id, sparse_code_id, sparse_texel_id);
6314 
6315 	bool forward = false;
6316 	string expr = to_texture_op(i, sparse, &forward, inherited_expressions);
6317 
6318 	if (sparse)
6319 	{
6320 		statement(to_expression(sparse_code_id), " = ", expr, ";");
6321 		expr = join(type_to_glsl(return_type), "(", to_expression(sparse_code_id), ", ", to_expression(sparse_texel_id),
6322 		            ")");
6323 		forward = true;
6324 		inherited_expressions.clear();
6325 	}
6326 
6327 	emit_op(result_type_id, id, expr, forward);
6328 	for (auto &inherit : inherited_expressions)
6329 		inherit_expression_dependencies(id, inherit);
6330 
6331 	// Do not register sparse ops as control dependent as they are always lowered to a temporary.
6332 	switch (op)
6333 	{
6334 	case OpImageSampleDrefImplicitLod:
6335 	case OpImageSampleImplicitLod:
6336 	case OpImageSampleProjImplicitLod:
6337 	case OpImageSampleProjDrefImplicitLod:
6338 		register_control_dependent_expression(id);
6339 		break;
6340 
6341 	default:
6342 		break;
6343 	}
6344 }
6345 
to_texture_op(const Instruction & i,bool sparse,bool * forward,SmallVector<uint32_t> & inherited_expressions)6346 std::string CompilerGLSL::to_texture_op(const Instruction &i, bool sparse, bool *forward,
6347                                         SmallVector<uint32_t> &inherited_expressions)
6348 {
6349 	auto *ops = stream(i);
6350 	auto op = static_cast<Op>(i.op);
6351 	uint32_t length = i.length;
6352 
6353 	uint32_t result_type_id = ops[0];
6354 	VariableID img = ops[2];
6355 	uint32_t coord = ops[3];
6356 	uint32_t dref = 0;
6357 	uint32_t comp = 0;
6358 	bool gather = false;
6359 	bool proj = false;
6360 	bool fetch = false;
6361 	bool nonuniform_expression = false;
6362 	const uint32_t *opt = nullptr;
6363 
6364 	auto &result_type = get<SPIRType>(result_type_id);
6365 
6366 	inherited_expressions.push_back(coord);
6367 
6368 	// Make sure non-uniform decoration is back-propagated to where it needs to be.
6369 	if (has_decoration(img, DecorationNonUniformEXT))
6370 	{
6371 		// In Vulkan GLSL, we cannot back-propgate nonuniform qualifiers if we
6372 		// use a combined image sampler constructor.
6373 		// We're only interested in back-propagating if we can trace back through access chains.
6374 		// If not, we will apply nonuniform to the sampled image expression itself.
6375 		auto *backing = maybe_get_backing_variable(img);
6376 		if (backing)
6377 			propagate_nonuniform_qualifier(img);
6378 		else
6379 			nonuniform_expression = true;
6380 	}
6381 
6382 	switch (op)
6383 	{
6384 	case OpImageSampleDrefImplicitLod:
6385 	case OpImageSampleDrefExplicitLod:
6386 	case OpImageSparseSampleDrefImplicitLod:
6387 	case OpImageSparseSampleDrefExplicitLod:
6388 		dref = ops[4];
6389 		opt = &ops[5];
6390 		length -= 5;
6391 		break;
6392 
6393 	case OpImageSampleProjDrefImplicitLod:
6394 	case OpImageSampleProjDrefExplicitLod:
6395 	case OpImageSparseSampleProjDrefImplicitLod:
6396 	case OpImageSparseSampleProjDrefExplicitLod:
6397 		dref = ops[4];
6398 		opt = &ops[5];
6399 		length -= 5;
6400 		proj = true;
6401 		break;
6402 
6403 	case OpImageDrefGather:
6404 	case OpImageSparseDrefGather:
6405 		dref = ops[4];
6406 		opt = &ops[5];
6407 		length -= 5;
6408 		gather = true;
6409 		if (options.es && options.version < 310)
6410 			SPIRV_CROSS_THROW("textureGather requires ESSL 310.");
6411 		else if (!options.es && options.version < 400)
6412 			SPIRV_CROSS_THROW("textureGather with depth compare requires GLSL 400.");
6413 		break;
6414 
6415 	case OpImageGather:
6416 	case OpImageSparseGather:
6417 		comp = ops[4];
6418 		opt = &ops[5];
6419 		length -= 5;
6420 		gather = true;
6421 		if (options.es && options.version < 310)
6422 			SPIRV_CROSS_THROW("textureGather requires ESSL 310.");
6423 		else if (!options.es && options.version < 400)
6424 		{
6425 			if (!expression_is_constant_null(comp))
6426 				SPIRV_CROSS_THROW("textureGather with component requires GLSL 400.");
6427 			require_extension_internal("GL_ARB_texture_gather");
6428 		}
6429 		break;
6430 
6431 	case OpImageFetch:
6432 	case OpImageSparseFetch:
6433 	case OpImageRead: // Reads == fetches in Metal (other langs will not get here)
6434 		opt = &ops[4];
6435 		length -= 4;
6436 		fetch = true;
6437 		break;
6438 
6439 	case OpImageSampleProjImplicitLod:
6440 	case OpImageSampleProjExplicitLod:
6441 	case OpImageSparseSampleProjImplicitLod:
6442 	case OpImageSparseSampleProjExplicitLod:
6443 		opt = &ops[4];
6444 		length -= 4;
6445 		proj = true;
6446 		break;
6447 
6448 	default:
6449 		opt = &ops[4];
6450 		length -= 4;
6451 		break;
6452 	}
6453 
6454 	// Bypass pointers because we need the real image struct
6455 	auto &type = expression_type(img);
6456 	auto &imgtype = get<SPIRType>(type.self);
6457 
6458 	uint32_t coord_components = 0;
6459 	switch (imgtype.image.dim)
6460 	{
6461 	case spv::Dim1D:
6462 		coord_components = 1;
6463 		break;
6464 	case spv::Dim2D:
6465 		coord_components = 2;
6466 		break;
6467 	case spv::Dim3D:
6468 		coord_components = 3;
6469 		break;
6470 	case spv::DimCube:
6471 		coord_components = 3;
6472 		break;
6473 	case spv::DimBuffer:
6474 		coord_components = 1;
6475 		break;
6476 	default:
6477 		coord_components = 2;
6478 		break;
6479 	}
6480 
6481 	if (dref)
6482 		inherited_expressions.push_back(dref);
6483 
6484 	if (proj)
6485 		coord_components++;
6486 	if (imgtype.image.arrayed)
6487 		coord_components++;
6488 
6489 	uint32_t bias = 0;
6490 	uint32_t lod = 0;
6491 	uint32_t grad_x = 0;
6492 	uint32_t grad_y = 0;
6493 	uint32_t coffset = 0;
6494 	uint32_t offset = 0;
6495 	uint32_t coffsets = 0;
6496 	uint32_t sample = 0;
6497 	uint32_t minlod = 0;
6498 	uint32_t flags = 0;
6499 
6500 	if (length)
6501 	{
6502 		flags = *opt++;
6503 		length--;
6504 	}
6505 
6506 	auto test = [&](uint32_t &v, uint32_t flag) {
6507 		if (length && (flags & flag))
6508 		{
6509 			v = *opt++;
6510 			inherited_expressions.push_back(v);
6511 			length--;
6512 		}
6513 	};
6514 
6515 	test(bias, ImageOperandsBiasMask);
6516 	test(lod, ImageOperandsLodMask);
6517 	test(grad_x, ImageOperandsGradMask);
6518 	test(grad_y, ImageOperandsGradMask);
6519 	test(coffset, ImageOperandsConstOffsetMask);
6520 	test(offset, ImageOperandsOffsetMask);
6521 	test(coffsets, ImageOperandsConstOffsetsMask);
6522 	test(sample, ImageOperandsSampleMask);
6523 	test(minlod, ImageOperandsMinLodMask);
6524 
6525 	TextureFunctionBaseArguments base_args = {};
6526 	base_args.img = img;
6527 	base_args.imgtype = &imgtype;
6528 	base_args.is_fetch = fetch != 0;
6529 	base_args.is_gather = gather != 0;
6530 	base_args.is_proj = proj != 0;
6531 
6532 	string expr;
6533 	TextureFunctionNameArguments name_args = {};
6534 
6535 	name_args.base = base_args;
6536 	name_args.has_array_offsets = coffsets != 0;
6537 	name_args.has_offset = coffset != 0 || offset != 0;
6538 	name_args.has_grad = grad_x != 0 || grad_y != 0;
6539 	name_args.has_dref = dref != 0;
6540 	name_args.is_sparse_feedback = sparse;
6541 	name_args.has_min_lod = minlod != 0;
6542 	name_args.lod = lod;
6543 	expr += to_function_name(name_args);
6544 	expr += "(";
6545 
6546 	uint32_t sparse_texel_id = 0;
6547 	if (sparse)
6548 		sparse_texel_id = get_sparse_feedback_texel_id(ops[1]);
6549 
6550 	TextureFunctionArguments args = {};
6551 	args.base = base_args;
6552 	args.coord = coord;
6553 	args.coord_components = coord_components;
6554 	args.dref = dref;
6555 	args.grad_x = grad_x;
6556 	args.grad_y = grad_y;
6557 	args.lod = lod;
6558 	args.coffset = coffset;
6559 	args.offset = offset;
6560 	args.bias = bias;
6561 	args.component = comp;
6562 	args.sample = sample;
6563 	args.sparse_texel = sparse_texel_id;
6564 	args.min_lod = minlod;
6565 	args.nonuniform_expression = nonuniform_expression;
6566 	expr += to_function_args(args, forward);
6567 	expr += ")";
6568 
6569 	// texture(samplerXShadow) returns float. shadowX() returns vec4. Swizzle here.
6570 	if (is_legacy() && image_is_comparison(imgtype, img))
6571 		expr += ".r";
6572 
6573 	// Sampling from a texture which was deduced to be a depth image, might actually return 1 component here.
6574 	// Remap back to 4 components as sampling opcodes expect.
6575 	if (backend.comparison_image_samples_scalar && image_opcode_is_sample_no_dref(op))
6576 	{
6577 		bool image_is_depth = false;
6578 		const auto *combined = maybe_get<SPIRCombinedImageSampler>(img);
6579 		VariableID image_id = combined ? combined->image : img;
6580 
6581 		if (combined && image_is_comparison(imgtype, combined->image))
6582 			image_is_depth = true;
6583 		else if (image_is_comparison(imgtype, img))
6584 			image_is_depth = true;
6585 
6586 		// We must also check the backing variable for the image.
6587 		// We might have loaded an OpImage, and used that handle for two different purposes.
6588 		// Once with comparison, once without.
6589 		auto *image_variable = maybe_get_backing_variable(image_id);
6590 		if (image_variable && image_is_comparison(get<SPIRType>(image_variable->basetype), image_variable->self))
6591 			image_is_depth = true;
6592 
6593 		if (image_is_depth)
6594 			expr = remap_swizzle(result_type, 1, expr);
6595 	}
6596 
6597 	if (!sparse && !backend.support_small_type_sampling_result && result_type.width < 32)
6598 	{
6599 		// Just value cast (narrowing) to expected type since we cannot rely on narrowing to work automatically.
6600 		// Hopefully compiler picks this up and converts the texturing instruction to the appropriate precision.
6601 		expr = join(type_to_glsl_constructor(result_type), "(", expr, ")");
6602 	}
6603 
6604 	// Deals with reads from MSL. We might need to downconvert to fewer components.
6605 	if (op == OpImageRead)
6606 		expr = remap_swizzle(result_type, 4, expr);
6607 
6608 	return expr;
6609 }
6610 
expression_is_constant_null(uint32_t id) const6611 bool CompilerGLSL::expression_is_constant_null(uint32_t id) const
6612 {
6613 	auto *c = maybe_get<SPIRConstant>(id);
6614 	if (!c)
6615 		return false;
6616 	return c->constant_is_null();
6617 }
6618 
expression_is_non_value_type_array(uint32_t ptr)6619 bool CompilerGLSL::expression_is_non_value_type_array(uint32_t ptr)
6620 {
6621 	auto &type = expression_type(ptr);
6622 	if (type.array.empty())
6623 		return false;
6624 
6625 	if (!backend.array_is_value_type)
6626 		return true;
6627 
6628 	auto *var = maybe_get_backing_variable(ptr);
6629 	if (!var)
6630 		return false;
6631 
6632 	auto &backed_type = get<SPIRType>(var->basetype);
6633 	return !backend.buffer_offset_array_is_value_type && backed_type.basetype == SPIRType::Struct &&
6634 	       has_member_decoration(backed_type.self, 0, DecorationOffset);
6635 }
6636 
6637 // Returns the function name for a texture sampling function for the specified image and sampling characteristics.
6638 // For some subclasses, the function is a method on the specified image.
to_function_name(const TextureFunctionNameArguments & args)6639 string CompilerGLSL::to_function_name(const TextureFunctionNameArguments &args)
6640 {
6641 	if (args.has_min_lod)
6642 	{
6643 		if (options.es)
6644 			SPIRV_CROSS_THROW("Sparse residency is not supported in ESSL.");
6645 		require_extension_internal("GL_ARB_sparse_texture_clamp");
6646 	}
6647 
6648 	string fname;
6649 	auto &imgtype = *args.base.imgtype;
6650 	VariableID tex = args.base.img;
6651 
6652 	// textureLod on sampler2DArrayShadow and samplerCubeShadow does not exist in GLSL for some reason.
6653 	// To emulate this, we will have to use textureGrad with a constant gradient of 0.
6654 	// The workaround will assert that the LOD is in fact constant 0, or we cannot emit correct code.
6655 	// This happens for HLSL SampleCmpLevelZero on Texture2DArray and TextureCube.
6656 	bool workaround_lod_array_shadow_as_grad = false;
6657 	if (((imgtype.image.arrayed && imgtype.image.dim == Dim2D) || imgtype.image.dim == DimCube) &&
6658 	    image_is_comparison(imgtype, tex) && args.lod)
6659 	{
6660 		if (!expression_is_constant_null(args.lod))
6661 		{
6662 			SPIRV_CROSS_THROW("textureLod on sampler2DArrayShadow is not constant 0.0. This cannot be "
6663 			                  "expressed in GLSL.");
6664 		}
6665 		workaround_lod_array_shadow_as_grad = true;
6666 	}
6667 
6668 	if (args.is_sparse_feedback)
6669 		fname += "sparse";
6670 
6671 	if (args.base.is_fetch)
6672 		fname += args.is_sparse_feedback ? "TexelFetch" : "texelFetch";
6673 	else
6674 	{
6675 		fname += args.is_sparse_feedback ? "Texture" : "texture";
6676 
6677 		if (args.base.is_gather)
6678 			fname += "Gather";
6679 		if (args.has_array_offsets)
6680 			fname += "Offsets";
6681 		if (args.base.is_proj)
6682 			fname += "Proj";
6683 		if (args.has_grad || workaround_lod_array_shadow_as_grad)
6684 			fname += "Grad";
6685 		if (args.lod != 0 && !workaround_lod_array_shadow_as_grad)
6686 			fname += "Lod";
6687 	}
6688 
6689 	if (args.has_offset)
6690 		fname += "Offset";
6691 
6692 	if (args.has_min_lod)
6693 		fname += "Clamp";
6694 
6695 	if (args.is_sparse_feedback || args.has_min_lod)
6696 		fname += "ARB";
6697 
6698 	return (is_legacy() && !args.base.is_gather) ? legacy_tex_op(fname, imgtype, tex) : fname;
6699 }
6700 
convert_separate_image_to_expression(uint32_t id)6701 std::string CompilerGLSL::convert_separate_image_to_expression(uint32_t id)
6702 {
6703 	auto *var = maybe_get_backing_variable(id);
6704 
6705 	// If we are fetching from a plain OpTypeImage, we must combine with a dummy sampler in GLSL.
6706 	// In Vulkan GLSL, we can make use of the newer GL_EXT_samplerless_texture_functions.
6707 	if (var)
6708 	{
6709 		auto &type = get<SPIRType>(var->basetype);
6710 		if (type.basetype == SPIRType::Image && type.image.sampled == 1 && type.image.dim != DimBuffer)
6711 		{
6712 			if (options.vulkan_semantics)
6713 			{
6714 				if (dummy_sampler_id)
6715 				{
6716 					// Don't need to consider Shadow state since the dummy sampler is always non-shadow.
6717 					auto sampled_type = type;
6718 					sampled_type.basetype = SPIRType::SampledImage;
6719 					return join(type_to_glsl(sampled_type), "(", to_expression(id), ", ",
6720 					            to_expression(dummy_sampler_id), ")");
6721 				}
6722 				else
6723 				{
6724 					// Newer glslang supports this extension to deal with texture2D as argument to texture functions.
6725 					require_extension_internal("GL_EXT_samplerless_texture_functions");
6726 				}
6727 			}
6728 			else
6729 			{
6730 				if (!dummy_sampler_id)
6731 					SPIRV_CROSS_THROW("Cannot find dummy sampler ID. Was "
6732 					                  "build_dummy_sampler_for_combined_images() called?");
6733 
6734 				return to_combined_image_sampler(id, dummy_sampler_id);
6735 			}
6736 		}
6737 	}
6738 
6739 	return to_expression(id);
6740 }
6741 
6742 // Returns the function args for a texture sampling function for the specified image and sampling characteristics.
to_function_args(const TextureFunctionArguments & args,bool * p_forward)6743 string CompilerGLSL::to_function_args(const TextureFunctionArguments &args, bool *p_forward)
6744 {
6745 	VariableID img = args.base.img;
6746 	auto &imgtype = *args.base.imgtype;
6747 
6748 	string farg_str;
6749 	if (args.base.is_fetch)
6750 		farg_str = convert_separate_image_to_expression(img);
6751 	else
6752 		farg_str = to_expression(img);
6753 
6754 	if (args.nonuniform_expression && farg_str.find_first_of('[') != string::npos)
6755 	{
6756 		// Only emit nonuniformEXT() wrapper if the underlying expression is arrayed in some way.
6757 		farg_str = join(backend.nonuniform_qualifier, "(", farg_str, ")");
6758 	}
6759 
6760 	bool swizz_func = backend.swizzle_is_function;
6761 	auto swizzle = [swizz_func](uint32_t comps, uint32_t in_comps) -> const char * {
6762 		if (comps == in_comps)
6763 			return "";
6764 
6765 		switch (comps)
6766 		{
6767 		case 1:
6768 			return ".x";
6769 		case 2:
6770 			return swizz_func ? ".xy()" : ".xy";
6771 		case 3:
6772 			return swizz_func ? ".xyz()" : ".xyz";
6773 		default:
6774 			return "";
6775 		}
6776 	};
6777 
6778 	bool forward = should_forward(args.coord);
6779 
6780 	// The IR can give us more components than we need, so chop them off as needed.
6781 	auto swizzle_expr = swizzle(args.coord_components, expression_type(args.coord).vecsize);
6782 	// Only enclose the UV expression if needed.
6783 	auto coord_expr =
6784 	    (*swizzle_expr == '\0') ? to_expression(args.coord) : (to_enclosed_expression(args.coord) + swizzle_expr);
6785 
6786 	// texelFetch only takes int, not uint.
6787 	auto &coord_type = expression_type(args.coord);
6788 	if (coord_type.basetype == SPIRType::UInt)
6789 	{
6790 		auto expected_type = coord_type;
6791 		expected_type.vecsize = args.coord_components;
6792 		expected_type.basetype = SPIRType::Int;
6793 		coord_expr = bitcast_expression(expected_type, coord_type.basetype, coord_expr);
6794 	}
6795 
6796 	// textureLod on sampler2DArrayShadow and samplerCubeShadow does not exist in GLSL for some reason.
6797 	// To emulate this, we will have to use textureGrad with a constant gradient of 0.
6798 	// The workaround will assert that the LOD is in fact constant 0, or we cannot emit correct code.
6799 	// This happens for HLSL SampleCmpLevelZero on Texture2DArray and TextureCube.
6800 	bool workaround_lod_array_shadow_as_grad =
6801 	    ((imgtype.image.arrayed && imgtype.image.dim == Dim2D) || imgtype.image.dim == DimCube) &&
6802 	    image_is_comparison(imgtype, img) && args.lod != 0;
6803 
6804 	if (args.dref)
6805 	{
6806 		forward = forward && should_forward(args.dref);
6807 
6808 		// SPIR-V splits dref and coordinate.
6809 		if (args.base.is_gather ||
6810 		    args.coord_components == 4) // GLSL also splits the arguments in two. Same for textureGather.
6811 		{
6812 			farg_str += ", ";
6813 			farg_str += to_expression(args.coord);
6814 			farg_str += ", ";
6815 			farg_str += to_expression(args.dref);
6816 		}
6817 		else if (args.base.is_proj)
6818 		{
6819 			// Have to reshuffle so we get vec4(coord, dref, proj), special case.
6820 			// Other shading languages splits up the arguments for coord and compare value like SPIR-V.
6821 			// The coordinate type for textureProj shadow is always vec4 even for sampler1DShadow.
6822 			farg_str += ", vec4(";
6823 
6824 			if (imgtype.image.dim == Dim1D)
6825 			{
6826 				// Could reuse coord_expr, but we will mess up the temporary usage checking.
6827 				farg_str += to_enclosed_expression(args.coord) + ".x";
6828 				farg_str += ", ";
6829 				farg_str += "0.0, ";
6830 				farg_str += to_expression(args.dref);
6831 				farg_str += ", ";
6832 				farg_str += to_enclosed_expression(args.coord) + ".y)";
6833 			}
6834 			else if (imgtype.image.dim == Dim2D)
6835 			{
6836 				// Could reuse coord_expr, but we will mess up the temporary usage checking.
6837 				farg_str += to_enclosed_expression(args.coord) + (swizz_func ? ".xy()" : ".xy");
6838 				farg_str += ", ";
6839 				farg_str += to_expression(args.dref);
6840 				farg_str += ", ";
6841 				farg_str += to_enclosed_expression(args.coord) + ".z)";
6842 			}
6843 			else
6844 				SPIRV_CROSS_THROW("Invalid type for textureProj with shadow.");
6845 		}
6846 		else
6847 		{
6848 			// Create a composite which merges coord/dref into a single vector.
6849 			auto type = expression_type(args.coord);
6850 			type.vecsize = args.coord_components + 1;
6851 			farg_str += ", ";
6852 			farg_str += type_to_glsl_constructor(type);
6853 			farg_str += "(";
6854 			farg_str += coord_expr;
6855 			farg_str += ", ";
6856 			farg_str += to_expression(args.dref);
6857 			farg_str += ")";
6858 		}
6859 	}
6860 	else
6861 	{
6862 		farg_str += ", ";
6863 		farg_str += coord_expr;
6864 	}
6865 
6866 	if (args.grad_x || args.grad_y)
6867 	{
6868 		forward = forward && should_forward(args.grad_x);
6869 		forward = forward && should_forward(args.grad_y);
6870 		farg_str += ", ";
6871 		farg_str += to_expression(args.grad_x);
6872 		farg_str += ", ";
6873 		farg_str += to_expression(args.grad_y);
6874 	}
6875 
6876 	if (args.lod)
6877 	{
6878 		if (workaround_lod_array_shadow_as_grad)
6879 		{
6880 			// Implement textureGrad() instead. LOD == 0.0 is implemented as gradient of 0.0.
6881 			// Implementing this as plain texture() is not safe on some implementations.
6882 			if (imgtype.image.dim == Dim2D)
6883 				farg_str += ", vec2(0.0), vec2(0.0)";
6884 			else if (imgtype.image.dim == DimCube)
6885 				farg_str += ", vec3(0.0), vec3(0.0)";
6886 		}
6887 		else
6888 		{
6889 			forward = forward && should_forward(args.lod);
6890 			farg_str += ", ";
6891 
6892 			auto &lod_expr_type = expression_type(args.lod);
6893 
6894 			// Lod expression for TexelFetch in GLSL must be int, and only int.
6895 			if (args.base.is_fetch && imgtype.image.dim != DimBuffer && !imgtype.image.ms &&
6896 			    lod_expr_type.basetype != SPIRType::Int)
6897 			{
6898 				farg_str += join("int(", to_expression(args.lod), ")");
6899 			}
6900 			else
6901 			{
6902 				farg_str += to_expression(args.lod);
6903 			}
6904 		}
6905 	}
6906 	else if (args.base.is_fetch && imgtype.image.dim != DimBuffer && !imgtype.image.ms)
6907 	{
6908 		// Lod argument is optional in OpImageFetch, but we require a LOD value, pick 0 as the default.
6909 		farg_str += ", 0";
6910 	}
6911 
6912 	if (args.coffset)
6913 	{
6914 		forward = forward && should_forward(args.coffset);
6915 		farg_str += ", ";
6916 		farg_str += to_expression(args.coffset);
6917 	}
6918 	else if (args.offset)
6919 	{
6920 		forward = forward && should_forward(args.offset);
6921 		farg_str += ", ";
6922 		farg_str += to_expression(args.offset);
6923 	}
6924 
6925 	if (args.sample)
6926 	{
6927 		farg_str += ", ";
6928 		farg_str += to_expression(args.sample);
6929 	}
6930 
6931 	if (args.min_lod)
6932 	{
6933 		farg_str += ", ";
6934 		farg_str += to_expression(args.min_lod);
6935 	}
6936 
6937 	if (args.sparse_texel)
6938 	{
6939 		// Sparse texel output parameter comes after everything else, except it's before the optional, component/bias arguments.
6940 		farg_str += ", ";
6941 		farg_str += to_expression(args.sparse_texel);
6942 	}
6943 
6944 	if (args.bias)
6945 	{
6946 		forward = forward && should_forward(args.bias);
6947 		farg_str += ", ";
6948 		farg_str += to_expression(args.bias);
6949 	}
6950 
6951 	if (args.component && !expression_is_constant_null(args.component))
6952 	{
6953 		forward = forward && should_forward(args.component);
6954 		farg_str += ", ";
6955 		farg_str += to_expression(args.component);
6956 	}
6957 
6958 	*p_forward = forward;
6959 
6960 	return farg_str;
6961 }
6962 
emit_glsl_op(uint32_t result_type,uint32_t id,uint32_t eop,const uint32_t * args,uint32_t length)6963 void CompilerGLSL::emit_glsl_op(uint32_t result_type, uint32_t id, uint32_t eop, const uint32_t *args, uint32_t length)
6964 {
6965 	auto op = static_cast<GLSLstd450>(eop);
6966 
6967 	if (is_legacy() && is_unsigned_glsl_opcode(op))
6968 		SPIRV_CROSS_THROW("Unsigned integers are not supported on legacy GLSL targets.");
6969 
6970 	// If we need to do implicit bitcasts, make sure we do it with the correct type.
6971 	uint32_t integer_width = get_integer_width_for_glsl_instruction(op, args, length);
6972 	auto int_type = to_signed_basetype(integer_width);
6973 	auto uint_type = to_unsigned_basetype(integer_width);
6974 
6975 	switch (op)
6976 	{
6977 	// FP fiddling
6978 	case GLSLstd450Round:
6979 		if (!is_legacy())
6980 			emit_unary_func_op(result_type, id, args[0], "round");
6981 		else
6982 		{
6983 			auto op0 = to_enclosed_expression(args[0]);
6984 			auto &op0_type = expression_type(args[0]);
6985 			auto expr = join("floor(", op0, " + ", type_to_glsl_constructor(op0_type), "(0.5))");
6986 			bool forward = should_forward(args[0]);
6987 			emit_op(result_type, id, expr, forward);
6988 			inherit_expression_dependencies(id, args[0]);
6989 		}
6990 		break;
6991 
6992 	case GLSLstd450RoundEven:
6993 		if (!is_legacy())
6994 			emit_unary_func_op(result_type, id, args[0], "roundEven");
6995 		else if (!options.es)
6996 		{
6997 			// This extension provides round() with round-to-even semantics.
6998 			require_extension_internal("GL_EXT_gpu_shader4");
6999 			emit_unary_func_op(result_type, id, args[0], "round");
7000 		}
7001 		else
7002 			SPIRV_CROSS_THROW("roundEven supported only in ESSL 300.");
7003 		break;
7004 
7005 	case GLSLstd450Trunc:
7006 		emit_unary_func_op(result_type, id, args[0], "trunc");
7007 		break;
7008 	case GLSLstd450SAbs:
7009 		emit_unary_func_op_cast(result_type, id, args[0], "abs", int_type, int_type);
7010 		break;
7011 	case GLSLstd450FAbs:
7012 		emit_unary_func_op(result_type, id, args[0], "abs");
7013 		break;
7014 	case GLSLstd450SSign:
7015 		emit_unary_func_op_cast(result_type, id, args[0], "sign", int_type, int_type);
7016 		break;
7017 	case GLSLstd450FSign:
7018 		emit_unary_func_op(result_type, id, args[0], "sign");
7019 		break;
7020 	case GLSLstd450Floor:
7021 		emit_unary_func_op(result_type, id, args[0], "floor");
7022 		break;
7023 	case GLSLstd450Ceil:
7024 		emit_unary_func_op(result_type, id, args[0], "ceil");
7025 		break;
7026 	case GLSLstd450Fract:
7027 		emit_unary_func_op(result_type, id, args[0], "fract");
7028 		break;
7029 	case GLSLstd450Radians:
7030 		emit_unary_func_op(result_type, id, args[0], "radians");
7031 		break;
7032 	case GLSLstd450Degrees:
7033 		emit_unary_func_op(result_type, id, args[0], "degrees");
7034 		break;
7035 	case GLSLstd450Fma:
7036 		if ((!options.es && options.version < 400) || (options.es && options.version < 320))
7037 		{
7038 			auto expr = join(to_enclosed_expression(args[0]), " * ", to_enclosed_expression(args[1]), " + ",
7039 			                 to_enclosed_expression(args[2]));
7040 
7041 			emit_op(result_type, id, expr,
7042 			        should_forward(args[0]) && should_forward(args[1]) && should_forward(args[2]));
7043 			for (uint32_t i = 0; i < 3; i++)
7044 				inherit_expression_dependencies(id, args[i]);
7045 		}
7046 		else
7047 			emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "fma");
7048 		break;
7049 	case GLSLstd450Modf:
7050 		register_call_out_argument(args[1]);
7051 		forced_temporaries.insert(id);
7052 		emit_binary_func_op(result_type, id, args[0], args[1], "modf");
7053 		break;
7054 
7055 	case GLSLstd450ModfStruct:
7056 	{
7057 		auto &type = get<SPIRType>(result_type);
7058 		emit_uninitialized_temporary_expression(result_type, id);
7059 		statement(to_expression(id), ".", to_member_name(type, 0), " = ", "modf(", to_expression(args[0]), ", ",
7060 		          to_expression(id), ".", to_member_name(type, 1), ");");
7061 		break;
7062 	}
7063 
7064 	// Minmax
7065 	case GLSLstd450UMin:
7066 		emit_binary_func_op_cast(result_type, id, args[0], args[1], "min", uint_type, false);
7067 		break;
7068 
7069 	case GLSLstd450SMin:
7070 		emit_binary_func_op_cast(result_type, id, args[0], args[1], "min", int_type, false);
7071 		break;
7072 
7073 	case GLSLstd450FMin:
7074 		emit_binary_func_op(result_type, id, args[0], args[1], "min");
7075 		break;
7076 
7077 	case GLSLstd450FMax:
7078 		emit_binary_func_op(result_type, id, args[0], args[1], "max");
7079 		break;
7080 
7081 	case GLSLstd450UMax:
7082 		emit_binary_func_op_cast(result_type, id, args[0], args[1], "max", uint_type, false);
7083 		break;
7084 
7085 	case GLSLstd450SMax:
7086 		emit_binary_func_op_cast(result_type, id, args[0], args[1], "max", int_type, false);
7087 		break;
7088 
7089 	case GLSLstd450FClamp:
7090 		emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "clamp");
7091 		break;
7092 
7093 	case GLSLstd450UClamp:
7094 		emit_trinary_func_op_cast(result_type, id, args[0], args[1], args[2], "clamp", uint_type);
7095 		break;
7096 
7097 	case GLSLstd450SClamp:
7098 		emit_trinary_func_op_cast(result_type, id, args[0], args[1], args[2], "clamp", int_type);
7099 		break;
7100 
7101 	// Trig
7102 	case GLSLstd450Sin:
7103 		emit_unary_func_op(result_type, id, args[0], "sin");
7104 		break;
7105 	case GLSLstd450Cos:
7106 		emit_unary_func_op(result_type, id, args[0], "cos");
7107 		break;
7108 	case GLSLstd450Tan:
7109 		emit_unary_func_op(result_type, id, args[0], "tan");
7110 		break;
7111 	case GLSLstd450Asin:
7112 		emit_unary_func_op(result_type, id, args[0], "asin");
7113 		break;
7114 	case GLSLstd450Acos:
7115 		emit_unary_func_op(result_type, id, args[0], "acos");
7116 		break;
7117 	case GLSLstd450Atan:
7118 		emit_unary_func_op(result_type, id, args[0], "atan");
7119 		break;
7120 	case GLSLstd450Sinh:
7121 		emit_unary_func_op(result_type, id, args[0], "sinh");
7122 		break;
7123 	case GLSLstd450Cosh:
7124 		emit_unary_func_op(result_type, id, args[0], "cosh");
7125 		break;
7126 	case GLSLstd450Tanh:
7127 		emit_unary_func_op(result_type, id, args[0], "tanh");
7128 		break;
7129 	case GLSLstd450Asinh:
7130 		emit_unary_func_op(result_type, id, args[0], "asinh");
7131 		break;
7132 	case GLSLstd450Acosh:
7133 		emit_unary_func_op(result_type, id, args[0], "acosh");
7134 		break;
7135 	case GLSLstd450Atanh:
7136 		emit_unary_func_op(result_type, id, args[0], "atanh");
7137 		break;
7138 	case GLSLstd450Atan2:
7139 		emit_binary_func_op(result_type, id, args[0], args[1], "atan");
7140 		break;
7141 
7142 	// Exponentials
7143 	case GLSLstd450Pow:
7144 		emit_binary_func_op(result_type, id, args[0], args[1], "pow");
7145 		break;
7146 	case GLSLstd450Exp:
7147 		emit_unary_func_op(result_type, id, args[0], "exp");
7148 		break;
7149 	case GLSLstd450Log:
7150 		emit_unary_func_op(result_type, id, args[0], "log");
7151 		break;
7152 	case GLSLstd450Exp2:
7153 		emit_unary_func_op(result_type, id, args[0], "exp2");
7154 		break;
7155 	case GLSLstd450Log2:
7156 		emit_unary_func_op(result_type, id, args[0], "log2");
7157 		break;
7158 	case GLSLstd450Sqrt:
7159 		emit_unary_func_op(result_type, id, args[0], "sqrt");
7160 		break;
7161 	case GLSLstd450InverseSqrt:
7162 		emit_unary_func_op(result_type, id, args[0], "inversesqrt");
7163 		break;
7164 
7165 	// Matrix math
7166 	case GLSLstd450Determinant:
7167 		emit_unary_func_op(result_type, id, args[0], "determinant");
7168 		break;
7169 	case GLSLstd450MatrixInverse:
7170 		emit_unary_func_op(result_type, id, args[0], "inverse");
7171 		break;
7172 
7173 	// Lerping
7174 	case GLSLstd450FMix:
7175 	case GLSLstd450IMix:
7176 	{
7177 		emit_mix_op(result_type, id, args[0], args[1], args[2]);
7178 		break;
7179 	}
7180 	case GLSLstd450Step:
7181 		emit_binary_func_op(result_type, id, args[0], args[1], "step");
7182 		break;
7183 	case GLSLstd450SmoothStep:
7184 		emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "smoothstep");
7185 		break;
7186 
7187 	// Packing
7188 	case GLSLstd450Frexp:
7189 		register_call_out_argument(args[1]);
7190 		forced_temporaries.insert(id);
7191 		emit_binary_func_op(result_type, id, args[0], args[1], "frexp");
7192 		break;
7193 
7194 	case GLSLstd450FrexpStruct:
7195 	{
7196 		auto &type = get<SPIRType>(result_type);
7197 		emit_uninitialized_temporary_expression(result_type, id);
7198 		statement(to_expression(id), ".", to_member_name(type, 0), " = ", "frexp(", to_expression(args[0]), ", ",
7199 		          to_expression(id), ".", to_member_name(type, 1), ");");
7200 		break;
7201 	}
7202 
7203 	case GLSLstd450Ldexp:
7204 	{
7205 		bool forward = should_forward(args[0]) && should_forward(args[1]);
7206 
7207 		auto op0 = to_unpacked_expression(args[0]);
7208 		auto op1 = to_unpacked_expression(args[1]);
7209 		auto &op1_type = expression_type(args[1]);
7210 		if (op1_type.basetype != SPIRType::Int)
7211 		{
7212 			// Need a value cast here.
7213 			auto target_type = op1_type;
7214 			target_type.basetype = SPIRType::Int;
7215 			op1 = join(type_to_glsl_constructor(target_type), "(", op1, ")");
7216 		}
7217 
7218 		auto expr = join("ldexp(", op0, ", ", op1, ")");
7219 
7220 		emit_op(result_type, id, expr, forward);
7221 		inherit_expression_dependencies(id, args[0]);
7222 		inherit_expression_dependencies(id, args[1]);
7223 		break;
7224 	}
7225 
7226 	case GLSLstd450PackSnorm4x8:
7227 		emit_unary_func_op(result_type, id, args[0], "packSnorm4x8");
7228 		break;
7229 	case GLSLstd450PackUnorm4x8:
7230 		emit_unary_func_op(result_type, id, args[0], "packUnorm4x8");
7231 		break;
7232 	case GLSLstd450PackSnorm2x16:
7233 		emit_unary_func_op(result_type, id, args[0], "packSnorm2x16");
7234 		break;
7235 	case GLSLstd450PackUnorm2x16:
7236 		emit_unary_func_op(result_type, id, args[0], "packUnorm2x16");
7237 		break;
7238 	case GLSLstd450PackHalf2x16:
7239 		emit_unary_func_op(result_type, id, args[0], "packHalf2x16");
7240 		break;
7241 	case GLSLstd450UnpackSnorm4x8:
7242 		emit_unary_func_op(result_type, id, args[0], "unpackSnorm4x8");
7243 		break;
7244 	case GLSLstd450UnpackUnorm4x8:
7245 		emit_unary_func_op(result_type, id, args[0], "unpackUnorm4x8");
7246 		break;
7247 	case GLSLstd450UnpackSnorm2x16:
7248 		emit_unary_func_op(result_type, id, args[0], "unpackSnorm2x16");
7249 		break;
7250 	case GLSLstd450UnpackUnorm2x16:
7251 		emit_unary_func_op(result_type, id, args[0], "unpackUnorm2x16");
7252 		break;
7253 	case GLSLstd450UnpackHalf2x16:
7254 		emit_unary_func_op(result_type, id, args[0], "unpackHalf2x16");
7255 		break;
7256 
7257 	case GLSLstd450PackDouble2x32:
7258 		emit_unary_func_op(result_type, id, args[0], "packDouble2x32");
7259 		break;
7260 	case GLSLstd450UnpackDouble2x32:
7261 		emit_unary_func_op(result_type, id, args[0], "unpackDouble2x32");
7262 		break;
7263 
7264 	// Vector math
7265 	case GLSLstd450Length:
7266 		emit_unary_func_op(result_type, id, args[0], "length");
7267 		break;
7268 	case GLSLstd450Distance:
7269 		emit_binary_func_op(result_type, id, args[0], args[1], "distance");
7270 		break;
7271 	case GLSLstd450Cross:
7272 		emit_binary_func_op(result_type, id, args[0], args[1], "cross");
7273 		break;
7274 	case GLSLstd450Normalize:
7275 		emit_unary_func_op(result_type, id, args[0], "normalize");
7276 		break;
7277 	case GLSLstd450FaceForward:
7278 		emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "faceforward");
7279 		break;
7280 	case GLSLstd450Reflect:
7281 		emit_binary_func_op(result_type, id, args[0], args[1], "reflect");
7282 		break;
7283 	case GLSLstd450Refract:
7284 		emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "refract");
7285 		break;
7286 
7287 	// Bit-fiddling
7288 	case GLSLstd450FindILsb:
7289 		// findLSB always returns int.
7290 		emit_unary_func_op_cast(result_type, id, args[0], "findLSB", expression_type(args[0]).basetype, int_type);
7291 		break;
7292 
7293 	case GLSLstd450FindSMsb:
7294 		emit_unary_func_op_cast(result_type, id, args[0], "findMSB", int_type, int_type);
7295 		break;
7296 
7297 	case GLSLstd450FindUMsb:
7298 		emit_unary_func_op_cast(result_type, id, args[0], "findMSB", uint_type,
7299 		                        int_type); // findMSB always returns int.
7300 		break;
7301 
7302 	// Multisampled varying
7303 	case GLSLstd450InterpolateAtCentroid:
7304 		emit_unary_func_op(result_type, id, args[0], "interpolateAtCentroid");
7305 		break;
7306 	case GLSLstd450InterpolateAtSample:
7307 		emit_binary_func_op(result_type, id, args[0], args[1], "interpolateAtSample");
7308 		break;
7309 	case GLSLstd450InterpolateAtOffset:
7310 		emit_binary_func_op(result_type, id, args[0], args[1], "interpolateAtOffset");
7311 		break;
7312 
7313 	case GLSLstd450NMin:
7314 	case GLSLstd450NMax:
7315 	{
7316 		emit_nminmax_op(result_type, id, args[0], args[1], op);
7317 		break;
7318 	}
7319 
7320 	case GLSLstd450NClamp:
7321 	{
7322 		// Make sure we have a unique ID here to avoid aliasing the extra sub-expressions between clamp and NMin sub-op.
7323 		// IDs cannot exceed 24 bits, so we can make use of the higher bits for some unique flags.
7324 		uint32_t &max_id = extra_sub_expressions[id | 0x80000000u];
7325 		if (!max_id)
7326 			max_id = ir.increase_bound_by(1);
7327 
7328 		// Inherit precision qualifiers.
7329 		ir.meta[max_id] = ir.meta[id];
7330 
7331 		emit_nminmax_op(result_type, max_id, args[0], args[1], GLSLstd450NMax);
7332 		emit_nminmax_op(result_type, id, max_id, args[2], GLSLstd450NMin);
7333 		break;
7334 	}
7335 
7336 	default:
7337 		statement("// unimplemented GLSL op ", eop);
7338 		break;
7339 	}
7340 }
7341 
emit_nminmax_op(uint32_t result_type,uint32_t id,uint32_t op0,uint32_t op1,GLSLstd450 op)7342 void CompilerGLSL::emit_nminmax_op(uint32_t result_type, uint32_t id, uint32_t op0, uint32_t op1, GLSLstd450 op)
7343 {
7344 	// Need to emulate this call.
7345 	uint32_t &ids = extra_sub_expressions[id];
7346 	if (!ids)
7347 	{
7348 		ids = ir.increase_bound_by(5);
7349 		auto btype = get<SPIRType>(result_type);
7350 		btype.basetype = SPIRType::Boolean;
7351 		set<SPIRType>(ids, btype);
7352 	}
7353 
7354 	uint32_t btype_id = ids + 0;
7355 	uint32_t left_nan_id = ids + 1;
7356 	uint32_t right_nan_id = ids + 2;
7357 	uint32_t tmp_id = ids + 3;
7358 	uint32_t mixed_first_id = ids + 4;
7359 
7360 	// Inherit precision qualifiers.
7361 	ir.meta[tmp_id] = ir.meta[id];
7362 	ir.meta[mixed_first_id] = ir.meta[id];
7363 
7364 	emit_unary_func_op(btype_id, left_nan_id, op0, "isnan");
7365 	emit_unary_func_op(btype_id, right_nan_id, op1, "isnan");
7366 	emit_binary_func_op(result_type, tmp_id, op0, op1, op == GLSLstd450NMin ? "min" : "max");
7367 	emit_mix_op(result_type, mixed_first_id, tmp_id, op1, left_nan_id);
7368 	emit_mix_op(result_type, id, mixed_first_id, op0, right_nan_id);
7369 }
7370 
emit_spv_amd_shader_ballot_op(uint32_t result_type,uint32_t id,uint32_t eop,const uint32_t * args,uint32_t)7371 void CompilerGLSL::emit_spv_amd_shader_ballot_op(uint32_t result_type, uint32_t id, uint32_t eop, const uint32_t *args,
7372                                                  uint32_t)
7373 {
7374 	require_extension_internal("GL_AMD_shader_ballot");
7375 
7376 	enum AMDShaderBallot
7377 	{
7378 		SwizzleInvocationsAMD = 1,
7379 		SwizzleInvocationsMaskedAMD = 2,
7380 		WriteInvocationAMD = 3,
7381 		MbcntAMD = 4
7382 	};
7383 
7384 	auto op = static_cast<AMDShaderBallot>(eop);
7385 
7386 	switch (op)
7387 	{
7388 	case SwizzleInvocationsAMD:
7389 		emit_binary_func_op(result_type, id, args[0], args[1], "swizzleInvocationsAMD");
7390 		register_control_dependent_expression(id);
7391 		break;
7392 
7393 	case SwizzleInvocationsMaskedAMD:
7394 		emit_binary_func_op(result_type, id, args[0], args[1], "swizzleInvocationsMaskedAMD");
7395 		register_control_dependent_expression(id);
7396 		break;
7397 
7398 	case WriteInvocationAMD:
7399 		emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "writeInvocationAMD");
7400 		register_control_dependent_expression(id);
7401 		break;
7402 
7403 	case MbcntAMD:
7404 		emit_unary_func_op(result_type, id, args[0], "mbcntAMD");
7405 		register_control_dependent_expression(id);
7406 		break;
7407 
7408 	default:
7409 		statement("// unimplemented SPV AMD shader ballot op ", eop);
7410 		break;
7411 	}
7412 }
7413 
emit_spv_amd_shader_explicit_vertex_parameter_op(uint32_t result_type,uint32_t id,uint32_t eop,const uint32_t * args,uint32_t)7414 void CompilerGLSL::emit_spv_amd_shader_explicit_vertex_parameter_op(uint32_t result_type, uint32_t id, uint32_t eop,
7415                                                                     const uint32_t *args, uint32_t)
7416 {
7417 	require_extension_internal("GL_AMD_shader_explicit_vertex_parameter");
7418 
7419 	enum AMDShaderExplicitVertexParameter
7420 	{
7421 		InterpolateAtVertexAMD = 1
7422 	};
7423 
7424 	auto op = static_cast<AMDShaderExplicitVertexParameter>(eop);
7425 
7426 	switch (op)
7427 	{
7428 	case InterpolateAtVertexAMD:
7429 		emit_binary_func_op(result_type, id, args[0], args[1], "interpolateAtVertexAMD");
7430 		break;
7431 
7432 	default:
7433 		statement("// unimplemented SPV AMD shader explicit vertex parameter op ", eop);
7434 		break;
7435 	}
7436 }
7437 
emit_spv_amd_shader_trinary_minmax_op(uint32_t result_type,uint32_t id,uint32_t eop,const uint32_t * args,uint32_t)7438 void CompilerGLSL::emit_spv_amd_shader_trinary_minmax_op(uint32_t result_type, uint32_t id, uint32_t eop,
7439                                                          const uint32_t *args, uint32_t)
7440 {
7441 	require_extension_internal("GL_AMD_shader_trinary_minmax");
7442 
7443 	enum AMDShaderTrinaryMinMax
7444 	{
7445 		FMin3AMD = 1,
7446 		UMin3AMD = 2,
7447 		SMin3AMD = 3,
7448 		FMax3AMD = 4,
7449 		UMax3AMD = 5,
7450 		SMax3AMD = 6,
7451 		FMid3AMD = 7,
7452 		UMid3AMD = 8,
7453 		SMid3AMD = 9
7454 	};
7455 
7456 	auto op = static_cast<AMDShaderTrinaryMinMax>(eop);
7457 
7458 	switch (op)
7459 	{
7460 	case FMin3AMD:
7461 	case UMin3AMD:
7462 	case SMin3AMD:
7463 		emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "min3");
7464 		break;
7465 
7466 	case FMax3AMD:
7467 	case UMax3AMD:
7468 	case SMax3AMD:
7469 		emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "max3");
7470 		break;
7471 
7472 	case FMid3AMD:
7473 	case UMid3AMD:
7474 	case SMid3AMD:
7475 		emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "mid3");
7476 		break;
7477 
7478 	default:
7479 		statement("// unimplemented SPV AMD shader trinary minmax op ", eop);
7480 		break;
7481 	}
7482 }
7483 
emit_spv_amd_gcn_shader_op(uint32_t result_type,uint32_t id,uint32_t eop,const uint32_t * args,uint32_t)7484 void CompilerGLSL::emit_spv_amd_gcn_shader_op(uint32_t result_type, uint32_t id, uint32_t eop, const uint32_t *args,
7485                                               uint32_t)
7486 {
7487 	require_extension_internal("GL_AMD_gcn_shader");
7488 
7489 	enum AMDGCNShader
7490 	{
7491 		CubeFaceIndexAMD = 1,
7492 		CubeFaceCoordAMD = 2,
7493 		TimeAMD = 3
7494 	};
7495 
7496 	auto op = static_cast<AMDGCNShader>(eop);
7497 
7498 	switch (op)
7499 	{
7500 	case CubeFaceIndexAMD:
7501 		emit_unary_func_op(result_type, id, args[0], "cubeFaceIndexAMD");
7502 		break;
7503 	case CubeFaceCoordAMD:
7504 		emit_unary_func_op(result_type, id, args[0], "cubeFaceCoordAMD");
7505 		break;
7506 	case TimeAMD:
7507 	{
7508 		string expr = "timeAMD()";
7509 		emit_op(result_type, id, expr, true);
7510 		register_control_dependent_expression(id);
7511 		break;
7512 	}
7513 
7514 	default:
7515 		statement("// unimplemented SPV AMD gcn shader op ", eop);
7516 		break;
7517 	}
7518 }
7519 
emit_subgroup_op(const Instruction & i)7520 void CompilerGLSL::emit_subgroup_op(const Instruction &i)
7521 {
7522 	const uint32_t *ops = stream(i);
7523 	auto op = static_cast<Op>(i.op);
7524 
7525 	if (!options.vulkan_semantics && !is_supported_subgroup_op_in_opengl(op))
7526 		SPIRV_CROSS_THROW("This subgroup operation is only supported in Vulkan semantics.");
7527 
7528 	// If we need to do implicit bitcasts, make sure we do it with the correct type.
7529 	uint32_t integer_width = get_integer_width_for_instruction(i);
7530 	auto int_type = to_signed_basetype(integer_width);
7531 	auto uint_type = to_unsigned_basetype(integer_width);
7532 
7533 	switch (op)
7534 	{
7535 	case OpGroupNonUniformElect:
7536 		request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupElect);
7537 		break;
7538 
7539 	case OpGroupNonUniformBallotBitCount:
7540 	{
7541 		const GroupOperation operation = static_cast<GroupOperation>(ops[3]);
7542 		if (operation == GroupOperationReduce)
7543 			request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupBallotBitCount);
7544 		else if (operation == GroupOperationInclusiveScan || operation == GroupOperationExclusiveScan)
7545 			request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupInverseBallot_InclBitCount_ExclBitCout);
7546 	}
7547 	break;
7548 
7549 	case OpGroupNonUniformBallotBitExtract:
7550 		request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupBallotBitExtract);
7551 		break;
7552 
7553 	case OpGroupNonUniformInverseBallot:
7554 		request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupInverseBallot_InclBitCount_ExclBitCout);
7555 		break;
7556 
7557 	case OpGroupNonUniformBallot:
7558 		request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupBallot);
7559 		break;
7560 
7561 	case OpGroupNonUniformBallotFindLSB:
7562 	case OpGroupNonUniformBallotFindMSB:
7563 		request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupBallotFindLSB_MSB);
7564 		break;
7565 
7566 	case OpGroupNonUniformBroadcast:
7567 	case OpGroupNonUniformBroadcastFirst:
7568 		request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupBroadcast_First);
7569 		break;
7570 
7571 	case OpGroupNonUniformShuffle:
7572 	case OpGroupNonUniformShuffleXor:
7573 		require_extension_internal("GL_KHR_shader_subgroup_shuffle");
7574 		break;
7575 
7576 	case OpGroupNonUniformShuffleUp:
7577 	case OpGroupNonUniformShuffleDown:
7578 		require_extension_internal("GL_KHR_shader_subgroup_shuffle_relative");
7579 		break;
7580 
7581 	case OpGroupNonUniformAll:
7582 	case OpGroupNonUniformAny:
7583 	case OpGroupNonUniformAllEqual:
7584 	{
7585 		const SPIRType &type = expression_type(ops[3]);
7586 		if (type.basetype == SPIRType::BaseType::Boolean && type.vecsize == 1u)
7587 			request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupAll_Any_AllEqualBool);
7588 		else
7589 			request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupAllEqualT);
7590 	}
7591 	break;
7592 
7593 	case OpGroupNonUniformFAdd:
7594 	case OpGroupNonUniformFMul:
7595 	case OpGroupNonUniformFMin:
7596 	case OpGroupNonUniformFMax:
7597 	case OpGroupNonUniformIAdd:
7598 	case OpGroupNonUniformIMul:
7599 	case OpGroupNonUniformSMin:
7600 	case OpGroupNonUniformSMax:
7601 	case OpGroupNonUniformUMin:
7602 	case OpGroupNonUniformUMax:
7603 	case OpGroupNonUniformBitwiseAnd:
7604 	case OpGroupNonUniformBitwiseOr:
7605 	case OpGroupNonUniformBitwiseXor:
7606 	{
7607 		auto operation = static_cast<GroupOperation>(ops[3]);
7608 		if (operation == GroupOperationClusteredReduce)
7609 		{
7610 			require_extension_internal("GL_KHR_shader_subgroup_clustered");
7611 		}
7612 		else if (operation == GroupOperationExclusiveScan || operation == GroupOperationInclusiveScan ||
7613 		         operation == GroupOperationReduce)
7614 		{
7615 			require_extension_internal("GL_KHR_shader_subgroup_arithmetic");
7616 		}
7617 		else
7618 			SPIRV_CROSS_THROW("Invalid group operation.");
7619 		break;
7620 	}
7621 
7622 	case OpGroupNonUniformQuadSwap:
7623 	case OpGroupNonUniformQuadBroadcast:
7624 		require_extension_internal("GL_KHR_shader_subgroup_quad");
7625 		break;
7626 
7627 	default:
7628 		SPIRV_CROSS_THROW("Invalid opcode for subgroup.");
7629 	}
7630 
7631 	uint32_t result_type = ops[0];
7632 	uint32_t id = ops[1];
7633 
7634 	auto scope = static_cast<Scope>(evaluate_constant_u32(ops[2]));
7635 	if (scope != ScopeSubgroup)
7636 		SPIRV_CROSS_THROW("Only subgroup scope is supported.");
7637 
7638 	switch (op)
7639 	{
7640 	case OpGroupNonUniformElect:
7641 		emit_op(result_type, id, "subgroupElect()", true);
7642 		break;
7643 
7644 	case OpGroupNonUniformBroadcast:
7645 		emit_binary_func_op(result_type, id, ops[3], ops[4], "subgroupBroadcast");
7646 		break;
7647 
7648 	case OpGroupNonUniformBroadcastFirst:
7649 		emit_unary_func_op(result_type, id, ops[3], "subgroupBroadcastFirst");
7650 		break;
7651 
7652 	case OpGroupNonUniformBallot:
7653 		emit_unary_func_op(result_type, id, ops[3], "subgroupBallot");
7654 		break;
7655 
7656 	case OpGroupNonUniformInverseBallot:
7657 		emit_unary_func_op(result_type, id, ops[3], "subgroupInverseBallot");
7658 		break;
7659 
7660 	case OpGroupNonUniformBallotBitExtract:
7661 		emit_binary_func_op(result_type, id, ops[3], ops[4], "subgroupBallotBitExtract");
7662 		break;
7663 
7664 	case OpGroupNonUniformBallotFindLSB:
7665 		emit_unary_func_op(result_type, id, ops[3], "subgroupBallotFindLSB");
7666 		break;
7667 
7668 	case OpGroupNonUniformBallotFindMSB:
7669 		emit_unary_func_op(result_type, id, ops[3], "subgroupBallotFindMSB");
7670 		break;
7671 
7672 	case OpGroupNonUniformBallotBitCount:
7673 	{
7674 		auto operation = static_cast<GroupOperation>(ops[3]);
7675 		if (operation == GroupOperationReduce)
7676 			emit_unary_func_op(result_type, id, ops[4], "subgroupBallotBitCount");
7677 		else if (operation == GroupOperationInclusiveScan)
7678 			emit_unary_func_op(result_type, id, ops[4], "subgroupBallotInclusiveBitCount");
7679 		else if (operation == GroupOperationExclusiveScan)
7680 			emit_unary_func_op(result_type, id, ops[4], "subgroupBallotExclusiveBitCount");
7681 		else
7682 			SPIRV_CROSS_THROW("Invalid BitCount operation.");
7683 		break;
7684 	}
7685 
7686 	case OpGroupNonUniformShuffle:
7687 		emit_binary_func_op(result_type, id, ops[3], ops[4], "subgroupShuffle");
7688 		break;
7689 
7690 	case OpGroupNonUniformShuffleXor:
7691 		emit_binary_func_op(result_type, id, ops[3], ops[4], "subgroupShuffleXor");
7692 		break;
7693 
7694 	case OpGroupNonUniformShuffleUp:
7695 		emit_binary_func_op(result_type, id, ops[3], ops[4], "subgroupShuffleUp");
7696 		break;
7697 
7698 	case OpGroupNonUniformShuffleDown:
7699 		emit_binary_func_op(result_type, id, ops[3], ops[4], "subgroupShuffleDown");
7700 		break;
7701 
7702 	case OpGroupNonUniformAll:
7703 		emit_unary_func_op(result_type, id, ops[3], "subgroupAll");
7704 		break;
7705 
7706 	case OpGroupNonUniformAny:
7707 		emit_unary_func_op(result_type, id, ops[3], "subgroupAny");
7708 		break;
7709 
7710 	case OpGroupNonUniformAllEqual:
7711 		emit_unary_func_op(result_type, id, ops[3], "subgroupAllEqual");
7712 		break;
7713 
7714 		// clang-format off
7715 #define GLSL_GROUP_OP(op, glsl_op) \
7716 case OpGroupNonUniform##op: \
7717 	{ \
7718 		auto operation = static_cast<GroupOperation>(ops[3]); \
7719 		if (operation == GroupOperationReduce) \
7720 			emit_unary_func_op(result_type, id, ops[4], "subgroup" #glsl_op); \
7721 		else if (operation == GroupOperationInclusiveScan) \
7722 			emit_unary_func_op(result_type, id, ops[4], "subgroupInclusive" #glsl_op); \
7723 		else if (operation == GroupOperationExclusiveScan) \
7724 			emit_unary_func_op(result_type, id, ops[4], "subgroupExclusive" #glsl_op); \
7725 		else if (operation == GroupOperationClusteredReduce) \
7726 			emit_binary_func_op(result_type, id, ops[4], ops[5], "subgroupClustered" #glsl_op); \
7727 		else \
7728 			SPIRV_CROSS_THROW("Invalid group operation."); \
7729 		break; \
7730 	}
7731 
7732 #define GLSL_GROUP_OP_CAST(op, glsl_op, type) \
7733 case OpGroupNonUniform##op: \
7734 	{ \
7735 		auto operation = static_cast<GroupOperation>(ops[3]); \
7736 		if (operation == GroupOperationReduce) \
7737 			emit_unary_func_op_cast(result_type, id, ops[4], "subgroup" #glsl_op, type, type); \
7738 		else if (operation == GroupOperationInclusiveScan) \
7739 			emit_unary_func_op_cast(result_type, id, ops[4], "subgroupInclusive" #glsl_op, type, type); \
7740 		else if (operation == GroupOperationExclusiveScan) \
7741 			emit_unary_func_op_cast(result_type, id, ops[4], "subgroupExclusive" #glsl_op, type, type); \
7742 		else if (operation == GroupOperationClusteredReduce) \
7743 			emit_binary_func_op_cast_clustered(result_type, id, ops[4], ops[5], "subgroupClustered" #glsl_op, type); \
7744 		else \
7745 			SPIRV_CROSS_THROW("Invalid group operation."); \
7746 		break; \
7747 	}
7748 
7749 	GLSL_GROUP_OP(FAdd, Add)
7750 	GLSL_GROUP_OP(FMul, Mul)
7751 	GLSL_GROUP_OP(FMin, Min)
7752 	GLSL_GROUP_OP(FMax, Max)
7753 	GLSL_GROUP_OP(IAdd, Add)
7754 	GLSL_GROUP_OP(IMul, Mul)
7755 	GLSL_GROUP_OP_CAST(SMin, Min, int_type)
7756 	GLSL_GROUP_OP_CAST(SMax, Max, int_type)
7757 	GLSL_GROUP_OP_CAST(UMin, Min, uint_type)
7758 	GLSL_GROUP_OP_CAST(UMax, Max, uint_type)
7759 	GLSL_GROUP_OP(BitwiseAnd, And)
7760 	GLSL_GROUP_OP(BitwiseOr, Or)
7761 	GLSL_GROUP_OP(BitwiseXor, Xor)
7762 #undef GLSL_GROUP_OP
7763 #undef GLSL_GROUP_OP_CAST
7764 		// clang-format on
7765 
7766 	case OpGroupNonUniformQuadSwap:
7767 	{
7768 		uint32_t direction = evaluate_constant_u32(ops[4]);
7769 		if (direction == 0)
7770 			emit_unary_func_op(result_type, id, ops[3], "subgroupQuadSwapHorizontal");
7771 		else if (direction == 1)
7772 			emit_unary_func_op(result_type, id, ops[3], "subgroupQuadSwapVertical");
7773 		else if (direction == 2)
7774 			emit_unary_func_op(result_type, id, ops[3], "subgroupQuadSwapDiagonal");
7775 		else
7776 			SPIRV_CROSS_THROW("Invalid quad swap direction.");
7777 		break;
7778 	}
7779 
7780 	case OpGroupNonUniformQuadBroadcast:
7781 	{
7782 		emit_binary_func_op(result_type, id, ops[3], ops[4], "subgroupQuadBroadcast");
7783 		break;
7784 	}
7785 
7786 	default:
7787 		SPIRV_CROSS_THROW("Invalid opcode for subgroup.");
7788 	}
7789 
7790 	register_control_dependent_expression(id);
7791 }
7792 
bitcast_glsl_op(const SPIRType & out_type,const SPIRType & in_type)7793 string CompilerGLSL::bitcast_glsl_op(const SPIRType &out_type, const SPIRType &in_type)
7794 {
7795 	// OpBitcast can deal with pointers.
7796 	if (out_type.pointer || in_type.pointer)
7797 		return type_to_glsl(out_type);
7798 
7799 	if (out_type.basetype == in_type.basetype)
7800 		return "";
7801 
7802 	assert(out_type.basetype != SPIRType::Boolean);
7803 	assert(in_type.basetype != SPIRType::Boolean);
7804 
7805 	bool integral_cast = type_is_integral(out_type) && type_is_integral(in_type);
7806 	bool same_size_cast = out_type.width == in_type.width;
7807 
7808 	// Trivial bitcast case, casts between integers.
7809 	if (integral_cast && same_size_cast)
7810 		return type_to_glsl(out_type);
7811 
7812 	// Catch-all 8-bit arithmetic casts (GL_EXT_shader_explicit_arithmetic_types).
7813 	if (out_type.width == 8 && in_type.width >= 16 && integral_cast && in_type.vecsize == 1)
7814 		return "unpack8";
7815 	else if (in_type.width == 8 && out_type.width == 16 && integral_cast && out_type.vecsize == 1)
7816 		return "pack16";
7817 	else if (in_type.width == 8 && out_type.width == 32 && integral_cast && out_type.vecsize == 1)
7818 		return "pack32";
7819 
7820 	// Floating <-> Integer special casts. Just have to enumerate all cases. :(
7821 	// 16-bit, 32-bit and 64-bit floats.
7822 	if (out_type.basetype == SPIRType::UInt && in_type.basetype == SPIRType::Float)
7823 	{
7824 		if (is_legacy_es())
7825 			SPIRV_CROSS_THROW("Float -> Uint bitcast not supported on legacy ESSL.");
7826 		else if (!options.es && options.version < 330)
7827 			require_extension_internal("GL_ARB_shader_bit_encoding");
7828 		return "floatBitsToUint";
7829 	}
7830 	else if (out_type.basetype == SPIRType::Int && in_type.basetype == SPIRType::Float)
7831 	{
7832 		if (is_legacy_es())
7833 			SPIRV_CROSS_THROW("Float -> Int bitcast not supported on legacy ESSL.");
7834 		else if (!options.es && options.version < 330)
7835 			require_extension_internal("GL_ARB_shader_bit_encoding");
7836 		return "floatBitsToInt";
7837 	}
7838 	else if (out_type.basetype == SPIRType::Float && in_type.basetype == SPIRType::UInt)
7839 	{
7840 		if (is_legacy_es())
7841 			SPIRV_CROSS_THROW("Uint -> Float bitcast not supported on legacy ESSL.");
7842 		else if (!options.es && options.version < 330)
7843 			require_extension_internal("GL_ARB_shader_bit_encoding");
7844 		return "uintBitsToFloat";
7845 	}
7846 	else if (out_type.basetype == SPIRType::Float && in_type.basetype == SPIRType::Int)
7847 	{
7848 		if (is_legacy_es())
7849 			SPIRV_CROSS_THROW("Int -> Float bitcast not supported on legacy ESSL.");
7850 		else if (!options.es && options.version < 330)
7851 			require_extension_internal("GL_ARB_shader_bit_encoding");
7852 		return "intBitsToFloat";
7853 	}
7854 
7855 	else if (out_type.basetype == SPIRType::Int64 && in_type.basetype == SPIRType::Double)
7856 		return "doubleBitsToInt64";
7857 	else if (out_type.basetype == SPIRType::UInt64 && in_type.basetype == SPIRType::Double)
7858 		return "doubleBitsToUint64";
7859 	else if (out_type.basetype == SPIRType::Double && in_type.basetype == SPIRType::Int64)
7860 		return "int64BitsToDouble";
7861 	else if (out_type.basetype == SPIRType::Double && in_type.basetype == SPIRType::UInt64)
7862 		return "uint64BitsToDouble";
7863 	else if (out_type.basetype == SPIRType::Short && in_type.basetype == SPIRType::Half)
7864 		return "float16BitsToInt16";
7865 	else if (out_type.basetype == SPIRType::UShort && in_type.basetype == SPIRType::Half)
7866 		return "float16BitsToUint16";
7867 	else if (out_type.basetype == SPIRType::Half && in_type.basetype == SPIRType::Short)
7868 		return "int16BitsToFloat16";
7869 	else if (out_type.basetype == SPIRType::Half && in_type.basetype == SPIRType::UShort)
7870 		return "uint16BitsToFloat16";
7871 
7872 	// And finally, some even more special purpose casts.
7873 	if (out_type.basetype == SPIRType::UInt64 && in_type.basetype == SPIRType::UInt && in_type.vecsize == 2)
7874 		return "packUint2x32";
7875 	else if (out_type.basetype == SPIRType::UInt && in_type.basetype == SPIRType::UInt64 && out_type.vecsize == 2)
7876 		return "unpackUint2x32";
7877 	else if (out_type.basetype == SPIRType::Half && in_type.basetype == SPIRType::UInt && in_type.vecsize == 1)
7878 		return "unpackFloat2x16";
7879 	else if (out_type.basetype == SPIRType::UInt && in_type.basetype == SPIRType::Half && in_type.vecsize == 2)
7880 		return "packFloat2x16";
7881 	else if (out_type.basetype == SPIRType::Int && in_type.basetype == SPIRType::Short && in_type.vecsize == 2)
7882 		return "packInt2x16";
7883 	else if (out_type.basetype == SPIRType::Short && in_type.basetype == SPIRType::Int && in_type.vecsize == 1)
7884 		return "unpackInt2x16";
7885 	else if (out_type.basetype == SPIRType::UInt && in_type.basetype == SPIRType::UShort && in_type.vecsize == 2)
7886 		return "packUint2x16";
7887 	else if (out_type.basetype == SPIRType::UShort && in_type.basetype == SPIRType::UInt && in_type.vecsize == 1)
7888 		return "unpackUint2x16";
7889 	else if (out_type.basetype == SPIRType::Int64 && in_type.basetype == SPIRType::Short && in_type.vecsize == 4)
7890 		return "packInt4x16";
7891 	else if (out_type.basetype == SPIRType::Short && in_type.basetype == SPIRType::Int64 && in_type.vecsize == 1)
7892 		return "unpackInt4x16";
7893 	else if (out_type.basetype == SPIRType::UInt64 && in_type.basetype == SPIRType::UShort && in_type.vecsize == 4)
7894 		return "packUint4x16";
7895 	else if (out_type.basetype == SPIRType::UShort && in_type.basetype == SPIRType::UInt64 && in_type.vecsize == 1)
7896 		return "unpackUint4x16";
7897 
7898 	return "";
7899 }
7900 
bitcast_glsl(const SPIRType & result_type,uint32_t argument)7901 string CompilerGLSL::bitcast_glsl(const SPIRType &result_type, uint32_t argument)
7902 {
7903 	auto op = bitcast_glsl_op(result_type, expression_type(argument));
7904 	if (op.empty())
7905 		return to_enclosed_unpacked_expression(argument);
7906 	else
7907 		return join(op, "(", to_unpacked_expression(argument), ")");
7908 }
7909 
bitcast_expression(SPIRType::BaseType target_type,uint32_t arg)7910 std::string CompilerGLSL::bitcast_expression(SPIRType::BaseType target_type, uint32_t arg)
7911 {
7912 	auto expr = to_expression(arg);
7913 	auto &src_type = expression_type(arg);
7914 	if (src_type.basetype != target_type)
7915 	{
7916 		auto target = src_type;
7917 		target.basetype = target_type;
7918 		expr = join(bitcast_glsl_op(target, src_type), "(", expr, ")");
7919 	}
7920 
7921 	return expr;
7922 }
7923 
bitcast_expression(const SPIRType & target_type,SPIRType::BaseType expr_type,const std::string & expr)7924 std::string CompilerGLSL::bitcast_expression(const SPIRType &target_type, SPIRType::BaseType expr_type,
7925                                              const std::string &expr)
7926 {
7927 	if (target_type.basetype == expr_type)
7928 		return expr;
7929 
7930 	auto src_type = target_type;
7931 	src_type.basetype = expr_type;
7932 	return join(bitcast_glsl_op(target_type, src_type), "(", expr, ")");
7933 }
7934 
builtin_to_glsl(BuiltIn builtin,StorageClass storage)7935 string CompilerGLSL::builtin_to_glsl(BuiltIn builtin, StorageClass storage)
7936 {
7937 	switch (builtin)
7938 	{
7939 	case BuiltInPosition:
7940 		return "gl_Position";
7941 	case BuiltInPointSize:
7942 		return "gl_PointSize";
7943 	case BuiltInClipDistance:
7944 		return "gl_ClipDistance";
7945 	case BuiltInCullDistance:
7946 		return "gl_CullDistance";
7947 	case BuiltInVertexId:
7948 		if (options.vulkan_semantics)
7949 			SPIRV_CROSS_THROW("Cannot implement gl_VertexID in Vulkan GLSL. This shader was created "
7950 			                  "with GL semantics.");
7951 		return "gl_VertexID";
7952 	case BuiltInInstanceId:
7953 		if (options.vulkan_semantics)
7954 		{
7955 			auto model = get_entry_point().model;
7956 			switch (model)
7957 			{
7958 			case spv::ExecutionModelIntersectionKHR:
7959 			case spv::ExecutionModelAnyHitKHR:
7960 			case spv::ExecutionModelClosestHitKHR:
7961 				// gl_InstanceID is allowed in these shaders.
7962 				break;
7963 
7964 			default:
7965 				SPIRV_CROSS_THROW("Cannot implement gl_InstanceID in Vulkan GLSL. This shader was "
7966 				                  "created with GL semantics.");
7967 			}
7968 		}
7969 		if (!options.es && options.version < 140)
7970 		{
7971 			require_extension_internal("GL_ARB_draw_instanced");
7972 		}
7973 		return "gl_InstanceID";
7974 	case BuiltInVertexIndex:
7975 		if (options.vulkan_semantics)
7976 			return "gl_VertexIndex";
7977 		else
7978 			return "gl_VertexID"; // gl_VertexID already has the base offset applied.
7979 	case BuiltInInstanceIndex:
7980 		if (options.vulkan_semantics)
7981 			return "gl_InstanceIndex";
7982 
7983 		if (!options.es && options.version < 140)
7984 		{
7985 			require_extension_internal("GL_ARB_draw_instanced");
7986 		}
7987 
7988 		if (options.vertex.support_nonzero_base_instance)
7989 		{
7990 			if (!options.vulkan_semantics)
7991 			{
7992 				// This is a soft-enable. We will opt-in to using gl_BaseInstanceARB if supported.
7993 				require_extension_internal("GL_ARB_shader_draw_parameters");
7994 			}
7995 			return "(gl_InstanceID + SPIRV_Cross_BaseInstance)"; // ... but not gl_InstanceID.
7996 		}
7997 		else
7998 			return "gl_InstanceID";
7999 	case BuiltInPrimitiveId:
8000 		if (storage == StorageClassInput && get_entry_point().model == ExecutionModelGeometry)
8001 			return "gl_PrimitiveIDIn";
8002 		else
8003 			return "gl_PrimitiveID";
8004 	case BuiltInInvocationId:
8005 		return "gl_InvocationID";
8006 	case BuiltInLayer:
8007 		return "gl_Layer";
8008 	case BuiltInViewportIndex:
8009 		return "gl_ViewportIndex";
8010 	case BuiltInTessLevelOuter:
8011 		return "gl_TessLevelOuter";
8012 	case BuiltInTessLevelInner:
8013 		return "gl_TessLevelInner";
8014 	case BuiltInTessCoord:
8015 		return "gl_TessCoord";
8016 	case BuiltInFragCoord:
8017 		return "gl_FragCoord";
8018 	case BuiltInPointCoord:
8019 		return "gl_PointCoord";
8020 	case BuiltInFrontFacing:
8021 		return "gl_FrontFacing";
8022 	case BuiltInFragDepth:
8023 		return "gl_FragDepth";
8024 	case BuiltInNumWorkgroups:
8025 		return "gl_NumWorkGroups";
8026 	case BuiltInWorkgroupSize:
8027 		return "gl_WorkGroupSize";
8028 	case BuiltInWorkgroupId:
8029 		return "gl_WorkGroupID";
8030 	case BuiltInLocalInvocationId:
8031 		return "gl_LocalInvocationID";
8032 	case BuiltInGlobalInvocationId:
8033 		return "gl_GlobalInvocationID";
8034 	case BuiltInLocalInvocationIndex:
8035 		return "gl_LocalInvocationIndex";
8036 	case BuiltInHelperInvocation:
8037 		return "gl_HelperInvocation";
8038 
8039 	case BuiltInBaseVertex:
8040 		if (options.es)
8041 			SPIRV_CROSS_THROW("BaseVertex not supported in ES profile.");
8042 
8043 		if (options.vulkan_semantics)
8044 		{
8045 			if (options.version < 460)
8046 			{
8047 				require_extension_internal("GL_ARB_shader_draw_parameters");
8048 				return "gl_BaseVertexARB";
8049 			}
8050 			return "gl_BaseVertex";
8051 		}
8052 		else
8053 		{
8054 			// On regular GL, this is soft-enabled and we emit ifdefs in code.
8055 			require_extension_internal("GL_ARB_shader_draw_parameters");
8056 			return "SPIRV_Cross_BaseVertex";
8057 		}
8058 		break;
8059 
8060 	case BuiltInBaseInstance:
8061 		if (options.es)
8062 			SPIRV_CROSS_THROW("BaseInstance not supported in ES profile.");
8063 
8064 		if (options.vulkan_semantics)
8065 		{
8066 			if (options.version < 460)
8067 			{
8068 				require_extension_internal("GL_ARB_shader_draw_parameters");
8069 				return "gl_BaseInstanceARB";
8070 			}
8071 			return "gl_BaseInstance";
8072 		}
8073 		else
8074 		{
8075 			// On regular GL, this is soft-enabled and we emit ifdefs in code.
8076 			require_extension_internal("GL_ARB_shader_draw_parameters");
8077 			return "SPIRV_Cross_BaseInstance";
8078 		}
8079 		break;
8080 
8081 	case BuiltInDrawIndex:
8082 		if (options.es)
8083 			SPIRV_CROSS_THROW("DrawIndex not supported in ES profile.");
8084 
8085 		if (options.vulkan_semantics)
8086 		{
8087 			if (options.version < 460)
8088 			{
8089 				require_extension_internal("GL_ARB_shader_draw_parameters");
8090 				return "gl_DrawIDARB";
8091 			}
8092 			return "gl_DrawID";
8093 		}
8094 		else
8095 		{
8096 			// On regular GL, this is soft-enabled and we emit ifdefs in code.
8097 			require_extension_internal("GL_ARB_shader_draw_parameters");
8098 			return "gl_DrawIDARB";
8099 		}
8100 		break;
8101 
8102 	case BuiltInSampleId:
8103 		if (options.es && options.version < 320)
8104 			require_extension_internal("GL_OES_sample_variables");
8105 		if (!options.es && options.version < 400)
8106 			SPIRV_CROSS_THROW("gl_SampleID not supported before GLSL 400.");
8107 		return "gl_SampleID";
8108 
8109 	case BuiltInSampleMask:
8110 		if (options.es && options.version < 320)
8111 			require_extension_internal("GL_OES_sample_variables");
8112 		if (!options.es && options.version < 400)
8113 			SPIRV_CROSS_THROW("gl_SampleMask/gl_SampleMaskIn not supported before GLSL 400.");
8114 
8115 		if (storage == StorageClassInput)
8116 			return "gl_SampleMaskIn";
8117 		else
8118 			return "gl_SampleMask";
8119 
8120 	case BuiltInSamplePosition:
8121 		if (options.es && options.version < 320)
8122 			require_extension_internal("GL_OES_sample_variables");
8123 		if (!options.es && options.version < 400)
8124 			SPIRV_CROSS_THROW("gl_SamplePosition not supported before GLSL 400.");
8125 		return "gl_SamplePosition";
8126 
8127 	case BuiltInViewIndex:
8128 		if (options.vulkan_semantics)
8129 		{
8130 			require_extension_internal("GL_EXT_multiview");
8131 			return "gl_ViewIndex";
8132 		}
8133 		else
8134 		{
8135 			require_extension_internal("GL_OVR_multiview2");
8136 			return "gl_ViewID_OVR";
8137 		}
8138 
8139 	case BuiltInNumSubgroups:
8140 		request_subgroup_feature(ShaderSubgroupSupportHelper::NumSubgroups);
8141 		return "gl_NumSubgroups";
8142 
8143 	case BuiltInSubgroupId:
8144 		request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupID);
8145 		return "gl_SubgroupID";
8146 
8147 	case BuiltInSubgroupSize:
8148 		request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupSize);
8149 		return "gl_SubgroupSize";
8150 
8151 	case BuiltInSubgroupLocalInvocationId:
8152 		request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupInvocationID);
8153 		return "gl_SubgroupInvocationID";
8154 
8155 	case BuiltInSubgroupEqMask:
8156 		request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupMask);
8157 		return "gl_SubgroupEqMask";
8158 
8159 	case BuiltInSubgroupGeMask:
8160 		request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupMask);
8161 		return "gl_SubgroupGeMask";
8162 
8163 	case BuiltInSubgroupGtMask:
8164 		request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupMask);
8165 		return "gl_SubgroupGtMask";
8166 
8167 	case BuiltInSubgroupLeMask:
8168 		request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupMask);
8169 		return "gl_SubgroupLeMask";
8170 
8171 	case BuiltInSubgroupLtMask:
8172 		request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupMask);
8173 		return "gl_SubgroupLtMask";
8174 
8175 	case BuiltInLaunchIdKHR:
8176 		return ray_tracing_is_khr ? "gl_LaunchIDEXT" : "gl_LaunchIDNV";
8177 	case BuiltInLaunchSizeKHR:
8178 		return ray_tracing_is_khr ? "gl_LaunchSizeEXT" : "gl_LaunchSizeNV";
8179 	case BuiltInWorldRayOriginKHR:
8180 		return ray_tracing_is_khr ? "gl_WorldRayOriginEXT" : "gl_WorldRayOriginNV";
8181 	case BuiltInWorldRayDirectionKHR:
8182 		return ray_tracing_is_khr ? "gl_WorldRayDirectionEXT" : "gl_WorldRayDirectionNV";
8183 	case BuiltInObjectRayOriginKHR:
8184 		return ray_tracing_is_khr ? "gl_ObjectRayOriginEXT" : "gl_ObjectRayOriginNV";
8185 	case BuiltInObjectRayDirectionKHR:
8186 		return ray_tracing_is_khr ? "gl_ObjectRayDirectionEXT" : "gl_ObjectRayDirectionNV";
8187 	case BuiltInRayTminKHR:
8188 		return ray_tracing_is_khr ? "gl_RayTminEXT" : "gl_RayTminNV";
8189 	case BuiltInRayTmaxKHR:
8190 		return ray_tracing_is_khr ? "gl_RayTmaxEXT" : "gl_RayTmaxNV";
8191 	case BuiltInInstanceCustomIndexKHR:
8192 		return ray_tracing_is_khr ? "gl_InstanceCustomIndexEXT" : "gl_InstanceCustomIndexNV";
8193 	case BuiltInObjectToWorldKHR:
8194 		return ray_tracing_is_khr ? "gl_ObjectToWorldEXT" : "gl_ObjectToWorldNV";
8195 	case BuiltInWorldToObjectKHR:
8196 		return ray_tracing_is_khr ? "gl_WorldToObjectEXT" : "gl_WorldToObjectNV";
8197 	case BuiltInHitTNV:
8198 		// gl_HitTEXT is an alias of RayTMax in KHR.
8199 		return "gl_HitTNV";
8200 	case BuiltInHitKindKHR:
8201 		return ray_tracing_is_khr ? "gl_HitKindEXT" : "gl_HitKindNV";
8202 	case BuiltInIncomingRayFlagsKHR:
8203 		return ray_tracing_is_khr ? "gl_IncomingRayFlagsEXT" : "gl_IncomingRayFlagsNV";
8204 
8205 	case BuiltInBaryCoordNV:
8206 	{
8207 		if (options.es && options.version < 320)
8208 			SPIRV_CROSS_THROW("gl_BaryCoordNV requires ESSL 320.");
8209 		else if (!options.es && options.version < 450)
8210 			SPIRV_CROSS_THROW("gl_BaryCoordNV requires GLSL 450.");
8211 		require_extension_internal("GL_NV_fragment_shader_barycentric");
8212 		return "gl_BaryCoordNV";
8213 	}
8214 
8215 	case BuiltInBaryCoordNoPerspNV:
8216 	{
8217 		if (options.es && options.version < 320)
8218 			SPIRV_CROSS_THROW("gl_BaryCoordNoPerspNV requires ESSL 320.");
8219 		else if (!options.es && options.version < 450)
8220 			SPIRV_CROSS_THROW("gl_BaryCoordNoPerspNV requires GLSL 450.");
8221 		require_extension_internal("GL_NV_fragment_shader_barycentric");
8222 		return "gl_BaryCoordNoPerspNV";
8223 	}
8224 
8225 	case BuiltInFragStencilRefEXT:
8226 	{
8227 		if (!options.es)
8228 		{
8229 			require_extension_internal("GL_ARB_shader_stencil_export");
8230 			return "gl_FragStencilRefARB";
8231 		}
8232 		else
8233 			SPIRV_CROSS_THROW("Stencil export not supported in GLES.");
8234 	}
8235 
8236 	case BuiltInDeviceIndex:
8237 		if (!options.vulkan_semantics)
8238 			SPIRV_CROSS_THROW("Need Vulkan semantics for device group support.");
8239 		require_extension_internal("GL_EXT_device_group");
8240 		return "gl_DeviceIndex";
8241 
8242 	default:
8243 		return join("gl_BuiltIn_", convert_to_string(builtin));
8244 	}
8245 }
8246 
index_to_swizzle(uint32_t index)8247 const char *CompilerGLSL::index_to_swizzle(uint32_t index)
8248 {
8249 	switch (index)
8250 	{
8251 	case 0:
8252 		return "x";
8253 	case 1:
8254 		return "y";
8255 	case 2:
8256 		return "z";
8257 	case 3:
8258 		return "w";
8259 	default:
8260 		SPIRV_CROSS_THROW("Swizzle index out of range");
8261 	}
8262 }
8263 
access_chain_internal_append_index(std::string & expr,uint32_t,const SPIRType * type,AccessChainFlags flags,bool &,uint32_t index)8264 void CompilerGLSL::access_chain_internal_append_index(std::string &expr, uint32_t /*base*/, const SPIRType *type,
8265                                                       AccessChainFlags flags, bool & /*access_chain_is_arrayed*/,
8266                                                       uint32_t index)
8267 {
8268 	bool index_is_literal = (flags & ACCESS_CHAIN_INDEX_IS_LITERAL_BIT) != 0;
8269 	bool register_expression_read = (flags & ACCESS_CHAIN_SKIP_REGISTER_EXPRESSION_READ_BIT) == 0;
8270 
8271 	expr += "[";
8272 
8273 	// If we are indexing into an array of SSBOs or UBOs, we need to index it with a non-uniform qualifier.
8274 	bool nonuniform_index =
8275 	    has_decoration(index, DecorationNonUniformEXT) &&
8276 	    (has_decoration(type->self, DecorationBlock) || has_decoration(type->self, DecorationBufferBlock));
8277 	if (nonuniform_index)
8278 	{
8279 		expr += backend.nonuniform_qualifier;
8280 		expr += "(";
8281 	}
8282 
8283 	if (index_is_literal)
8284 		expr += convert_to_string(index);
8285 	else
8286 		expr += to_expression(index, register_expression_read);
8287 
8288 	if (nonuniform_index)
8289 		expr += ")";
8290 
8291 	expr += "]";
8292 }
8293 
access_chain_internal(uint32_t base,const uint32_t * indices,uint32_t count,AccessChainFlags flags,AccessChainMeta * meta)8294 string CompilerGLSL::access_chain_internal(uint32_t base, const uint32_t *indices, uint32_t count,
8295                                            AccessChainFlags flags, AccessChainMeta *meta)
8296 {
8297 	string expr;
8298 
8299 	bool index_is_literal = (flags & ACCESS_CHAIN_INDEX_IS_LITERAL_BIT) != 0;
8300 	bool msb_is_id = (flags & ACCESS_CHAIN_LITERAL_MSB_FORCE_ID) != 0;
8301 	bool chain_only = (flags & ACCESS_CHAIN_CHAIN_ONLY_BIT) != 0;
8302 	bool ptr_chain = (flags & ACCESS_CHAIN_PTR_CHAIN_BIT) != 0;
8303 	bool register_expression_read = (flags & ACCESS_CHAIN_SKIP_REGISTER_EXPRESSION_READ_BIT) == 0;
8304 	bool flatten_member_reference = (flags & ACCESS_CHAIN_FLATTEN_ALL_MEMBERS_BIT) != 0;
8305 
8306 	if (!chain_only)
8307 	{
8308 		// We handle transpose explicitly, so don't resolve that here.
8309 		auto *e = maybe_get<SPIRExpression>(base);
8310 		bool old_transpose = e && e->need_transpose;
8311 		if (e)
8312 			e->need_transpose = false;
8313 		expr = to_enclosed_expression(base, register_expression_read);
8314 		if (e)
8315 			e->need_transpose = old_transpose;
8316 	}
8317 
8318 	// Start traversing type hierarchy at the proper non-pointer types,
8319 	// but keep type_id referencing the original pointer for use below.
8320 	uint32_t type_id = expression_type_id(base);
8321 
8322 	if (!backend.native_pointers)
8323 	{
8324 		if (ptr_chain)
8325 			SPIRV_CROSS_THROW("Backend does not support native pointers and does not support OpPtrAccessChain.");
8326 
8327 		// Wrapped buffer reference pointer types will need to poke into the internal "value" member before
8328 		// continuing the access chain.
8329 		if (should_dereference(base))
8330 		{
8331 			auto &type = get<SPIRType>(type_id);
8332 			expr = dereference_expression(type, expr);
8333 		}
8334 	}
8335 
8336 	const auto *type = &get_pointee_type(type_id);
8337 
8338 	bool access_chain_is_arrayed = expr.find_first_of('[') != string::npos;
8339 	bool row_major_matrix_needs_conversion = is_non_native_row_major_matrix(base);
8340 	bool is_packed = has_extended_decoration(base, SPIRVCrossDecorationPhysicalTypePacked);
8341 	uint32_t physical_type = get_extended_decoration(base, SPIRVCrossDecorationPhysicalTypeID);
8342 	bool is_invariant = has_decoration(base, DecorationInvariant);
8343 	bool pending_array_enclose = false;
8344 	bool dimension_flatten = false;
8345 
8346 	const auto append_index = [&](uint32_t index, bool is_literal) {
8347 		AccessChainFlags mod_flags = flags;
8348 		if (!is_literal)
8349 			mod_flags &= ~ACCESS_CHAIN_INDEX_IS_LITERAL_BIT;
8350 		access_chain_internal_append_index(expr, base, type, mod_flags, access_chain_is_arrayed, index);
8351 	};
8352 
8353 	for (uint32_t i = 0; i < count; i++)
8354 	{
8355 		uint32_t index = indices[i];
8356 
8357 		bool is_literal = index_is_literal;
8358 		if (is_literal && msb_is_id && (index >> 31u) != 0u)
8359 		{
8360 			is_literal = false;
8361 			index &= 0x7fffffffu;
8362 		}
8363 
8364 		// Pointer chains
8365 		if (ptr_chain && i == 0)
8366 		{
8367 			// If we are flattening multidimensional arrays, only create opening bracket on first
8368 			// array index.
8369 			if (options.flatten_multidimensional_arrays)
8370 			{
8371 				dimension_flatten = type->array.size() >= 1;
8372 				pending_array_enclose = dimension_flatten;
8373 				if (pending_array_enclose)
8374 					expr += "[";
8375 			}
8376 
8377 			if (options.flatten_multidimensional_arrays && dimension_flatten)
8378 			{
8379 				// If we are flattening multidimensional arrays, do manual stride computation.
8380 				if (is_literal)
8381 					expr += convert_to_string(index);
8382 				else
8383 					expr += to_enclosed_expression(index, register_expression_read);
8384 
8385 				for (auto j = uint32_t(type->array.size()); j; j--)
8386 				{
8387 					expr += " * ";
8388 					expr += enclose_expression(to_array_size(*type, j - 1));
8389 				}
8390 
8391 				if (type->array.empty())
8392 					pending_array_enclose = false;
8393 				else
8394 					expr += " + ";
8395 
8396 				if (!pending_array_enclose)
8397 					expr += "]";
8398 			}
8399 			else
8400 			{
8401 				append_index(index, is_literal);
8402 			}
8403 
8404 			if (type->basetype == SPIRType::ControlPointArray)
8405 			{
8406 				type_id = type->parent_type;
8407 				type = &get<SPIRType>(type_id);
8408 			}
8409 
8410 			access_chain_is_arrayed = true;
8411 		}
8412 		// Arrays
8413 		else if (!type->array.empty())
8414 		{
8415 			// If we are flattening multidimensional arrays, only create opening bracket on first
8416 			// array index.
8417 			if (options.flatten_multidimensional_arrays && !pending_array_enclose)
8418 			{
8419 				dimension_flatten = type->array.size() > 1;
8420 				pending_array_enclose = dimension_flatten;
8421 				if (pending_array_enclose)
8422 					expr += "[";
8423 			}
8424 
8425 			assert(type->parent_type);
8426 
8427 			auto *var = maybe_get<SPIRVariable>(base);
8428 			if (backend.force_gl_in_out_block && i == 0 && var && is_builtin_variable(*var) &&
8429 			    !has_decoration(type->self, DecorationBlock))
8430 			{
8431 				// This deals with scenarios for tesc/geom where arrays of gl_Position[] are declared.
8432 				// Normally, these variables live in blocks when compiled from GLSL,
8433 				// but HLSL seems to just emit straight arrays here.
8434 				// We must pretend this access goes through gl_in/gl_out arrays
8435 				// to be able to access certain builtins as arrays.
8436 				auto builtin = ir.meta[base].decoration.builtin_type;
8437 				switch (builtin)
8438 				{
8439 				// case BuiltInCullDistance: // These are already arrays, need to figure out rules for these in tess/geom.
8440 				// case BuiltInClipDistance:
8441 				case BuiltInPosition:
8442 				case BuiltInPointSize:
8443 					if (var->storage == StorageClassInput)
8444 						expr = join("gl_in[", to_expression(index, register_expression_read), "].", expr);
8445 					else if (var->storage == StorageClassOutput)
8446 						expr = join("gl_out[", to_expression(index, register_expression_read), "].", expr);
8447 					else
8448 						append_index(index, is_literal);
8449 					break;
8450 
8451 				default:
8452 					append_index(index, is_literal);
8453 					break;
8454 				}
8455 			}
8456 			else if (options.flatten_multidimensional_arrays && dimension_flatten)
8457 			{
8458 				// If we are flattening multidimensional arrays, do manual stride computation.
8459 				auto &parent_type = get<SPIRType>(type->parent_type);
8460 
8461 				if (is_literal)
8462 					expr += convert_to_string(index);
8463 				else
8464 					expr += to_enclosed_expression(index, register_expression_read);
8465 
8466 				for (auto j = uint32_t(parent_type.array.size()); j; j--)
8467 				{
8468 					expr += " * ";
8469 					expr += enclose_expression(to_array_size(parent_type, j - 1));
8470 				}
8471 
8472 				if (parent_type.array.empty())
8473 					pending_array_enclose = false;
8474 				else
8475 					expr += " + ";
8476 
8477 				if (!pending_array_enclose)
8478 					expr += "]";
8479 			}
8480 			// Some builtins are arrays in SPIR-V but not in other languages, e.g. gl_SampleMask[] is an array in SPIR-V but not in Metal.
8481 			// By throwing away the index, we imply the index was 0, which it must be for gl_SampleMask.
8482 			else if (!builtin_translates_to_nonarray(BuiltIn(get_decoration(base, DecorationBuiltIn))))
8483 			{
8484 				append_index(index, is_literal);
8485 			}
8486 
8487 			type_id = type->parent_type;
8488 			type = &get<SPIRType>(type_id);
8489 
8490 			access_chain_is_arrayed = true;
8491 		}
8492 		// For structs, the index refers to a constant, which indexes into the members.
8493 		// We also check if this member is a builtin, since we then replace the entire expression with the builtin one.
8494 		else if (type->basetype == SPIRType::Struct)
8495 		{
8496 			if (!is_literal)
8497 				index = evaluate_constant_u32(index);
8498 
8499 			if (index >= type->member_types.size())
8500 				SPIRV_CROSS_THROW("Member index is out of bounds!");
8501 
8502 			BuiltIn builtin;
8503 			if (is_member_builtin(*type, index, &builtin))
8504 			{
8505 				if (access_chain_is_arrayed)
8506 				{
8507 					expr += ".";
8508 					expr += builtin_to_glsl(builtin, type->storage);
8509 				}
8510 				else
8511 					expr = builtin_to_glsl(builtin, type->storage);
8512 			}
8513 			else
8514 			{
8515 				// If the member has a qualified name, use it as the entire chain
8516 				string qual_mbr_name = get_member_qualified_name(type_id, index);
8517 				if (!qual_mbr_name.empty())
8518 					expr = qual_mbr_name;
8519 				else if (flatten_member_reference)
8520 					expr += join("_", to_member_name(*type, index));
8521 				else
8522 					expr += to_member_reference(base, *type, index, ptr_chain);
8523 			}
8524 
8525 			if (has_member_decoration(type->self, index, DecorationInvariant))
8526 				is_invariant = true;
8527 
8528 			is_packed = member_is_packed_physical_type(*type, index);
8529 			if (member_is_remapped_physical_type(*type, index))
8530 				physical_type = get_extended_member_decoration(type->self, index, SPIRVCrossDecorationPhysicalTypeID);
8531 			else
8532 				physical_type = 0;
8533 
8534 			row_major_matrix_needs_conversion = member_is_non_native_row_major_matrix(*type, index);
8535 			type = &get<SPIRType>(type->member_types[index]);
8536 		}
8537 		// Matrix -> Vector
8538 		else if (type->columns > 1)
8539 		{
8540 			// If we have a row-major matrix here, we need to defer any transpose in case this access chain
8541 			// is used to store a column. We can resolve it right here and now if we access a scalar directly,
8542 			// by flipping indexing order of the matrix.
8543 
8544 			expr += "[";
8545 			if (is_literal)
8546 				expr += convert_to_string(index);
8547 			else
8548 				expr += to_expression(index, register_expression_read);
8549 			expr += "]";
8550 
8551 			type_id = type->parent_type;
8552 			type = &get<SPIRType>(type_id);
8553 		}
8554 		// Vector -> Scalar
8555 		else if (type->vecsize > 1)
8556 		{
8557 			string deferred_index;
8558 			if (row_major_matrix_needs_conversion)
8559 			{
8560 				// Flip indexing order.
8561 				auto column_index = expr.find_last_of('[');
8562 				if (column_index != string::npos)
8563 				{
8564 					deferred_index = expr.substr(column_index);
8565 					expr.resize(column_index);
8566 				}
8567 			}
8568 
8569 			// Internally, access chain implementation can also be used on composites,
8570 			// ignore scalar access workarounds in this case.
8571 			StorageClass effective_storage;
8572 			if (expression_type(base).pointer)
8573 				effective_storage = get_expression_effective_storage_class(base);
8574 			else
8575 				effective_storage = StorageClassGeneric;
8576 
8577 			if (!row_major_matrix_needs_conversion)
8578 			{
8579 				// On some backends, we might not be able to safely access individual scalars in a vector.
8580 				// To work around this, we might have to cast the access chain reference to something which can,
8581 				// like a pointer to scalar, which we can then index into.
8582 				prepare_access_chain_for_scalar_access(expr, get<SPIRType>(type->parent_type), effective_storage,
8583 				                                       is_packed);
8584 			}
8585 
8586 			if (is_literal && !is_packed && !row_major_matrix_needs_conversion)
8587 			{
8588 				expr += ".";
8589 				expr += index_to_swizzle(index);
8590 			}
8591 			else if (ir.ids[index].get_type() == TypeConstant && !is_packed && !row_major_matrix_needs_conversion)
8592 			{
8593 				auto &c = get<SPIRConstant>(index);
8594 				if (c.specialization)
8595 				{
8596 					// If the index is a spec constant, we cannot turn extract into a swizzle.
8597 					expr += join("[", to_expression(index), "]");
8598 				}
8599 				else
8600 				{
8601 					expr += ".";
8602 					expr += index_to_swizzle(c.scalar());
8603 				}
8604 			}
8605 			else if (is_literal)
8606 			{
8607 				// For packed vectors, we can only access them as an array, not by swizzle.
8608 				expr += join("[", index, "]");
8609 			}
8610 			else
8611 			{
8612 				expr += "[";
8613 				expr += to_expression(index, register_expression_read);
8614 				expr += "]";
8615 			}
8616 
8617 			if (row_major_matrix_needs_conversion)
8618 			{
8619 				prepare_access_chain_for_scalar_access(expr, get<SPIRType>(type->parent_type), effective_storage,
8620 				                                       is_packed);
8621 			}
8622 
8623 			expr += deferred_index;
8624 			row_major_matrix_needs_conversion = false;
8625 
8626 			is_packed = false;
8627 			physical_type = 0;
8628 			type_id = type->parent_type;
8629 			type = &get<SPIRType>(type_id);
8630 		}
8631 		else if (!backend.allow_truncated_access_chain)
8632 			SPIRV_CROSS_THROW("Cannot subdivide a scalar value!");
8633 	}
8634 
8635 	if (pending_array_enclose)
8636 	{
8637 		SPIRV_CROSS_THROW("Flattening of multidimensional arrays were enabled, "
8638 		                  "but the access chain was terminated in the middle of a multidimensional array. "
8639 		                  "This is not supported.");
8640 	}
8641 
8642 	if (meta)
8643 	{
8644 		meta->need_transpose = row_major_matrix_needs_conversion;
8645 		meta->storage_is_packed = is_packed;
8646 		meta->storage_is_invariant = is_invariant;
8647 		meta->storage_physical_type = physical_type;
8648 	}
8649 
8650 	return expr;
8651 }
8652 
prepare_access_chain_for_scalar_access(std::string &,const SPIRType &,spv::StorageClass,bool &)8653 void CompilerGLSL::prepare_access_chain_for_scalar_access(std::string &, const SPIRType &, spv::StorageClass, bool &)
8654 {
8655 }
8656 
to_flattened_struct_member(const string & basename,const SPIRType & type,uint32_t index)8657 string CompilerGLSL::to_flattened_struct_member(const string &basename, const SPIRType &type, uint32_t index)
8658 {
8659 	auto ret = join(basename, "_", to_member_name(type, index));
8660 	ParsedIR::sanitize_underscores(ret);
8661 	return ret;
8662 }
8663 
access_chain(uint32_t base,const uint32_t * indices,uint32_t count,const SPIRType & target_type,AccessChainMeta * meta,bool ptr_chain)8664 string CompilerGLSL::access_chain(uint32_t base, const uint32_t *indices, uint32_t count, const SPIRType &target_type,
8665                                   AccessChainMeta *meta, bool ptr_chain)
8666 {
8667 	if (flattened_buffer_blocks.count(base))
8668 	{
8669 		uint32_t matrix_stride = 0;
8670 		uint32_t array_stride = 0;
8671 		bool need_transpose = false;
8672 		flattened_access_chain_offset(expression_type(base), indices, count, 0, 16, &need_transpose, &matrix_stride,
8673 		                              &array_stride, ptr_chain);
8674 
8675 		if (meta)
8676 		{
8677 			meta->need_transpose = target_type.columns > 1 && need_transpose;
8678 			meta->storage_is_packed = false;
8679 		}
8680 
8681 		return flattened_access_chain(base, indices, count, target_type, 0, matrix_stride, array_stride,
8682 		                              need_transpose);
8683 	}
8684 	else if (flattened_structs.count(base) && count > 0)
8685 	{
8686 		AccessChainFlags flags = ACCESS_CHAIN_CHAIN_ONLY_BIT | ACCESS_CHAIN_SKIP_REGISTER_EXPRESSION_READ_BIT;
8687 		if (ptr_chain)
8688 			flags |= ACCESS_CHAIN_PTR_CHAIN_BIT;
8689 
8690 		if (flattened_structs[base])
8691 		{
8692 			flags |= ACCESS_CHAIN_FLATTEN_ALL_MEMBERS_BIT;
8693 			if (meta)
8694 				meta->flattened_struct = target_type.basetype == SPIRType::Struct;
8695 		}
8696 
8697 		auto chain = access_chain_internal(base, indices, count, flags, nullptr).substr(1);
8698 		if (meta)
8699 		{
8700 			meta->need_transpose = false;
8701 			meta->storage_is_packed = false;
8702 		}
8703 
8704 		auto basename = to_flattened_access_chain_expression(base);
8705 		auto ret = join(basename, "_", chain);
8706 		ParsedIR::sanitize_underscores(ret);
8707 		return ret;
8708 	}
8709 	else
8710 	{
8711 		AccessChainFlags flags = ACCESS_CHAIN_SKIP_REGISTER_EXPRESSION_READ_BIT;
8712 		if (ptr_chain)
8713 			flags |= ACCESS_CHAIN_PTR_CHAIN_BIT;
8714 		return access_chain_internal(base, indices, count, flags, meta);
8715 	}
8716 }
8717 
load_flattened_struct(const string & basename,const SPIRType & type)8718 string CompilerGLSL::load_flattened_struct(const string &basename, const SPIRType &type)
8719 {
8720 	auto expr = type_to_glsl_constructor(type);
8721 	expr += '(';
8722 
8723 	for (uint32_t i = 0; i < uint32_t(type.member_types.size()); i++)
8724 	{
8725 		if (i)
8726 			expr += ", ";
8727 
8728 		auto &member_type = get<SPIRType>(type.member_types[i]);
8729 		if (member_type.basetype == SPIRType::Struct)
8730 			expr += load_flattened_struct(to_flattened_struct_member(basename, type, i), member_type);
8731 		else
8732 			expr += to_flattened_struct_member(basename, type, i);
8733 	}
8734 	expr += ')';
8735 	return expr;
8736 }
8737 
to_flattened_access_chain_expression(uint32_t id)8738 std::string CompilerGLSL::to_flattened_access_chain_expression(uint32_t id)
8739 {
8740 	// Do not use to_expression as that will unflatten access chains.
8741 	string basename;
8742 	if (const auto *var = maybe_get<SPIRVariable>(id))
8743 		basename = to_name(var->self);
8744 	else if (const auto *expr = maybe_get<SPIRExpression>(id))
8745 		basename = expr->expression;
8746 	else
8747 		basename = to_expression(id);
8748 
8749 	return basename;
8750 }
8751 
store_flattened_struct(const string & basename,uint32_t rhs_id,const SPIRType & type,const SmallVector<uint32_t> & indices)8752 void CompilerGLSL::store_flattened_struct(const string &basename, uint32_t rhs_id, const SPIRType &type,
8753                                           const SmallVector<uint32_t> &indices)
8754 {
8755 	SmallVector<uint32_t> sub_indices = indices;
8756 	sub_indices.push_back(0);
8757 
8758 	auto *member_type = &type;
8759 	for (auto &index : indices)
8760 		member_type = &get<SPIRType>(member_type->member_types[index]);
8761 
8762 	for (uint32_t i = 0; i < uint32_t(member_type->member_types.size()); i++)
8763 	{
8764 		sub_indices.back() = i;
8765 		auto lhs = join(basename, "_", to_member_name(*member_type, i));
8766 		ParsedIR::sanitize_underscores(lhs);
8767 
8768 		if (get<SPIRType>(member_type->member_types[i]).basetype == SPIRType::Struct)
8769 		{
8770 			store_flattened_struct(lhs, rhs_id, type, sub_indices);
8771 		}
8772 		else
8773 		{
8774 			auto rhs = to_expression(rhs_id) + to_multi_member_reference(type, sub_indices);
8775 			statement(lhs, " = ", rhs, ";");
8776 		}
8777 	}
8778 }
8779 
store_flattened_struct(uint32_t lhs_id,uint32_t value)8780 void CompilerGLSL::store_flattened_struct(uint32_t lhs_id, uint32_t value)
8781 {
8782 	auto &type = expression_type(lhs_id);
8783 	auto basename = to_flattened_access_chain_expression(lhs_id);
8784 	store_flattened_struct(basename, value, type, {});
8785 }
8786 
flattened_access_chain(uint32_t base,const uint32_t * indices,uint32_t count,const SPIRType & target_type,uint32_t offset,uint32_t matrix_stride,uint32_t,bool need_transpose)8787 std::string CompilerGLSL::flattened_access_chain(uint32_t base, const uint32_t *indices, uint32_t count,
8788                                                  const SPIRType &target_type, uint32_t offset, uint32_t matrix_stride,
8789                                                  uint32_t /* array_stride */, bool need_transpose)
8790 {
8791 	if (!target_type.array.empty())
8792 		SPIRV_CROSS_THROW("Access chains that result in an array can not be flattened");
8793 	else if (target_type.basetype == SPIRType::Struct)
8794 		return flattened_access_chain_struct(base, indices, count, target_type, offset);
8795 	else if (target_type.columns > 1)
8796 		return flattened_access_chain_matrix(base, indices, count, target_type, offset, matrix_stride, need_transpose);
8797 	else
8798 		return flattened_access_chain_vector(base, indices, count, target_type, offset, matrix_stride, need_transpose);
8799 }
8800 
flattened_access_chain_struct(uint32_t base,const uint32_t * indices,uint32_t count,const SPIRType & target_type,uint32_t offset)8801 std::string CompilerGLSL::flattened_access_chain_struct(uint32_t base, const uint32_t *indices, uint32_t count,
8802                                                         const SPIRType &target_type, uint32_t offset)
8803 {
8804 	std::string expr;
8805 
8806 	expr += type_to_glsl_constructor(target_type);
8807 	expr += "(";
8808 
8809 	for (uint32_t i = 0; i < uint32_t(target_type.member_types.size()); ++i)
8810 	{
8811 		if (i != 0)
8812 			expr += ", ";
8813 
8814 		const SPIRType &member_type = get<SPIRType>(target_type.member_types[i]);
8815 		uint32_t member_offset = type_struct_member_offset(target_type, i);
8816 
8817 		// The access chain terminates at the struct, so we need to find matrix strides and row-major information
8818 		// ahead of time.
8819 		bool need_transpose = false;
8820 		uint32_t matrix_stride = 0;
8821 		if (member_type.columns > 1)
8822 		{
8823 			need_transpose = combined_decoration_for_member(target_type, i).get(DecorationRowMajor);
8824 			matrix_stride = type_struct_member_matrix_stride(target_type, i);
8825 		}
8826 
8827 		auto tmp = flattened_access_chain(base, indices, count, member_type, offset + member_offset, matrix_stride,
8828 		                                  0 /* array_stride */, need_transpose);
8829 
8830 		// Cannot forward transpositions, so resolve them here.
8831 		if (need_transpose)
8832 			expr += convert_row_major_matrix(tmp, member_type, 0, false);
8833 		else
8834 			expr += tmp;
8835 	}
8836 
8837 	expr += ")";
8838 
8839 	return expr;
8840 }
8841 
flattened_access_chain_matrix(uint32_t base,const uint32_t * indices,uint32_t count,const SPIRType & target_type,uint32_t offset,uint32_t matrix_stride,bool need_transpose)8842 std::string CompilerGLSL::flattened_access_chain_matrix(uint32_t base, const uint32_t *indices, uint32_t count,
8843                                                         const SPIRType &target_type, uint32_t offset,
8844                                                         uint32_t matrix_stride, bool need_transpose)
8845 {
8846 	assert(matrix_stride);
8847 	SPIRType tmp_type = target_type;
8848 	if (need_transpose)
8849 		swap(tmp_type.vecsize, tmp_type.columns);
8850 
8851 	std::string expr;
8852 
8853 	expr += type_to_glsl_constructor(tmp_type);
8854 	expr += "(";
8855 
8856 	for (uint32_t i = 0; i < tmp_type.columns; i++)
8857 	{
8858 		if (i != 0)
8859 			expr += ", ";
8860 
8861 		expr += flattened_access_chain_vector(base, indices, count, tmp_type, offset + i * matrix_stride, matrix_stride,
8862 		                                      /* need_transpose= */ false);
8863 	}
8864 
8865 	expr += ")";
8866 
8867 	return expr;
8868 }
8869 
flattened_access_chain_vector(uint32_t base,const uint32_t * indices,uint32_t count,const SPIRType & target_type,uint32_t offset,uint32_t matrix_stride,bool need_transpose)8870 std::string CompilerGLSL::flattened_access_chain_vector(uint32_t base, const uint32_t *indices, uint32_t count,
8871                                                         const SPIRType &target_type, uint32_t offset,
8872                                                         uint32_t matrix_stride, bool need_transpose)
8873 {
8874 	auto result = flattened_access_chain_offset(expression_type(base), indices, count, offset, 16);
8875 
8876 	auto buffer_name = to_name(expression_type(base).self);
8877 
8878 	if (need_transpose)
8879 	{
8880 		std::string expr;
8881 
8882 		if (target_type.vecsize > 1)
8883 		{
8884 			expr += type_to_glsl_constructor(target_type);
8885 			expr += "(";
8886 		}
8887 
8888 		for (uint32_t i = 0; i < target_type.vecsize; ++i)
8889 		{
8890 			if (i != 0)
8891 				expr += ", ";
8892 
8893 			uint32_t component_offset = result.second + i * matrix_stride;
8894 
8895 			assert(component_offset % (target_type.width / 8) == 0);
8896 			uint32_t index = component_offset / (target_type.width / 8);
8897 
8898 			expr += buffer_name;
8899 			expr += "[";
8900 			expr += result.first; // this is a series of N1 * k1 + N2 * k2 + ... that is either empty or ends with a +
8901 			expr += convert_to_string(index / 4);
8902 			expr += "]";
8903 
8904 			expr += vector_swizzle(1, index % 4);
8905 		}
8906 
8907 		if (target_type.vecsize > 1)
8908 		{
8909 			expr += ")";
8910 		}
8911 
8912 		return expr;
8913 	}
8914 	else
8915 	{
8916 		assert(result.second % (target_type.width / 8) == 0);
8917 		uint32_t index = result.second / (target_type.width / 8);
8918 
8919 		std::string expr;
8920 
8921 		expr += buffer_name;
8922 		expr += "[";
8923 		expr += result.first; // this is a series of N1 * k1 + N2 * k2 + ... that is either empty or ends with a +
8924 		expr += convert_to_string(index / 4);
8925 		expr += "]";
8926 
8927 		expr += vector_swizzle(target_type.vecsize, index % 4);
8928 
8929 		return expr;
8930 	}
8931 }
8932 
flattened_access_chain_offset(const SPIRType & basetype,const uint32_t * indices,uint32_t count,uint32_t offset,uint32_t word_stride,bool * need_transpose,uint32_t * out_matrix_stride,uint32_t * out_array_stride,bool ptr_chain)8933 std::pair<std::string, uint32_t> CompilerGLSL::flattened_access_chain_offset(
8934     const SPIRType &basetype, const uint32_t *indices, uint32_t count, uint32_t offset, uint32_t word_stride,
8935     bool *need_transpose, uint32_t *out_matrix_stride, uint32_t *out_array_stride, bool ptr_chain)
8936 {
8937 	// Start traversing type hierarchy at the proper non-pointer types.
8938 	const auto *type = &get_pointee_type(basetype);
8939 
8940 	std::string expr;
8941 
8942 	// Inherit matrix information in case we are access chaining a vector which might have come from a row major layout.
8943 	bool row_major_matrix_needs_conversion = need_transpose ? *need_transpose : false;
8944 	uint32_t matrix_stride = out_matrix_stride ? *out_matrix_stride : 0;
8945 	uint32_t array_stride = out_array_stride ? *out_array_stride : 0;
8946 
8947 	for (uint32_t i = 0; i < count; i++)
8948 	{
8949 		uint32_t index = indices[i];
8950 
8951 		// Pointers
8952 		if (ptr_chain && i == 0)
8953 		{
8954 			// Here, the pointer type will be decorated with an array stride.
8955 			array_stride = get_decoration(basetype.self, DecorationArrayStride);
8956 			if (!array_stride)
8957 				SPIRV_CROSS_THROW("SPIR-V does not define ArrayStride for buffer block.");
8958 
8959 			auto *constant = maybe_get<SPIRConstant>(index);
8960 			if (constant)
8961 			{
8962 				// Constant array access.
8963 				offset += constant->scalar() * array_stride;
8964 			}
8965 			else
8966 			{
8967 				// Dynamic array access.
8968 				if (array_stride % word_stride)
8969 				{
8970 					SPIRV_CROSS_THROW("Array stride for dynamic indexing must be divisible by the size "
8971 					                  "of a 4-component vector. "
8972 					                  "Likely culprit here is a float or vec2 array inside a push "
8973 					                  "constant block which is std430. "
8974 					                  "This cannot be flattened. Try using std140 layout instead.");
8975 				}
8976 
8977 				expr += to_enclosed_expression(index);
8978 				expr += " * ";
8979 				expr += convert_to_string(array_stride / word_stride);
8980 				expr += " + ";
8981 			}
8982 		}
8983 		// Arrays
8984 		else if (!type->array.empty())
8985 		{
8986 			auto *constant = maybe_get<SPIRConstant>(index);
8987 			if (constant)
8988 			{
8989 				// Constant array access.
8990 				offset += constant->scalar() * array_stride;
8991 			}
8992 			else
8993 			{
8994 				// Dynamic array access.
8995 				if (array_stride % word_stride)
8996 				{
8997 					SPIRV_CROSS_THROW("Array stride for dynamic indexing must be divisible by the size "
8998 					                  "of a 4-component vector. "
8999 					                  "Likely culprit here is a float or vec2 array inside a push "
9000 					                  "constant block which is std430. "
9001 					                  "This cannot be flattened. Try using std140 layout instead.");
9002 				}
9003 
9004 				expr += to_enclosed_expression(index, false);
9005 				expr += " * ";
9006 				expr += convert_to_string(array_stride / word_stride);
9007 				expr += " + ";
9008 			}
9009 
9010 			uint32_t parent_type = type->parent_type;
9011 			type = &get<SPIRType>(parent_type);
9012 
9013 			if (!type->array.empty())
9014 				array_stride = get_decoration(parent_type, DecorationArrayStride);
9015 		}
9016 		// For structs, the index refers to a constant, which indexes into the members.
9017 		// We also check if this member is a builtin, since we then replace the entire expression with the builtin one.
9018 		else if (type->basetype == SPIRType::Struct)
9019 		{
9020 			index = evaluate_constant_u32(index);
9021 
9022 			if (index >= type->member_types.size())
9023 				SPIRV_CROSS_THROW("Member index is out of bounds!");
9024 
9025 			offset += type_struct_member_offset(*type, index);
9026 
9027 			auto &struct_type = *type;
9028 			type = &get<SPIRType>(type->member_types[index]);
9029 
9030 			if (type->columns > 1)
9031 			{
9032 				matrix_stride = type_struct_member_matrix_stride(struct_type, index);
9033 				row_major_matrix_needs_conversion =
9034 				    combined_decoration_for_member(struct_type, index).get(DecorationRowMajor);
9035 			}
9036 			else
9037 				row_major_matrix_needs_conversion = false;
9038 
9039 			if (!type->array.empty())
9040 				array_stride = type_struct_member_array_stride(struct_type, index);
9041 		}
9042 		// Matrix -> Vector
9043 		else if (type->columns > 1)
9044 		{
9045 			auto *constant = maybe_get<SPIRConstant>(index);
9046 			if (constant)
9047 			{
9048 				index = evaluate_constant_u32(index);
9049 				offset += index * (row_major_matrix_needs_conversion ? (type->width / 8) : matrix_stride);
9050 			}
9051 			else
9052 			{
9053 				uint32_t indexing_stride = row_major_matrix_needs_conversion ? (type->width / 8) : matrix_stride;
9054 				// Dynamic array access.
9055 				if (indexing_stride % word_stride)
9056 				{
9057 					SPIRV_CROSS_THROW("Matrix stride for dynamic indexing must be divisible by the size of a "
9058 					                  "4-component vector. "
9059 					                  "Likely culprit here is a row-major matrix being accessed dynamically. "
9060 					                  "This cannot be flattened. Try using std140 layout instead.");
9061 				}
9062 
9063 				expr += to_enclosed_expression(index, false);
9064 				expr += " * ";
9065 				expr += convert_to_string(indexing_stride / word_stride);
9066 				expr += " + ";
9067 			}
9068 
9069 			type = &get<SPIRType>(type->parent_type);
9070 		}
9071 		// Vector -> Scalar
9072 		else if (type->vecsize > 1)
9073 		{
9074 			auto *constant = maybe_get<SPIRConstant>(index);
9075 			if (constant)
9076 			{
9077 				index = evaluate_constant_u32(index);
9078 				offset += index * (row_major_matrix_needs_conversion ? matrix_stride : (type->width / 8));
9079 			}
9080 			else
9081 			{
9082 				uint32_t indexing_stride = row_major_matrix_needs_conversion ? matrix_stride : (type->width / 8);
9083 
9084 				// Dynamic array access.
9085 				if (indexing_stride % word_stride)
9086 				{
9087 					SPIRV_CROSS_THROW("Stride for dynamic vector indexing must be divisible by the "
9088 					                  "size of a 4-component vector. "
9089 					                  "This cannot be flattened in legacy targets.");
9090 				}
9091 
9092 				expr += to_enclosed_expression(index, false);
9093 				expr += " * ";
9094 				expr += convert_to_string(indexing_stride / word_stride);
9095 				expr += " + ";
9096 			}
9097 
9098 			type = &get<SPIRType>(type->parent_type);
9099 		}
9100 		else
9101 			SPIRV_CROSS_THROW("Cannot subdivide a scalar value!");
9102 	}
9103 
9104 	if (need_transpose)
9105 		*need_transpose = row_major_matrix_needs_conversion;
9106 	if (out_matrix_stride)
9107 		*out_matrix_stride = matrix_stride;
9108 	if (out_array_stride)
9109 		*out_array_stride = array_stride;
9110 
9111 	return std::make_pair(expr, offset);
9112 }
9113 
should_dereference(uint32_t id)9114 bool CompilerGLSL::should_dereference(uint32_t id)
9115 {
9116 	const auto &type = expression_type(id);
9117 	// Non-pointer expressions don't need to be dereferenced.
9118 	if (!type.pointer)
9119 		return false;
9120 
9121 	// Handles shouldn't be dereferenced either.
9122 	if (!expression_is_lvalue(id))
9123 		return false;
9124 
9125 	// If id is a variable but not a phi variable, we should not dereference it.
9126 	if (auto *var = maybe_get<SPIRVariable>(id))
9127 		return var->phi_variable;
9128 
9129 	// If id is an access chain, we should not dereference it.
9130 	if (auto *expr = maybe_get<SPIRExpression>(id))
9131 		return !expr->access_chain;
9132 
9133 	// Otherwise, we should dereference this pointer expression.
9134 	return true;
9135 }
9136 
should_forward(uint32_t id) const9137 bool CompilerGLSL::should_forward(uint32_t id) const
9138 {
9139 	// If id is a variable we will try to forward it regardless of force_temporary check below
9140 	// This is important because otherwise we'll get local sampler copies (highp sampler2D foo = bar) that are invalid in OpenGL GLSL
9141 	auto *var = maybe_get<SPIRVariable>(id);
9142 	if (var && var->forwardable)
9143 		return true;
9144 
9145 	// For debugging emit temporary variables for all expressions
9146 	if (options.force_temporary)
9147 		return false;
9148 
9149 	// Immutable expression can always be forwarded.
9150 	if (is_immutable(id))
9151 		return true;
9152 
9153 	return false;
9154 }
9155 
should_suppress_usage_tracking(uint32_t id) const9156 bool CompilerGLSL::should_suppress_usage_tracking(uint32_t id) const
9157 {
9158 	// Used only by opcodes which don't do any real "work", they just swizzle data in some fashion.
9159 	return !expression_is_forwarded(id) || expression_suppresses_usage_tracking(id);
9160 }
9161 
track_expression_read(uint32_t id)9162 void CompilerGLSL::track_expression_read(uint32_t id)
9163 {
9164 	switch (ir.ids[id].get_type())
9165 	{
9166 	case TypeExpression:
9167 	{
9168 		auto &e = get<SPIRExpression>(id);
9169 		for (auto implied_read : e.implied_read_expressions)
9170 			track_expression_read(implied_read);
9171 		break;
9172 	}
9173 
9174 	case TypeAccessChain:
9175 	{
9176 		auto &e = get<SPIRAccessChain>(id);
9177 		for (auto implied_read : e.implied_read_expressions)
9178 			track_expression_read(implied_read);
9179 		break;
9180 	}
9181 
9182 	default:
9183 		break;
9184 	}
9185 
9186 	// If we try to read a forwarded temporary more than once we will stamp out possibly complex code twice.
9187 	// In this case, it's better to just bind the complex expression to the temporary and read that temporary twice.
9188 	if (expression_is_forwarded(id) && !expression_suppresses_usage_tracking(id))
9189 	{
9190 		auto &v = expression_usage_counts[id];
9191 		v++;
9192 
9193 		// If we create an expression outside a loop,
9194 		// but access it inside a loop, we're implicitly reading it multiple times.
9195 		// If the expression in question is expensive, we should hoist it out to avoid relying on loop-invariant code motion
9196 		// working inside the backend compiler.
9197 		if (expression_read_implies_multiple_reads(id))
9198 			v++;
9199 
9200 		if (v >= 2)
9201 		{
9202 			//if (v == 2)
9203 			//    fprintf(stderr, "ID %u was forced to temporary due to more than 1 expression use!\n", id);
9204 
9205 			forced_temporaries.insert(id);
9206 			// Force a recompile after this pass to avoid forwarding this variable.
9207 			force_recompile();
9208 		}
9209 	}
9210 }
9211 
args_will_forward(uint32_t id,const uint32_t * args,uint32_t num_args,bool pure)9212 bool CompilerGLSL::args_will_forward(uint32_t id, const uint32_t *args, uint32_t num_args, bool pure)
9213 {
9214 	if (forced_temporaries.find(id) != end(forced_temporaries))
9215 		return false;
9216 
9217 	for (uint32_t i = 0; i < num_args; i++)
9218 		if (!should_forward(args[i]))
9219 			return false;
9220 
9221 	// We need to forward globals as well.
9222 	if (!pure)
9223 	{
9224 		for (auto global : global_variables)
9225 			if (!should_forward(global))
9226 				return false;
9227 		for (auto aliased : aliased_variables)
9228 			if (!should_forward(aliased))
9229 				return false;
9230 	}
9231 
9232 	return true;
9233 }
9234 
register_impure_function_call()9235 void CompilerGLSL::register_impure_function_call()
9236 {
9237 	// Impure functions can modify globals and aliased variables, so invalidate them as well.
9238 	for (auto global : global_variables)
9239 		flush_dependees(get<SPIRVariable>(global));
9240 	for (auto aliased : aliased_variables)
9241 		flush_dependees(get<SPIRVariable>(aliased));
9242 }
9243 
register_call_out_argument(uint32_t id)9244 void CompilerGLSL::register_call_out_argument(uint32_t id)
9245 {
9246 	register_write(id);
9247 
9248 	auto *var = maybe_get<SPIRVariable>(id);
9249 	if (var)
9250 		flush_variable_declaration(var->self);
9251 }
9252 
variable_decl_function_local(SPIRVariable & var)9253 string CompilerGLSL::variable_decl_function_local(SPIRVariable &var)
9254 {
9255 	// These variables are always function local,
9256 	// so make sure we emit the variable without storage qualifiers.
9257 	// Some backends will inject custom variables locally in a function
9258 	// with a storage qualifier which is not function-local.
9259 	auto old_storage = var.storage;
9260 	var.storage = StorageClassFunction;
9261 	auto expr = variable_decl(var);
9262 	var.storage = old_storage;
9263 	return expr;
9264 }
9265 
emit_variable_temporary_copies(const SPIRVariable & var)9266 void CompilerGLSL::emit_variable_temporary_copies(const SPIRVariable &var)
9267 {
9268 	// Ensure that we declare phi-variable copies even if the original declaration isn't deferred
9269 	if (var.allocate_temporary_copy && !flushed_phi_variables.count(var.self))
9270 	{
9271 		auto &type = get<SPIRType>(var.basetype);
9272 		auto &flags = get_decoration_bitset(var.self);
9273 		statement(flags_to_qualifiers_glsl(type, flags), variable_decl(type, join("_", var.self, "_copy")), ";");
9274 		flushed_phi_variables.insert(var.self);
9275 	}
9276 }
9277 
flush_variable_declaration(uint32_t id)9278 void CompilerGLSL::flush_variable_declaration(uint32_t id)
9279 {
9280 	// Ensure that we declare phi-variable copies even if the original declaration isn't deferred
9281 	auto *var = maybe_get<SPIRVariable>(id);
9282 	if (var && var->deferred_declaration)
9283 	{
9284 		string initializer;
9285 		if (options.force_zero_initialized_variables &&
9286 		    (var->storage == StorageClassFunction || var->storage == StorageClassGeneric ||
9287 		     var->storage == StorageClassPrivate) &&
9288 		    !var->initializer && type_can_zero_initialize(get_variable_data_type(*var)))
9289 		{
9290 			initializer = join(" = ", to_zero_initialized_expression(get_variable_data_type_id(*var)));
9291 		}
9292 
9293 		statement(variable_decl_function_local(*var), initializer, ";");
9294 		var->deferred_declaration = false;
9295 	}
9296 	if (var)
9297 	{
9298 		emit_variable_temporary_copies(*var);
9299 	}
9300 }
9301 
remove_duplicate_swizzle(string & op)9302 bool CompilerGLSL::remove_duplicate_swizzle(string &op)
9303 {
9304 	auto pos = op.find_last_of('.');
9305 	if (pos == string::npos || pos == 0)
9306 		return false;
9307 
9308 	string final_swiz = op.substr(pos + 1, string::npos);
9309 
9310 	if (backend.swizzle_is_function)
9311 	{
9312 		if (final_swiz.size() < 2)
9313 			return false;
9314 
9315 		if (final_swiz.substr(final_swiz.size() - 2, string::npos) == "()")
9316 			final_swiz.erase(final_swiz.size() - 2, string::npos);
9317 		else
9318 			return false;
9319 	}
9320 
9321 	// Check if final swizzle is of form .x, .xy, .xyz, .xyzw or similar.
9322 	// If so, and previous swizzle is of same length,
9323 	// we can drop the final swizzle altogether.
9324 	for (uint32_t i = 0; i < final_swiz.size(); i++)
9325 	{
9326 		static const char expected[] = { 'x', 'y', 'z', 'w' };
9327 		if (i >= 4 || final_swiz[i] != expected[i])
9328 			return false;
9329 	}
9330 
9331 	auto prevpos = op.find_last_of('.', pos - 1);
9332 	if (prevpos == string::npos)
9333 		return false;
9334 
9335 	prevpos++;
9336 
9337 	// Make sure there are only swizzles here ...
9338 	for (auto i = prevpos; i < pos; i++)
9339 	{
9340 		if (op[i] < 'w' || op[i] > 'z')
9341 		{
9342 			// If swizzles are foo.xyz() like in C++ backend for example, check for that.
9343 			if (backend.swizzle_is_function && i + 2 == pos && op[i] == '(' && op[i + 1] == ')')
9344 				break;
9345 			return false;
9346 		}
9347 	}
9348 
9349 	// If original swizzle is large enough, just carve out the components we need.
9350 	// E.g. foobar.wyx.xy will turn into foobar.wy.
9351 	if (pos - prevpos >= final_swiz.size())
9352 	{
9353 		op.erase(prevpos + final_swiz.size(), string::npos);
9354 
9355 		// Add back the function call ...
9356 		if (backend.swizzle_is_function)
9357 			op += "()";
9358 	}
9359 	return true;
9360 }
9361 
9362 // Optimizes away vector swizzles where we have something like
9363 // vec3 foo;
9364 // foo.xyz <-- swizzle expression does nothing.
9365 // This is a very common pattern after OpCompositeCombine.
remove_unity_swizzle(uint32_t base,string & op)9366 bool CompilerGLSL::remove_unity_swizzle(uint32_t base, string &op)
9367 {
9368 	auto pos = op.find_last_of('.');
9369 	if (pos == string::npos || pos == 0)
9370 		return false;
9371 
9372 	string final_swiz = op.substr(pos + 1, string::npos);
9373 
9374 	if (backend.swizzle_is_function)
9375 	{
9376 		if (final_swiz.size() < 2)
9377 			return false;
9378 
9379 		if (final_swiz.substr(final_swiz.size() - 2, string::npos) == "()")
9380 			final_swiz.erase(final_swiz.size() - 2, string::npos);
9381 		else
9382 			return false;
9383 	}
9384 
9385 	// Check if final swizzle is of form .x, .xy, .xyz, .xyzw or similar.
9386 	// If so, and previous swizzle is of same length,
9387 	// we can drop the final swizzle altogether.
9388 	for (uint32_t i = 0; i < final_swiz.size(); i++)
9389 	{
9390 		static const char expected[] = { 'x', 'y', 'z', 'w' };
9391 		if (i >= 4 || final_swiz[i] != expected[i])
9392 			return false;
9393 	}
9394 
9395 	auto &type = expression_type(base);
9396 
9397 	// Sanity checking ...
9398 	assert(type.columns == 1 && type.array.empty());
9399 
9400 	if (type.vecsize == final_swiz.size())
9401 		op.erase(pos, string::npos);
9402 	return true;
9403 }
9404 
build_composite_combiner(uint32_t return_type,const uint32_t * elems,uint32_t length)9405 string CompilerGLSL::build_composite_combiner(uint32_t return_type, const uint32_t *elems, uint32_t length)
9406 {
9407 	ID base = 0;
9408 	string op;
9409 	string subop;
9410 
9411 	// Can only merge swizzles for vectors.
9412 	auto &type = get<SPIRType>(return_type);
9413 	bool can_apply_swizzle_opt = type.basetype != SPIRType::Struct && type.array.empty() && type.columns == 1;
9414 	bool swizzle_optimization = false;
9415 
9416 	for (uint32_t i = 0; i < length; i++)
9417 	{
9418 		auto *e = maybe_get<SPIRExpression>(elems[i]);
9419 
9420 		// If we're merging another scalar which belongs to the same base
9421 		// object, just merge the swizzles to avoid triggering more than 1 expression read as much as possible!
9422 		if (can_apply_swizzle_opt && e && e->base_expression && e->base_expression == base)
9423 		{
9424 			// Only supposed to be used for vector swizzle -> scalar.
9425 			assert(!e->expression.empty() && e->expression.front() == '.');
9426 			subop += e->expression.substr(1, string::npos);
9427 			swizzle_optimization = true;
9428 		}
9429 		else
9430 		{
9431 			// We'll likely end up with duplicated swizzles, e.g.
9432 			// foobar.xyz.xyz from patterns like
9433 			// OpVectorShuffle
9434 			// OpCompositeExtract x 3
9435 			// OpCompositeConstruct 3x + other scalar.
9436 			// Just modify op in-place.
9437 			if (swizzle_optimization)
9438 			{
9439 				if (backend.swizzle_is_function)
9440 					subop += "()";
9441 
9442 				// Don't attempt to remove unity swizzling if we managed to remove duplicate swizzles.
9443 				// The base "foo" might be vec4, while foo.xyz is vec3 (OpVectorShuffle) and looks like a vec3 due to the .xyz tacked on.
9444 				// We only want to remove the swizzles if we're certain that the resulting base will be the same vecsize.
9445 				// Essentially, we can only remove one set of swizzles, since that's what we have control over ...
9446 				// Case 1:
9447 				//  foo.yxz.xyz: Duplicate swizzle kicks in, giving foo.yxz, we are done.
9448 				//               foo.yxz was the result of OpVectorShuffle and we don't know the type of foo.
9449 				// Case 2:
9450 				//  foo.xyz: Duplicate swizzle won't kick in.
9451 				//           If foo is vec3, we can remove xyz, giving just foo.
9452 				if (!remove_duplicate_swizzle(subop))
9453 					remove_unity_swizzle(base, subop);
9454 
9455 				// Strips away redundant parens if we created them during component extraction.
9456 				strip_enclosed_expression(subop);
9457 				swizzle_optimization = false;
9458 				op += subop;
9459 			}
9460 			else
9461 				op += subop;
9462 
9463 			if (i)
9464 				op += ", ";
9465 
9466 			bool uses_buffer_offset =
9467 			    type.basetype == SPIRType::Struct && has_member_decoration(type.self, i, DecorationOffset);
9468 			subop = to_composite_constructor_expression(elems[i], uses_buffer_offset);
9469 		}
9470 
9471 		base = e ? e->base_expression : ID(0);
9472 	}
9473 
9474 	if (swizzle_optimization)
9475 	{
9476 		if (backend.swizzle_is_function)
9477 			subop += "()";
9478 
9479 		if (!remove_duplicate_swizzle(subop))
9480 			remove_unity_swizzle(base, subop);
9481 		// Strips away redundant parens if we created them during component extraction.
9482 		strip_enclosed_expression(subop);
9483 	}
9484 
9485 	op += subop;
9486 	return op;
9487 }
9488 
skip_argument(uint32_t id) const9489 bool CompilerGLSL::skip_argument(uint32_t id) const
9490 {
9491 	if (!combined_image_samplers.empty() || !options.vulkan_semantics)
9492 	{
9493 		auto &type = expression_type(id);
9494 		if (type.basetype == SPIRType::Sampler || (type.basetype == SPIRType::Image && type.image.sampled == 1))
9495 			return true;
9496 	}
9497 	return false;
9498 }
9499 
optimize_read_modify_write(const SPIRType & type,const string & lhs,const string & rhs)9500 bool CompilerGLSL::optimize_read_modify_write(const SPIRType &type, const string &lhs, const string &rhs)
9501 {
9502 	// Do this with strings because we have a very clear pattern we can check for and it avoids
9503 	// adding lots of special cases to the code emission.
9504 	if (rhs.size() < lhs.size() + 3)
9505 		return false;
9506 
9507 	// Do not optimize matrices. They are a bit awkward to reason about in general
9508 	// (in which order does operation happen?), and it does not work on MSL anyways.
9509 	if (type.vecsize > 1 && type.columns > 1)
9510 		return false;
9511 
9512 	auto index = rhs.find(lhs);
9513 	if (index != 0)
9514 		return false;
9515 
9516 	// TODO: Shift operators, but it's not important for now.
9517 	auto op = rhs.find_first_of("+-/*%|&^", lhs.size() + 1);
9518 	if (op != lhs.size() + 1)
9519 		return false;
9520 
9521 	// Check that the op is followed by space. This excludes && and ||.
9522 	if (rhs[op + 1] != ' ')
9523 		return false;
9524 
9525 	char bop = rhs[op];
9526 	auto expr = rhs.substr(lhs.size() + 3);
9527 	// Try to find increments and decrements. Makes it look neater as += 1, -= 1 is fairly rare to see in real code.
9528 	// Find some common patterns which are equivalent.
9529 	if ((bop == '+' || bop == '-') && (expr == "1" || expr == "uint(1)" || expr == "1u" || expr == "int(1u)"))
9530 		statement(lhs, bop, bop, ";");
9531 	else
9532 		statement(lhs, " ", bop, "= ", expr, ";");
9533 	return true;
9534 }
9535 
register_control_dependent_expression(uint32_t expr)9536 void CompilerGLSL::register_control_dependent_expression(uint32_t expr)
9537 {
9538 	if (forwarded_temporaries.find(expr) == end(forwarded_temporaries))
9539 		return;
9540 
9541 	assert(current_emitting_block);
9542 	current_emitting_block->invalidate_expressions.push_back(expr);
9543 }
9544 
emit_block_instructions(SPIRBlock & block)9545 void CompilerGLSL::emit_block_instructions(SPIRBlock &block)
9546 {
9547 	current_emitting_block = &block;
9548 	for (auto &op : block.ops)
9549 		emit_instruction(op);
9550 	current_emitting_block = nullptr;
9551 }
9552 
disallow_forwarding_in_expression_chain(const SPIRExpression & expr)9553 void CompilerGLSL::disallow_forwarding_in_expression_chain(const SPIRExpression &expr)
9554 {
9555 	// Allow trivially forwarded expressions like OpLoad or trivial shuffles,
9556 	// these will be marked as having suppressed usage tracking.
9557 	// Our only concern is to make sure arithmetic operations are done in similar ways.
9558 	if (expression_is_forwarded(expr.self) && !expression_suppresses_usage_tracking(expr.self) &&
9559 	    forced_invariant_temporaries.count(expr.self) == 0)
9560 	{
9561 		forced_temporaries.insert(expr.self);
9562 		forced_invariant_temporaries.insert(expr.self);
9563 		force_recompile();
9564 
9565 		for (auto &dependent : expr.expression_dependencies)
9566 			disallow_forwarding_in_expression_chain(get<SPIRExpression>(dependent));
9567 	}
9568 }
9569 
handle_store_to_invariant_variable(uint32_t store_id,uint32_t value_id)9570 void CompilerGLSL::handle_store_to_invariant_variable(uint32_t store_id, uint32_t value_id)
9571 {
9572 	// Variables or access chains marked invariant are complicated. We will need to make sure the code-gen leading up to
9573 	// this variable is consistent. The failure case for SPIRV-Cross is when an expression is forced to a temporary
9574 	// in one translation unit, but not another, e.g. due to multiple use of an expression.
9575 	// This causes variance despite the output variable being marked invariant, so the solution here is to force all dependent
9576 	// expressions to be temporaries.
9577 	// It is uncertain if this is enough to support invariant in all possible cases, but it should be good enough
9578 	// for all reasonable uses of invariant.
9579 	if (!has_decoration(store_id, DecorationInvariant))
9580 		return;
9581 
9582 	auto *expr = maybe_get<SPIRExpression>(value_id);
9583 	if (!expr)
9584 		return;
9585 
9586 	disallow_forwarding_in_expression_chain(*expr);
9587 }
9588 
emit_store_statement(uint32_t lhs_expression,uint32_t rhs_expression)9589 void CompilerGLSL::emit_store_statement(uint32_t lhs_expression, uint32_t rhs_expression)
9590 {
9591 	auto rhs = to_pointer_expression(rhs_expression);
9592 
9593 	// Statements to OpStore may be empty if it is a struct with zero members. Just forward the store to /dev/null.
9594 	if (!rhs.empty())
9595 	{
9596 		handle_store_to_invariant_variable(lhs_expression, rhs_expression);
9597 
9598 		auto lhs = to_dereferenced_expression(lhs_expression);
9599 
9600 		// We might need to cast in order to store to a builtin.
9601 		cast_to_builtin_store(lhs_expression, rhs, expression_type(rhs_expression));
9602 
9603 		// Tries to optimize assignments like "<lhs> = <lhs> op expr".
9604 		// While this is purely cosmetic, this is important for legacy ESSL where loop
9605 		// variable increments must be in either i++ or i += const-expr.
9606 		// Without this, we end up with i = i + 1, which is correct GLSL, but not correct GLES 2.0.
9607 		if (!optimize_read_modify_write(expression_type(rhs_expression), lhs, rhs))
9608 			statement(lhs, " = ", rhs, ";");
9609 		register_write(lhs_expression);
9610 	}
9611 }
9612 
get_integer_width_for_instruction(const Instruction & instr) const9613 uint32_t CompilerGLSL::get_integer_width_for_instruction(const Instruction &instr) const
9614 {
9615 	if (instr.length < 3)
9616 		return 32;
9617 
9618 	auto *ops = stream(instr);
9619 
9620 	switch (instr.op)
9621 	{
9622 	case OpSConvert:
9623 	case OpConvertSToF:
9624 	case OpUConvert:
9625 	case OpConvertUToF:
9626 	case OpIEqual:
9627 	case OpINotEqual:
9628 	case OpSLessThan:
9629 	case OpSLessThanEqual:
9630 	case OpSGreaterThan:
9631 	case OpSGreaterThanEqual:
9632 	case OpULessThan:
9633 	case OpULessThanEqual:
9634 	case OpUGreaterThan:
9635 	case OpUGreaterThanEqual:
9636 		return expression_type(ops[2]).width;
9637 
9638 	default:
9639 	{
9640 		// We can look at result type which is more robust.
9641 		auto *type = maybe_get<SPIRType>(ops[0]);
9642 		if (type && type_is_integral(*type))
9643 			return type->width;
9644 		else
9645 			return 32;
9646 	}
9647 	}
9648 }
9649 
get_integer_width_for_glsl_instruction(GLSLstd450 op,const uint32_t * ops,uint32_t length) const9650 uint32_t CompilerGLSL::get_integer_width_for_glsl_instruction(GLSLstd450 op, const uint32_t *ops, uint32_t length) const
9651 {
9652 	if (length < 1)
9653 		return 32;
9654 
9655 	switch (op)
9656 	{
9657 	case GLSLstd450SAbs:
9658 	case GLSLstd450SSign:
9659 	case GLSLstd450UMin:
9660 	case GLSLstd450SMin:
9661 	case GLSLstd450UMax:
9662 	case GLSLstd450SMax:
9663 	case GLSLstd450UClamp:
9664 	case GLSLstd450SClamp:
9665 	case GLSLstd450FindSMsb:
9666 	case GLSLstd450FindUMsb:
9667 		return expression_type(ops[0]).width;
9668 
9669 	default:
9670 	{
9671 		// We don't need to care about other opcodes, just return 32.
9672 		return 32;
9673 	}
9674 	}
9675 }
9676 
emit_instruction(const Instruction & instruction)9677 void CompilerGLSL::emit_instruction(const Instruction &instruction)
9678 {
9679 	auto ops = stream(instruction);
9680 	auto opcode = static_cast<Op>(instruction.op);
9681 	uint32_t length = instruction.length;
9682 
9683 #define GLSL_BOP(op) emit_binary_op(ops[0], ops[1], ops[2], ops[3], #op)
9684 #define GLSL_BOP_CAST(op, type) \
9685 	emit_binary_op_cast(ops[0], ops[1], ops[2], ops[3], #op, type, opcode_is_sign_invariant(opcode))
9686 #define GLSL_UOP(op) emit_unary_op(ops[0], ops[1], ops[2], #op)
9687 #define GLSL_QFOP(op) emit_quaternary_func_op(ops[0], ops[1], ops[2], ops[3], ops[4], ops[5], #op)
9688 #define GLSL_TFOP(op) emit_trinary_func_op(ops[0], ops[1], ops[2], ops[3], ops[4], #op)
9689 #define GLSL_BFOP(op) emit_binary_func_op(ops[0], ops[1], ops[2], ops[3], #op)
9690 #define GLSL_BFOP_CAST(op, type) \
9691 	emit_binary_func_op_cast(ops[0], ops[1], ops[2], ops[3], #op, type, opcode_is_sign_invariant(opcode))
9692 #define GLSL_BFOP(op) emit_binary_func_op(ops[0], ops[1], ops[2], ops[3], #op)
9693 #define GLSL_UFOP(op) emit_unary_func_op(ops[0], ops[1], ops[2], #op)
9694 
9695 	// If we need to do implicit bitcasts, make sure we do it with the correct type.
9696 	uint32_t integer_width = get_integer_width_for_instruction(instruction);
9697 	auto int_type = to_signed_basetype(integer_width);
9698 	auto uint_type = to_unsigned_basetype(integer_width);
9699 
9700 	switch (opcode)
9701 	{
9702 	// Dealing with memory
9703 	case OpLoad:
9704 	{
9705 		uint32_t result_type = ops[0];
9706 		uint32_t id = ops[1];
9707 		uint32_t ptr = ops[2];
9708 
9709 		flush_variable_declaration(ptr);
9710 
9711 		// If we're loading from memory that cannot be changed by the shader,
9712 		// just forward the expression directly to avoid needless temporaries.
9713 		// If an expression is mutable and forwardable, we speculate that it is immutable.
9714 		bool forward = should_forward(ptr) && forced_temporaries.find(id) == end(forced_temporaries);
9715 
9716 		// If loading a non-native row-major matrix, mark the expression as need_transpose.
9717 		bool need_transpose = false;
9718 		bool old_need_transpose = false;
9719 
9720 		auto *ptr_expression = maybe_get<SPIRExpression>(ptr);
9721 
9722 		if (forward)
9723 		{
9724 			// If we're forwarding the load, we're also going to forward transpose state, so don't transpose while
9725 			// taking the expression.
9726 			if (ptr_expression && ptr_expression->need_transpose)
9727 			{
9728 				old_need_transpose = true;
9729 				ptr_expression->need_transpose = false;
9730 				need_transpose = true;
9731 			}
9732 			else if (is_non_native_row_major_matrix(ptr))
9733 				need_transpose = true;
9734 		}
9735 
9736 		// If we are forwarding this load,
9737 		// don't register the read to access chain here, defer that to when we actually use the expression,
9738 		// using the add_implied_read_expression mechanism.
9739 		string expr;
9740 
9741 		bool is_packed = has_extended_decoration(ptr, SPIRVCrossDecorationPhysicalTypePacked);
9742 		bool is_remapped = has_extended_decoration(ptr, SPIRVCrossDecorationPhysicalTypeID);
9743 		if (forward || (!is_packed && !is_remapped))
9744 		{
9745 			// For the simple case, we do not need to deal with repacking.
9746 			expr = to_dereferenced_expression(ptr, false);
9747 		}
9748 		else
9749 		{
9750 			// If we are not forwarding the expression, we need to unpack and resolve any physical type remapping here before
9751 			// storing the expression to a temporary.
9752 			expr = to_unpacked_expression(ptr);
9753 		}
9754 
9755 		auto &type = get<SPIRType>(result_type);
9756 		auto &expr_type = expression_type(ptr);
9757 
9758 		// If the expression has more vector components than the result type, insert
9759 		// a swizzle. This shouldn't happen normally on valid SPIR-V, but it might
9760 		// happen with e.g. the MSL backend replacing the type of an input variable.
9761 		if (expr_type.vecsize > type.vecsize)
9762 			expr = enclose_expression(expr + vector_swizzle(type.vecsize, 0));
9763 
9764 		// We might need to cast in order to load from a builtin.
9765 		cast_from_builtin_load(ptr, expr, type);
9766 
9767 		// We might be trying to load a gl_Position[N], where we should be
9768 		// doing float4[](gl_in[i].gl_Position, ...) instead.
9769 		// Similar workarounds are required for input arrays in tessellation.
9770 		unroll_array_from_complex_load(id, ptr, expr);
9771 
9772 		// Shouldn't need to check for ID, but current glslang codegen requires it in some cases
9773 		// when loading Image/Sampler descriptors. It does not hurt to check ID as well.
9774 		if (has_decoration(id, DecorationNonUniformEXT) || has_decoration(ptr, DecorationNonUniformEXT))
9775 		{
9776 			propagate_nonuniform_qualifier(ptr);
9777 			convert_non_uniform_expression(type, expr);
9778 		}
9779 
9780 		if (forward && ptr_expression)
9781 			ptr_expression->need_transpose = old_need_transpose;
9782 
9783 		bool flattened = ptr_expression && flattened_buffer_blocks.count(ptr_expression->loaded_from) != 0;
9784 
9785 		if (backend.needs_row_major_load_workaround && !is_non_native_row_major_matrix(ptr) && !flattened)
9786 			rewrite_load_for_wrapped_row_major(expr, result_type, ptr);
9787 
9788 		// By default, suppress usage tracking since using same expression multiple times does not imply any extra work.
9789 		// However, if we try to load a complex, composite object from a flattened buffer,
9790 		// we should avoid emitting the same code over and over and lower the result to a temporary.
9791 		bool usage_tracking = flattened && (type.basetype == SPIRType::Struct || (type.columns > 1));
9792 
9793 		SPIRExpression *e = nullptr;
9794 		if (!forward && expression_is_non_value_type_array(ptr))
9795 		{
9796 			// Complicated load case where we need to make a copy of ptr, but we cannot, because
9797 			// it is an array, and our backend does not support arrays as value types.
9798 			// Emit the temporary, and copy it explicitly.
9799 			e = &emit_uninitialized_temporary_expression(result_type, id);
9800 			emit_array_copy(to_expression(id), ptr, StorageClassFunction, get_expression_effective_storage_class(ptr));
9801 		}
9802 		else
9803 			e = &emit_op(result_type, id, expr, forward, !usage_tracking);
9804 
9805 		e->need_transpose = need_transpose;
9806 		register_read(id, ptr, forward);
9807 
9808 		if (forward)
9809 		{
9810 			// Pass through whether the result is of a packed type and the physical type ID.
9811 			if (has_extended_decoration(ptr, SPIRVCrossDecorationPhysicalTypePacked))
9812 				set_extended_decoration(id, SPIRVCrossDecorationPhysicalTypePacked);
9813 			if (has_extended_decoration(ptr, SPIRVCrossDecorationPhysicalTypeID))
9814 			{
9815 				set_extended_decoration(id, SPIRVCrossDecorationPhysicalTypeID,
9816 				                        get_extended_decoration(ptr, SPIRVCrossDecorationPhysicalTypeID));
9817 			}
9818 		}
9819 		else
9820 		{
9821 			// This might have been set on an earlier compilation iteration, force it to be unset.
9822 			unset_extended_decoration(id, SPIRVCrossDecorationPhysicalTypePacked);
9823 			unset_extended_decoration(id, SPIRVCrossDecorationPhysicalTypeID);
9824 		}
9825 
9826 		inherit_expression_dependencies(id, ptr);
9827 		if (forward)
9828 			add_implied_read_expression(*e, ptr);
9829 		break;
9830 	}
9831 
9832 	case OpInBoundsAccessChain:
9833 	case OpAccessChain:
9834 	case OpPtrAccessChain:
9835 	{
9836 		auto *var = maybe_get<SPIRVariable>(ops[2]);
9837 		if (var)
9838 			flush_variable_declaration(var->self);
9839 
9840 		// If the base is immutable, the access chain pointer must also be.
9841 		// If an expression is mutable and forwardable, we speculate that it is immutable.
9842 		AccessChainMeta meta;
9843 		bool ptr_chain = opcode == OpPtrAccessChain;
9844 		auto e = access_chain(ops[2], &ops[3], length - 3, get<SPIRType>(ops[0]), &meta, ptr_chain);
9845 
9846 		auto &expr = set<SPIRExpression>(ops[1], move(e), ops[0], should_forward(ops[2]));
9847 
9848 		auto *backing_variable = maybe_get_backing_variable(ops[2]);
9849 		expr.loaded_from = backing_variable ? backing_variable->self : ID(ops[2]);
9850 		expr.need_transpose = meta.need_transpose;
9851 		expr.access_chain = true;
9852 
9853 		// Mark the result as being packed. Some platforms handled packed vectors differently than non-packed.
9854 		if (meta.storage_is_packed)
9855 			set_extended_decoration(ops[1], SPIRVCrossDecorationPhysicalTypePacked);
9856 		if (meta.storage_physical_type != 0)
9857 			set_extended_decoration(ops[1], SPIRVCrossDecorationPhysicalTypeID, meta.storage_physical_type);
9858 		if (meta.storage_is_invariant)
9859 			set_decoration(ops[1], DecorationInvariant);
9860 		if (meta.flattened_struct)
9861 			flattened_structs[ops[1]] = true;
9862 
9863 		// If we have some expression dependencies in our access chain, this access chain is technically a forwarded
9864 		// temporary which could be subject to invalidation.
9865 		// Need to assume we're forwarded while calling inherit_expression_depdendencies.
9866 		forwarded_temporaries.insert(ops[1]);
9867 		// The access chain itself is never forced to a temporary, but its dependencies might.
9868 		suppressed_usage_tracking.insert(ops[1]);
9869 
9870 		for (uint32_t i = 2; i < length; i++)
9871 		{
9872 			inherit_expression_dependencies(ops[1], ops[i]);
9873 			add_implied_read_expression(expr, ops[i]);
9874 		}
9875 
9876 		// If we have no dependencies after all, i.e., all indices in the access chain are immutable temporaries,
9877 		// we're not forwarded after all.
9878 		if (expr.expression_dependencies.empty())
9879 			forwarded_temporaries.erase(ops[1]);
9880 
9881 		if (has_decoration(ops[1], DecorationNonUniformEXT))
9882 			propagate_nonuniform_qualifier(ops[1]);
9883 
9884 		break;
9885 	}
9886 
9887 	case OpStore:
9888 	{
9889 		auto *var = maybe_get<SPIRVariable>(ops[0]);
9890 
9891 		if (var && var->statically_assigned)
9892 			var->static_expression = ops[1];
9893 		else if (var && var->loop_variable && !var->loop_variable_enable)
9894 			var->static_expression = ops[1];
9895 		else if (var && var->remapped_variable && var->static_expression)
9896 		{
9897 			// Skip the write.
9898 		}
9899 		else if (flattened_structs.count(ops[0]))
9900 		{
9901 			store_flattened_struct(ops[0], ops[1]);
9902 			register_write(ops[0]);
9903 		}
9904 		else
9905 		{
9906 			emit_store_statement(ops[0], ops[1]);
9907 		}
9908 
9909 		// Storing a pointer results in a variable pointer, so we must conservatively assume
9910 		// we can write through it.
9911 		if (expression_type(ops[1]).pointer)
9912 			register_write(ops[1]);
9913 		break;
9914 	}
9915 
9916 	case OpArrayLength:
9917 	{
9918 		uint32_t result_type = ops[0];
9919 		uint32_t id = ops[1];
9920 		auto e = access_chain_internal(ops[2], &ops[3], length - 3, ACCESS_CHAIN_INDEX_IS_LITERAL_BIT, nullptr);
9921 		set<SPIRExpression>(id, join(type_to_glsl(get<SPIRType>(result_type)), "(", e, ".length())"), result_type,
9922 		                    true);
9923 		break;
9924 	}
9925 
9926 	// Function calls
9927 	case OpFunctionCall:
9928 	{
9929 		uint32_t result_type = ops[0];
9930 		uint32_t id = ops[1];
9931 		uint32_t func = ops[2];
9932 		const auto *arg = &ops[3];
9933 		length -= 3;
9934 
9935 		auto &callee = get<SPIRFunction>(func);
9936 		auto &return_type = get<SPIRType>(callee.return_type);
9937 		bool pure = function_is_pure(callee);
9938 
9939 		bool callee_has_out_variables = false;
9940 		bool emit_return_value_as_argument = false;
9941 
9942 		// Invalidate out variables passed to functions since they can be OpStore'd to.
9943 		for (uint32_t i = 0; i < length; i++)
9944 		{
9945 			if (callee.arguments[i].write_count)
9946 			{
9947 				register_call_out_argument(arg[i]);
9948 				callee_has_out_variables = true;
9949 			}
9950 
9951 			flush_variable_declaration(arg[i]);
9952 		}
9953 
9954 		if (!return_type.array.empty() && !backend.can_return_array)
9955 		{
9956 			callee_has_out_variables = true;
9957 			emit_return_value_as_argument = true;
9958 		}
9959 
9960 		if (!pure)
9961 			register_impure_function_call();
9962 
9963 		string funexpr;
9964 		SmallVector<string> arglist;
9965 		funexpr += to_name(func) + "(";
9966 
9967 		if (emit_return_value_as_argument)
9968 		{
9969 			statement(type_to_glsl(return_type), " ", to_name(id), type_to_array_glsl(return_type), ";");
9970 			arglist.push_back(to_name(id));
9971 		}
9972 
9973 		for (uint32_t i = 0; i < length; i++)
9974 		{
9975 			// Do not pass in separate images or samplers if we're remapping
9976 			// to combined image samplers.
9977 			if (skip_argument(arg[i]))
9978 				continue;
9979 
9980 			arglist.push_back(to_func_call_arg(callee.arguments[i], arg[i]));
9981 		}
9982 
9983 		for (auto &combined : callee.combined_parameters)
9984 		{
9985 			auto image_id = combined.global_image ? combined.image_id : VariableID(arg[combined.image_id]);
9986 			auto sampler_id = combined.global_sampler ? combined.sampler_id : VariableID(arg[combined.sampler_id]);
9987 			arglist.push_back(to_combined_image_sampler(image_id, sampler_id));
9988 		}
9989 
9990 		append_global_func_args(callee, length, arglist);
9991 
9992 		funexpr += merge(arglist);
9993 		funexpr += ")";
9994 
9995 		// Check for function call constraints.
9996 		check_function_call_constraints(arg, length);
9997 
9998 		if (return_type.basetype != SPIRType::Void)
9999 		{
10000 			// If the function actually writes to an out variable,
10001 			// take the conservative route and do not forward.
10002 			// The problem is that we might not read the function
10003 			// result (and emit the function) before an out variable
10004 			// is read (common case when return value is ignored!
10005 			// In order to avoid start tracking invalid variables,
10006 			// just avoid the forwarding problem altogether.
10007 			bool forward = args_will_forward(id, arg, length, pure) && !callee_has_out_variables && pure &&
10008 			               (forced_temporaries.find(id) == end(forced_temporaries));
10009 
10010 			if (emit_return_value_as_argument)
10011 			{
10012 				statement(funexpr, ";");
10013 				set<SPIRExpression>(id, to_name(id), result_type, true);
10014 			}
10015 			else
10016 				emit_op(result_type, id, funexpr, forward);
10017 
10018 			// Function calls are implicit loads from all variables in question.
10019 			// Set dependencies for them.
10020 			for (uint32_t i = 0; i < length; i++)
10021 				register_read(id, arg[i], forward);
10022 
10023 			// If we're going to forward the temporary result,
10024 			// put dependencies on every variable that must not change.
10025 			if (forward)
10026 				register_global_read_dependencies(callee, id);
10027 		}
10028 		else
10029 			statement(funexpr, ";");
10030 
10031 		break;
10032 	}
10033 
10034 	// Composite munging
10035 	case OpCompositeConstruct:
10036 	{
10037 		uint32_t result_type = ops[0];
10038 		uint32_t id = ops[1];
10039 		const auto *const elems = &ops[2];
10040 		length -= 2;
10041 
10042 		bool forward = true;
10043 		for (uint32_t i = 0; i < length; i++)
10044 			forward = forward && should_forward(elems[i]);
10045 
10046 		auto &out_type = get<SPIRType>(result_type);
10047 		auto *in_type = length > 0 ? &expression_type(elems[0]) : nullptr;
10048 
10049 		// Only splat if we have vector constructors.
10050 		// Arrays and structs must be initialized properly in full.
10051 		bool composite = !out_type.array.empty() || out_type.basetype == SPIRType::Struct;
10052 
10053 		bool splat = false;
10054 		bool swizzle_splat = false;
10055 
10056 		if (in_type)
10057 		{
10058 			splat = in_type->vecsize == 1 && in_type->columns == 1 && !composite && backend.use_constructor_splatting;
10059 			swizzle_splat = in_type->vecsize == 1 && in_type->columns == 1 && backend.can_swizzle_scalar;
10060 
10061 			if (ir.ids[elems[0]].get_type() == TypeConstant && !type_is_floating_point(*in_type))
10062 			{
10063 				// Cannot swizzle literal integers as a special case.
10064 				swizzle_splat = false;
10065 			}
10066 		}
10067 
10068 		if (splat || swizzle_splat)
10069 		{
10070 			uint32_t input = elems[0];
10071 			for (uint32_t i = 0; i < length; i++)
10072 			{
10073 				if (input != elems[i])
10074 				{
10075 					splat = false;
10076 					swizzle_splat = false;
10077 				}
10078 			}
10079 		}
10080 
10081 		if (out_type.basetype == SPIRType::Struct && !backend.can_declare_struct_inline)
10082 			forward = false;
10083 		if (!out_type.array.empty() && !backend.can_declare_arrays_inline)
10084 			forward = false;
10085 		if (type_is_empty(out_type) && !backend.supports_empty_struct)
10086 			forward = false;
10087 
10088 		string constructor_op;
10089 		if (backend.use_initializer_list && composite)
10090 		{
10091 			bool needs_trailing_tracket = false;
10092 			// Only use this path if we are building composites.
10093 			// This path cannot be used for arithmetic.
10094 			if (backend.use_typed_initializer_list && out_type.basetype == SPIRType::Struct && out_type.array.empty())
10095 				constructor_op += type_to_glsl_constructor(get<SPIRType>(result_type));
10096 			else if (backend.use_typed_initializer_list && backend.array_is_value_type && !out_type.array.empty())
10097 			{
10098 				// MSL path. Array constructor is baked into type here, do not use _constructor variant.
10099 				constructor_op += type_to_glsl_constructor(get<SPIRType>(result_type)) + "(";
10100 				needs_trailing_tracket = true;
10101 			}
10102 			constructor_op += "{ ";
10103 
10104 			if (type_is_empty(out_type) && !backend.supports_empty_struct)
10105 				constructor_op += "0";
10106 			else if (splat)
10107 				constructor_op += to_unpacked_expression(elems[0]);
10108 			else
10109 				constructor_op += build_composite_combiner(result_type, elems, length);
10110 			constructor_op += " }";
10111 			if (needs_trailing_tracket)
10112 				constructor_op += ")";
10113 		}
10114 		else if (swizzle_splat && !composite)
10115 		{
10116 			constructor_op = remap_swizzle(get<SPIRType>(result_type), 1, to_unpacked_expression(elems[0]));
10117 		}
10118 		else
10119 		{
10120 			constructor_op = type_to_glsl_constructor(get<SPIRType>(result_type)) + "(";
10121 			if (type_is_empty(out_type) && !backend.supports_empty_struct)
10122 				constructor_op += "0";
10123 			else if (splat)
10124 				constructor_op += to_unpacked_expression(elems[0]);
10125 			else
10126 				constructor_op += build_composite_combiner(result_type, elems, length);
10127 			constructor_op += ")";
10128 		}
10129 
10130 		if (!constructor_op.empty())
10131 		{
10132 			emit_op(result_type, id, constructor_op, forward);
10133 			for (uint32_t i = 0; i < length; i++)
10134 				inherit_expression_dependencies(id, elems[i]);
10135 		}
10136 		break;
10137 	}
10138 
10139 	case OpVectorInsertDynamic:
10140 	{
10141 		uint32_t result_type = ops[0];
10142 		uint32_t id = ops[1];
10143 		uint32_t vec = ops[2];
10144 		uint32_t comp = ops[3];
10145 		uint32_t index = ops[4];
10146 
10147 		flush_variable_declaration(vec);
10148 
10149 		// Make a copy, then use access chain to store the variable.
10150 		statement(declare_temporary(result_type, id), to_expression(vec), ";");
10151 		set<SPIRExpression>(id, to_name(id), result_type, true);
10152 		auto chain = access_chain_internal(id, &index, 1, 0, nullptr);
10153 		statement(chain, " = ", to_unpacked_expression(comp), ";");
10154 		break;
10155 	}
10156 
10157 	case OpVectorExtractDynamic:
10158 	{
10159 		uint32_t result_type = ops[0];
10160 		uint32_t id = ops[1];
10161 
10162 		auto expr = access_chain_internal(ops[2], &ops[3], 1, 0, nullptr);
10163 		emit_op(result_type, id, expr, should_forward(ops[2]));
10164 		inherit_expression_dependencies(id, ops[2]);
10165 		inherit_expression_dependencies(id, ops[3]);
10166 		break;
10167 	}
10168 
10169 	case OpCompositeExtract:
10170 	{
10171 		uint32_t result_type = ops[0];
10172 		uint32_t id = ops[1];
10173 		length -= 3;
10174 
10175 		auto &type = get<SPIRType>(result_type);
10176 
10177 		// We can only split the expression here if our expression is forwarded as a temporary.
10178 		bool allow_base_expression = forced_temporaries.find(id) == end(forced_temporaries);
10179 
10180 		// Do not allow base expression for struct members. We risk doing "swizzle" optimizations in this case.
10181 		auto &composite_type = expression_type(ops[2]);
10182 		if (composite_type.basetype == SPIRType::Struct || !composite_type.array.empty())
10183 			allow_base_expression = false;
10184 
10185 		// Packed expressions or physical ID mapped expressions cannot be split up.
10186 		if (has_extended_decoration(ops[2], SPIRVCrossDecorationPhysicalTypePacked) ||
10187 		    has_extended_decoration(ops[2], SPIRVCrossDecorationPhysicalTypeID))
10188 			allow_base_expression = false;
10189 
10190 		// Cannot use base expression for row-major matrix row-extraction since we need to interleave access pattern
10191 		// into the base expression.
10192 		if (is_non_native_row_major_matrix(ops[2]))
10193 			allow_base_expression = false;
10194 
10195 		AccessChainMeta meta;
10196 		SPIRExpression *e = nullptr;
10197 
10198 		// Only apply this optimization if result is scalar.
10199 		if (allow_base_expression && should_forward(ops[2]) && type.vecsize == 1 && type.columns == 1 && length == 1)
10200 		{
10201 			// We want to split the access chain from the base.
10202 			// This is so we can later combine different CompositeExtract results
10203 			// with CompositeConstruct without emitting code like
10204 			//
10205 			// vec3 temp = texture(...).xyz
10206 			// vec4(temp.x, temp.y, temp.z, 1.0).
10207 			//
10208 			// when we actually wanted to emit this
10209 			// vec4(texture(...).xyz, 1.0).
10210 			//
10211 			// Including the base will prevent this and would trigger multiple reads
10212 			// from expression causing it to be forced to an actual temporary in GLSL.
10213 			auto expr = access_chain_internal(ops[2], &ops[3], length,
10214 			                                  ACCESS_CHAIN_INDEX_IS_LITERAL_BIT | ACCESS_CHAIN_CHAIN_ONLY_BIT, &meta);
10215 			e = &emit_op(result_type, id, expr, true, should_suppress_usage_tracking(ops[2]));
10216 			inherit_expression_dependencies(id, ops[2]);
10217 			e->base_expression = ops[2];
10218 		}
10219 		else
10220 		{
10221 			auto expr = access_chain_internal(ops[2], &ops[3], length, ACCESS_CHAIN_INDEX_IS_LITERAL_BIT, &meta);
10222 			e = &emit_op(result_type, id, expr, should_forward(ops[2]), should_suppress_usage_tracking(ops[2]));
10223 			inherit_expression_dependencies(id, ops[2]);
10224 		}
10225 
10226 		// Pass through some meta information to the loaded expression.
10227 		// We can still end up loading a buffer type to a variable, then CompositeExtract from it
10228 		// instead of loading everything through an access chain.
10229 		e->need_transpose = meta.need_transpose;
10230 		if (meta.storage_is_packed)
10231 			set_extended_decoration(id, SPIRVCrossDecorationPhysicalTypePacked);
10232 		if (meta.storage_physical_type != 0)
10233 			set_extended_decoration(id, SPIRVCrossDecorationPhysicalTypeID, meta.storage_physical_type);
10234 		if (meta.storage_is_invariant)
10235 			set_decoration(id, DecorationInvariant);
10236 
10237 		break;
10238 	}
10239 
10240 	case OpCompositeInsert:
10241 	{
10242 		uint32_t result_type = ops[0];
10243 		uint32_t id = ops[1];
10244 		uint32_t obj = ops[2];
10245 		uint32_t composite = ops[3];
10246 		const auto *elems = &ops[4];
10247 		length -= 4;
10248 
10249 		flush_variable_declaration(composite);
10250 
10251 		// Make a copy, then use access chain to store the variable.
10252 		statement(declare_temporary(result_type, id), to_expression(composite), ";");
10253 		set<SPIRExpression>(id, to_name(id), result_type, true);
10254 		auto chain = access_chain_internal(id, elems, length, ACCESS_CHAIN_INDEX_IS_LITERAL_BIT, nullptr);
10255 		statement(chain, " = ", to_unpacked_expression(obj), ";");
10256 
10257 		break;
10258 	}
10259 
10260 	case OpCopyMemory:
10261 	{
10262 		uint32_t lhs = ops[0];
10263 		uint32_t rhs = ops[1];
10264 		if (lhs != rhs)
10265 		{
10266 			flush_variable_declaration(lhs);
10267 			flush_variable_declaration(rhs);
10268 			statement(to_expression(lhs), " = ", to_unpacked_expression(rhs), ";");
10269 			register_write(lhs);
10270 		}
10271 		break;
10272 	}
10273 
10274 	case OpCopyLogical:
10275 	{
10276 		// This is used for copying object of different types, arrays and structs.
10277 		// We need to unroll the copy, element-by-element.
10278 		uint32_t result_type = ops[0];
10279 		uint32_t id = ops[1];
10280 		uint32_t rhs = ops[2];
10281 
10282 		emit_uninitialized_temporary_expression(result_type, id);
10283 		emit_copy_logical_type(id, result_type, rhs, expression_type_id(rhs), {});
10284 		break;
10285 	}
10286 
10287 	case OpCopyObject:
10288 	{
10289 		uint32_t result_type = ops[0];
10290 		uint32_t id = ops[1];
10291 		uint32_t rhs = ops[2];
10292 		bool pointer = get<SPIRType>(result_type).pointer;
10293 
10294 		auto *chain = maybe_get<SPIRAccessChain>(rhs);
10295 		auto *imgsamp = maybe_get<SPIRCombinedImageSampler>(rhs);
10296 		if (chain)
10297 		{
10298 			// Cannot lower to a SPIRExpression, just copy the object.
10299 			auto &e = set<SPIRAccessChain>(id, *chain);
10300 			e.self = id;
10301 		}
10302 		else if (imgsamp)
10303 		{
10304 			// Cannot lower to a SPIRExpression, just copy the object.
10305 			// GLSL does not currently use this type and will never get here, but MSL does.
10306 			// Handled here instead of CompilerMSL for better integration and general handling,
10307 			// and in case GLSL or other subclasses require it in the future.
10308 			auto &e = set<SPIRCombinedImageSampler>(id, *imgsamp);
10309 			e.self = id;
10310 		}
10311 		else if (expression_is_lvalue(rhs) && !pointer)
10312 		{
10313 			// Need a copy.
10314 			// For pointer types, we copy the pointer itself.
10315 			statement(declare_temporary(result_type, id), to_unpacked_expression(rhs), ";");
10316 			set<SPIRExpression>(id, to_name(id), result_type, true);
10317 		}
10318 		else
10319 		{
10320 			// RHS expression is immutable, so just forward it.
10321 			// Copying these things really make no sense, but
10322 			// seems to be allowed anyways.
10323 			auto &e = set<SPIRExpression>(id, to_expression(rhs), result_type, true);
10324 			if (pointer)
10325 			{
10326 				auto *var = maybe_get_backing_variable(rhs);
10327 				e.loaded_from = var ? var->self : ID(0);
10328 			}
10329 
10330 			// If we're copying an access chain, need to inherit the read expressions.
10331 			auto *rhs_expr = maybe_get<SPIRExpression>(rhs);
10332 			if (rhs_expr)
10333 			{
10334 				e.implied_read_expressions = rhs_expr->implied_read_expressions;
10335 				e.expression_dependencies = rhs_expr->expression_dependencies;
10336 			}
10337 		}
10338 		break;
10339 	}
10340 
10341 	case OpVectorShuffle:
10342 	{
10343 		uint32_t result_type = ops[0];
10344 		uint32_t id = ops[1];
10345 		uint32_t vec0 = ops[2];
10346 		uint32_t vec1 = ops[3];
10347 		const auto *elems = &ops[4];
10348 		length -= 4;
10349 
10350 		auto &type0 = expression_type(vec0);
10351 
10352 		// If we have the undefined swizzle index -1, we need to swizzle in undefined data,
10353 		// or in our case, T(0).
10354 		bool shuffle = false;
10355 		for (uint32_t i = 0; i < length; i++)
10356 			if (elems[i] >= type0.vecsize || elems[i] == 0xffffffffu)
10357 				shuffle = true;
10358 
10359 		// Cannot use swizzles with packed expressions, force shuffle path.
10360 		if (!shuffle && has_extended_decoration(vec0, SPIRVCrossDecorationPhysicalTypePacked))
10361 			shuffle = true;
10362 
10363 		string expr;
10364 		bool should_fwd, trivial_forward;
10365 
10366 		if (shuffle)
10367 		{
10368 			should_fwd = should_forward(vec0) && should_forward(vec1);
10369 			trivial_forward = should_suppress_usage_tracking(vec0) && should_suppress_usage_tracking(vec1);
10370 
10371 			// Constructor style and shuffling from two different vectors.
10372 			SmallVector<string> args;
10373 			for (uint32_t i = 0; i < length; i++)
10374 			{
10375 				if (elems[i] == 0xffffffffu)
10376 				{
10377 					// Use a constant 0 here.
10378 					// We could use the first component or similar, but then we risk propagating
10379 					// a value we might not need, and bog down codegen.
10380 					SPIRConstant c;
10381 					c.constant_type = type0.parent_type;
10382 					assert(type0.parent_type != ID(0));
10383 					args.push_back(constant_expression(c));
10384 				}
10385 				else if (elems[i] >= type0.vecsize)
10386 					args.push_back(to_extract_component_expression(vec1, elems[i] - type0.vecsize));
10387 				else
10388 					args.push_back(to_extract_component_expression(vec0, elems[i]));
10389 			}
10390 			expr += join(type_to_glsl_constructor(get<SPIRType>(result_type)), "(", merge(args), ")");
10391 		}
10392 		else
10393 		{
10394 			should_fwd = should_forward(vec0);
10395 			trivial_forward = should_suppress_usage_tracking(vec0);
10396 
10397 			// We only source from first vector, so can use swizzle.
10398 			// If the vector is packed, unpack it before applying a swizzle (needed for MSL)
10399 			expr += to_enclosed_unpacked_expression(vec0);
10400 			expr += ".";
10401 			for (uint32_t i = 0; i < length; i++)
10402 			{
10403 				assert(elems[i] != 0xffffffffu);
10404 				expr += index_to_swizzle(elems[i]);
10405 			}
10406 
10407 			if (backend.swizzle_is_function && length > 1)
10408 				expr += "()";
10409 		}
10410 
10411 		// A shuffle is trivial in that it doesn't actually *do* anything.
10412 		// We inherit the forwardedness from our arguments to avoid flushing out to temporaries when it's not really needed.
10413 
10414 		emit_op(result_type, id, expr, should_fwd, trivial_forward);
10415 
10416 		inherit_expression_dependencies(id, vec0);
10417 		if (vec0 != vec1)
10418 			inherit_expression_dependencies(id, vec1);
10419 		break;
10420 	}
10421 
10422 	// ALU
10423 	case OpIsNan:
10424 		GLSL_UFOP(isnan);
10425 		break;
10426 
10427 	case OpIsInf:
10428 		GLSL_UFOP(isinf);
10429 		break;
10430 
10431 	case OpSNegate:
10432 	case OpFNegate:
10433 		GLSL_UOP(-);
10434 		break;
10435 
10436 	case OpIAdd:
10437 	{
10438 		// For simple arith ops, prefer the output type if there's a mismatch to avoid extra bitcasts.
10439 		auto type = get<SPIRType>(ops[0]).basetype;
10440 		GLSL_BOP_CAST(+, type);
10441 		break;
10442 	}
10443 
10444 	case OpFAdd:
10445 		GLSL_BOP(+);
10446 		break;
10447 
10448 	case OpISub:
10449 	{
10450 		auto type = get<SPIRType>(ops[0]).basetype;
10451 		GLSL_BOP_CAST(-, type);
10452 		break;
10453 	}
10454 
10455 	case OpFSub:
10456 		GLSL_BOP(-);
10457 		break;
10458 
10459 	case OpIMul:
10460 	{
10461 		auto type = get<SPIRType>(ops[0]).basetype;
10462 		GLSL_BOP_CAST(*, type);
10463 		break;
10464 	}
10465 
10466 	case OpVectorTimesMatrix:
10467 	case OpMatrixTimesVector:
10468 	{
10469 		// If the matrix needs transpose, just flip the multiply order.
10470 		auto *e = maybe_get<SPIRExpression>(ops[opcode == OpMatrixTimesVector ? 2 : 3]);
10471 		if (e && e->need_transpose)
10472 		{
10473 			e->need_transpose = false;
10474 			string expr;
10475 
10476 			if (opcode == OpMatrixTimesVector)
10477 				expr = join(to_enclosed_unpacked_expression(ops[3]), " * ",
10478 				            enclose_expression(to_unpacked_row_major_matrix_expression(ops[2])));
10479 			else
10480 				expr = join(enclose_expression(to_unpacked_row_major_matrix_expression(ops[3])), " * ",
10481 				            to_enclosed_unpacked_expression(ops[2]));
10482 
10483 			bool forward = should_forward(ops[2]) && should_forward(ops[3]);
10484 			emit_op(ops[0], ops[1], expr, forward);
10485 			e->need_transpose = true;
10486 			inherit_expression_dependencies(ops[1], ops[2]);
10487 			inherit_expression_dependencies(ops[1], ops[3]);
10488 		}
10489 		else
10490 			GLSL_BOP(*);
10491 		break;
10492 	}
10493 
10494 	case OpMatrixTimesMatrix:
10495 	{
10496 		auto *a = maybe_get<SPIRExpression>(ops[2]);
10497 		auto *b = maybe_get<SPIRExpression>(ops[3]);
10498 
10499 		// If both matrices need transpose, we can multiply in flipped order and tag the expression as transposed.
10500 		// a^T * b^T = (b * a)^T.
10501 		if (a && b && a->need_transpose && b->need_transpose)
10502 		{
10503 			a->need_transpose = false;
10504 			b->need_transpose = false;
10505 			auto expr = join(enclose_expression(to_unpacked_row_major_matrix_expression(ops[3])), " * ",
10506 			                 enclose_expression(to_unpacked_row_major_matrix_expression(ops[2])));
10507 			bool forward = should_forward(ops[2]) && should_forward(ops[3]);
10508 			auto &e = emit_op(ops[0], ops[1], expr, forward);
10509 			e.need_transpose = true;
10510 			a->need_transpose = true;
10511 			b->need_transpose = true;
10512 			inherit_expression_dependencies(ops[1], ops[2]);
10513 			inherit_expression_dependencies(ops[1], ops[3]);
10514 		}
10515 		else
10516 			GLSL_BOP(*);
10517 
10518 		break;
10519 	}
10520 
10521 	case OpFMul:
10522 	case OpMatrixTimesScalar:
10523 	case OpVectorTimesScalar:
10524 		GLSL_BOP(*);
10525 		break;
10526 
10527 	case OpOuterProduct:
10528 		GLSL_BFOP(outerProduct);
10529 		break;
10530 
10531 	case OpDot:
10532 		GLSL_BFOP(dot);
10533 		break;
10534 
10535 	case OpTranspose:
10536 		if (options.version < 120) // Matches GLSL 1.10 / ESSL 1.00
10537 		{
10538 			// transpose() is not available, so instead, flip need_transpose,
10539 			// which can later be turned into an emulated transpose op by
10540 			// convert_row_major_matrix(), if necessary.
10541 			uint32_t result_type = ops[0];
10542 			uint32_t result_id = ops[1];
10543 			uint32_t input = ops[2];
10544 
10545 			// Force need_transpose to false temporarily to prevent
10546 			// to_expression() from doing the transpose.
10547 			bool need_transpose = false;
10548 			auto *input_e = maybe_get<SPIRExpression>(input);
10549 			if (input_e)
10550 				swap(need_transpose, input_e->need_transpose);
10551 
10552 			bool forward = should_forward(input);
10553 			auto &e = emit_op(result_type, result_id, to_expression(input), forward);
10554 			e.need_transpose = !need_transpose;
10555 
10556 			// Restore the old need_transpose flag.
10557 			if (input_e)
10558 				input_e->need_transpose = need_transpose;
10559 		}
10560 		else
10561 			GLSL_UFOP(transpose);
10562 		break;
10563 
10564 	case OpSRem:
10565 	{
10566 		uint32_t result_type = ops[0];
10567 		uint32_t result_id = ops[1];
10568 		uint32_t op0 = ops[2];
10569 		uint32_t op1 = ops[3];
10570 
10571 		// Needs special handling.
10572 		bool forward = should_forward(op0) && should_forward(op1);
10573 		auto expr = join(to_enclosed_expression(op0), " - ", to_enclosed_expression(op1), " * ", "(",
10574 		                 to_enclosed_expression(op0), " / ", to_enclosed_expression(op1), ")");
10575 
10576 		emit_op(result_type, result_id, expr, forward);
10577 		inherit_expression_dependencies(result_id, op0);
10578 		inherit_expression_dependencies(result_id, op1);
10579 		break;
10580 	}
10581 
10582 	case OpSDiv:
10583 		GLSL_BOP_CAST(/, int_type);
10584 		break;
10585 
10586 	case OpUDiv:
10587 		GLSL_BOP_CAST(/, uint_type);
10588 		break;
10589 
10590 	case OpIAddCarry:
10591 	case OpISubBorrow:
10592 	{
10593 		if (options.es && options.version < 310)
10594 			SPIRV_CROSS_THROW("Extended arithmetic is only available from ESSL 310.");
10595 		else if (!options.es && options.version < 400)
10596 			SPIRV_CROSS_THROW("Extended arithmetic is only available from GLSL 400.");
10597 
10598 		uint32_t result_type = ops[0];
10599 		uint32_t result_id = ops[1];
10600 		uint32_t op0 = ops[2];
10601 		uint32_t op1 = ops[3];
10602 		auto &type = get<SPIRType>(result_type);
10603 		emit_uninitialized_temporary_expression(result_type, result_id);
10604 		const char *op = opcode == OpIAddCarry ? "uaddCarry" : "usubBorrow";
10605 
10606 		statement(to_expression(result_id), ".", to_member_name(type, 0), " = ", op, "(", to_expression(op0), ", ",
10607 		          to_expression(op1), ", ", to_expression(result_id), ".", to_member_name(type, 1), ");");
10608 		break;
10609 	}
10610 
10611 	case OpUMulExtended:
10612 	case OpSMulExtended:
10613 	{
10614 		if (options.es && options.version < 310)
10615 			SPIRV_CROSS_THROW("Extended arithmetic is only available from ESSL 310.");
10616 		else if (!options.es && options.version < 400)
10617 			SPIRV_CROSS_THROW("Extended arithmetic is only available from GLSL 4000.");
10618 
10619 		uint32_t result_type = ops[0];
10620 		uint32_t result_id = ops[1];
10621 		uint32_t op0 = ops[2];
10622 		uint32_t op1 = ops[3];
10623 		auto &type = get<SPIRType>(result_type);
10624 		emit_uninitialized_temporary_expression(result_type, result_id);
10625 		const char *op = opcode == OpUMulExtended ? "umulExtended" : "imulExtended";
10626 
10627 		statement(op, "(", to_expression(op0), ", ", to_expression(op1), ", ", to_expression(result_id), ".",
10628 		          to_member_name(type, 1), ", ", to_expression(result_id), ".", to_member_name(type, 0), ");");
10629 		break;
10630 	}
10631 
10632 	case OpFDiv:
10633 		GLSL_BOP(/);
10634 		break;
10635 
10636 	case OpShiftRightLogical:
10637 		GLSL_BOP_CAST(>>, uint_type);
10638 		break;
10639 
10640 	case OpShiftRightArithmetic:
10641 		GLSL_BOP_CAST(>>, int_type);
10642 		break;
10643 
10644 	case OpShiftLeftLogical:
10645 	{
10646 		auto type = get<SPIRType>(ops[0]).basetype;
10647 		GLSL_BOP_CAST(<<, type);
10648 		break;
10649 	}
10650 
10651 	case OpBitwiseOr:
10652 	{
10653 		auto type = get<SPIRType>(ops[0]).basetype;
10654 		GLSL_BOP_CAST(|, type);
10655 		break;
10656 	}
10657 
10658 	case OpBitwiseXor:
10659 	{
10660 		auto type = get<SPIRType>(ops[0]).basetype;
10661 		GLSL_BOP_CAST(^, type);
10662 		break;
10663 	}
10664 
10665 	case OpBitwiseAnd:
10666 	{
10667 		auto type = get<SPIRType>(ops[0]).basetype;
10668 		GLSL_BOP_CAST(&, type);
10669 		break;
10670 	}
10671 
10672 	case OpNot:
10673 		GLSL_UOP(~);
10674 		break;
10675 
10676 	case OpUMod:
10677 		GLSL_BOP_CAST(%, uint_type);
10678 		break;
10679 
10680 	case OpSMod:
10681 		GLSL_BOP_CAST(%, int_type);
10682 		break;
10683 
10684 	case OpFMod:
10685 		GLSL_BFOP(mod);
10686 		break;
10687 
10688 	case OpFRem:
10689 	{
10690 		if (is_legacy())
10691 			SPIRV_CROSS_THROW("OpFRem requires trunc() and is only supported on non-legacy targets. A workaround is "
10692 			                  "needed for legacy.");
10693 
10694 		uint32_t result_type = ops[0];
10695 		uint32_t result_id = ops[1];
10696 		uint32_t op0 = ops[2];
10697 		uint32_t op1 = ops[3];
10698 
10699 		// Needs special handling.
10700 		bool forward = should_forward(op0) && should_forward(op1);
10701 		auto expr = join(to_enclosed_expression(op0), " - ", to_enclosed_expression(op1), " * ", "trunc(",
10702 		                 to_enclosed_expression(op0), " / ", to_enclosed_expression(op1), ")");
10703 
10704 		emit_op(result_type, result_id, expr, forward);
10705 		inherit_expression_dependencies(result_id, op0);
10706 		inherit_expression_dependencies(result_id, op1);
10707 		break;
10708 	}
10709 
10710 	// Relational
10711 	case OpAny:
10712 		GLSL_UFOP(any);
10713 		break;
10714 
10715 	case OpAll:
10716 		GLSL_UFOP(all);
10717 		break;
10718 
10719 	case OpSelect:
10720 		emit_mix_op(ops[0], ops[1], ops[4], ops[3], ops[2]);
10721 		break;
10722 
10723 	case OpLogicalOr:
10724 	{
10725 		// No vector variant in GLSL for logical OR.
10726 		auto result_type = ops[0];
10727 		auto id = ops[1];
10728 		auto &type = get<SPIRType>(result_type);
10729 
10730 		if (type.vecsize > 1)
10731 			emit_unrolled_binary_op(result_type, id, ops[2], ops[3], "||", false, SPIRType::Unknown);
10732 		else
10733 			GLSL_BOP(||);
10734 		break;
10735 	}
10736 
10737 	case OpLogicalAnd:
10738 	{
10739 		// No vector variant in GLSL for logical AND.
10740 		auto result_type = ops[0];
10741 		auto id = ops[1];
10742 		auto &type = get<SPIRType>(result_type);
10743 
10744 		if (type.vecsize > 1)
10745 			emit_unrolled_binary_op(result_type, id, ops[2], ops[3], "&&", false, SPIRType::Unknown);
10746 		else
10747 			GLSL_BOP(&&);
10748 		break;
10749 	}
10750 
10751 	case OpLogicalNot:
10752 	{
10753 		auto &type = get<SPIRType>(ops[0]);
10754 		if (type.vecsize > 1)
10755 			GLSL_UFOP(not );
10756 		else
10757 			GLSL_UOP(!);
10758 		break;
10759 	}
10760 
10761 	case OpIEqual:
10762 	{
10763 		if (expression_type(ops[2]).vecsize > 1)
10764 			GLSL_BFOP_CAST(equal, int_type);
10765 		else
10766 			GLSL_BOP_CAST(==, int_type);
10767 		break;
10768 	}
10769 
10770 	case OpLogicalEqual:
10771 	case OpFOrdEqual:
10772 	{
10773 		if (expression_type(ops[2]).vecsize > 1)
10774 			GLSL_BFOP(equal);
10775 		else
10776 			GLSL_BOP(==);
10777 		break;
10778 	}
10779 
10780 	case OpINotEqual:
10781 	{
10782 		if (expression_type(ops[2]).vecsize > 1)
10783 			GLSL_BFOP_CAST(notEqual, int_type);
10784 		else
10785 			GLSL_BOP_CAST(!=, int_type);
10786 		break;
10787 	}
10788 
10789 	case OpLogicalNotEqual:
10790 	case OpFOrdNotEqual:
10791 	{
10792 		if (expression_type(ops[2]).vecsize > 1)
10793 			GLSL_BFOP(notEqual);
10794 		else
10795 			GLSL_BOP(!=);
10796 		break;
10797 	}
10798 
10799 	case OpUGreaterThan:
10800 	case OpSGreaterThan:
10801 	{
10802 		auto type = opcode == OpUGreaterThan ? uint_type : int_type;
10803 		if (expression_type(ops[2]).vecsize > 1)
10804 			GLSL_BFOP_CAST(greaterThan, type);
10805 		else
10806 			GLSL_BOP_CAST(>, type);
10807 		break;
10808 	}
10809 
10810 	case OpFOrdGreaterThan:
10811 	{
10812 		if (expression_type(ops[2]).vecsize > 1)
10813 			GLSL_BFOP(greaterThan);
10814 		else
10815 			GLSL_BOP(>);
10816 		break;
10817 	}
10818 
10819 	case OpUGreaterThanEqual:
10820 	case OpSGreaterThanEqual:
10821 	{
10822 		auto type = opcode == OpUGreaterThanEqual ? uint_type : int_type;
10823 		if (expression_type(ops[2]).vecsize > 1)
10824 			GLSL_BFOP_CAST(greaterThanEqual, type);
10825 		else
10826 			GLSL_BOP_CAST(>=, type);
10827 		break;
10828 	}
10829 
10830 	case OpFOrdGreaterThanEqual:
10831 	{
10832 		if (expression_type(ops[2]).vecsize > 1)
10833 			GLSL_BFOP(greaterThanEqual);
10834 		else
10835 			GLSL_BOP(>=);
10836 		break;
10837 	}
10838 
10839 	case OpULessThan:
10840 	case OpSLessThan:
10841 	{
10842 		auto type = opcode == OpULessThan ? uint_type : int_type;
10843 		if (expression_type(ops[2]).vecsize > 1)
10844 			GLSL_BFOP_CAST(lessThan, type);
10845 		else
10846 			GLSL_BOP_CAST(<, type);
10847 		break;
10848 	}
10849 
10850 	case OpFOrdLessThan:
10851 	{
10852 		if (expression_type(ops[2]).vecsize > 1)
10853 			GLSL_BFOP(lessThan);
10854 		else
10855 			GLSL_BOP(<);
10856 		break;
10857 	}
10858 
10859 	case OpULessThanEqual:
10860 	case OpSLessThanEqual:
10861 	{
10862 		auto type = opcode == OpULessThanEqual ? uint_type : int_type;
10863 		if (expression_type(ops[2]).vecsize > 1)
10864 			GLSL_BFOP_CAST(lessThanEqual, type);
10865 		else
10866 			GLSL_BOP_CAST(<=, type);
10867 		break;
10868 	}
10869 
10870 	case OpFOrdLessThanEqual:
10871 	{
10872 		if (expression_type(ops[2]).vecsize > 1)
10873 			GLSL_BFOP(lessThanEqual);
10874 		else
10875 			GLSL_BOP(<=);
10876 		break;
10877 	}
10878 
10879 	// Conversion
10880 	case OpSConvert:
10881 	case OpConvertSToF:
10882 	case OpUConvert:
10883 	case OpConvertUToF:
10884 	{
10885 		auto input_type = opcode == OpSConvert || opcode == OpConvertSToF ? int_type : uint_type;
10886 		uint32_t result_type = ops[0];
10887 		uint32_t id = ops[1];
10888 
10889 		auto &type = get<SPIRType>(result_type);
10890 		auto &arg_type = expression_type(ops[2]);
10891 		auto func = type_to_glsl_constructor(type);
10892 
10893 		if (arg_type.width < type.width || type_is_floating_point(type))
10894 			emit_unary_func_op_cast(result_type, id, ops[2], func.c_str(), input_type, type.basetype);
10895 		else
10896 			emit_unary_func_op(result_type, id, ops[2], func.c_str());
10897 		break;
10898 	}
10899 
10900 	case OpConvertFToU:
10901 	case OpConvertFToS:
10902 	{
10903 		// Cast to expected arithmetic type, then potentially bitcast away to desired signedness.
10904 		uint32_t result_type = ops[0];
10905 		uint32_t id = ops[1];
10906 		auto &type = get<SPIRType>(result_type);
10907 		auto expected_type = type;
10908 		auto &float_type = expression_type(ops[2]);
10909 		expected_type.basetype =
10910 		    opcode == OpConvertFToS ? to_signed_basetype(type.width) : to_unsigned_basetype(type.width);
10911 
10912 		auto func = type_to_glsl_constructor(expected_type);
10913 		emit_unary_func_op_cast(result_type, id, ops[2], func.c_str(), float_type.basetype, expected_type.basetype);
10914 		break;
10915 	}
10916 
10917 	case OpFConvert:
10918 	{
10919 		uint32_t result_type = ops[0];
10920 		uint32_t id = ops[1];
10921 
10922 		auto func = type_to_glsl_constructor(get<SPIRType>(result_type));
10923 		emit_unary_func_op(result_type, id, ops[2], func.c_str());
10924 		break;
10925 	}
10926 
10927 	case OpBitcast:
10928 	{
10929 		uint32_t result_type = ops[0];
10930 		uint32_t id = ops[1];
10931 		uint32_t arg = ops[2];
10932 
10933 		if (!emit_complex_bitcast(result_type, id, arg))
10934 		{
10935 			auto op = bitcast_glsl_op(get<SPIRType>(result_type), expression_type(arg));
10936 			emit_unary_func_op(result_type, id, arg, op.c_str());
10937 		}
10938 		break;
10939 	}
10940 
10941 	case OpQuantizeToF16:
10942 	{
10943 		uint32_t result_type = ops[0];
10944 		uint32_t id = ops[1];
10945 		uint32_t arg = ops[2];
10946 
10947 		string op;
10948 		auto &type = get<SPIRType>(result_type);
10949 
10950 		switch (type.vecsize)
10951 		{
10952 		case 1:
10953 			op = join("unpackHalf2x16(packHalf2x16(vec2(", to_expression(arg), "))).x");
10954 			break;
10955 		case 2:
10956 			op = join("unpackHalf2x16(packHalf2x16(", to_expression(arg), "))");
10957 			break;
10958 		case 3:
10959 		{
10960 			auto op0 = join("unpackHalf2x16(packHalf2x16(", to_expression(arg), ".xy))");
10961 			auto op1 = join("unpackHalf2x16(packHalf2x16(", to_expression(arg), ".zz)).x");
10962 			op = join("vec3(", op0, ", ", op1, ")");
10963 			break;
10964 		}
10965 		case 4:
10966 		{
10967 			auto op0 = join("unpackHalf2x16(packHalf2x16(", to_expression(arg), ".xy))");
10968 			auto op1 = join("unpackHalf2x16(packHalf2x16(", to_expression(arg), ".zw))");
10969 			op = join("vec4(", op0, ", ", op1, ")");
10970 			break;
10971 		}
10972 		default:
10973 			SPIRV_CROSS_THROW("Illegal argument to OpQuantizeToF16.");
10974 		}
10975 
10976 		emit_op(result_type, id, op, should_forward(arg));
10977 		inherit_expression_dependencies(id, arg);
10978 		break;
10979 	}
10980 
10981 	// Derivatives
10982 	case OpDPdx:
10983 		GLSL_UFOP(dFdx);
10984 		if (is_legacy_es())
10985 			require_extension_internal("GL_OES_standard_derivatives");
10986 		register_control_dependent_expression(ops[1]);
10987 		break;
10988 
10989 	case OpDPdy:
10990 		GLSL_UFOP(dFdy);
10991 		if (is_legacy_es())
10992 			require_extension_internal("GL_OES_standard_derivatives");
10993 		register_control_dependent_expression(ops[1]);
10994 		break;
10995 
10996 	case OpDPdxFine:
10997 		GLSL_UFOP(dFdxFine);
10998 		if (options.es)
10999 		{
11000 			SPIRV_CROSS_THROW("GL_ARB_derivative_control is unavailable in OpenGL ES.");
11001 		}
11002 		if (options.version < 450)
11003 			require_extension_internal("GL_ARB_derivative_control");
11004 		register_control_dependent_expression(ops[1]);
11005 		break;
11006 
11007 	case OpDPdyFine:
11008 		GLSL_UFOP(dFdyFine);
11009 		if (options.es)
11010 		{
11011 			SPIRV_CROSS_THROW("GL_ARB_derivative_control is unavailable in OpenGL ES.");
11012 		}
11013 		if (options.version < 450)
11014 			require_extension_internal("GL_ARB_derivative_control");
11015 		register_control_dependent_expression(ops[1]);
11016 		break;
11017 
11018 	case OpDPdxCoarse:
11019 		if (options.es)
11020 		{
11021 			SPIRV_CROSS_THROW("GL_ARB_derivative_control is unavailable in OpenGL ES.");
11022 		}
11023 		GLSL_UFOP(dFdxCoarse);
11024 		if (options.version < 450)
11025 			require_extension_internal("GL_ARB_derivative_control");
11026 		register_control_dependent_expression(ops[1]);
11027 		break;
11028 
11029 	case OpDPdyCoarse:
11030 		GLSL_UFOP(dFdyCoarse);
11031 		if (options.es)
11032 		{
11033 			SPIRV_CROSS_THROW("GL_ARB_derivative_control is unavailable in OpenGL ES.");
11034 		}
11035 		if (options.version < 450)
11036 			require_extension_internal("GL_ARB_derivative_control");
11037 		register_control_dependent_expression(ops[1]);
11038 		break;
11039 
11040 	case OpFwidth:
11041 		GLSL_UFOP(fwidth);
11042 		if (is_legacy_es())
11043 			require_extension_internal("GL_OES_standard_derivatives");
11044 		register_control_dependent_expression(ops[1]);
11045 		break;
11046 
11047 	case OpFwidthCoarse:
11048 		GLSL_UFOP(fwidthCoarse);
11049 		if (options.es)
11050 		{
11051 			SPIRV_CROSS_THROW("GL_ARB_derivative_control is unavailable in OpenGL ES.");
11052 		}
11053 		if (options.version < 450)
11054 			require_extension_internal("GL_ARB_derivative_control");
11055 		register_control_dependent_expression(ops[1]);
11056 		break;
11057 
11058 	case OpFwidthFine:
11059 		GLSL_UFOP(fwidthFine);
11060 		if (options.es)
11061 		{
11062 			SPIRV_CROSS_THROW("GL_ARB_derivative_control is unavailable in OpenGL ES.");
11063 		}
11064 		if (options.version < 450)
11065 			require_extension_internal("GL_ARB_derivative_control");
11066 		register_control_dependent_expression(ops[1]);
11067 		break;
11068 
11069 	// Bitfield
11070 	case OpBitFieldInsert:
11071 	{
11072 		emit_bitfield_insert_op(ops[0], ops[1], ops[2], ops[3], ops[4], ops[5], "bitfieldInsert", SPIRType::Int);
11073 		break;
11074 	}
11075 
11076 	case OpBitFieldSExtract:
11077 	{
11078 		emit_trinary_func_op_bitextract(ops[0], ops[1], ops[2], ops[3], ops[4], "bitfieldExtract", int_type, int_type,
11079 		                                SPIRType::Int, SPIRType::Int);
11080 		break;
11081 	}
11082 
11083 	case OpBitFieldUExtract:
11084 	{
11085 		emit_trinary_func_op_bitextract(ops[0], ops[1], ops[2], ops[3], ops[4], "bitfieldExtract", uint_type, uint_type,
11086 		                                SPIRType::Int, SPIRType::Int);
11087 		break;
11088 	}
11089 
11090 	case OpBitReverse:
11091 		// BitReverse does not have issues with sign since result type must match input type.
11092 		GLSL_UFOP(bitfieldReverse);
11093 		break;
11094 
11095 	case OpBitCount:
11096 	{
11097 		auto basetype = expression_type(ops[2]).basetype;
11098 		emit_unary_func_op_cast(ops[0], ops[1], ops[2], "bitCount", basetype, int_type);
11099 		break;
11100 	}
11101 
11102 	// Atomics
11103 	case OpAtomicExchange:
11104 	{
11105 		uint32_t result_type = ops[0];
11106 		uint32_t id = ops[1];
11107 		uint32_t ptr = ops[2];
11108 		// Ignore semantics for now, probably only relevant to CL.
11109 		uint32_t val = ops[5];
11110 		const char *op = check_atomic_image(ptr) ? "imageAtomicExchange" : "atomicExchange";
11111 		forced_temporaries.insert(id);
11112 		emit_binary_func_op(result_type, id, ptr, val, op);
11113 		flush_all_atomic_capable_variables();
11114 		break;
11115 	}
11116 
11117 	case OpAtomicCompareExchange:
11118 	{
11119 		uint32_t result_type = ops[0];
11120 		uint32_t id = ops[1];
11121 		uint32_t ptr = ops[2];
11122 		uint32_t val = ops[6];
11123 		uint32_t comp = ops[7];
11124 		const char *op = check_atomic_image(ptr) ? "imageAtomicCompSwap" : "atomicCompSwap";
11125 
11126 		forced_temporaries.insert(id);
11127 		emit_trinary_func_op(result_type, id, ptr, comp, val, op);
11128 		flush_all_atomic_capable_variables();
11129 		break;
11130 	}
11131 
11132 	case OpAtomicLoad:
11133 	{
11134 		// In plain GLSL, we have no atomic loads, so emulate this by fetch adding by 0 and hope compiler figures it out.
11135 		// Alternatively, we could rely on KHR_memory_model, but that's not very helpful for GL.
11136 		auto &type = expression_type(ops[2]);
11137 		forced_temporaries.insert(ops[1]);
11138 		bool atomic_image = check_atomic_image(ops[2]);
11139 		bool unsigned_type = (type.basetype == SPIRType::UInt) ||
11140 		                     (atomic_image && get<SPIRType>(type.image.type).basetype == SPIRType::UInt);
11141 		const char *op = atomic_image ? "imageAtomicAdd" : "atomicAdd";
11142 		const char *increment = unsigned_type ? "0u" : "0";
11143 		emit_op(ops[0], ops[1], join(op, "(", to_expression(ops[2]), ", ", increment, ")"), false);
11144 		flush_all_atomic_capable_variables();
11145 		break;
11146 	}
11147 
11148 	case OpAtomicStore:
11149 	{
11150 		// In plain GLSL, we have no atomic stores, so emulate this with an atomic exchange where we don't consume the result.
11151 		// Alternatively, we could rely on KHR_memory_model, but that's not very helpful for GL.
11152 		uint32_t ptr = ops[0];
11153 		// Ignore semantics for now, probably only relevant to CL.
11154 		uint32_t val = ops[3];
11155 		const char *op = check_atomic_image(ptr) ? "imageAtomicExchange" : "atomicExchange";
11156 		statement(op, "(", to_expression(ptr), ", ", to_expression(val), ");");
11157 		flush_all_atomic_capable_variables();
11158 		break;
11159 	}
11160 
11161 	case OpAtomicIIncrement:
11162 	case OpAtomicIDecrement:
11163 	{
11164 		forced_temporaries.insert(ops[1]);
11165 		auto &type = expression_type(ops[2]);
11166 		if (type.storage == StorageClassAtomicCounter)
11167 		{
11168 			// Legacy GLSL stuff, not sure if this is relevant to support.
11169 			if (opcode == OpAtomicIIncrement)
11170 				GLSL_UFOP(atomicCounterIncrement);
11171 			else
11172 				GLSL_UFOP(atomicCounterDecrement);
11173 		}
11174 		else
11175 		{
11176 			bool atomic_image = check_atomic_image(ops[2]);
11177 			bool unsigned_type = (type.basetype == SPIRType::UInt) ||
11178 			                     (atomic_image && get<SPIRType>(type.image.type).basetype == SPIRType::UInt);
11179 			const char *op = atomic_image ? "imageAtomicAdd" : "atomicAdd";
11180 
11181 			const char *increment = nullptr;
11182 			if (opcode == OpAtomicIIncrement && unsigned_type)
11183 				increment = "1u";
11184 			else if (opcode == OpAtomicIIncrement)
11185 				increment = "1";
11186 			else if (unsigned_type)
11187 				increment = "uint(-1)";
11188 			else
11189 				increment = "-1";
11190 
11191 			emit_op(ops[0], ops[1], join(op, "(", to_expression(ops[2]), ", ", increment, ")"), false);
11192 		}
11193 
11194 		flush_all_atomic_capable_variables();
11195 		break;
11196 	}
11197 
11198 	case OpAtomicIAdd:
11199 	{
11200 		const char *op = check_atomic_image(ops[2]) ? "imageAtomicAdd" : "atomicAdd";
11201 		forced_temporaries.insert(ops[1]);
11202 		emit_binary_func_op(ops[0], ops[1], ops[2], ops[5], op);
11203 		flush_all_atomic_capable_variables();
11204 		break;
11205 	}
11206 
11207 	case OpAtomicISub:
11208 	{
11209 		const char *op = check_atomic_image(ops[2]) ? "imageAtomicAdd" : "atomicAdd";
11210 		forced_temporaries.insert(ops[1]);
11211 		auto expr = join(op, "(", to_expression(ops[2]), ", -", to_enclosed_expression(ops[5]), ")");
11212 		emit_op(ops[0], ops[1], expr, should_forward(ops[2]) && should_forward(ops[5]));
11213 		flush_all_atomic_capable_variables();
11214 		break;
11215 	}
11216 
11217 	case OpAtomicSMin:
11218 	case OpAtomicUMin:
11219 	{
11220 		const char *op = check_atomic_image(ops[2]) ? "imageAtomicMin" : "atomicMin";
11221 		forced_temporaries.insert(ops[1]);
11222 		emit_binary_func_op(ops[0], ops[1], ops[2], ops[5], op);
11223 		flush_all_atomic_capable_variables();
11224 		break;
11225 	}
11226 
11227 	case OpAtomicSMax:
11228 	case OpAtomicUMax:
11229 	{
11230 		const char *op = check_atomic_image(ops[2]) ? "imageAtomicMax" : "atomicMax";
11231 		forced_temporaries.insert(ops[1]);
11232 		emit_binary_func_op(ops[0], ops[1], ops[2], ops[5], op);
11233 		flush_all_atomic_capable_variables();
11234 		break;
11235 	}
11236 
11237 	case OpAtomicAnd:
11238 	{
11239 		const char *op = check_atomic_image(ops[2]) ? "imageAtomicAnd" : "atomicAnd";
11240 		forced_temporaries.insert(ops[1]);
11241 		emit_binary_func_op(ops[0], ops[1], ops[2], ops[5], op);
11242 		flush_all_atomic_capable_variables();
11243 		break;
11244 	}
11245 
11246 	case OpAtomicOr:
11247 	{
11248 		const char *op = check_atomic_image(ops[2]) ? "imageAtomicOr" : "atomicOr";
11249 		forced_temporaries.insert(ops[1]);
11250 		emit_binary_func_op(ops[0], ops[1], ops[2], ops[5], op);
11251 		flush_all_atomic_capable_variables();
11252 		break;
11253 	}
11254 
11255 	case OpAtomicXor:
11256 	{
11257 		const char *op = check_atomic_image(ops[2]) ? "imageAtomicXor" : "atomicXor";
11258 		forced_temporaries.insert(ops[1]);
11259 		emit_binary_func_op(ops[0], ops[1], ops[2], ops[5], op);
11260 		flush_all_atomic_capable_variables();
11261 		break;
11262 	}
11263 
11264 	// Geometry shaders
11265 	case OpEmitVertex:
11266 		statement("EmitVertex();");
11267 		break;
11268 
11269 	case OpEndPrimitive:
11270 		statement("EndPrimitive();");
11271 		break;
11272 
11273 	case OpEmitStreamVertex:
11274 	{
11275 		if (options.es)
11276 			SPIRV_CROSS_THROW("Multi-stream geometry shaders not supported in ES.");
11277 		else if (!options.es && options.version < 400)
11278 			SPIRV_CROSS_THROW("Multi-stream geometry shaders only supported in GLSL 400.");
11279 
11280 		auto stream_expr = to_expression(ops[0]);
11281 		if (expression_type(ops[0]).basetype != SPIRType::Int)
11282 			stream_expr = join("int(", stream_expr, ")");
11283 		statement("EmitStreamVertex(", stream_expr, ");");
11284 		break;
11285 	}
11286 
11287 	case OpEndStreamPrimitive:
11288 	{
11289 		if (options.es)
11290 			SPIRV_CROSS_THROW("Multi-stream geometry shaders not supported in ES.");
11291 		else if (!options.es && options.version < 400)
11292 			SPIRV_CROSS_THROW("Multi-stream geometry shaders only supported in GLSL 400.");
11293 
11294 		auto stream_expr = to_expression(ops[0]);
11295 		if (expression_type(ops[0]).basetype != SPIRType::Int)
11296 			stream_expr = join("int(", stream_expr, ")");
11297 		statement("EndStreamPrimitive(", stream_expr, ");");
11298 		break;
11299 	}
11300 
11301 	// Textures
11302 	case OpImageSampleExplicitLod:
11303 	case OpImageSampleProjExplicitLod:
11304 	case OpImageSampleDrefExplicitLod:
11305 	case OpImageSampleProjDrefExplicitLod:
11306 	case OpImageSampleImplicitLod:
11307 	case OpImageSampleProjImplicitLod:
11308 	case OpImageSampleDrefImplicitLod:
11309 	case OpImageSampleProjDrefImplicitLod:
11310 	case OpImageFetch:
11311 	case OpImageGather:
11312 	case OpImageDrefGather:
11313 		// Gets a bit hairy, so move this to a separate instruction.
11314 		emit_texture_op(instruction, false);
11315 		break;
11316 
11317 	case OpImageSparseSampleExplicitLod:
11318 	case OpImageSparseSampleProjExplicitLod:
11319 	case OpImageSparseSampleDrefExplicitLod:
11320 	case OpImageSparseSampleProjDrefExplicitLod:
11321 	case OpImageSparseSampleImplicitLod:
11322 	case OpImageSparseSampleProjImplicitLod:
11323 	case OpImageSparseSampleDrefImplicitLod:
11324 	case OpImageSparseSampleProjDrefImplicitLod:
11325 	case OpImageSparseFetch:
11326 	case OpImageSparseGather:
11327 	case OpImageSparseDrefGather:
11328 		// Gets a bit hairy, so move this to a separate instruction.
11329 		emit_texture_op(instruction, true);
11330 		break;
11331 
11332 	case OpImageSparseTexelsResident:
11333 		if (options.es)
11334 			SPIRV_CROSS_THROW("Sparse feedback is not supported in GLSL.");
11335 		require_extension_internal("GL_ARB_sparse_texture2");
11336 		emit_unary_func_op_cast(ops[0], ops[1], ops[2], "sparseTexelsResidentARB", int_type, SPIRType::Boolean);
11337 		break;
11338 
11339 	case OpImage:
11340 	{
11341 		uint32_t result_type = ops[0];
11342 		uint32_t id = ops[1];
11343 
11344 		// Suppress usage tracking.
11345 		auto &e = emit_op(result_type, id, to_expression(ops[2]), true, true);
11346 
11347 		// When using the image, we need to know which variable it is actually loaded from.
11348 		auto *var = maybe_get_backing_variable(ops[2]);
11349 		e.loaded_from = var ? var->self : ID(0);
11350 		break;
11351 	}
11352 
11353 	case OpImageQueryLod:
11354 	{
11355 		if (!options.es && options.version < 400)
11356 		{
11357 			require_extension_internal("GL_ARB_texture_query_lod");
11358 			// For some reason, the ARB spec is all-caps.
11359 			GLSL_BFOP(textureQueryLOD);
11360 		}
11361 		else if (options.es)
11362 			SPIRV_CROSS_THROW("textureQueryLod not supported in ES profile.");
11363 		else
11364 			GLSL_BFOP(textureQueryLod);
11365 		register_control_dependent_expression(ops[1]);
11366 		break;
11367 	}
11368 
11369 	case OpImageQueryLevels:
11370 	{
11371 		uint32_t result_type = ops[0];
11372 		uint32_t id = ops[1];
11373 
11374 		if (!options.es && options.version < 430)
11375 			require_extension_internal("GL_ARB_texture_query_levels");
11376 		if (options.es)
11377 			SPIRV_CROSS_THROW("textureQueryLevels not supported in ES profile.");
11378 
11379 		auto expr = join("textureQueryLevels(", convert_separate_image_to_expression(ops[2]), ")");
11380 		auto &restype = get<SPIRType>(ops[0]);
11381 		expr = bitcast_expression(restype, SPIRType::Int, expr);
11382 		emit_op(result_type, id, expr, true);
11383 		break;
11384 	}
11385 
11386 	case OpImageQuerySamples:
11387 	{
11388 		auto &type = expression_type(ops[2]);
11389 		uint32_t result_type = ops[0];
11390 		uint32_t id = ops[1];
11391 
11392 		string expr;
11393 		if (type.image.sampled == 2)
11394 			expr = join("imageSamples(", to_expression(ops[2]), ")");
11395 		else
11396 			expr = join("textureSamples(", convert_separate_image_to_expression(ops[2]), ")");
11397 
11398 		auto &restype = get<SPIRType>(ops[0]);
11399 		expr = bitcast_expression(restype, SPIRType::Int, expr);
11400 		emit_op(result_type, id, expr, true);
11401 		break;
11402 	}
11403 
11404 	case OpSampledImage:
11405 	{
11406 		uint32_t result_type = ops[0];
11407 		uint32_t id = ops[1];
11408 		emit_sampled_image_op(result_type, id, ops[2], ops[3]);
11409 		inherit_expression_dependencies(id, ops[2]);
11410 		inherit_expression_dependencies(id, ops[3]);
11411 		break;
11412 	}
11413 
11414 	case OpImageQuerySizeLod:
11415 	{
11416 		uint32_t result_type = ops[0];
11417 		uint32_t id = ops[1];
11418 		uint32_t img = ops[2];
11419 
11420 		std::string fname = "textureSize";
11421 		if (is_legacy_desktop())
11422 		{
11423 			auto &type = expression_type(img);
11424 			auto &imgtype = get<SPIRType>(type.self);
11425 			fname = legacy_tex_op(fname, imgtype, img);
11426 		}
11427 		else if (is_legacy_es())
11428 			SPIRV_CROSS_THROW("textureSize is not supported in ESSL 100.");
11429 
11430 		auto expr = join(fname, "(", convert_separate_image_to_expression(img), ", ",
11431 		                 bitcast_expression(SPIRType::Int, ops[3]), ")");
11432 		auto &restype = get<SPIRType>(ops[0]);
11433 		expr = bitcast_expression(restype, SPIRType::Int, expr);
11434 		emit_op(result_type, id, expr, true);
11435 		break;
11436 	}
11437 
11438 	// Image load/store
11439 	case OpImageRead:
11440 	case OpImageSparseRead:
11441 	{
11442 		// We added Nonreadable speculatively to the OpImage variable due to glslangValidator
11443 		// not adding the proper qualifiers.
11444 		// If it turns out we need to read the image after all, remove the qualifier and recompile.
11445 		auto *var = maybe_get_backing_variable(ops[2]);
11446 		if (var)
11447 		{
11448 			auto &flags = ir.meta[var->self].decoration.decoration_flags;
11449 			if (flags.get(DecorationNonReadable))
11450 			{
11451 				flags.clear(DecorationNonReadable);
11452 				force_recompile();
11453 			}
11454 		}
11455 
11456 		uint32_t result_type = ops[0];
11457 		uint32_t id = ops[1];
11458 
11459 		bool pure;
11460 		string imgexpr;
11461 		auto &type = expression_type(ops[2]);
11462 
11463 		if (var && var->remapped_variable) // Remapped input, just read as-is without any op-code
11464 		{
11465 			if (type.image.ms)
11466 				SPIRV_CROSS_THROW("Trying to remap multisampled image to variable, this is not possible.");
11467 
11468 			auto itr =
11469 			    find_if(begin(pls_inputs), end(pls_inputs), [var](const PlsRemap &pls) { return pls.id == var->self; });
11470 
11471 			if (itr == end(pls_inputs))
11472 			{
11473 				// For non-PLS inputs, we rely on subpass type remapping information to get it right
11474 				// since ImageRead always returns 4-component vectors and the backing type is opaque.
11475 				if (!var->remapped_components)
11476 					SPIRV_CROSS_THROW("subpassInput was remapped, but remap_components is not set correctly.");
11477 				imgexpr = remap_swizzle(get<SPIRType>(result_type), var->remapped_components, to_expression(ops[2]));
11478 			}
11479 			else
11480 			{
11481 				// PLS input could have different number of components than what the SPIR expects, swizzle to
11482 				// the appropriate vector size.
11483 				uint32_t components = pls_format_to_components(itr->format);
11484 				imgexpr = remap_swizzle(get<SPIRType>(result_type), components, to_expression(ops[2]));
11485 			}
11486 			pure = true;
11487 		}
11488 		else if (type.image.dim == DimSubpassData)
11489 		{
11490 			if (var && subpass_input_is_framebuffer_fetch(var->self))
11491 			{
11492 				imgexpr = to_expression(var->self);
11493 			}
11494 			else if (options.vulkan_semantics)
11495 			{
11496 				// With Vulkan semantics, use the proper Vulkan GLSL construct.
11497 				if (type.image.ms)
11498 				{
11499 					uint32_t operands = ops[4];
11500 					if (operands != ImageOperandsSampleMask || length != 6)
11501 						SPIRV_CROSS_THROW("Multisampled image used in OpImageRead, but unexpected "
11502 						                  "operand mask was used.");
11503 
11504 					uint32_t samples = ops[5];
11505 					imgexpr = join("subpassLoad(", to_expression(ops[2]), ", ", to_expression(samples), ")");
11506 				}
11507 				else
11508 					imgexpr = join("subpassLoad(", to_expression(ops[2]), ")");
11509 			}
11510 			else
11511 			{
11512 				if (type.image.ms)
11513 				{
11514 					uint32_t operands = ops[4];
11515 					if (operands != ImageOperandsSampleMask || length != 6)
11516 						SPIRV_CROSS_THROW("Multisampled image used in OpImageRead, but unexpected "
11517 						                  "operand mask was used.");
11518 
11519 					uint32_t samples = ops[5];
11520 					imgexpr = join("texelFetch(", to_expression(ops[2]), ", ivec2(gl_FragCoord.xy), ",
11521 					               to_expression(samples), ")");
11522 				}
11523 				else
11524 				{
11525 					// Implement subpass loads via texture barrier style sampling.
11526 					imgexpr = join("texelFetch(", to_expression(ops[2]), ", ivec2(gl_FragCoord.xy), 0)");
11527 				}
11528 			}
11529 			imgexpr = remap_swizzle(get<SPIRType>(result_type), 4, imgexpr);
11530 			pure = true;
11531 		}
11532 		else
11533 		{
11534 			bool sparse = opcode == OpImageSparseRead;
11535 			uint32_t sparse_code_id = 0;
11536 			uint32_t sparse_texel_id = 0;
11537 			if (sparse)
11538 				emit_sparse_feedback_temporaries(ops[0], ops[1], sparse_code_id, sparse_texel_id);
11539 
11540 			// imageLoad only accepts int coords, not uint.
11541 			auto coord_expr = to_expression(ops[3]);
11542 			auto target_coord_type = expression_type(ops[3]);
11543 			target_coord_type.basetype = SPIRType::Int;
11544 			coord_expr = bitcast_expression(target_coord_type, expression_type(ops[3]).basetype, coord_expr);
11545 
11546 			// Plain image load/store.
11547 			if (sparse)
11548 			{
11549 				if (type.image.ms)
11550 				{
11551 					uint32_t operands = ops[4];
11552 					if (operands != ImageOperandsSampleMask || length != 6)
11553 						SPIRV_CROSS_THROW("Multisampled image used in OpImageRead, but unexpected "
11554 						                  "operand mask was used.");
11555 
11556 					uint32_t samples = ops[5];
11557 					statement(to_expression(sparse_code_id), " = sparseImageLoadARB(", to_expression(ops[2]), ", ",
11558 					          coord_expr, ", ", to_expression(samples), ", ", to_expression(sparse_texel_id), ");");
11559 				}
11560 				else
11561 				{
11562 					statement(to_expression(sparse_code_id), " = sparseImageLoadARB(", to_expression(ops[2]), ", ",
11563 					          coord_expr, ", ", to_expression(sparse_texel_id), ");");
11564 				}
11565 				imgexpr = join(type_to_glsl(get<SPIRType>(result_type)), "(", to_expression(sparse_code_id), ", ",
11566 				               to_expression(sparse_texel_id), ")");
11567 			}
11568 			else
11569 			{
11570 				if (type.image.ms)
11571 				{
11572 					uint32_t operands = ops[4];
11573 					if (operands != ImageOperandsSampleMask || length != 6)
11574 						SPIRV_CROSS_THROW("Multisampled image used in OpImageRead, but unexpected "
11575 						                  "operand mask was used.");
11576 
11577 					uint32_t samples = ops[5];
11578 					imgexpr =
11579 					    join("imageLoad(", to_expression(ops[2]), ", ", coord_expr, ", ", to_expression(samples), ")");
11580 				}
11581 				else
11582 					imgexpr = join("imageLoad(", to_expression(ops[2]), ", ", coord_expr, ")");
11583 			}
11584 
11585 			if (!sparse)
11586 				imgexpr = remap_swizzle(get<SPIRType>(result_type), 4, imgexpr);
11587 			pure = false;
11588 		}
11589 
11590 		if (var && var->forwardable)
11591 		{
11592 			bool forward = forced_temporaries.find(id) == end(forced_temporaries);
11593 			auto &e = emit_op(result_type, id, imgexpr, forward);
11594 
11595 			// We only need to track dependencies if we're reading from image load/store.
11596 			if (!pure)
11597 			{
11598 				e.loaded_from = var->self;
11599 				if (forward)
11600 					var->dependees.push_back(id);
11601 			}
11602 		}
11603 		else
11604 			emit_op(result_type, id, imgexpr, false);
11605 
11606 		inherit_expression_dependencies(id, ops[2]);
11607 		if (type.image.ms)
11608 			inherit_expression_dependencies(id, ops[5]);
11609 		break;
11610 	}
11611 
11612 	case OpImageTexelPointer:
11613 	{
11614 		uint32_t result_type = ops[0];
11615 		uint32_t id = ops[1];
11616 
11617 		auto coord_expr = to_expression(ops[3]);
11618 		auto target_coord_type = expression_type(ops[3]);
11619 		target_coord_type.basetype = SPIRType::Int;
11620 		coord_expr = bitcast_expression(target_coord_type, expression_type(ops[3]).basetype, coord_expr);
11621 
11622 		auto expr = join(to_expression(ops[2]), ", ", coord_expr);
11623 		if (has_decoration(id, DecorationNonUniformEXT) || has_decoration(ops[2], DecorationNonUniformEXT))
11624 			convert_non_uniform_expression(expression_type(ops[2]), expr);
11625 
11626 		auto &e = set<SPIRExpression>(id, expr, result_type, true);
11627 
11628 		// When using the pointer, we need to know which variable it is actually loaded from.
11629 		auto *var = maybe_get_backing_variable(ops[2]);
11630 		e.loaded_from = var ? var->self : ID(0);
11631 		inherit_expression_dependencies(id, ops[3]);
11632 		break;
11633 	}
11634 
11635 	case OpImageWrite:
11636 	{
11637 		// We added Nonwritable speculatively to the OpImage variable due to glslangValidator
11638 		// not adding the proper qualifiers.
11639 		// If it turns out we need to write to the image after all, remove the qualifier and recompile.
11640 		auto *var = maybe_get_backing_variable(ops[0]);
11641 		if (var)
11642 		{
11643 			auto &flags = ir.meta[var->self].decoration.decoration_flags;
11644 			if (flags.get(DecorationNonWritable))
11645 			{
11646 				flags.clear(DecorationNonWritable);
11647 				force_recompile();
11648 			}
11649 		}
11650 
11651 		auto &type = expression_type(ops[0]);
11652 		auto &value_type = expression_type(ops[2]);
11653 		auto store_type = value_type;
11654 		store_type.vecsize = 4;
11655 
11656 		// imageStore only accepts int coords, not uint.
11657 		auto coord_expr = to_expression(ops[1]);
11658 		auto target_coord_type = expression_type(ops[1]);
11659 		target_coord_type.basetype = SPIRType::Int;
11660 		coord_expr = bitcast_expression(target_coord_type, expression_type(ops[1]).basetype, coord_expr);
11661 
11662 		if (type.image.ms)
11663 		{
11664 			uint32_t operands = ops[3];
11665 			if (operands != ImageOperandsSampleMask || length != 5)
11666 				SPIRV_CROSS_THROW("Multisampled image used in OpImageWrite, but unexpected operand mask was used.");
11667 			uint32_t samples = ops[4];
11668 			statement("imageStore(", to_expression(ops[0]), ", ", coord_expr, ", ", to_expression(samples), ", ",
11669 			          remap_swizzle(store_type, value_type.vecsize, to_expression(ops[2])), ");");
11670 		}
11671 		else
11672 			statement("imageStore(", to_expression(ops[0]), ", ", coord_expr, ", ",
11673 			          remap_swizzle(store_type, value_type.vecsize, to_expression(ops[2])), ");");
11674 
11675 		if (var && variable_storage_is_aliased(*var))
11676 			flush_all_aliased_variables();
11677 		break;
11678 	}
11679 
11680 	case OpImageQuerySize:
11681 	{
11682 		auto &type = expression_type(ops[2]);
11683 		uint32_t result_type = ops[0];
11684 		uint32_t id = ops[1];
11685 
11686 		if (type.basetype == SPIRType::Image)
11687 		{
11688 			string expr;
11689 			if (type.image.sampled == 2)
11690 			{
11691 				if (!options.es && options.version < 430)
11692 					require_extension_internal("GL_ARB_shader_image_size");
11693 				else if (options.es && options.version < 310)
11694 					SPIRV_CROSS_THROW("At least ESSL 3.10 required for imageSize.");
11695 
11696 				// The size of an image is always constant.
11697 				expr = join("imageSize(", to_expression(ops[2]), ")");
11698 			}
11699 			else
11700 			{
11701 				// This path is hit for samplerBuffers and multisampled images which do not have LOD.
11702 				std::string fname = "textureSize";
11703 				if (is_legacy())
11704 				{
11705 					auto &imgtype = get<SPIRType>(type.self);
11706 					fname = legacy_tex_op(fname, imgtype, ops[2]);
11707 				}
11708 				expr = join(fname, "(", convert_separate_image_to_expression(ops[2]), ")");
11709 			}
11710 
11711 			auto &restype = get<SPIRType>(ops[0]);
11712 			expr = bitcast_expression(restype, SPIRType::Int, expr);
11713 			emit_op(result_type, id, expr, true);
11714 		}
11715 		else
11716 			SPIRV_CROSS_THROW("Invalid type for OpImageQuerySize.");
11717 		break;
11718 	}
11719 
11720 	// Compute
11721 	case OpControlBarrier:
11722 	case OpMemoryBarrier:
11723 	{
11724 		uint32_t execution_scope = 0;
11725 		uint32_t memory;
11726 		uint32_t semantics;
11727 
11728 		if (opcode == OpMemoryBarrier)
11729 		{
11730 			memory = evaluate_constant_u32(ops[0]);
11731 			semantics = evaluate_constant_u32(ops[1]);
11732 		}
11733 		else
11734 		{
11735 			execution_scope = evaluate_constant_u32(ops[0]);
11736 			memory = evaluate_constant_u32(ops[1]);
11737 			semantics = evaluate_constant_u32(ops[2]);
11738 		}
11739 
11740 		if (execution_scope == ScopeSubgroup || memory == ScopeSubgroup)
11741 		{
11742 			// OpControlBarrier with ScopeSubgroup is subgroupBarrier()
11743 			if (opcode != OpControlBarrier)
11744 			{
11745 				request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupMemBarrier);
11746 			}
11747 			else
11748 			{
11749 				request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupBarrier);
11750 			}
11751 		}
11752 
11753 		if (execution_scope != ScopeSubgroup && get_entry_point().model == ExecutionModelTessellationControl)
11754 		{
11755 			// Control shaders only have barriers, and it implies memory barriers.
11756 			if (opcode == OpControlBarrier)
11757 				statement("barrier();");
11758 			break;
11759 		}
11760 
11761 		// We only care about these flags, acquire/release and friends are not relevant to GLSL.
11762 		semantics = mask_relevant_memory_semantics(semantics);
11763 
11764 		if (opcode == OpMemoryBarrier)
11765 		{
11766 			// If we are a memory barrier, and the next instruction is a control barrier, check if that memory barrier
11767 			// does what we need, so we avoid redundant barriers.
11768 			const Instruction *next = get_next_instruction_in_block(instruction);
11769 			if (next && next->op == OpControlBarrier)
11770 			{
11771 				auto *next_ops = stream(*next);
11772 				uint32_t next_memory = evaluate_constant_u32(next_ops[1]);
11773 				uint32_t next_semantics = evaluate_constant_u32(next_ops[2]);
11774 				next_semantics = mask_relevant_memory_semantics(next_semantics);
11775 
11776 				bool memory_scope_covered = false;
11777 				if (next_memory == memory)
11778 					memory_scope_covered = true;
11779 				else if (next_semantics == MemorySemanticsWorkgroupMemoryMask)
11780 				{
11781 					// If we only care about workgroup memory, either Device or Workgroup scope is fine,
11782 					// scope does not have to match.
11783 					if ((next_memory == ScopeDevice || next_memory == ScopeWorkgroup) &&
11784 					    (memory == ScopeDevice || memory == ScopeWorkgroup))
11785 					{
11786 						memory_scope_covered = true;
11787 					}
11788 				}
11789 				else if (memory == ScopeWorkgroup && next_memory == ScopeDevice)
11790 				{
11791 					// The control barrier has device scope, but the memory barrier just has workgroup scope.
11792 					memory_scope_covered = true;
11793 				}
11794 
11795 				// If we have the same memory scope, and all memory types are covered, we're good.
11796 				if (memory_scope_covered && (semantics & next_semantics) == semantics)
11797 					break;
11798 			}
11799 		}
11800 
11801 		// We are synchronizing some memory or syncing execution,
11802 		// so we cannot forward any loads beyond the memory barrier.
11803 		if (semantics || opcode == OpControlBarrier)
11804 		{
11805 			assert(current_emitting_block);
11806 			flush_control_dependent_expressions(current_emitting_block->self);
11807 			flush_all_active_variables();
11808 		}
11809 
11810 		if (memory == ScopeWorkgroup) // Only need to consider memory within a group
11811 		{
11812 			if (semantics == MemorySemanticsWorkgroupMemoryMask)
11813 			{
11814 				// OpControlBarrier implies a memory barrier for shared memory as well.
11815 				bool implies_shared_barrier = opcode == OpControlBarrier && execution_scope == ScopeWorkgroup;
11816 				if (!implies_shared_barrier)
11817 					statement("memoryBarrierShared();");
11818 			}
11819 			else if (semantics != 0)
11820 				statement("groupMemoryBarrier();");
11821 		}
11822 		else if (memory == ScopeSubgroup)
11823 		{
11824 			const uint32_t all_barriers =
11825 			    MemorySemanticsWorkgroupMemoryMask | MemorySemanticsUniformMemoryMask | MemorySemanticsImageMemoryMask;
11826 
11827 			if (semantics & (MemorySemanticsCrossWorkgroupMemoryMask | MemorySemanticsSubgroupMemoryMask))
11828 			{
11829 				// These are not relevant for GLSL, but assume it means memoryBarrier().
11830 				// memoryBarrier() does everything, so no need to test anything else.
11831 				statement("subgroupMemoryBarrier();");
11832 			}
11833 			else if ((semantics & all_barriers) == all_barriers)
11834 			{
11835 				// Short-hand instead of emitting 3 barriers.
11836 				statement("subgroupMemoryBarrier();");
11837 			}
11838 			else
11839 			{
11840 				// Pick out individual barriers.
11841 				if (semantics & MemorySemanticsWorkgroupMemoryMask)
11842 					statement("subgroupMemoryBarrierShared();");
11843 				if (semantics & MemorySemanticsUniformMemoryMask)
11844 					statement("subgroupMemoryBarrierBuffer();");
11845 				if (semantics & MemorySemanticsImageMemoryMask)
11846 					statement("subgroupMemoryBarrierImage();");
11847 			}
11848 		}
11849 		else
11850 		{
11851 			const uint32_t all_barriers =
11852 			    MemorySemanticsWorkgroupMemoryMask | MemorySemanticsUniformMemoryMask | MemorySemanticsImageMemoryMask;
11853 
11854 			if (semantics & (MemorySemanticsCrossWorkgroupMemoryMask | MemorySemanticsSubgroupMemoryMask))
11855 			{
11856 				// These are not relevant for GLSL, but assume it means memoryBarrier().
11857 				// memoryBarrier() does everything, so no need to test anything else.
11858 				statement("memoryBarrier();");
11859 			}
11860 			else if ((semantics & all_barriers) == all_barriers)
11861 			{
11862 				// Short-hand instead of emitting 4 barriers.
11863 				statement("memoryBarrier();");
11864 			}
11865 			else
11866 			{
11867 				// Pick out individual barriers.
11868 				if (semantics & MemorySemanticsWorkgroupMemoryMask)
11869 					statement("memoryBarrierShared();");
11870 				if (semantics & MemorySemanticsUniformMemoryMask)
11871 					statement("memoryBarrierBuffer();");
11872 				if (semantics & MemorySemanticsImageMemoryMask)
11873 					statement("memoryBarrierImage();");
11874 			}
11875 		}
11876 
11877 		if (opcode == OpControlBarrier)
11878 		{
11879 			if (execution_scope == ScopeSubgroup)
11880 				statement("subgroupBarrier();");
11881 			else
11882 				statement("barrier();");
11883 		}
11884 		break;
11885 	}
11886 
11887 	case OpExtInst:
11888 	{
11889 		uint32_t extension_set = ops[2];
11890 
11891 		if (get<SPIRExtension>(extension_set).ext == SPIRExtension::GLSL)
11892 		{
11893 			emit_glsl_op(ops[0], ops[1], ops[3], &ops[4], length - 4);
11894 		}
11895 		else if (get<SPIRExtension>(extension_set).ext == SPIRExtension::SPV_AMD_shader_ballot)
11896 		{
11897 			emit_spv_amd_shader_ballot_op(ops[0], ops[1], ops[3], &ops[4], length - 4);
11898 		}
11899 		else if (get<SPIRExtension>(extension_set).ext == SPIRExtension::SPV_AMD_shader_explicit_vertex_parameter)
11900 		{
11901 			emit_spv_amd_shader_explicit_vertex_parameter_op(ops[0], ops[1], ops[3], &ops[4], length - 4);
11902 		}
11903 		else if (get<SPIRExtension>(extension_set).ext == SPIRExtension::SPV_AMD_shader_trinary_minmax)
11904 		{
11905 			emit_spv_amd_shader_trinary_minmax_op(ops[0], ops[1], ops[3], &ops[4], length - 4);
11906 		}
11907 		else if (get<SPIRExtension>(extension_set).ext == SPIRExtension::SPV_AMD_gcn_shader)
11908 		{
11909 			emit_spv_amd_gcn_shader_op(ops[0], ops[1], ops[3], &ops[4], length - 4);
11910 		}
11911 		else if (get<SPIRExtension>(extension_set).ext == SPIRExtension::SPV_debug_info)
11912 		{
11913 			break; // Ignore SPIR-V debug information extended instructions.
11914 		}
11915 		else
11916 		{
11917 			statement("// unimplemented ext op ", instruction.op);
11918 			break;
11919 		}
11920 
11921 		break;
11922 	}
11923 
11924 	// Legacy sub-group stuff ...
11925 	case OpSubgroupBallotKHR:
11926 	{
11927 		uint32_t result_type = ops[0];
11928 		uint32_t id = ops[1];
11929 		string expr;
11930 		expr = join("uvec4(unpackUint2x32(ballotARB(" + to_expression(ops[2]) + ")), 0u, 0u)");
11931 		emit_op(result_type, id, expr, should_forward(ops[2]));
11932 
11933 		require_extension_internal("GL_ARB_shader_ballot");
11934 		inherit_expression_dependencies(id, ops[2]);
11935 		register_control_dependent_expression(ops[1]);
11936 		break;
11937 	}
11938 
11939 	case OpSubgroupFirstInvocationKHR:
11940 	{
11941 		uint32_t result_type = ops[0];
11942 		uint32_t id = ops[1];
11943 		emit_unary_func_op(result_type, id, ops[2], "readFirstInvocationARB");
11944 
11945 		require_extension_internal("GL_ARB_shader_ballot");
11946 		register_control_dependent_expression(ops[1]);
11947 		break;
11948 	}
11949 
11950 	case OpSubgroupReadInvocationKHR:
11951 	{
11952 		uint32_t result_type = ops[0];
11953 		uint32_t id = ops[1];
11954 		emit_binary_func_op(result_type, id, ops[2], ops[3], "readInvocationARB");
11955 
11956 		require_extension_internal("GL_ARB_shader_ballot");
11957 		register_control_dependent_expression(ops[1]);
11958 		break;
11959 	}
11960 
11961 	case OpSubgroupAllKHR:
11962 	{
11963 		uint32_t result_type = ops[0];
11964 		uint32_t id = ops[1];
11965 		emit_unary_func_op(result_type, id, ops[2], "allInvocationsARB");
11966 
11967 		require_extension_internal("GL_ARB_shader_group_vote");
11968 		register_control_dependent_expression(ops[1]);
11969 		break;
11970 	}
11971 
11972 	case OpSubgroupAnyKHR:
11973 	{
11974 		uint32_t result_type = ops[0];
11975 		uint32_t id = ops[1];
11976 		emit_unary_func_op(result_type, id, ops[2], "anyInvocationARB");
11977 
11978 		require_extension_internal("GL_ARB_shader_group_vote");
11979 		register_control_dependent_expression(ops[1]);
11980 		break;
11981 	}
11982 
11983 	case OpSubgroupAllEqualKHR:
11984 	{
11985 		uint32_t result_type = ops[0];
11986 		uint32_t id = ops[1];
11987 		emit_unary_func_op(result_type, id, ops[2], "allInvocationsEqualARB");
11988 
11989 		require_extension_internal("GL_ARB_shader_group_vote");
11990 		register_control_dependent_expression(ops[1]);
11991 		break;
11992 	}
11993 
11994 	case OpGroupIAddNonUniformAMD:
11995 	case OpGroupFAddNonUniformAMD:
11996 	{
11997 		uint32_t result_type = ops[0];
11998 		uint32_t id = ops[1];
11999 		emit_unary_func_op(result_type, id, ops[4], "addInvocationsNonUniformAMD");
12000 
12001 		require_extension_internal("GL_AMD_shader_ballot");
12002 		register_control_dependent_expression(ops[1]);
12003 		break;
12004 	}
12005 
12006 	case OpGroupFMinNonUniformAMD:
12007 	case OpGroupUMinNonUniformAMD:
12008 	case OpGroupSMinNonUniformAMD:
12009 	{
12010 		uint32_t result_type = ops[0];
12011 		uint32_t id = ops[1];
12012 		emit_unary_func_op(result_type, id, ops[4], "minInvocationsNonUniformAMD");
12013 
12014 		require_extension_internal("GL_AMD_shader_ballot");
12015 		register_control_dependent_expression(ops[1]);
12016 		break;
12017 	}
12018 
12019 	case OpGroupFMaxNonUniformAMD:
12020 	case OpGroupUMaxNonUniformAMD:
12021 	case OpGroupSMaxNonUniformAMD:
12022 	{
12023 		uint32_t result_type = ops[0];
12024 		uint32_t id = ops[1];
12025 		emit_unary_func_op(result_type, id, ops[4], "maxInvocationsNonUniformAMD");
12026 
12027 		require_extension_internal("GL_AMD_shader_ballot");
12028 		register_control_dependent_expression(ops[1]);
12029 		break;
12030 	}
12031 
12032 	case OpFragmentMaskFetchAMD:
12033 	{
12034 		auto &type = expression_type(ops[2]);
12035 		uint32_t result_type = ops[0];
12036 		uint32_t id = ops[1];
12037 
12038 		if (type.image.dim == spv::DimSubpassData)
12039 		{
12040 			emit_unary_func_op(result_type, id, ops[2], "fragmentMaskFetchAMD");
12041 		}
12042 		else
12043 		{
12044 			emit_binary_func_op(result_type, id, ops[2], ops[3], "fragmentMaskFetchAMD");
12045 		}
12046 
12047 		require_extension_internal("GL_AMD_shader_fragment_mask");
12048 		break;
12049 	}
12050 
12051 	case OpFragmentFetchAMD:
12052 	{
12053 		auto &type = expression_type(ops[2]);
12054 		uint32_t result_type = ops[0];
12055 		uint32_t id = ops[1];
12056 
12057 		if (type.image.dim == spv::DimSubpassData)
12058 		{
12059 			emit_binary_func_op(result_type, id, ops[2], ops[4], "fragmentFetchAMD");
12060 		}
12061 		else
12062 		{
12063 			emit_trinary_func_op(result_type, id, ops[2], ops[3], ops[4], "fragmentFetchAMD");
12064 		}
12065 
12066 		require_extension_internal("GL_AMD_shader_fragment_mask");
12067 		break;
12068 	}
12069 
12070 	// Vulkan 1.1 sub-group stuff ...
12071 	case OpGroupNonUniformElect:
12072 	case OpGroupNonUniformBroadcast:
12073 	case OpGroupNonUniformBroadcastFirst:
12074 	case OpGroupNonUniformBallot:
12075 	case OpGroupNonUniformInverseBallot:
12076 	case OpGroupNonUniformBallotBitExtract:
12077 	case OpGroupNonUniformBallotBitCount:
12078 	case OpGroupNonUniformBallotFindLSB:
12079 	case OpGroupNonUniformBallotFindMSB:
12080 	case OpGroupNonUniformShuffle:
12081 	case OpGroupNonUniformShuffleXor:
12082 	case OpGroupNonUniformShuffleUp:
12083 	case OpGroupNonUniformShuffleDown:
12084 	case OpGroupNonUniformAll:
12085 	case OpGroupNonUniformAny:
12086 	case OpGroupNonUniformAllEqual:
12087 	case OpGroupNonUniformFAdd:
12088 	case OpGroupNonUniformIAdd:
12089 	case OpGroupNonUniformFMul:
12090 	case OpGroupNonUniformIMul:
12091 	case OpGroupNonUniformFMin:
12092 	case OpGroupNonUniformFMax:
12093 	case OpGroupNonUniformSMin:
12094 	case OpGroupNonUniformSMax:
12095 	case OpGroupNonUniformUMin:
12096 	case OpGroupNonUniformUMax:
12097 	case OpGroupNonUniformBitwiseAnd:
12098 	case OpGroupNonUniformBitwiseOr:
12099 	case OpGroupNonUniformBitwiseXor:
12100 	case OpGroupNonUniformQuadSwap:
12101 	case OpGroupNonUniformQuadBroadcast:
12102 		emit_subgroup_op(instruction);
12103 		break;
12104 
12105 	case OpFUnordEqual:
12106 	case OpFUnordNotEqual:
12107 	case OpFUnordLessThan:
12108 	case OpFUnordGreaterThan:
12109 	case OpFUnordLessThanEqual:
12110 	case OpFUnordGreaterThanEqual:
12111 	{
12112 		// GLSL doesn't specify if floating point comparisons are ordered or unordered,
12113 		// but glslang always emits ordered floating point compares for GLSL.
12114 		// To get unordered compares, we can test the opposite thing and invert the result.
12115 		// This way, we force true when there is any NaN present.
12116 		uint32_t op0 = ops[2];
12117 		uint32_t op1 = ops[3];
12118 
12119 		string expr;
12120 		if (expression_type(op0).vecsize > 1)
12121 		{
12122 			const char *comp_op = nullptr;
12123 			switch (opcode)
12124 			{
12125 			case OpFUnordEqual:
12126 				comp_op = "notEqual";
12127 				break;
12128 
12129 			case OpFUnordNotEqual:
12130 				comp_op = "equal";
12131 				break;
12132 
12133 			case OpFUnordLessThan:
12134 				comp_op = "greaterThanEqual";
12135 				break;
12136 
12137 			case OpFUnordLessThanEqual:
12138 				comp_op = "greaterThan";
12139 				break;
12140 
12141 			case OpFUnordGreaterThan:
12142 				comp_op = "lessThanEqual";
12143 				break;
12144 
12145 			case OpFUnordGreaterThanEqual:
12146 				comp_op = "lessThan";
12147 				break;
12148 
12149 			default:
12150 				assert(0);
12151 				break;
12152 			}
12153 
12154 			expr = join("not(", comp_op, "(", to_unpacked_expression(op0), ", ", to_unpacked_expression(op1), "))");
12155 		}
12156 		else
12157 		{
12158 			const char *comp_op = nullptr;
12159 			switch (opcode)
12160 			{
12161 			case OpFUnordEqual:
12162 				comp_op = " != ";
12163 				break;
12164 
12165 			case OpFUnordNotEqual:
12166 				comp_op = " == ";
12167 				break;
12168 
12169 			case OpFUnordLessThan:
12170 				comp_op = " >= ";
12171 				break;
12172 
12173 			case OpFUnordLessThanEqual:
12174 				comp_op = " > ";
12175 				break;
12176 
12177 			case OpFUnordGreaterThan:
12178 				comp_op = " <= ";
12179 				break;
12180 
12181 			case OpFUnordGreaterThanEqual:
12182 				comp_op = " < ";
12183 				break;
12184 
12185 			default:
12186 				assert(0);
12187 				break;
12188 			}
12189 
12190 			expr = join("!(", to_enclosed_unpacked_expression(op0), comp_op, to_enclosed_unpacked_expression(op1), ")");
12191 		}
12192 
12193 		emit_op(ops[0], ops[1], expr, should_forward(op0) && should_forward(op1));
12194 		inherit_expression_dependencies(ops[1], op0);
12195 		inherit_expression_dependencies(ops[1], op1);
12196 		break;
12197 	}
12198 
12199 	case OpReportIntersectionKHR:
12200 		// NV is same opcode.
12201 		forced_temporaries.insert(ops[1]);
12202 		if (ray_tracing_is_khr)
12203 			GLSL_BFOP(reportIntersectionEXT);
12204 		else
12205 			GLSL_BFOP(reportIntersectionNV);
12206 		flush_control_dependent_expressions(current_emitting_block->self);
12207 		break;
12208 	case OpIgnoreIntersectionNV:
12209 		// KHR variant is a terminator.
12210 		statement("ignoreIntersectionNV();");
12211 		flush_control_dependent_expressions(current_emitting_block->self);
12212 		break;
12213 	case OpTerminateRayNV:
12214 		// KHR variant is a terminator.
12215 		statement("terminateRayNV();");
12216 		flush_control_dependent_expressions(current_emitting_block->self);
12217 		break;
12218 	case OpTraceNV:
12219 		statement("traceNV(", to_expression(ops[0]), ", ", to_expression(ops[1]), ", ", to_expression(ops[2]), ", ",
12220 		          to_expression(ops[3]), ", ", to_expression(ops[4]), ", ", to_expression(ops[5]), ", ",
12221 		          to_expression(ops[6]), ", ", to_expression(ops[7]), ", ", to_expression(ops[8]), ", ",
12222 		          to_expression(ops[9]), ", ", to_expression(ops[10]), ");");
12223 		flush_control_dependent_expressions(current_emitting_block->self);
12224 		break;
12225 	case OpTraceRayKHR:
12226 		if (!has_decoration(ops[10], DecorationLocation))
12227 			SPIRV_CROSS_THROW("A memory declaration object must be used in TraceRayKHR.");
12228 		statement("traceRayEXT(", to_expression(ops[0]), ", ", to_expression(ops[1]), ", ", to_expression(ops[2]), ", ",
12229 		          to_expression(ops[3]), ", ", to_expression(ops[4]), ", ", to_expression(ops[5]), ", ",
12230 		          to_expression(ops[6]), ", ", to_expression(ops[7]), ", ", to_expression(ops[8]), ", ",
12231 		          to_expression(ops[9]), ", ", get_decoration(ops[10], DecorationLocation), ");");
12232 		flush_control_dependent_expressions(current_emitting_block->self);
12233 		break;
12234 	case OpExecuteCallableNV:
12235 		statement("executeCallableNV(", to_expression(ops[0]), ", ", to_expression(ops[1]), ");");
12236 		flush_control_dependent_expressions(current_emitting_block->self);
12237 		break;
12238 	case OpExecuteCallableKHR:
12239 		if (!has_decoration(ops[1], DecorationLocation))
12240 			SPIRV_CROSS_THROW("A memory declaration object must be used in ExecuteCallableKHR.");
12241 		statement("executeCallableEXT(", to_expression(ops[0]), ", ", get_decoration(ops[1], DecorationLocation), ");");
12242 		flush_control_dependent_expressions(current_emitting_block->self);
12243 		break;
12244 
12245 	case OpConvertUToAccelerationStructureKHR:
12246 		GLSL_UFOP(accelerationStructureEXT);
12247 		break;
12248 
12249 	case OpConvertUToPtr:
12250 	{
12251 		auto &type = get<SPIRType>(ops[0]);
12252 		if (type.storage != StorageClassPhysicalStorageBufferEXT)
12253 			SPIRV_CROSS_THROW("Only StorageClassPhysicalStorageBufferEXT is supported by OpConvertUToPtr.");
12254 
12255 		auto op = type_to_glsl(type);
12256 		emit_unary_func_op(ops[0], ops[1], ops[2], op.c_str());
12257 		break;
12258 	}
12259 
12260 	case OpConvertPtrToU:
12261 	{
12262 		auto &type = get<SPIRType>(ops[0]);
12263 		auto &ptr_type = expression_type(ops[2]);
12264 		if (ptr_type.storage != StorageClassPhysicalStorageBufferEXT)
12265 			SPIRV_CROSS_THROW("Only StorageClassPhysicalStorageBufferEXT is supported by OpConvertPtrToU.");
12266 
12267 		auto op = type_to_glsl(type);
12268 		emit_unary_func_op(ops[0], ops[1], ops[2], op.c_str());
12269 		break;
12270 	}
12271 
12272 	case OpUndef:
12273 		// Undefined value has been declared.
12274 		break;
12275 
12276 	case OpLine:
12277 	{
12278 		emit_line_directive(ops[0], ops[1]);
12279 		break;
12280 	}
12281 
12282 	case OpNoLine:
12283 		break;
12284 
12285 	case OpDemoteToHelperInvocationEXT:
12286 		if (!options.vulkan_semantics)
12287 			SPIRV_CROSS_THROW("GL_EXT_demote_to_helper_invocation is only supported in Vulkan GLSL.");
12288 		require_extension_internal("GL_EXT_demote_to_helper_invocation");
12289 		statement(backend.demote_literal, ";");
12290 		break;
12291 
12292 	case OpIsHelperInvocationEXT:
12293 		if (!options.vulkan_semantics)
12294 			SPIRV_CROSS_THROW("GL_EXT_demote_to_helper_invocation is only supported in Vulkan GLSL.");
12295 		require_extension_internal("GL_EXT_demote_to_helper_invocation");
12296 		emit_op(ops[0], ops[1], "helperInvocationEXT()", false);
12297 		break;
12298 
12299 	case OpBeginInvocationInterlockEXT:
12300 		// If the interlock is complex, we emit this elsewhere.
12301 		if (!interlocked_is_complex)
12302 		{
12303 			if (options.es)
12304 				statement("beginInvocationInterlockNV();");
12305 			else
12306 				statement("beginInvocationInterlockARB();");
12307 
12308 			flush_all_active_variables();
12309 			// Make sure forwarding doesn't propagate outside interlock region.
12310 		}
12311 		break;
12312 
12313 	case OpEndInvocationInterlockEXT:
12314 		// If the interlock is complex, we emit this elsewhere.
12315 		if (!interlocked_is_complex)
12316 		{
12317 			if (options.es)
12318 				statement("endInvocationInterlockNV();");
12319 			else
12320 				statement("endInvocationInterlockARB();");
12321 
12322 			flush_all_active_variables();
12323 			// Make sure forwarding doesn't propagate outside interlock region.
12324 		}
12325 		break;
12326 
12327 	default:
12328 		statement("// unimplemented op ", instruction.op);
12329 		break;
12330 	}
12331 }
12332 
12333 // Appends function arguments, mapped from global variables, beyond the specified arg index.
12334 // This is used when a function call uses fewer arguments than the function defines.
12335 // This situation may occur if the function signature has been dynamically modified to
12336 // extract global variables referenced from within the function, and convert them to
12337 // function arguments. This is necessary for shader languages that do not support global
12338 // access to shader input content from within a function (eg. Metal). Each additional
12339 // function args uses the name of the global variable. Function nesting will modify the
12340 // functions and function calls all the way up the nesting chain.
append_global_func_args(const SPIRFunction & func,uint32_t index,SmallVector<string> & arglist)12341 void CompilerGLSL::append_global_func_args(const SPIRFunction &func, uint32_t index, SmallVector<string> &arglist)
12342 {
12343 	auto &args = func.arguments;
12344 	uint32_t arg_cnt = uint32_t(args.size());
12345 	for (uint32_t arg_idx = index; arg_idx < arg_cnt; arg_idx++)
12346 	{
12347 		auto &arg = args[arg_idx];
12348 		assert(arg.alias_global_variable);
12349 
12350 		// If the underlying variable needs to be declared
12351 		// (ie. a local variable with deferred declaration), do so now.
12352 		uint32_t var_id = get<SPIRVariable>(arg.id).basevariable;
12353 		if (var_id)
12354 			flush_variable_declaration(var_id);
12355 
12356 		arglist.push_back(to_func_call_arg(arg, arg.id));
12357 	}
12358 }
12359 
to_member_name(const SPIRType & type,uint32_t index)12360 string CompilerGLSL::to_member_name(const SPIRType &type, uint32_t index)
12361 {
12362 	if (type.type_alias != TypeID(0) &&
12363 	    !has_extended_decoration(type.type_alias, SPIRVCrossDecorationBufferBlockRepacked))
12364 	{
12365 		return to_member_name(get<SPIRType>(type.type_alias), index);
12366 	}
12367 
12368 	auto &memb = ir.meta[type.self].members;
12369 	if (index < memb.size() && !memb[index].alias.empty())
12370 		return memb[index].alias;
12371 	else
12372 		return join("_m", index);
12373 }
12374 
to_member_reference(uint32_t,const SPIRType & type,uint32_t index,bool)12375 string CompilerGLSL::to_member_reference(uint32_t, const SPIRType &type, uint32_t index, bool)
12376 {
12377 	return join(".", to_member_name(type, index));
12378 }
12379 
to_multi_member_reference(const SPIRType & type,const SmallVector<uint32_t> & indices)12380 string CompilerGLSL::to_multi_member_reference(const SPIRType &type, const SmallVector<uint32_t> &indices)
12381 {
12382 	string ret;
12383 	auto *member_type = &type;
12384 	for (auto &index : indices)
12385 	{
12386 		ret += join(".", to_member_name(*member_type, index));
12387 		member_type = &get<SPIRType>(member_type->member_types[index]);
12388 	}
12389 	return ret;
12390 }
12391 
add_member_name(SPIRType & type,uint32_t index)12392 void CompilerGLSL::add_member_name(SPIRType &type, uint32_t index)
12393 {
12394 	auto &memb = ir.meta[type.self].members;
12395 	if (index < memb.size() && !memb[index].alias.empty())
12396 	{
12397 		auto &name = memb[index].alias;
12398 		if (name.empty())
12399 			return;
12400 
12401 		ParsedIR::sanitize_identifier(name, true, true);
12402 		update_name_cache(type.member_name_cache, name);
12403 	}
12404 }
12405 
12406 // Checks whether the ID is a row_major matrix that requires conversion before use
is_non_native_row_major_matrix(uint32_t id)12407 bool CompilerGLSL::is_non_native_row_major_matrix(uint32_t id)
12408 {
12409 	// Natively supported row-major matrices do not need to be converted.
12410 	// Legacy targets do not support row major.
12411 	if (backend.native_row_major_matrix && !is_legacy())
12412 		return false;
12413 
12414 	auto *e = maybe_get<SPIRExpression>(id);
12415 	if (e)
12416 		return e->need_transpose;
12417 	else
12418 		return has_decoration(id, DecorationRowMajor);
12419 }
12420 
12421 // Checks whether the member is a row_major matrix that requires conversion before use
member_is_non_native_row_major_matrix(const SPIRType & type,uint32_t index)12422 bool CompilerGLSL::member_is_non_native_row_major_matrix(const SPIRType &type, uint32_t index)
12423 {
12424 	// Natively supported row-major matrices do not need to be converted.
12425 	if (backend.native_row_major_matrix && !is_legacy())
12426 		return false;
12427 
12428 	// Non-matrix or column-major matrix types do not need to be converted.
12429 	if (!has_member_decoration(type.self, index, DecorationRowMajor))
12430 		return false;
12431 
12432 	// Only square row-major matrices can be converted at this time.
12433 	// Converting non-square matrices will require defining custom GLSL function that
12434 	// swaps matrix elements while retaining the original dimensional form of the matrix.
12435 	const auto mbr_type = get<SPIRType>(type.member_types[index]);
12436 	if (mbr_type.columns != mbr_type.vecsize)
12437 		SPIRV_CROSS_THROW("Row-major matrices must be square on this platform.");
12438 
12439 	return true;
12440 }
12441 
12442 // Checks if we need to remap physical type IDs when declaring the type in a buffer.
member_is_remapped_physical_type(const SPIRType & type,uint32_t index) const12443 bool CompilerGLSL::member_is_remapped_physical_type(const SPIRType &type, uint32_t index) const
12444 {
12445 	return has_extended_member_decoration(type.self, index, SPIRVCrossDecorationPhysicalTypeID);
12446 }
12447 
12448 // Checks whether the member is in packed data type, that might need to be unpacked.
member_is_packed_physical_type(const SPIRType & type,uint32_t index) const12449 bool CompilerGLSL::member_is_packed_physical_type(const SPIRType &type, uint32_t index) const
12450 {
12451 	return has_extended_member_decoration(type.self, index, SPIRVCrossDecorationPhysicalTypePacked);
12452 }
12453 
12454 // Wraps the expression string in a function call that converts the
12455 // row_major matrix result of the expression to a column_major matrix.
12456 // Base implementation uses the standard library transpose() function.
12457 // Subclasses may override to use a different function.
convert_row_major_matrix(string exp_str,const SPIRType & exp_type,uint32_t,bool)12458 string CompilerGLSL::convert_row_major_matrix(string exp_str, const SPIRType &exp_type, uint32_t /* physical_type_id */,
12459                                               bool /*is_packed*/)
12460 {
12461 	strip_enclosed_expression(exp_str);
12462 	if (!is_matrix(exp_type))
12463 	{
12464 		auto column_index = exp_str.find_last_of('[');
12465 		if (column_index == string::npos)
12466 			return exp_str;
12467 
12468 		auto column_expr = exp_str.substr(column_index);
12469 		exp_str.resize(column_index);
12470 
12471 		auto transposed_expr = type_to_glsl_constructor(exp_type) + "(";
12472 
12473 		// Loading a column from a row-major matrix. Unroll the load.
12474 		for (uint32_t c = 0; c < exp_type.vecsize; c++)
12475 		{
12476 			transposed_expr += join(exp_str, '[', c, ']', column_expr);
12477 			if (c + 1 < exp_type.vecsize)
12478 				transposed_expr += ", ";
12479 		}
12480 
12481 		transposed_expr += ")";
12482 		return transposed_expr;
12483 	}
12484 	else if (options.version < 120)
12485 	{
12486 		// GLSL 110, ES 100 do not have transpose(), so emulate it.  Note that
12487 		// these GLSL versions do not support non-square matrices.
12488 		if (exp_type.vecsize == 2 && exp_type.columns == 2)
12489 		{
12490 			if (!requires_transpose_2x2)
12491 			{
12492 				requires_transpose_2x2 = true;
12493 				force_recompile();
12494 			}
12495 		}
12496 		else if (exp_type.vecsize == 3 && exp_type.columns == 3)
12497 		{
12498 			if (!requires_transpose_3x3)
12499 			{
12500 				requires_transpose_3x3 = true;
12501 				force_recompile();
12502 			}
12503 		}
12504 		else if (exp_type.vecsize == 4 && exp_type.columns == 4)
12505 		{
12506 			if (!requires_transpose_4x4)
12507 			{
12508 				requires_transpose_4x4 = true;
12509 				force_recompile();
12510 			}
12511 		}
12512 		else
12513 			SPIRV_CROSS_THROW("Non-square matrices are not supported in legacy GLSL, cannot transpose.");
12514 		return join("spvTranspose(", exp_str, ")");
12515 	}
12516 	else
12517 		return join("transpose(", exp_str, ")");
12518 }
12519 
variable_decl(const SPIRType & type,const string & name,uint32_t id)12520 string CompilerGLSL::variable_decl(const SPIRType &type, const string &name, uint32_t id)
12521 {
12522 	string type_name = type_to_glsl(type, id);
12523 	remap_variable_type_name(type, name, type_name);
12524 	return join(type_name, " ", name, type_to_array_glsl(type));
12525 }
12526 
12527 // Emit a structure member. Subclasses may override to modify output,
12528 // or to dynamically add a padding member if needed.
emit_struct_member(const SPIRType & type,uint32_t member_type_id,uint32_t index,const string & qualifier,uint32_t)12529 void CompilerGLSL::emit_struct_member(const SPIRType &type, uint32_t member_type_id, uint32_t index,
12530                                       const string &qualifier, uint32_t)
12531 {
12532 	auto &membertype = get<SPIRType>(member_type_id);
12533 
12534 	Bitset memberflags;
12535 	auto &memb = ir.meta[type.self].members;
12536 	if (index < memb.size())
12537 		memberflags = memb[index].decoration_flags;
12538 
12539 	string qualifiers;
12540 	bool is_block = ir.meta[type.self].decoration.decoration_flags.get(DecorationBlock) ||
12541 	                ir.meta[type.self].decoration.decoration_flags.get(DecorationBufferBlock);
12542 
12543 	if (is_block)
12544 		qualifiers = to_interpolation_qualifiers(memberflags);
12545 
12546 	statement(layout_for_member(type, index), qualifiers, qualifier, flags_to_qualifiers_glsl(membertype, memberflags),
12547 	          variable_decl(membertype, to_member_name(type, index)), ";");
12548 }
12549 
emit_struct_padding_target(const SPIRType &)12550 void CompilerGLSL::emit_struct_padding_target(const SPIRType &)
12551 {
12552 }
12553 
flags_to_qualifiers_glsl(const SPIRType & type,const Bitset & flags)12554 const char *CompilerGLSL::flags_to_qualifiers_glsl(const SPIRType &type, const Bitset &flags)
12555 {
12556 	// GL_EXT_buffer_reference variables can be marked as restrict.
12557 	if (flags.get(DecorationRestrictPointerEXT))
12558 		return "restrict ";
12559 
12560 	// Structs do not have precision qualifiers, neither do doubles (desktop only anyways, so no mediump/highp).
12561 	if (type.basetype != SPIRType::Float && type.basetype != SPIRType::Int && type.basetype != SPIRType::UInt &&
12562 	    type.basetype != SPIRType::Image && type.basetype != SPIRType::SampledImage &&
12563 	    type.basetype != SPIRType::Sampler)
12564 		return "";
12565 
12566 	if (options.es)
12567 	{
12568 		auto &execution = get_entry_point();
12569 
12570 		if (flags.get(DecorationRelaxedPrecision))
12571 		{
12572 			bool implied_fmediump = type.basetype == SPIRType::Float &&
12573 			                        options.fragment.default_float_precision == Options::Mediump &&
12574 			                        execution.model == ExecutionModelFragment;
12575 
12576 			bool implied_imediump = (type.basetype == SPIRType::Int || type.basetype == SPIRType::UInt) &&
12577 			                        options.fragment.default_int_precision == Options::Mediump &&
12578 			                        execution.model == ExecutionModelFragment;
12579 
12580 			return implied_fmediump || implied_imediump ? "" : "mediump ";
12581 		}
12582 		else
12583 		{
12584 			bool implied_fhighp =
12585 			    type.basetype == SPIRType::Float && ((options.fragment.default_float_precision == Options::Highp &&
12586 			                                          execution.model == ExecutionModelFragment) ||
12587 			                                         (execution.model != ExecutionModelFragment));
12588 
12589 			bool implied_ihighp = (type.basetype == SPIRType::Int || type.basetype == SPIRType::UInt) &&
12590 			                      ((options.fragment.default_int_precision == Options::Highp &&
12591 			                        execution.model == ExecutionModelFragment) ||
12592 			                       (execution.model != ExecutionModelFragment));
12593 
12594 			return implied_fhighp || implied_ihighp ? "" : "highp ";
12595 		}
12596 	}
12597 	else if (backend.allow_precision_qualifiers)
12598 	{
12599 		// Vulkan GLSL supports precision qualifiers, even in desktop profiles, which is convenient.
12600 		// The default is highp however, so only emit mediump in the rare case that a shader has these.
12601 		if (flags.get(DecorationRelaxedPrecision))
12602 			return "mediump ";
12603 		else
12604 			return "";
12605 	}
12606 	else
12607 		return "";
12608 }
12609 
to_precision_qualifiers_glsl(uint32_t id)12610 const char *CompilerGLSL::to_precision_qualifiers_glsl(uint32_t id)
12611 {
12612 	auto &type = expression_type(id);
12613 	bool use_precision_qualifiers = backend.allow_precision_qualifiers || options.es;
12614 	if (use_precision_qualifiers && (type.basetype == SPIRType::Image || type.basetype == SPIRType::SampledImage))
12615 	{
12616 		// Force mediump for the sampler type. We cannot declare 16-bit or smaller image types.
12617 		auto &result_type = get<SPIRType>(type.image.type);
12618 		if (result_type.width < 32)
12619 			return "mediump ";
12620 	}
12621 	return flags_to_qualifiers_glsl(type, ir.meta[id].decoration.decoration_flags);
12622 }
12623 
fixup_io_block_patch_qualifiers(const SPIRVariable & var)12624 void CompilerGLSL::fixup_io_block_patch_qualifiers(const SPIRVariable &var)
12625 {
12626 	// Works around weird behavior in glslangValidator where
12627 	// a patch out block is translated to just block members getting the decoration.
12628 	// To make glslang not complain when we compile again, we have to transform this back to a case where
12629 	// the variable itself has Patch decoration, and not members.
12630 	auto &type = get<SPIRType>(var.basetype);
12631 	if (has_decoration(type.self, DecorationBlock))
12632 	{
12633 		uint32_t member_count = uint32_t(type.member_types.size());
12634 		for (uint32_t i = 0; i < member_count; i++)
12635 		{
12636 			if (has_member_decoration(type.self, i, DecorationPatch))
12637 			{
12638 				set_decoration(var.self, DecorationPatch);
12639 				break;
12640 			}
12641 		}
12642 
12643 		if (has_decoration(var.self, DecorationPatch))
12644 			for (uint32_t i = 0; i < member_count; i++)
12645 				unset_member_decoration(type.self, i, DecorationPatch);
12646 	}
12647 }
12648 
to_qualifiers_glsl(uint32_t id)12649 string CompilerGLSL::to_qualifiers_glsl(uint32_t id)
12650 {
12651 	auto &flags = ir.meta[id].decoration.decoration_flags;
12652 	string res;
12653 
12654 	auto *var = maybe_get<SPIRVariable>(id);
12655 
12656 	if (var && var->storage == StorageClassWorkgroup && !backend.shared_is_implied)
12657 		res += "shared ";
12658 
12659 	res += to_interpolation_qualifiers(flags);
12660 	if (var)
12661 		res += to_storage_qualifiers_glsl(*var);
12662 
12663 	auto &type = expression_type(id);
12664 	if (type.image.dim != DimSubpassData && type.image.sampled == 2)
12665 	{
12666 		if (flags.get(DecorationCoherent))
12667 			res += "coherent ";
12668 		if (flags.get(DecorationRestrict))
12669 			res += "restrict ";
12670 		if (flags.get(DecorationNonWritable))
12671 			res += "readonly ";
12672 		if (flags.get(DecorationNonReadable))
12673 			res += "writeonly ";
12674 	}
12675 
12676 	res += to_precision_qualifiers_glsl(id);
12677 
12678 	return res;
12679 }
12680 
argument_decl(const SPIRFunction::Parameter & arg)12681 string CompilerGLSL::argument_decl(const SPIRFunction::Parameter &arg)
12682 {
12683 	// glslangValidator seems to make all arguments pointer no matter what which is rather bizarre ...
12684 	auto &type = expression_type(arg.id);
12685 	const char *direction = "";
12686 
12687 	if (type.pointer)
12688 	{
12689 		if (arg.write_count && arg.read_count)
12690 			direction = "inout ";
12691 		else if (arg.write_count)
12692 			direction = "out ";
12693 	}
12694 
12695 	return join(direction, to_qualifiers_glsl(arg.id), variable_decl(type, to_name(arg.id), arg.id));
12696 }
12697 
to_initializer_expression(const SPIRVariable & var)12698 string CompilerGLSL::to_initializer_expression(const SPIRVariable &var)
12699 {
12700 	return to_expression(var.initializer);
12701 }
12702 
to_zero_initialized_expression(uint32_t type_id)12703 string CompilerGLSL::to_zero_initialized_expression(uint32_t type_id)
12704 {
12705 #ifndef NDEBUG
12706 	auto &type = get<SPIRType>(type_id);
12707 	assert(type.storage == StorageClassPrivate || type.storage == StorageClassFunction ||
12708 	       type.storage == StorageClassGeneric);
12709 #endif
12710 	uint32_t id = ir.increase_bound_by(1);
12711 	ir.make_constant_null(id, type_id, false);
12712 	return constant_expression(get<SPIRConstant>(id));
12713 }
12714 
type_can_zero_initialize(const SPIRType & type) const12715 bool CompilerGLSL::type_can_zero_initialize(const SPIRType &type) const
12716 {
12717 	if (type.pointer)
12718 		return false;
12719 
12720 	if (!type.array.empty() && options.flatten_multidimensional_arrays)
12721 		return false;
12722 
12723 	for (auto &literal : type.array_size_literal)
12724 		if (!literal)
12725 			return false;
12726 
12727 	for (auto &memb : type.member_types)
12728 		if (!type_can_zero_initialize(get<SPIRType>(memb)))
12729 			return false;
12730 
12731 	return true;
12732 }
12733 
variable_decl(const SPIRVariable & variable)12734 string CompilerGLSL::variable_decl(const SPIRVariable &variable)
12735 {
12736 	// Ignore the pointer type since GLSL doesn't have pointers.
12737 	auto &type = get_variable_data_type(variable);
12738 
12739 	if (type.pointer_depth > 1)
12740 		SPIRV_CROSS_THROW("Cannot declare pointer-to-pointer types.");
12741 
12742 	auto res = join(to_qualifiers_glsl(variable.self), variable_decl(type, to_name(variable.self), variable.self));
12743 
12744 	if (variable.loop_variable && variable.static_expression)
12745 	{
12746 		uint32_t expr = variable.static_expression;
12747 		if (ir.ids[expr].get_type() != TypeUndef)
12748 			res += join(" = ", to_expression(variable.static_expression));
12749 		else if (options.force_zero_initialized_variables && type_can_zero_initialize(type))
12750 			res += join(" = ", to_zero_initialized_expression(get_variable_data_type_id(variable)));
12751 	}
12752 	else if (variable.initializer)
12753 	{
12754 		uint32_t expr = variable.initializer;
12755 		if (ir.ids[expr].get_type() != TypeUndef)
12756 			res += join(" = ", to_initializer_expression(variable));
12757 		else if (options.force_zero_initialized_variables && type_can_zero_initialize(type))
12758 			res += join(" = ", to_zero_initialized_expression(get_variable_data_type_id(variable)));
12759 	}
12760 
12761 	return res;
12762 }
12763 
to_pls_qualifiers_glsl(const SPIRVariable & variable)12764 const char *CompilerGLSL::to_pls_qualifiers_glsl(const SPIRVariable &variable)
12765 {
12766 	auto &flags = ir.meta[variable.self].decoration.decoration_flags;
12767 	if (flags.get(DecorationRelaxedPrecision))
12768 		return "mediump ";
12769 	else
12770 		return "highp ";
12771 }
12772 
pls_decl(const PlsRemap & var)12773 string CompilerGLSL::pls_decl(const PlsRemap &var)
12774 {
12775 	auto &variable = get<SPIRVariable>(var.id);
12776 
12777 	SPIRType type;
12778 	type.vecsize = pls_format_to_components(var.format);
12779 	type.basetype = pls_format_to_basetype(var.format);
12780 
12781 	return join(to_pls_layout(var.format), to_pls_qualifiers_glsl(variable), type_to_glsl(type), " ",
12782 	            to_name(variable.self));
12783 }
12784 
to_array_size_literal(const SPIRType & type) const12785 uint32_t CompilerGLSL::to_array_size_literal(const SPIRType &type) const
12786 {
12787 	return to_array_size_literal(type, uint32_t(type.array.size() - 1));
12788 }
12789 
to_array_size_literal(const SPIRType & type,uint32_t index) const12790 uint32_t CompilerGLSL::to_array_size_literal(const SPIRType &type, uint32_t index) const
12791 {
12792 	assert(type.array.size() == type.array_size_literal.size());
12793 
12794 	if (type.array_size_literal[index])
12795 	{
12796 		return type.array[index];
12797 	}
12798 	else
12799 	{
12800 		// Use the default spec constant value.
12801 		// This is the best we can do.
12802 		return evaluate_constant_u32(type.array[index]);
12803 	}
12804 }
12805 
to_array_size(const SPIRType & type,uint32_t index)12806 string CompilerGLSL::to_array_size(const SPIRType &type, uint32_t index)
12807 {
12808 	assert(type.array.size() == type.array_size_literal.size());
12809 
12810 	auto &size = type.array[index];
12811 	if (!type.array_size_literal[index])
12812 		return to_expression(size);
12813 	else if (size)
12814 		return convert_to_string(size);
12815 	else if (!backend.unsized_array_supported)
12816 	{
12817 		// For runtime-sized arrays, we can work around
12818 		// lack of standard support for this by simply having
12819 		// a single element array.
12820 		//
12821 		// Runtime length arrays must always be the last element
12822 		// in an interface block.
12823 		return "1";
12824 	}
12825 	else
12826 		return "";
12827 }
12828 
type_to_array_glsl(const SPIRType & type)12829 string CompilerGLSL::type_to_array_glsl(const SPIRType &type)
12830 {
12831 	if (type.pointer && type.storage == StorageClassPhysicalStorageBufferEXT && type.basetype != SPIRType::Struct)
12832 	{
12833 		// We are using a wrapped pointer type, and we should not emit any array declarations here.
12834 		return "";
12835 	}
12836 
12837 	if (type.array.empty())
12838 		return "";
12839 
12840 	if (options.flatten_multidimensional_arrays)
12841 	{
12842 		string res;
12843 		res += "[";
12844 		for (auto i = uint32_t(type.array.size()); i; i--)
12845 		{
12846 			res += enclose_expression(to_array_size(type, i - 1));
12847 			if (i > 1)
12848 				res += " * ";
12849 		}
12850 		res += "]";
12851 		return res;
12852 	}
12853 	else
12854 	{
12855 		if (type.array.size() > 1)
12856 		{
12857 			if (!options.es && options.version < 430)
12858 				require_extension_internal("GL_ARB_arrays_of_arrays");
12859 			else if (options.es && options.version < 310)
12860 				SPIRV_CROSS_THROW("Arrays of arrays not supported before ESSL version 310. "
12861 				                  "Try using --flatten-multidimensional-arrays or set "
12862 				                  "options.flatten_multidimensional_arrays to true.");
12863 		}
12864 
12865 		string res;
12866 		for (auto i = uint32_t(type.array.size()); i; i--)
12867 		{
12868 			res += "[";
12869 			res += to_array_size(type, i - 1);
12870 			res += "]";
12871 		}
12872 		return res;
12873 	}
12874 }
12875 
image_type_glsl(const SPIRType & type,uint32_t id)12876 string CompilerGLSL::image_type_glsl(const SPIRType &type, uint32_t id)
12877 {
12878 	auto &imagetype = get<SPIRType>(type.image.type);
12879 	string res;
12880 
12881 	switch (imagetype.basetype)
12882 	{
12883 	case SPIRType::Int:
12884 	case SPIRType::Short:
12885 	case SPIRType::SByte:
12886 		res = "i";
12887 		break;
12888 	case SPIRType::UInt:
12889 	case SPIRType::UShort:
12890 	case SPIRType::UByte:
12891 		res = "u";
12892 		break;
12893 	default:
12894 		break;
12895 	}
12896 
12897 	// For half image types, we will force mediump for the sampler, and cast to f16 after any sampling operation.
12898 	// We cannot express a true half texture type in GLSL. Neither for short integer formats for that matter.
12899 
12900 	if (type.basetype == SPIRType::Image && type.image.dim == DimSubpassData && options.vulkan_semantics)
12901 		return res + "subpassInput" + (type.image.ms ? "MS" : "");
12902 	else if (type.basetype == SPIRType::Image && type.image.dim == DimSubpassData &&
12903 	         subpass_input_is_framebuffer_fetch(id))
12904 	{
12905 		SPIRType sampled_type = get<SPIRType>(type.image.type);
12906 		sampled_type.vecsize = 4;
12907 		return type_to_glsl(sampled_type);
12908 	}
12909 
12910 	// If we're emulating subpassInput with samplers, force sampler2D
12911 	// so we don't have to specify format.
12912 	if (type.basetype == SPIRType::Image && type.image.dim != DimSubpassData)
12913 	{
12914 		// Sampler buffers are always declared as samplerBuffer even though they might be separate images in the SPIR-V.
12915 		if (type.image.dim == DimBuffer && type.image.sampled == 1)
12916 			res += "sampler";
12917 		else
12918 			res += type.image.sampled == 2 ? "image" : "texture";
12919 	}
12920 	else
12921 		res += "sampler";
12922 
12923 	switch (type.image.dim)
12924 	{
12925 	case Dim1D:
12926 		res += "1D";
12927 		break;
12928 	case Dim2D:
12929 		res += "2D";
12930 		break;
12931 	case Dim3D:
12932 		res += "3D";
12933 		break;
12934 	case DimCube:
12935 		res += "Cube";
12936 		break;
12937 	case DimRect:
12938 		if (options.es)
12939 			SPIRV_CROSS_THROW("Rectangle textures are not supported on OpenGL ES.");
12940 
12941 		if (is_legacy_desktop())
12942 			require_extension_internal("GL_ARB_texture_rectangle");
12943 
12944 		res += "2DRect";
12945 		break;
12946 
12947 	case DimBuffer:
12948 		if (options.es && options.version < 320)
12949 			require_extension_internal("GL_OES_texture_buffer");
12950 		else if (!options.es && options.version < 300)
12951 			require_extension_internal("GL_EXT_texture_buffer_object");
12952 		res += "Buffer";
12953 		break;
12954 
12955 	case DimSubpassData:
12956 		res += "2D";
12957 		break;
12958 	default:
12959 		SPIRV_CROSS_THROW("Only 1D, 2D, 2DRect, 3D, Buffer, InputTarget and Cube textures supported.");
12960 	}
12961 
12962 	if (type.image.ms)
12963 		res += "MS";
12964 	if (type.image.arrayed)
12965 	{
12966 		if (is_legacy_desktop())
12967 			require_extension_internal("GL_EXT_texture_array");
12968 		res += "Array";
12969 	}
12970 
12971 	// "Shadow" state in GLSL only exists for samplers and combined image samplers.
12972 	if (((type.basetype == SPIRType::SampledImage) || (type.basetype == SPIRType::Sampler)) &&
12973 	    image_is_comparison(type, id))
12974 	{
12975 		res += "Shadow";
12976 	}
12977 
12978 	return res;
12979 }
12980 
type_to_glsl_constructor(const SPIRType & type)12981 string CompilerGLSL::type_to_glsl_constructor(const SPIRType &type)
12982 {
12983 	if (backend.use_array_constructor && type.array.size() > 1)
12984 	{
12985 		if (options.flatten_multidimensional_arrays)
12986 			SPIRV_CROSS_THROW("Cannot flatten constructors of multidimensional array constructors, "
12987 			                  "e.g. float[][]().");
12988 		else if (!options.es && options.version < 430)
12989 			require_extension_internal("GL_ARB_arrays_of_arrays");
12990 		else if (options.es && options.version < 310)
12991 			SPIRV_CROSS_THROW("Arrays of arrays not supported before ESSL version 310.");
12992 	}
12993 
12994 	auto e = type_to_glsl(type);
12995 	if (backend.use_array_constructor)
12996 	{
12997 		for (uint32_t i = 0; i < type.array.size(); i++)
12998 			e += "[]";
12999 	}
13000 	return e;
13001 }
13002 
13003 // The optional id parameter indicates the object whose type we are trying
13004 // to find the description for. It is optional. Most type descriptions do not
13005 // depend on a specific object's use of that type.
type_to_glsl(const SPIRType & type,uint32_t id)13006 string CompilerGLSL::type_to_glsl(const SPIRType &type, uint32_t id)
13007 {
13008 	if (type.pointer && type.storage == StorageClassPhysicalStorageBufferEXT && type.basetype != SPIRType::Struct)
13009 	{
13010 		// Need to create a magic type name which compacts the entire type information.
13011 		string name = type_to_glsl(get_pointee_type(type));
13012 		for (size_t i = 0; i < type.array.size(); i++)
13013 		{
13014 			if (type.array_size_literal[i])
13015 				name += join(type.array[i], "_");
13016 			else
13017 				name += join("id", type.array[i], "_");
13018 		}
13019 		name += "Pointer";
13020 		return name;
13021 	}
13022 
13023 	switch (type.basetype)
13024 	{
13025 	case SPIRType::Struct:
13026 		// Need OpName lookup here to get a "sensible" name for a struct.
13027 		if (backend.explicit_struct_type)
13028 			return join("struct ", to_name(type.self));
13029 		else
13030 			return to_name(type.self);
13031 
13032 	case SPIRType::Image:
13033 	case SPIRType::SampledImage:
13034 		return image_type_glsl(type, id);
13035 
13036 	case SPIRType::Sampler:
13037 		// The depth field is set by calling code based on the variable ID of the sampler, effectively reintroducing
13038 		// this distinction into the type system.
13039 		return comparison_ids.count(id) ? "samplerShadow" : "sampler";
13040 
13041 	case SPIRType::AccelerationStructure:
13042 		return ray_tracing_is_khr ? "accelerationStructureEXT" : "accelerationStructureNV";
13043 
13044 	case SPIRType::Void:
13045 		return "void";
13046 
13047 	default:
13048 		break;
13049 	}
13050 
13051 	if (type.basetype == SPIRType::UInt && is_legacy())
13052 		SPIRV_CROSS_THROW("Unsigned integers are not supported on legacy targets.");
13053 
13054 	if (type.vecsize == 1 && type.columns == 1) // Scalar builtin
13055 	{
13056 		switch (type.basetype)
13057 		{
13058 		case SPIRType::Boolean:
13059 			return "bool";
13060 		case SPIRType::SByte:
13061 			return backend.basic_int8_type;
13062 		case SPIRType::UByte:
13063 			return backend.basic_uint8_type;
13064 		case SPIRType::Short:
13065 			return backend.basic_int16_type;
13066 		case SPIRType::UShort:
13067 			return backend.basic_uint16_type;
13068 		case SPIRType::Int:
13069 			return backend.basic_int_type;
13070 		case SPIRType::UInt:
13071 			return backend.basic_uint_type;
13072 		case SPIRType::AtomicCounter:
13073 			return "atomic_uint";
13074 		case SPIRType::Half:
13075 			return "float16_t";
13076 		case SPIRType::Float:
13077 			return "float";
13078 		case SPIRType::Double:
13079 			return "double";
13080 		case SPIRType::Int64:
13081 			return "int64_t";
13082 		case SPIRType::UInt64:
13083 			return "uint64_t";
13084 		default:
13085 			return "???";
13086 		}
13087 	}
13088 	else if (type.vecsize > 1 && type.columns == 1) // Vector builtin
13089 	{
13090 		switch (type.basetype)
13091 		{
13092 		case SPIRType::Boolean:
13093 			return join("bvec", type.vecsize);
13094 		case SPIRType::SByte:
13095 			return join("i8vec", type.vecsize);
13096 		case SPIRType::UByte:
13097 			return join("u8vec", type.vecsize);
13098 		case SPIRType::Short:
13099 			return join("i16vec", type.vecsize);
13100 		case SPIRType::UShort:
13101 			return join("u16vec", type.vecsize);
13102 		case SPIRType::Int:
13103 			return join("ivec", type.vecsize);
13104 		case SPIRType::UInt:
13105 			return join("uvec", type.vecsize);
13106 		case SPIRType::Half:
13107 			return join("f16vec", type.vecsize);
13108 		case SPIRType::Float:
13109 			return join("vec", type.vecsize);
13110 		case SPIRType::Double:
13111 			return join("dvec", type.vecsize);
13112 		case SPIRType::Int64:
13113 			return join("i64vec", type.vecsize);
13114 		case SPIRType::UInt64:
13115 			return join("u64vec", type.vecsize);
13116 		default:
13117 			return "???";
13118 		}
13119 	}
13120 	else if (type.vecsize == type.columns) // Simple Matrix builtin
13121 	{
13122 		switch (type.basetype)
13123 		{
13124 		case SPIRType::Boolean:
13125 			return join("bmat", type.vecsize);
13126 		case SPIRType::Int:
13127 			return join("imat", type.vecsize);
13128 		case SPIRType::UInt:
13129 			return join("umat", type.vecsize);
13130 		case SPIRType::Half:
13131 			return join("f16mat", type.vecsize);
13132 		case SPIRType::Float:
13133 			return join("mat", type.vecsize);
13134 		case SPIRType::Double:
13135 			return join("dmat", type.vecsize);
13136 		// Matrix types not supported for int64/uint64.
13137 		default:
13138 			return "???";
13139 		}
13140 	}
13141 	else
13142 	{
13143 		switch (type.basetype)
13144 		{
13145 		case SPIRType::Boolean:
13146 			return join("bmat", type.columns, "x", type.vecsize);
13147 		case SPIRType::Int:
13148 			return join("imat", type.columns, "x", type.vecsize);
13149 		case SPIRType::UInt:
13150 			return join("umat", type.columns, "x", type.vecsize);
13151 		case SPIRType::Half:
13152 			return join("f16mat", type.columns, "x", type.vecsize);
13153 		case SPIRType::Float:
13154 			return join("mat", type.columns, "x", type.vecsize);
13155 		case SPIRType::Double:
13156 			return join("dmat", type.columns, "x", type.vecsize);
13157 		// Matrix types not supported for int64/uint64.
13158 		default:
13159 			return "???";
13160 		}
13161 	}
13162 }
13163 
add_variable(unordered_set<string> & variables_primary,const unordered_set<string> & variables_secondary,string & name)13164 void CompilerGLSL::add_variable(unordered_set<string> &variables_primary,
13165                                 const unordered_set<string> &variables_secondary, string &name)
13166 {
13167 	if (name.empty())
13168 		return;
13169 
13170 	ParsedIR::sanitize_underscores(name);
13171 	if (ParsedIR::is_globally_reserved_identifier(name, true))
13172 	{
13173 		name.clear();
13174 		return;
13175 	}
13176 
13177 	update_name_cache(variables_primary, variables_secondary, name);
13178 }
13179 
add_local_variable_name(uint32_t id)13180 void CompilerGLSL::add_local_variable_name(uint32_t id)
13181 {
13182 	add_variable(local_variable_names, block_names, ir.meta[id].decoration.alias);
13183 }
13184 
add_resource_name(uint32_t id)13185 void CompilerGLSL::add_resource_name(uint32_t id)
13186 {
13187 	add_variable(resource_names, block_names, ir.meta[id].decoration.alias);
13188 }
13189 
add_header_line(const std::string & line)13190 void CompilerGLSL::add_header_line(const std::string &line)
13191 {
13192 	header_lines.push_back(line);
13193 }
13194 
has_extension(const std::string & ext) const13195 bool CompilerGLSL::has_extension(const std::string &ext) const
13196 {
13197 	auto itr = find(begin(forced_extensions), end(forced_extensions), ext);
13198 	return itr != end(forced_extensions);
13199 }
13200 
require_extension(const std::string & ext)13201 void CompilerGLSL::require_extension(const std::string &ext)
13202 {
13203 	if (!has_extension(ext))
13204 		forced_extensions.push_back(ext);
13205 }
13206 
require_extension_internal(const string & ext)13207 void CompilerGLSL::require_extension_internal(const string &ext)
13208 {
13209 	if (backend.supports_extensions && !has_extension(ext))
13210 	{
13211 		forced_extensions.push_back(ext);
13212 		force_recompile();
13213 	}
13214 }
13215 
flatten_buffer_block(VariableID id)13216 void CompilerGLSL::flatten_buffer_block(VariableID id)
13217 {
13218 	auto &var = get<SPIRVariable>(id);
13219 	auto &type = get<SPIRType>(var.basetype);
13220 	auto name = to_name(type.self, false);
13221 	auto &flags = ir.meta[type.self].decoration.decoration_flags;
13222 
13223 	if (!type.array.empty())
13224 		SPIRV_CROSS_THROW(name + " is an array of UBOs.");
13225 	if (type.basetype != SPIRType::Struct)
13226 		SPIRV_CROSS_THROW(name + " is not a struct.");
13227 	if (!flags.get(DecorationBlock))
13228 		SPIRV_CROSS_THROW(name + " is not a block.");
13229 	if (type.member_types.empty())
13230 		SPIRV_CROSS_THROW(name + " is an empty struct.");
13231 
13232 	flattened_buffer_blocks.insert(id);
13233 }
13234 
builtin_translates_to_nonarray(spv::BuiltIn) const13235 bool CompilerGLSL::builtin_translates_to_nonarray(spv::BuiltIn /*builtin*/) const
13236 {
13237 	return false; // GLSL itself does not need to translate array builtin types to non-array builtin types
13238 }
13239 
check_atomic_image(uint32_t id)13240 bool CompilerGLSL::check_atomic_image(uint32_t id)
13241 {
13242 	auto &type = expression_type(id);
13243 	if (type.storage == StorageClassImage)
13244 	{
13245 		if (options.es && options.version < 320)
13246 			require_extension_internal("GL_OES_shader_image_atomic");
13247 
13248 		auto *var = maybe_get_backing_variable(id);
13249 		if (var)
13250 		{
13251 			auto &flags = ir.meta[var->self].decoration.decoration_flags;
13252 			if (flags.get(DecorationNonWritable) || flags.get(DecorationNonReadable))
13253 			{
13254 				flags.clear(DecorationNonWritable);
13255 				flags.clear(DecorationNonReadable);
13256 				force_recompile();
13257 			}
13258 		}
13259 		return true;
13260 	}
13261 	else
13262 		return false;
13263 }
13264 
add_function_overload(const SPIRFunction & func)13265 void CompilerGLSL::add_function_overload(const SPIRFunction &func)
13266 {
13267 	Hasher hasher;
13268 	for (auto &arg : func.arguments)
13269 	{
13270 		// Parameters can vary with pointer type or not,
13271 		// but that will not change the signature in GLSL/HLSL,
13272 		// so strip the pointer type before hashing.
13273 		uint32_t type_id = get_pointee_type_id(arg.type);
13274 		auto &type = get<SPIRType>(type_id);
13275 
13276 		if (!combined_image_samplers.empty())
13277 		{
13278 			// If we have combined image samplers, we cannot really trust the image and sampler arguments
13279 			// we pass down to callees, because they may be shuffled around.
13280 			// Ignore these arguments, to make sure that functions need to differ in some other way
13281 			// to be considered different overloads.
13282 			if (type.basetype == SPIRType::SampledImage ||
13283 			    (type.basetype == SPIRType::Image && type.image.sampled == 1) || type.basetype == SPIRType::Sampler)
13284 			{
13285 				continue;
13286 			}
13287 		}
13288 
13289 		hasher.u32(type_id);
13290 	}
13291 	uint64_t types_hash = hasher.get();
13292 
13293 	auto function_name = to_name(func.self);
13294 	auto itr = function_overloads.find(function_name);
13295 	if (itr != end(function_overloads))
13296 	{
13297 		// There exists a function with this name already.
13298 		auto &overloads = itr->second;
13299 		if (overloads.count(types_hash) != 0)
13300 		{
13301 			// Overload conflict, assign a new name.
13302 			add_resource_name(func.self);
13303 			function_overloads[to_name(func.self)].insert(types_hash);
13304 		}
13305 		else
13306 		{
13307 			// Can reuse the name.
13308 			overloads.insert(types_hash);
13309 		}
13310 	}
13311 	else
13312 	{
13313 		// First time we see this function name.
13314 		add_resource_name(func.self);
13315 		function_overloads[to_name(func.self)].insert(types_hash);
13316 	}
13317 }
13318 
emit_function_prototype(SPIRFunction & func,const Bitset & return_flags)13319 void CompilerGLSL::emit_function_prototype(SPIRFunction &func, const Bitset &return_flags)
13320 {
13321 	if (func.self != ir.default_entry_point)
13322 		add_function_overload(func);
13323 
13324 	// Avoid shadow declarations.
13325 	local_variable_names = resource_names;
13326 
13327 	string decl;
13328 
13329 	auto &type = get<SPIRType>(func.return_type);
13330 	decl += flags_to_qualifiers_glsl(type, return_flags);
13331 	decl += type_to_glsl(type);
13332 	decl += type_to_array_glsl(type);
13333 	decl += " ";
13334 
13335 	if (func.self == ir.default_entry_point)
13336 	{
13337 		// If we need complex fallback in GLSL, we just wrap main() in a function
13338 		// and interlock the entire shader ...
13339 		if (interlocked_is_complex)
13340 			decl += "spvMainInterlockedBody";
13341 		else
13342 			decl += "main";
13343 
13344 		processing_entry_point = true;
13345 	}
13346 	else
13347 		decl += to_name(func.self);
13348 
13349 	decl += "(";
13350 	SmallVector<string> arglist;
13351 	for (auto &arg : func.arguments)
13352 	{
13353 		// Do not pass in separate images or samplers if we're remapping
13354 		// to combined image samplers.
13355 		if (skip_argument(arg.id))
13356 			continue;
13357 
13358 		// Might change the variable name if it already exists in this function.
13359 		// SPIRV OpName doesn't have any semantic effect, so it's valid for an implementation
13360 		// to use same name for variables.
13361 		// Since we want to make the GLSL debuggable and somewhat sane, use fallback names for variables which are duplicates.
13362 		add_local_variable_name(arg.id);
13363 
13364 		arglist.push_back(argument_decl(arg));
13365 
13366 		// Hold a pointer to the parameter so we can invalidate the readonly field if needed.
13367 		auto *var = maybe_get<SPIRVariable>(arg.id);
13368 		if (var)
13369 			var->parameter = &arg;
13370 	}
13371 
13372 	for (auto &arg : func.shadow_arguments)
13373 	{
13374 		// Might change the variable name if it already exists in this function.
13375 		// SPIRV OpName doesn't have any semantic effect, so it's valid for an implementation
13376 		// to use same name for variables.
13377 		// Since we want to make the GLSL debuggable and somewhat sane, use fallback names for variables which are duplicates.
13378 		add_local_variable_name(arg.id);
13379 
13380 		arglist.push_back(argument_decl(arg));
13381 
13382 		// Hold a pointer to the parameter so we can invalidate the readonly field if needed.
13383 		auto *var = maybe_get<SPIRVariable>(arg.id);
13384 		if (var)
13385 			var->parameter = &arg;
13386 	}
13387 
13388 	decl += merge(arglist);
13389 	decl += ")";
13390 	statement(decl);
13391 }
13392 
emit_function(SPIRFunction & func,const Bitset & return_flags)13393 void CompilerGLSL::emit_function(SPIRFunction &func, const Bitset &return_flags)
13394 {
13395 	// Avoid potential cycles.
13396 	if (func.active)
13397 		return;
13398 	func.active = true;
13399 
13400 	// If we depend on a function, emit that function before we emit our own function.
13401 	for (auto block : func.blocks)
13402 	{
13403 		auto &b = get<SPIRBlock>(block);
13404 		for (auto &i : b.ops)
13405 		{
13406 			auto ops = stream(i);
13407 			auto op = static_cast<Op>(i.op);
13408 
13409 			if (op == OpFunctionCall)
13410 			{
13411 				// Recursively emit functions which are called.
13412 				uint32_t id = ops[2];
13413 				emit_function(get<SPIRFunction>(id), ir.meta[ops[1]].decoration.decoration_flags);
13414 			}
13415 		}
13416 	}
13417 
13418 	if (func.entry_line.file_id != 0)
13419 		emit_line_directive(func.entry_line.file_id, func.entry_line.line_literal);
13420 	emit_function_prototype(func, return_flags);
13421 	begin_scope();
13422 
13423 	if (func.self == ir.default_entry_point)
13424 		emit_entry_point_declarations();
13425 
13426 	current_function = &func;
13427 	auto &entry_block = get<SPIRBlock>(func.entry_block);
13428 
13429 	sort(begin(func.constant_arrays_needed_on_stack), end(func.constant_arrays_needed_on_stack));
13430 	for (auto &array : func.constant_arrays_needed_on_stack)
13431 	{
13432 		auto &c = get<SPIRConstant>(array);
13433 		auto &type = get<SPIRType>(c.constant_type);
13434 		statement(variable_decl(type, join("_", array, "_array_copy")), " = ", constant_expression(c), ";");
13435 	}
13436 
13437 	for (auto &v : func.local_variables)
13438 	{
13439 		auto &var = get<SPIRVariable>(v);
13440 		var.deferred_declaration = false;
13441 
13442 		if (var.storage == StorageClassWorkgroup)
13443 		{
13444 			// Special variable type which cannot have initializer,
13445 			// need to be declared as standalone variables.
13446 			// Comes from MSL which can push global variables as local variables in main function.
13447 			add_local_variable_name(var.self);
13448 			statement(variable_decl(var), ";");
13449 			var.deferred_declaration = false;
13450 		}
13451 		else if (var.storage == StorageClassPrivate)
13452 		{
13453 			// These variables will not have had their CFG usage analyzed, so move it to the entry block.
13454 			// Comes from MSL which can push global variables as local variables in main function.
13455 			// We could just declare them right now, but we would miss out on an important initialization case which is
13456 			// LUT declaration in MSL.
13457 			// If we don't declare the variable when it is assigned we're forced to go through a helper function
13458 			// which copies elements one by one.
13459 			add_local_variable_name(var.self);
13460 
13461 			if (var.initializer)
13462 			{
13463 				statement(variable_decl(var), ";");
13464 				var.deferred_declaration = false;
13465 			}
13466 			else
13467 			{
13468 				auto &dominated = entry_block.dominated_variables;
13469 				if (find(begin(dominated), end(dominated), var.self) == end(dominated))
13470 					entry_block.dominated_variables.push_back(var.self);
13471 				var.deferred_declaration = true;
13472 			}
13473 		}
13474 		else if (var.storage == StorageClassFunction && var.remapped_variable && var.static_expression)
13475 		{
13476 			// No need to declare this variable, it has a static expression.
13477 			var.deferred_declaration = false;
13478 		}
13479 		else if (expression_is_lvalue(v))
13480 		{
13481 			add_local_variable_name(var.self);
13482 
13483 			// Loop variables should never be declared early, they are explicitly emitted in a loop.
13484 			if (var.initializer && !var.loop_variable)
13485 				statement(variable_decl_function_local(var), ";");
13486 			else
13487 			{
13488 				// Don't declare variable until first use to declutter the GLSL output quite a lot.
13489 				// If we don't touch the variable before first branch,
13490 				// declare it then since we need variable declaration to be in top scope.
13491 				var.deferred_declaration = true;
13492 			}
13493 		}
13494 		else
13495 		{
13496 			// HACK: SPIR-V in older glslang output likes to use samplers and images as local variables, but GLSL does not allow this.
13497 			// For these types (non-lvalue), we enforce forwarding through a shadowed variable.
13498 			// This means that when we OpStore to these variables, we just write in the expression ID directly.
13499 			// This breaks any kind of branching, since the variable must be statically assigned.
13500 			// Branching on samplers and images would be pretty much impossible to fake in GLSL.
13501 			var.statically_assigned = true;
13502 		}
13503 
13504 		var.loop_variable_enable = false;
13505 
13506 		// Loop variables are never declared outside their for-loop, so block any implicit declaration.
13507 		if (var.loop_variable)
13508 			var.deferred_declaration = false;
13509 	}
13510 
13511 	// Enforce declaration order for regression testing purposes.
13512 	for (auto &block_id : func.blocks)
13513 	{
13514 		auto &block = get<SPIRBlock>(block_id);
13515 		sort(begin(block.dominated_variables), end(block.dominated_variables));
13516 	}
13517 
13518 	for (auto &line : current_function->fixup_hooks_in)
13519 		line();
13520 
13521 	emit_block_chain(entry_block);
13522 
13523 	end_scope();
13524 	processing_entry_point = false;
13525 	statement("");
13526 
13527 	// Make sure deferred declaration state for local variables is cleared when we are done with function.
13528 	// We risk declaring Private/Workgroup variables in places we are not supposed to otherwise.
13529 	for (auto &v : func.local_variables)
13530 	{
13531 		auto &var = get<SPIRVariable>(v);
13532 		var.deferred_declaration = false;
13533 	}
13534 }
13535 
emit_fixup()13536 void CompilerGLSL::emit_fixup()
13537 {
13538 	if (is_vertex_like_shader())
13539 	{
13540 		if (options.vertex.fixup_clipspace)
13541 		{
13542 			const char *suffix = backend.float_literal_suffix ? "f" : "";
13543 			statement("gl_Position.z = 2.0", suffix, " * gl_Position.z - gl_Position.w;");
13544 		}
13545 
13546 		if (options.vertex.flip_vert_y)
13547 			statement("gl_Position.y = -gl_Position.y;");
13548 	}
13549 }
13550 
flush_phi(BlockID from,BlockID to)13551 void CompilerGLSL::flush_phi(BlockID from, BlockID to)
13552 {
13553 	auto &child = get<SPIRBlock>(to);
13554 	if (child.ignore_phi_from_block == from)
13555 		return;
13556 
13557 	unordered_set<uint32_t> temporary_phi_variables;
13558 
13559 	for (auto itr = begin(child.phi_variables); itr != end(child.phi_variables); ++itr)
13560 	{
13561 		auto &phi = *itr;
13562 
13563 		if (phi.parent == from)
13564 		{
13565 			auto &var = get<SPIRVariable>(phi.function_variable);
13566 
13567 			// A Phi variable might be a loop variable, so flush to static expression.
13568 			if (var.loop_variable && !var.loop_variable_enable)
13569 				var.static_expression = phi.local_variable;
13570 			else
13571 			{
13572 				flush_variable_declaration(phi.function_variable);
13573 
13574 				// Check if we are going to write to a Phi variable that another statement will read from
13575 				// as part of another Phi node in our target block.
13576 				// For this case, we will need to copy phi.function_variable to a temporary, and use that for future reads.
13577 				// This is judged to be extremely rare, so deal with it here using a simple, but suboptimal algorithm.
13578 				bool need_saved_temporary =
13579 				    find_if(itr + 1, end(child.phi_variables), [&](const SPIRBlock::Phi &future_phi) -> bool {
13580 					    return future_phi.local_variable == ID(phi.function_variable) && future_phi.parent == from;
13581 				    }) != end(child.phi_variables);
13582 
13583 				if (need_saved_temporary)
13584 				{
13585 					// Need to make sure we declare the phi variable with a copy at the right scope.
13586 					// We cannot safely declare a temporary here since we might be inside a continue block.
13587 					if (!var.allocate_temporary_copy)
13588 					{
13589 						var.allocate_temporary_copy = true;
13590 						force_recompile();
13591 					}
13592 					statement("_", phi.function_variable, "_copy", " = ", to_name(phi.function_variable), ";");
13593 					temporary_phi_variables.insert(phi.function_variable);
13594 				}
13595 
13596 				// This might be called in continue block, so make sure we
13597 				// use this to emit ESSL 1.0 compliant increments/decrements.
13598 				auto lhs = to_expression(phi.function_variable);
13599 
13600 				string rhs;
13601 				if (temporary_phi_variables.count(phi.local_variable))
13602 					rhs = join("_", phi.local_variable, "_copy");
13603 				else
13604 					rhs = to_pointer_expression(phi.local_variable);
13605 
13606 				if (!optimize_read_modify_write(get<SPIRType>(var.basetype), lhs, rhs))
13607 					statement(lhs, " = ", rhs, ";");
13608 			}
13609 
13610 			register_write(phi.function_variable);
13611 		}
13612 	}
13613 }
13614 
branch_to_continue(BlockID from,BlockID to)13615 void CompilerGLSL::branch_to_continue(BlockID from, BlockID to)
13616 {
13617 	auto &to_block = get<SPIRBlock>(to);
13618 	if (from == to)
13619 		return;
13620 
13621 	assert(is_continue(to));
13622 	if (to_block.complex_continue)
13623 	{
13624 		// Just emit the whole block chain as is.
13625 		auto usage_counts = expression_usage_counts;
13626 
13627 		emit_block_chain(to_block);
13628 
13629 		// Expression usage counts are moot after returning from the continue block.
13630 		expression_usage_counts = usage_counts;
13631 	}
13632 	else
13633 	{
13634 		auto &from_block = get<SPIRBlock>(from);
13635 		bool outside_control_flow = false;
13636 		uint32_t loop_dominator = 0;
13637 
13638 		// FIXME: Refactor this to not use the old loop_dominator tracking.
13639 		if (from_block.merge_block)
13640 		{
13641 			// If we are a loop header, we don't set the loop dominator,
13642 			// so just use "self" here.
13643 			loop_dominator = from;
13644 		}
13645 		else if (from_block.loop_dominator != BlockID(SPIRBlock::NoDominator))
13646 		{
13647 			loop_dominator = from_block.loop_dominator;
13648 		}
13649 
13650 		if (loop_dominator != 0)
13651 		{
13652 			auto &cfg = get_cfg_for_current_function();
13653 
13654 			// For non-complex continue blocks, we implicitly branch to the continue block
13655 			// by having the continue block be part of the loop header in for (; ; continue-block).
13656 			outside_control_flow = cfg.node_terminates_control_flow_in_sub_graph(loop_dominator, from);
13657 		}
13658 
13659 		// Some simplification for for-loops. We always end up with a useless continue;
13660 		// statement since we branch to a loop block.
13661 		// Walk the CFG, if we unconditionally execute the block calling continue assuming we're in the loop block,
13662 		// we can avoid writing out an explicit continue statement.
13663 		// Similar optimization to return statements if we know we're outside flow control.
13664 		if (!outside_control_flow)
13665 			statement("continue;");
13666 	}
13667 }
13668 
branch(BlockID from,BlockID to)13669 void CompilerGLSL::branch(BlockID from, BlockID to)
13670 {
13671 	flush_phi(from, to);
13672 	flush_control_dependent_expressions(from);
13673 
13674 	bool to_is_continue = is_continue(to);
13675 
13676 	// This is only a continue if we branch to our loop dominator.
13677 	if ((ir.block_meta[to] & ParsedIR::BLOCK_META_LOOP_HEADER_BIT) != 0 && get<SPIRBlock>(from).loop_dominator == to)
13678 	{
13679 		// This can happen if we had a complex continue block which was emitted.
13680 		// Once the continue block tries to branch to the loop header, just emit continue;
13681 		// and end the chain here.
13682 		statement("continue;");
13683 	}
13684 	else if (from != to && is_break(to))
13685 	{
13686 		// We cannot break to ourselves, so check explicitly for from != to.
13687 		// This case can trigger if a loop header is all three of these things:
13688 		// - Continue block
13689 		// - Loop header
13690 		// - Break merge target all at once ...
13691 
13692 		// Very dirty workaround.
13693 		// Switch constructs are able to break, but they cannot break out of a loop at the same time.
13694 		// Only sensible solution is to make a ladder variable, which we declare at the top of the switch block,
13695 		// write to the ladder here, and defer the break.
13696 		// The loop we're breaking out of must dominate the switch block, or there is no ladder breaking case.
13697 		if (current_emitting_switch && is_loop_break(to) &&
13698 		    current_emitting_switch->loop_dominator != BlockID(SPIRBlock::NoDominator) &&
13699 		    get<SPIRBlock>(current_emitting_switch->loop_dominator).merge_block == to)
13700 		{
13701 			if (!current_emitting_switch->need_ladder_break)
13702 			{
13703 				force_recompile();
13704 				current_emitting_switch->need_ladder_break = true;
13705 			}
13706 
13707 			statement("_", current_emitting_switch->self, "_ladder_break = true;");
13708 		}
13709 		statement("break;");
13710 	}
13711 	else if (to_is_continue || from == to)
13712 	{
13713 		// For from == to case can happen for a do-while loop which branches into itself.
13714 		// We don't mark these cases as continue blocks, but the only possible way to branch into
13715 		// ourselves is through means of continue blocks.
13716 
13717 		// If we are merging to a continue block, there is no need to emit the block chain for continue here.
13718 		// We can branch to the continue block after we merge execution.
13719 
13720 		// Here we make use of structured control flow rules from spec:
13721 		// 2.11: - the merge block declared by a header block cannot be a merge block declared by any other header block
13722 		//       - each header block must strictly dominate its merge block, unless the merge block is unreachable in the CFG
13723 		// If we are branching to a merge block, we must be inside a construct which dominates the merge block.
13724 		auto &block_meta = ir.block_meta[to];
13725 		bool branching_to_merge =
13726 		    (block_meta & (ParsedIR::BLOCK_META_SELECTION_MERGE_BIT | ParsedIR::BLOCK_META_MULTISELECT_MERGE_BIT |
13727 		                   ParsedIR::BLOCK_META_LOOP_MERGE_BIT)) != 0;
13728 		if (!to_is_continue || !branching_to_merge)
13729 			branch_to_continue(from, to);
13730 	}
13731 	else if (!is_conditional(to))
13732 		emit_block_chain(get<SPIRBlock>(to));
13733 
13734 	// It is important that we check for break before continue.
13735 	// A block might serve two purposes, a break block for the inner scope, and
13736 	// a continue block in the outer scope.
13737 	// Inner scope always takes precedence.
13738 }
13739 
branch(BlockID from,uint32_t cond,BlockID true_block,BlockID false_block)13740 void CompilerGLSL::branch(BlockID from, uint32_t cond, BlockID true_block, BlockID false_block)
13741 {
13742 	auto &from_block = get<SPIRBlock>(from);
13743 	BlockID merge_block = from_block.merge == SPIRBlock::MergeSelection ? from_block.next_block : BlockID(0);
13744 
13745 	// If we branch directly to our selection merge target, we don't need a code path.
13746 	bool true_block_needs_code = true_block != merge_block || flush_phi_required(from, true_block);
13747 	bool false_block_needs_code = false_block != merge_block || flush_phi_required(from, false_block);
13748 
13749 	if (!true_block_needs_code && !false_block_needs_code)
13750 		return;
13751 
13752 	emit_block_hints(get<SPIRBlock>(from));
13753 
13754 	if (true_block_needs_code)
13755 	{
13756 		statement("if (", to_expression(cond), ")");
13757 		begin_scope();
13758 		branch(from, true_block);
13759 		end_scope();
13760 
13761 		if (false_block_needs_code)
13762 		{
13763 			statement("else");
13764 			begin_scope();
13765 			branch(from, false_block);
13766 			end_scope();
13767 		}
13768 	}
13769 	else if (false_block_needs_code)
13770 	{
13771 		// Only need false path, use negative conditional.
13772 		statement("if (!", to_enclosed_expression(cond), ")");
13773 		begin_scope();
13774 		branch(from, false_block);
13775 		end_scope();
13776 	}
13777 }
13778 
13779 // FIXME: This currently cannot handle complex continue blocks
13780 // as in do-while.
13781 // This should be seen as a "trivial" continue block.
emit_continue_block(uint32_t continue_block,bool follow_true_block,bool follow_false_block)13782 string CompilerGLSL::emit_continue_block(uint32_t continue_block, bool follow_true_block, bool follow_false_block)
13783 {
13784 	auto *block = &get<SPIRBlock>(continue_block);
13785 
13786 	// While emitting the continue block, declare_temporary will check this
13787 	// if we have to emit temporaries.
13788 	current_continue_block = block;
13789 
13790 	SmallVector<string> statements;
13791 
13792 	// Capture all statements into our list.
13793 	auto *old = redirect_statement;
13794 	redirect_statement = &statements;
13795 
13796 	// Stamp out all blocks one after each other.
13797 	while ((ir.block_meta[block->self] & ParsedIR::BLOCK_META_LOOP_HEADER_BIT) == 0)
13798 	{
13799 		// Write out all instructions we have in this block.
13800 		emit_block_instructions(*block);
13801 
13802 		// For plain branchless for/while continue blocks.
13803 		if (block->next_block)
13804 		{
13805 			flush_phi(continue_block, block->next_block);
13806 			block = &get<SPIRBlock>(block->next_block);
13807 		}
13808 		// For do while blocks. The last block will be a select block.
13809 		else if (block->true_block && follow_true_block)
13810 		{
13811 			flush_phi(continue_block, block->true_block);
13812 			block = &get<SPIRBlock>(block->true_block);
13813 		}
13814 		else if (block->false_block && follow_false_block)
13815 		{
13816 			flush_phi(continue_block, block->false_block);
13817 			block = &get<SPIRBlock>(block->false_block);
13818 		}
13819 		else
13820 		{
13821 			SPIRV_CROSS_THROW("Invalid continue block detected!");
13822 		}
13823 	}
13824 
13825 	// Restore old pointer.
13826 	redirect_statement = old;
13827 
13828 	// Somewhat ugly, strip off the last ';' since we use ',' instead.
13829 	// Ideally, we should select this behavior in statement().
13830 	for (auto &s : statements)
13831 	{
13832 		if (!s.empty() && s.back() == ';')
13833 			s.erase(s.size() - 1, 1);
13834 	}
13835 
13836 	current_continue_block = nullptr;
13837 	return merge(statements);
13838 }
13839 
emit_while_loop_initializers(const SPIRBlock & block)13840 void CompilerGLSL::emit_while_loop_initializers(const SPIRBlock &block)
13841 {
13842 	// While loops do not take initializers, so declare all of them outside.
13843 	for (auto &loop_var : block.loop_variables)
13844 	{
13845 		auto &var = get<SPIRVariable>(loop_var);
13846 		statement(variable_decl(var), ";");
13847 	}
13848 }
13849 
emit_for_loop_initializers(const SPIRBlock & block)13850 string CompilerGLSL::emit_for_loop_initializers(const SPIRBlock &block)
13851 {
13852 	if (block.loop_variables.empty())
13853 		return "";
13854 
13855 	bool same_types = for_loop_initializers_are_same_type(block);
13856 	// We can only declare for loop initializers if all variables are of same type.
13857 	// If we cannot do this, declare individual variables before the loop header.
13858 
13859 	// We might have a loop variable candidate which was not assigned to for some reason.
13860 	uint32_t missing_initializers = 0;
13861 	for (auto &variable : block.loop_variables)
13862 	{
13863 		uint32_t expr = get<SPIRVariable>(variable).static_expression;
13864 
13865 		// Sometimes loop variables are initialized with OpUndef, but we can just declare
13866 		// a plain variable without initializer in this case.
13867 		if (expr == 0 || ir.ids[expr].get_type() == TypeUndef)
13868 			missing_initializers++;
13869 	}
13870 
13871 	if (block.loop_variables.size() == 1 && missing_initializers == 0)
13872 	{
13873 		return variable_decl(get<SPIRVariable>(block.loop_variables.front()));
13874 	}
13875 	else if (!same_types || missing_initializers == uint32_t(block.loop_variables.size()))
13876 	{
13877 		for (auto &loop_var : block.loop_variables)
13878 			statement(variable_decl(get<SPIRVariable>(loop_var)), ";");
13879 		return "";
13880 	}
13881 	else
13882 	{
13883 		// We have a mix of loop variables, either ones with a clear initializer, or ones without.
13884 		// Separate the two streams.
13885 		string expr;
13886 
13887 		for (auto &loop_var : block.loop_variables)
13888 		{
13889 			uint32_t static_expr = get<SPIRVariable>(loop_var).static_expression;
13890 			if (static_expr == 0 || ir.ids[static_expr].get_type() == TypeUndef)
13891 			{
13892 				statement(variable_decl(get<SPIRVariable>(loop_var)), ";");
13893 			}
13894 			else
13895 			{
13896 				auto &var = get<SPIRVariable>(loop_var);
13897 				auto &type = get_variable_data_type(var);
13898 				if (expr.empty())
13899 				{
13900 					// For loop initializers are of the form <type id = value, id = value, id = value, etc ...
13901 					expr = join(to_qualifiers_glsl(var.self), type_to_glsl(type), " ");
13902 				}
13903 				else
13904 				{
13905 					expr += ", ";
13906 					// In MSL, being based on C++, the asterisk marking a pointer
13907 					// binds to the identifier, not the type.
13908 					if (type.pointer)
13909 						expr += "* ";
13910 				}
13911 
13912 				expr += join(to_name(loop_var), " = ", to_pointer_expression(var.static_expression));
13913 			}
13914 		}
13915 		return expr;
13916 	}
13917 }
13918 
for_loop_initializers_are_same_type(const SPIRBlock & block)13919 bool CompilerGLSL::for_loop_initializers_are_same_type(const SPIRBlock &block)
13920 {
13921 	if (block.loop_variables.size() <= 1)
13922 		return true;
13923 
13924 	uint32_t expected = 0;
13925 	Bitset expected_flags;
13926 	for (auto &var : block.loop_variables)
13927 	{
13928 		// Don't care about uninitialized variables as they will not be part of the initializers.
13929 		uint32_t expr = get<SPIRVariable>(var).static_expression;
13930 		if (expr == 0 || ir.ids[expr].get_type() == TypeUndef)
13931 			continue;
13932 
13933 		if (expected == 0)
13934 		{
13935 			expected = get<SPIRVariable>(var).basetype;
13936 			expected_flags = get_decoration_bitset(var);
13937 		}
13938 		else if (expected != get<SPIRVariable>(var).basetype)
13939 			return false;
13940 
13941 		// Precision flags and things like that must also match.
13942 		if (expected_flags != get_decoration_bitset(var))
13943 			return false;
13944 	}
13945 
13946 	return true;
13947 }
13948 
attempt_emit_loop_header(SPIRBlock & block,SPIRBlock::Method method)13949 bool CompilerGLSL::attempt_emit_loop_header(SPIRBlock &block, SPIRBlock::Method method)
13950 {
13951 	SPIRBlock::ContinueBlockType continue_type = continue_block_type(get<SPIRBlock>(block.continue_block));
13952 
13953 	if (method == SPIRBlock::MergeToSelectForLoop || method == SPIRBlock::MergeToSelectContinueForLoop)
13954 	{
13955 		uint32_t current_count = statement_count;
13956 		// If we're trying to create a true for loop,
13957 		// we need to make sure that all opcodes before branch statement do not actually emit any code.
13958 		// We can then take the condition expression and create a for (; cond ; ) { body; } structure instead.
13959 		emit_block_instructions(block);
13960 
13961 		bool condition_is_temporary = forced_temporaries.find(block.condition) == end(forced_temporaries);
13962 
13963 		// This can work! We only did trivial things which could be forwarded in block body!
13964 		if (current_count == statement_count && condition_is_temporary)
13965 		{
13966 			switch (continue_type)
13967 			{
13968 			case SPIRBlock::ForLoop:
13969 			{
13970 				// This block may be a dominating block, so make sure we flush undeclared variables before building the for loop header.
13971 				flush_undeclared_variables(block);
13972 
13973 				// Important that we do this in this order because
13974 				// emitting the continue block can invalidate the condition expression.
13975 				auto initializer = emit_for_loop_initializers(block);
13976 				auto condition = to_expression(block.condition);
13977 
13978 				// Condition might have to be inverted.
13979 				if (execution_is_noop(get<SPIRBlock>(block.true_block), get<SPIRBlock>(block.merge_block)))
13980 					condition = join("!", enclose_expression(condition));
13981 
13982 				emit_block_hints(block);
13983 				if (method != SPIRBlock::MergeToSelectContinueForLoop)
13984 				{
13985 					auto continue_block = emit_continue_block(block.continue_block, false, false);
13986 					statement("for (", initializer, "; ", condition, "; ", continue_block, ")");
13987 				}
13988 				else
13989 					statement("for (", initializer, "; ", condition, "; )");
13990 				break;
13991 			}
13992 
13993 			case SPIRBlock::WhileLoop:
13994 			{
13995 				// This block may be a dominating block, so make sure we flush undeclared variables before building the while loop header.
13996 				flush_undeclared_variables(block);
13997 				emit_while_loop_initializers(block);
13998 				emit_block_hints(block);
13999 
14000 				auto condition = to_expression(block.condition);
14001 				// Condition might have to be inverted.
14002 				if (execution_is_noop(get<SPIRBlock>(block.true_block), get<SPIRBlock>(block.merge_block)))
14003 					condition = join("!", enclose_expression(condition));
14004 
14005 				statement("while (", condition, ")");
14006 				break;
14007 			}
14008 
14009 			default:
14010 				block.disable_block_optimization = true;
14011 				force_recompile();
14012 				begin_scope(); // We'll see an end_scope() later.
14013 				return false;
14014 			}
14015 
14016 			begin_scope();
14017 			return true;
14018 		}
14019 		else
14020 		{
14021 			block.disable_block_optimization = true;
14022 			force_recompile();
14023 			begin_scope(); // We'll see an end_scope() later.
14024 			return false;
14025 		}
14026 	}
14027 	else if (method == SPIRBlock::MergeToDirectForLoop)
14028 	{
14029 		auto &child = get<SPIRBlock>(block.next_block);
14030 
14031 		// This block may be a dominating block, so make sure we flush undeclared variables before building the for loop header.
14032 		flush_undeclared_variables(child);
14033 
14034 		uint32_t current_count = statement_count;
14035 
14036 		// If we're trying to create a true for loop,
14037 		// we need to make sure that all opcodes before branch statement do not actually emit any code.
14038 		// We can then take the condition expression and create a for (; cond ; ) { body; } structure instead.
14039 		emit_block_instructions(child);
14040 
14041 		bool condition_is_temporary = forced_temporaries.find(child.condition) == end(forced_temporaries);
14042 
14043 		if (current_count == statement_count && condition_is_temporary)
14044 		{
14045 			uint32_t target_block = child.true_block;
14046 
14047 			switch (continue_type)
14048 			{
14049 			case SPIRBlock::ForLoop:
14050 			{
14051 				// Important that we do this in this order because
14052 				// emitting the continue block can invalidate the condition expression.
14053 				auto initializer = emit_for_loop_initializers(block);
14054 				auto condition = to_expression(child.condition);
14055 
14056 				// Condition might have to be inverted.
14057 				if (execution_is_noop(get<SPIRBlock>(child.true_block), get<SPIRBlock>(block.merge_block)))
14058 				{
14059 					condition = join("!", enclose_expression(condition));
14060 					target_block = child.false_block;
14061 				}
14062 
14063 				auto continue_block = emit_continue_block(block.continue_block, false, false);
14064 				emit_block_hints(block);
14065 				statement("for (", initializer, "; ", condition, "; ", continue_block, ")");
14066 				break;
14067 			}
14068 
14069 			case SPIRBlock::WhileLoop:
14070 			{
14071 				emit_while_loop_initializers(block);
14072 				emit_block_hints(block);
14073 
14074 				auto condition = to_expression(child.condition);
14075 				// Condition might have to be inverted.
14076 				if (execution_is_noop(get<SPIRBlock>(child.true_block), get<SPIRBlock>(block.merge_block)))
14077 				{
14078 					condition = join("!", enclose_expression(condition));
14079 					target_block = child.false_block;
14080 				}
14081 
14082 				statement("while (", condition, ")");
14083 				break;
14084 			}
14085 
14086 			default:
14087 				block.disable_block_optimization = true;
14088 				force_recompile();
14089 				begin_scope(); // We'll see an end_scope() later.
14090 				return false;
14091 			}
14092 
14093 			begin_scope();
14094 			branch(child.self, target_block);
14095 			return true;
14096 		}
14097 		else
14098 		{
14099 			block.disable_block_optimization = true;
14100 			force_recompile();
14101 			begin_scope(); // We'll see an end_scope() later.
14102 			return false;
14103 		}
14104 	}
14105 	else
14106 		return false;
14107 }
14108 
flush_undeclared_variables(SPIRBlock & block)14109 void CompilerGLSL::flush_undeclared_variables(SPIRBlock &block)
14110 {
14111 	for (auto &v : block.dominated_variables)
14112 		flush_variable_declaration(v);
14113 }
14114 
emit_hoisted_temporaries(SmallVector<pair<TypeID,ID>> & temporaries)14115 void CompilerGLSL::emit_hoisted_temporaries(SmallVector<pair<TypeID, ID>> &temporaries)
14116 {
14117 	// If we need to force temporaries for certain IDs due to continue blocks, do it before starting loop header.
14118 	// Need to sort these to ensure that reference output is stable.
14119 	sort(begin(temporaries), end(temporaries),
14120 	     [](const pair<TypeID, ID> &a, const pair<TypeID, ID> &b) { return a.second < b.second; });
14121 
14122 	for (auto &tmp : temporaries)
14123 	{
14124 		add_local_variable_name(tmp.second);
14125 		auto &flags = ir.meta[tmp.second].decoration.decoration_flags;
14126 		auto &type = get<SPIRType>(tmp.first);
14127 
14128 		// Not all targets support pointer literals, so don't bother with that case.
14129 		string initializer;
14130 		if (options.force_zero_initialized_variables && type_can_zero_initialize(type))
14131 			initializer = join(" = ", to_zero_initialized_expression(tmp.first));
14132 
14133 		statement(flags_to_qualifiers_glsl(type, flags), variable_decl(type, to_name(tmp.second)), initializer, ";");
14134 
14135 		hoisted_temporaries.insert(tmp.second);
14136 		forced_temporaries.insert(tmp.second);
14137 
14138 		// The temporary might be read from before it's assigned, set up the expression now.
14139 		set<SPIRExpression>(tmp.second, to_name(tmp.second), tmp.first, true);
14140 	}
14141 }
14142 
emit_block_chain(SPIRBlock & block)14143 void CompilerGLSL::emit_block_chain(SPIRBlock &block)
14144 {
14145 	bool select_branch_to_true_block = false;
14146 	bool select_branch_to_false_block = false;
14147 	bool skip_direct_branch = false;
14148 	bool emitted_loop_header_variables = false;
14149 	bool force_complex_continue_block = false;
14150 	ValueSaver<uint32_t> loop_level_saver(current_loop_level);
14151 
14152 	if (block.merge == SPIRBlock::MergeLoop)
14153 		add_loop_level();
14154 
14155 	emit_hoisted_temporaries(block.declare_temporary);
14156 
14157 	SPIRBlock::ContinueBlockType continue_type = SPIRBlock::ContinueNone;
14158 	if (block.continue_block)
14159 	{
14160 		continue_type = continue_block_type(get<SPIRBlock>(block.continue_block));
14161 		// If we know we cannot emit a loop, mark the block early as a complex loop so we don't force unnecessary recompiles.
14162 		if (continue_type == SPIRBlock::ComplexLoop)
14163 			block.complex_continue = true;
14164 	}
14165 
14166 	// If we have loop variables, stop masking out access to the variable now.
14167 	for (auto var_id : block.loop_variables)
14168 	{
14169 		auto &var = get<SPIRVariable>(var_id);
14170 		var.loop_variable_enable = true;
14171 		// We're not going to declare the variable directly, so emit a copy here.
14172 		emit_variable_temporary_copies(var);
14173 	}
14174 
14175 	// Remember deferred declaration state. We will restore it before returning.
14176 	SmallVector<bool, 64> rearm_dominated_variables(block.dominated_variables.size());
14177 	for (size_t i = 0; i < block.dominated_variables.size(); i++)
14178 	{
14179 		uint32_t var_id = block.dominated_variables[i];
14180 		auto &var = get<SPIRVariable>(var_id);
14181 		rearm_dominated_variables[i] = var.deferred_declaration;
14182 	}
14183 
14184 	// This is the method often used by spirv-opt to implement loops.
14185 	// The loop header goes straight into the continue block.
14186 	// However, don't attempt this on ESSL 1.0, because if a loop variable is used in a continue block,
14187 	// it *MUST* be used in the continue block. This loop method will not work.
14188 	if (!is_legacy_es() && block_is_loop_candidate(block, SPIRBlock::MergeToSelectContinueForLoop))
14189 	{
14190 		flush_undeclared_variables(block);
14191 		if (attempt_emit_loop_header(block, SPIRBlock::MergeToSelectContinueForLoop))
14192 		{
14193 			if (execution_is_noop(get<SPIRBlock>(block.true_block), get<SPIRBlock>(block.merge_block)))
14194 				select_branch_to_false_block = true;
14195 			else
14196 				select_branch_to_true_block = true;
14197 
14198 			emitted_loop_header_variables = true;
14199 			force_complex_continue_block = true;
14200 		}
14201 	}
14202 	// This is the older loop behavior in glslang which branches to loop body directly from the loop header.
14203 	else if (block_is_loop_candidate(block, SPIRBlock::MergeToSelectForLoop))
14204 	{
14205 		flush_undeclared_variables(block);
14206 		if (attempt_emit_loop_header(block, SPIRBlock::MergeToSelectForLoop))
14207 		{
14208 			// The body of while, is actually just the true (or false) block, so always branch there unconditionally.
14209 			if (execution_is_noop(get<SPIRBlock>(block.true_block), get<SPIRBlock>(block.merge_block)))
14210 				select_branch_to_false_block = true;
14211 			else
14212 				select_branch_to_true_block = true;
14213 
14214 			emitted_loop_header_variables = true;
14215 		}
14216 	}
14217 	// This is the newer loop behavior in glslang which branches from Loop header directly to
14218 	// a new block, which in turn has a OpBranchSelection without a selection merge.
14219 	else if (block_is_loop_candidate(block, SPIRBlock::MergeToDirectForLoop))
14220 	{
14221 		flush_undeclared_variables(block);
14222 		if (attempt_emit_loop_header(block, SPIRBlock::MergeToDirectForLoop))
14223 		{
14224 			skip_direct_branch = true;
14225 			emitted_loop_header_variables = true;
14226 		}
14227 	}
14228 	else if (continue_type == SPIRBlock::DoWhileLoop)
14229 	{
14230 		flush_undeclared_variables(block);
14231 		emit_while_loop_initializers(block);
14232 		emitted_loop_header_variables = true;
14233 		// We have some temporaries where the loop header is the dominator.
14234 		// We risk a case where we have code like:
14235 		// for (;;) { create-temporary; break; } consume-temporary;
14236 		// so force-declare temporaries here.
14237 		emit_hoisted_temporaries(block.potential_declare_temporary);
14238 		statement("do");
14239 		begin_scope();
14240 
14241 		emit_block_instructions(block);
14242 	}
14243 	else if (block.merge == SPIRBlock::MergeLoop)
14244 	{
14245 		flush_undeclared_variables(block);
14246 		emit_while_loop_initializers(block);
14247 		emitted_loop_header_variables = true;
14248 
14249 		// We have a generic loop without any distinguishable pattern like for, while or do while.
14250 		get<SPIRBlock>(block.continue_block).complex_continue = true;
14251 		continue_type = SPIRBlock::ComplexLoop;
14252 
14253 		// We have some temporaries where the loop header is the dominator.
14254 		// We risk a case where we have code like:
14255 		// for (;;) { create-temporary; break; } consume-temporary;
14256 		// so force-declare temporaries here.
14257 		emit_hoisted_temporaries(block.potential_declare_temporary);
14258 		statement("for (;;)");
14259 		begin_scope();
14260 
14261 		emit_block_instructions(block);
14262 	}
14263 	else
14264 	{
14265 		emit_block_instructions(block);
14266 	}
14267 
14268 	// If we didn't successfully emit a loop header and we had loop variable candidates, we have a problem
14269 	// as writes to said loop variables might have been masked out, we need a recompile.
14270 	if (!emitted_loop_header_variables && !block.loop_variables.empty())
14271 	{
14272 		force_recompile();
14273 		for (auto var : block.loop_variables)
14274 			get<SPIRVariable>(var).loop_variable = false;
14275 		block.loop_variables.clear();
14276 	}
14277 
14278 	flush_undeclared_variables(block);
14279 	bool emit_next_block = true;
14280 
14281 	// Handle end of block.
14282 	switch (block.terminator)
14283 	{
14284 	case SPIRBlock::Direct:
14285 		// True when emitting complex continue block.
14286 		if (block.loop_dominator == block.next_block)
14287 		{
14288 			branch(block.self, block.next_block);
14289 			emit_next_block = false;
14290 		}
14291 		// True if MergeToDirectForLoop succeeded.
14292 		else if (skip_direct_branch)
14293 			emit_next_block = false;
14294 		else if (is_continue(block.next_block) || is_break(block.next_block) || is_conditional(block.next_block))
14295 		{
14296 			branch(block.self, block.next_block);
14297 			emit_next_block = false;
14298 		}
14299 		break;
14300 
14301 	case SPIRBlock::Select:
14302 		// True if MergeToSelectForLoop or MergeToSelectContinueForLoop succeeded.
14303 		if (select_branch_to_true_block)
14304 		{
14305 			if (force_complex_continue_block)
14306 			{
14307 				assert(block.true_block == block.continue_block);
14308 
14309 				// We're going to emit a continue block directly here, so make sure it's marked as complex.
14310 				auto &complex_continue = get<SPIRBlock>(block.continue_block).complex_continue;
14311 				bool old_complex = complex_continue;
14312 				complex_continue = true;
14313 				branch(block.self, block.true_block);
14314 				complex_continue = old_complex;
14315 			}
14316 			else
14317 				branch(block.self, block.true_block);
14318 		}
14319 		else if (select_branch_to_false_block)
14320 		{
14321 			if (force_complex_continue_block)
14322 			{
14323 				assert(block.false_block == block.continue_block);
14324 
14325 				// We're going to emit a continue block directly here, so make sure it's marked as complex.
14326 				auto &complex_continue = get<SPIRBlock>(block.continue_block).complex_continue;
14327 				bool old_complex = complex_continue;
14328 				complex_continue = true;
14329 				branch(block.self, block.false_block);
14330 				complex_continue = old_complex;
14331 			}
14332 			else
14333 				branch(block.self, block.false_block);
14334 		}
14335 		else
14336 			branch(block.self, block.condition, block.true_block, block.false_block);
14337 		break;
14338 
14339 	case SPIRBlock::MultiSelect:
14340 	{
14341 		auto &type = expression_type(block.condition);
14342 		bool unsigned_case =
14343 		    type.basetype == SPIRType::UInt || type.basetype == SPIRType::UShort || type.basetype == SPIRType::UByte;
14344 
14345 		if (block.merge == SPIRBlock::MergeNone)
14346 			SPIRV_CROSS_THROW("Switch statement is not structured");
14347 
14348 		if (type.basetype == SPIRType::UInt64 || type.basetype == SPIRType::Int64)
14349 		{
14350 			// SPIR-V spec suggests this is allowed, but we cannot support it in higher level languages.
14351 			SPIRV_CROSS_THROW("Cannot use 64-bit switch selectors.");
14352 		}
14353 
14354 		const char *label_suffix = "";
14355 		if (type.basetype == SPIRType::UInt && backend.uint32_t_literal_suffix)
14356 			label_suffix = "u";
14357 		else if (type.basetype == SPIRType::UShort)
14358 			label_suffix = backend.uint16_t_literal_suffix;
14359 		else if (type.basetype == SPIRType::Short)
14360 			label_suffix = backend.int16_t_literal_suffix;
14361 
14362 		SPIRBlock *old_emitting_switch = current_emitting_switch;
14363 		current_emitting_switch = &block;
14364 
14365 		if (block.need_ladder_break)
14366 			statement("bool _", block.self, "_ladder_break = false;");
14367 
14368 		// Find all unique case constructs.
14369 		unordered_map<uint32_t, SmallVector<uint32_t>> case_constructs;
14370 		SmallVector<uint32_t> block_declaration_order;
14371 		SmallVector<uint32_t> literals_to_merge;
14372 
14373 		// If a switch case branches to the default block for some reason, we can just remove that literal from consideration
14374 		// and let the default: block handle it.
14375 		// 2.11 in SPIR-V spec states that for fall-through cases, there is a very strict declaration order which we can take advantage of here.
14376 		// We only need to consider possible fallthrough if order[i] branches to order[i + 1].
14377 		for (auto &c : block.cases)
14378 		{
14379 			if (c.block != block.next_block && c.block != block.default_block)
14380 			{
14381 				if (!case_constructs.count(c.block))
14382 					block_declaration_order.push_back(c.block);
14383 				case_constructs[c.block].push_back(c.value);
14384 			}
14385 			else if (c.block == block.next_block && block.default_block != block.next_block)
14386 			{
14387 				// We might have to flush phi inside specific case labels.
14388 				// If we can piggyback on default:, do so instead.
14389 				literals_to_merge.push_back(c.value);
14390 			}
14391 		}
14392 
14393 		// Empty literal array -> default.
14394 		if (block.default_block != block.next_block)
14395 		{
14396 			auto &default_block = get<SPIRBlock>(block.default_block);
14397 
14398 			// We need to slide in the default block somewhere in this chain
14399 			// if there are fall-through scenarios since the default is declared separately in OpSwitch.
14400 			// Only consider trivial fall-through cases here.
14401 			size_t num_blocks = block_declaration_order.size();
14402 			bool injected_block = false;
14403 
14404 			for (size_t i = 0; i < num_blocks; i++)
14405 			{
14406 				auto &case_block = get<SPIRBlock>(block_declaration_order[i]);
14407 				if (execution_is_direct_branch(case_block, default_block))
14408 				{
14409 					// Fallthrough to default block, we must inject the default block here.
14410 					block_declaration_order.insert(begin(block_declaration_order) + i + 1, block.default_block);
14411 					injected_block = true;
14412 					break;
14413 				}
14414 				else if (execution_is_direct_branch(default_block, case_block))
14415 				{
14416 					// Default case is falling through to another case label, we must inject the default block here.
14417 					block_declaration_order.insert(begin(block_declaration_order) + i, block.default_block);
14418 					injected_block = true;
14419 					break;
14420 				}
14421 			}
14422 
14423 			// Order does not matter.
14424 			if (!injected_block)
14425 				block_declaration_order.push_back(block.default_block);
14426 			else if (is_legacy_es())
14427 				SPIRV_CROSS_THROW("Default case label fallthrough to other case label is not supported in ESSL 1.0.");
14428 
14429 			case_constructs[block.default_block] = {};
14430 		}
14431 
14432 		size_t num_blocks = block_declaration_order.size();
14433 
14434 		const auto to_case_label = [](uint32_t literal, bool is_unsigned_case) -> string {
14435 			return is_unsigned_case ? convert_to_string(literal) : convert_to_string(int32_t(literal));
14436 		};
14437 
14438 		const auto to_legacy_case_label = [&](uint32_t condition, const SmallVector<uint32_t> &labels,
14439 		                                      const char *suffix) -> string {
14440 			string ret;
14441 			size_t count = labels.size();
14442 			for (size_t i = 0; i < count; i++)
14443 			{
14444 				if (i)
14445 					ret += " || ";
14446 				ret += join(count > 1 ? "(" : "", to_enclosed_expression(condition), " == ", labels[i], suffix,
14447 				            count > 1 ? ")" : "");
14448 			}
14449 			return ret;
14450 		};
14451 
14452 		// We need to deal with a complex scenario for OpPhi. If we have case-fallthrough and Phi in the picture,
14453 		// we need to flush phi nodes outside the switch block in a branch,
14454 		// and skip any Phi handling inside the case label to make fall-through work as expected.
14455 		// This kind of code-gen is super awkward and it's a last resort. Normally we would want to handle this
14456 		// inside the case label if at all possible.
14457 		for (size_t i = 1; backend.support_case_fallthrough && i < num_blocks; i++)
14458 		{
14459 			if (flush_phi_required(block.self, block_declaration_order[i]) &&
14460 			    flush_phi_required(block_declaration_order[i - 1], block_declaration_order[i]))
14461 			{
14462 				uint32_t target_block = block_declaration_order[i];
14463 
14464 				// Make sure we flush Phi, it might have been marked to be ignored earlier.
14465 				get<SPIRBlock>(target_block).ignore_phi_from_block = 0;
14466 
14467 				auto &literals = case_constructs[target_block];
14468 
14469 				if (literals.empty())
14470 				{
14471 					// Oh boy, gotta make a complete negative test instead! o.o
14472 					// Find all possible literals that would *not* make us enter the default block.
14473 					// If none of those literals match, we flush Phi ...
14474 					SmallVector<string> conditions;
14475 					for (size_t j = 0; j < num_blocks; j++)
14476 					{
14477 						auto &negative_literals = case_constructs[block_declaration_order[j]];
14478 						for (auto &case_label : negative_literals)
14479 							conditions.push_back(join(to_enclosed_expression(block.condition),
14480 							                          " != ", to_case_label(case_label, unsigned_case)));
14481 					}
14482 
14483 					statement("if (", merge(conditions, " && "), ")");
14484 					begin_scope();
14485 					flush_phi(block.self, target_block);
14486 					end_scope();
14487 				}
14488 				else
14489 				{
14490 					SmallVector<string> conditions;
14491 					conditions.reserve(literals.size());
14492 					for (auto &case_label : literals)
14493 						conditions.push_back(join(to_enclosed_expression(block.condition),
14494 						                          " == ", to_case_label(case_label, unsigned_case)));
14495 					statement("if (", merge(conditions, " || "), ")");
14496 					begin_scope();
14497 					flush_phi(block.self, target_block);
14498 					end_scope();
14499 				}
14500 
14501 				// Mark the block so that we don't flush Phi from header to case label.
14502 				get<SPIRBlock>(target_block).ignore_phi_from_block = block.self;
14503 			}
14504 		}
14505 
14506 		// If there is only one default block, and no cases, this is a case where SPIRV-opt decided to emulate
14507 		// non-structured exits with the help of a switch block.
14508 		// This is buggy on FXC, so just emit the logical equivalent of a do { } while(false), which is more idiomatic.
14509 		bool degenerate_switch = block.default_block != block.merge_block && block.cases.empty();
14510 
14511 		if (degenerate_switch || is_legacy_es())
14512 		{
14513 			// ESSL 1.0 is not guaranteed to support do/while.
14514 			if (is_legacy_es())
14515 			{
14516 				uint32_t counter = statement_count;
14517 				statement("for (int spvDummy", counter, " = 0; spvDummy", counter,
14518 				          " < 1; spvDummy", counter, "++)");
14519 			}
14520 			else
14521 				statement("do");
14522 		}
14523 		else
14524 		{
14525 			emit_block_hints(block);
14526 			statement("switch (", to_expression(block.condition), ")");
14527 		}
14528 		begin_scope();
14529 
14530 		for (size_t i = 0; i < num_blocks; i++)
14531 		{
14532 			uint32_t target_block = block_declaration_order[i];
14533 			auto &literals = case_constructs[target_block];
14534 
14535 			if (literals.empty())
14536 			{
14537 				// Default case.
14538 				if (!degenerate_switch)
14539 				{
14540 					if (is_legacy_es())
14541 						statement("else");
14542 					else
14543 						statement("default:");
14544 				}
14545 			}
14546 			else
14547 			{
14548 				if (is_legacy_es())
14549 				{
14550 					statement((i ? "else " : ""), "if (", to_legacy_case_label(block.condition, literals, label_suffix),
14551 					          ")");
14552 				}
14553 				else
14554 				{
14555 					for (auto &case_literal : literals)
14556 					{
14557 						// The case label value must be sign-extended properly in SPIR-V, so we can assume 32-bit values here.
14558 						statement("case ", to_case_label(case_literal, unsigned_case), label_suffix, ":");
14559 					}
14560 				}
14561 			}
14562 
14563 			auto &case_block = get<SPIRBlock>(target_block);
14564 			if (backend.support_case_fallthrough && i + 1 < num_blocks &&
14565 			    execution_is_direct_branch(case_block, get<SPIRBlock>(block_declaration_order[i + 1])))
14566 			{
14567 				// We will fall through here, so just terminate the block chain early.
14568 				// We still need to deal with Phi potentially.
14569 				// No need for a stack-like thing here since we only do fall-through when there is a
14570 				// single trivial branch to fall-through target..
14571 				current_emitting_switch_fallthrough = true;
14572 			}
14573 			else
14574 				current_emitting_switch_fallthrough = false;
14575 
14576 			if (!degenerate_switch)
14577 				begin_scope();
14578 			branch(block.self, target_block);
14579 			if (!degenerate_switch)
14580 				end_scope();
14581 
14582 			current_emitting_switch_fallthrough = false;
14583 		}
14584 
14585 		// Might still have to flush phi variables if we branch from loop header directly to merge target.
14586 		if (flush_phi_required(block.self, block.next_block))
14587 		{
14588 			if (block.default_block == block.next_block || !literals_to_merge.empty())
14589 			{
14590 				for (auto &case_literal : literals_to_merge)
14591 					statement("case ", to_case_label(case_literal, unsigned_case), label_suffix, ":");
14592 
14593 				if (block.default_block == block.next_block)
14594 				{
14595 					if (is_legacy_es())
14596 						statement("else");
14597 					else
14598 						statement("default:");
14599 				}
14600 
14601 				begin_scope();
14602 				flush_phi(block.self, block.next_block);
14603 				statement("break;");
14604 				end_scope();
14605 			}
14606 		}
14607 
14608 		if (degenerate_switch && !is_legacy_es())
14609 			end_scope_decl("while(false)");
14610 		else
14611 			end_scope();
14612 
14613 		if (block.need_ladder_break)
14614 		{
14615 			statement("if (_", block.self, "_ladder_break)");
14616 			begin_scope();
14617 			statement("break;");
14618 			end_scope();
14619 		}
14620 
14621 		current_emitting_switch = old_emitting_switch;
14622 		break;
14623 	}
14624 
14625 	case SPIRBlock::Return:
14626 	{
14627 		for (auto &line : current_function->fixup_hooks_out)
14628 			line();
14629 
14630 		if (processing_entry_point)
14631 			emit_fixup();
14632 
14633 		auto &cfg = get_cfg_for_current_function();
14634 
14635 		if (block.return_value)
14636 		{
14637 			auto &type = expression_type(block.return_value);
14638 			if (!type.array.empty() && !backend.can_return_array)
14639 			{
14640 				// If we cannot return arrays, we will have a special out argument we can write to instead.
14641 				// The backend is responsible for setting this up, and redirection the return values as appropriate.
14642 				if (ir.ids[block.return_value].get_type() != TypeUndef)
14643 				{
14644 					emit_array_copy("spvReturnValue", block.return_value, StorageClassFunction,
14645 					                get_expression_effective_storage_class(block.return_value));
14646 				}
14647 
14648 				if (!cfg.node_terminates_control_flow_in_sub_graph(current_function->entry_block, block.self) ||
14649 				    block.loop_dominator != BlockID(SPIRBlock::NoDominator))
14650 				{
14651 					statement("return;");
14652 				}
14653 			}
14654 			else
14655 			{
14656 				// OpReturnValue can return Undef, so don't emit anything for this case.
14657 				if (ir.ids[block.return_value].get_type() != TypeUndef)
14658 					statement("return ", to_expression(block.return_value), ";");
14659 			}
14660 		}
14661 		else if (!cfg.node_terminates_control_flow_in_sub_graph(current_function->entry_block, block.self) ||
14662 		         block.loop_dominator != BlockID(SPIRBlock::NoDominator))
14663 		{
14664 			// If this block is the very final block and not called from control flow,
14665 			// we do not need an explicit return which looks out of place. Just end the function here.
14666 			// In the very weird case of for(;;) { return; } executing return is unconditional,
14667 			// but we actually need a return here ...
14668 			statement("return;");
14669 		}
14670 		break;
14671 	}
14672 
14673 	case SPIRBlock::Kill:
14674 		statement(backend.discard_literal, ";");
14675 		break;
14676 
14677 	case SPIRBlock::Unreachable:
14678 		emit_next_block = false;
14679 		break;
14680 
14681 	case SPIRBlock::IgnoreIntersection:
14682 		statement("ignoreIntersectionEXT;");
14683 		break;
14684 
14685 	case SPIRBlock::TerminateRay:
14686 		statement("terminateRayEXT;");
14687 		break;
14688 
14689 	default:
14690 		SPIRV_CROSS_THROW("Unimplemented block terminator.");
14691 	}
14692 
14693 	if (block.next_block && emit_next_block)
14694 	{
14695 		// If we hit this case, we're dealing with an unconditional branch, which means we will output
14696 		// that block after this. If we had selection merge, we already flushed phi variables.
14697 		if (block.merge != SPIRBlock::MergeSelection)
14698 		{
14699 			flush_phi(block.self, block.next_block);
14700 			// For a direct branch, need to remember to invalidate expressions in the next linear block instead.
14701 			get<SPIRBlock>(block.next_block).invalidate_expressions = block.invalidate_expressions;
14702 		}
14703 
14704 		// For switch fallthrough cases, we terminate the chain here, but we still need to handle Phi.
14705 		if (!current_emitting_switch_fallthrough)
14706 		{
14707 			// For merge selects we might have ignored the fact that a merge target
14708 			// could have been a break; or continue;
14709 			// We will need to deal with it here.
14710 			if (is_loop_break(block.next_block))
14711 			{
14712 				// Cannot check for just break, because switch statements will also use break.
14713 				assert(block.merge == SPIRBlock::MergeSelection);
14714 				statement("break;");
14715 			}
14716 			else if (is_continue(block.next_block))
14717 			{
14718 				assert(block.merge == SPIRBlock::MergeSelection);
14719 				branch_to_continue(block.self, block.next_block);
14720 			}
14721 			else if (BlockID(block.self) != block.next_block)
14722 				emit_block_chain(get<SPIRBlock>(block.next_block));
14723 		}
14724 	}
14725 
14726 	if (block.merge == SPIRBlock::MergeLoop)
14727 	{
14728 		if (continue_type == SPIRBlock::DoWhileLoop)
14729 		{
14730 			// Make sure that we run the continue block to get the expressions set, but this
14731 			// should become an empty string.
14732 			// We have no fallbacks if we cannot forward everything to temporaries ...
14733 			const auto &continue_block = get<SPIRBlock>(block.continue_block);
14734 			bool positive_test = execution_is_noop(get<SPIRBlock>(continue_block.true_block),
14735 			                                       get<SPIRBlock>(continue_block.loop_dominator));
14736 
14737 			uint32_t current_count = statement_count;
14738 			auto statements = emit_continue_block(block.continue_block, positive_test, !positive_test);
14739 			if (statement_count != current_count)
14740 			{
14741 				// The DoWhile block has side effects, force ComplexLoop pattern next pass.
14742 				get<SPIRBlock>(block.continue_block).complex_continue = true;
14743 				force_recompile();
14744 			}
14745 
14746 			// Might have to invert the do-while test here.
14747 			auto condition = to_expression(continue_block.condition);
14748 			if (!positive_test)
14749 				condition = join("!", enclose_expression(condition));
14750 
14751 			end_scope_decl(join("while (", condition, ")"));
14752 		}
14753 		else
14754 			end_scope();
14755 
14756 		loop_level_saver.release();
14757 
14758 		// We cannot break out of two loops at once, so don't check for break; here.
14759 		// Using block.self as the "from" block isn't quite right, but it has the same scope
14760 		// and dominance structure, so it's fine.
14761 		if (is_continue(block.merge_block))
14762 			branch_to_continue(block.self, block.merge_block);
14763 		else
14764 			emit_block_chain(get<SPIRBlock>(block.merge_block));
14765 	}
14766 
14767 	// Forget about control dependent expressions now.
14768 	block.invalidate_expressions.clear();
14769 
14770 	// After we return, we must be out of scope, so if we somehow have to re-emit this function,
14771 	// re-declare variables if necessary.
14772 	assert(rearm_dominated_variables.size() == block.dominated_variables.size());
14773 	for (size_t i = 0; i < block.dominated_variables.size(); i++)
14774 	{
14775 		uint32_t var = block.dominated_variables[i];
14776 		get<SPIRVariable>(var).deferred_declaration = rearm_dominated_variables[i];
14777 	}
14778 
14779 	// Just like for deferred declaration, we need to forget about loop variable enable
14780 	// if our block chain is reinstantiated later.
14781 	for (auto &var_id : block.loop_variables)
14782 		get<SPIRVariable>(var_id).loop_variable_enable = false;
14783 }
14784 
begin_scope()14785 void CompilerGLSL::begin_scope()
14786 {
14787 	statement("{");
14788 	indent++;
14789 }
14790 
end_scope()14791 void CompilerGLSL::end_scope()
14792 {
14793 	if (!indent)
14794 		SPIRV_CROSS_THROW("Popping empty indent stack.");
14795 	indent--;
14796 	statement("}");
14797 }
14798 
end_scope(const string & trailer)14799 void CompilerGLSL::end_scope(const string &trailer)
14800 {
14801 	if (!indent)
14802 		SPIRV_CROSS_THROW("Popping empty indent stack.");
14803 	indent--;
14804 	statement("}", trailer);
14805 }
14806 
end_scope_decl()14807 void CompilerGLSL::end_scope_decl()
14808 {
14809 	if (!indent)
14810 		SPIRV_CROSS_THROW("Popping empty indent stack.");
14811 	indent--;
14812 	statement("};");
14813 }
14814 
end_scope_decl(const string & decl)14815 void CompilerGLSL::end_scope_decl(const string &decl)
14816 {
14817 	if (!indent)
14818 		SPIRV_CROSS_THROW("Popping empty indent stack.");
14819 	indent--;
14820 	statement("} ", decl, ";");
14821 }
14822 
check_function_call_constraints(const uint32_t * args,uint32_t length)14823 void CompilerGLSL::check_function_call_constraints(const uint32_t *args, uint32_t length)
14824 {
14825 	// If our variable is remapped, and we rely on type-remapping information as
14826 	// well, then we cannot pass the variable as a function parameter.
14827 	// Fixing this is non-trivial without stamping out variants of the same function,
14828 	// so for now warn about this and suggest workarounds instead.
14829 	for (uint32_t i = 0; i < length; i++)
14830 	{
14831 		auto *var = maybe_get<SPIRVariable>(args[i]);
14832 		if (!var || !var->remapped_variable)
14833 			continue;
14834 
14835 		auto &type = get<SPIRType>(var->basetype);
14836 		if (type.basetype == SPIRType::Image && type.image.dim == DimSubpassData)
14837 		{
14838 			SPIRV_CROSS_THROW("Tried passing a remapped subpassInput variable to a function. "
14839 			                  "This will not work correctly because type-remapping information is lost. "
14840 			                  "To workaround, please consider not passing the subpass input as a function parameter, "
14841 			                  "or use in/out variables instead which do not need type remapping information.");
14842 		}
14843 	}
14844 }
14845 
get_next_instruction_in_block(const Instruction & instr)14846 const Instruction *CompilerGLSL::get_next_instruction_in_block(const Instruction &instr)
14847 {
14848 	// FIXME: This is kind of hacky. There should be a cleaner way.
14849 	auto offset = uint32_t(&instr - current_emitting_block->ops.data());
14850 	if ((offset + 1) < current_emitting_block->ops.size())
14851 		return &current_emitting_block->ops[offset + 1];
14852 	else
14853 		return nullptr;
14854 }
14855 
mask_relevant_memory_semantics(uint32_t semantics)14856 uint32_t CompilerGLSL::mask_relevant_memory_semantics(uint32_t semantics)
14857 {
14858 	return semantics & (MemorySemanticsAtomicCounterMemoryMask | MemorySemanticsImageMemoryMask |
14859 	                    MemorySemanticsWorkgroupMemoryMask | MemorySemanticsUniformMemoryMask |
14860 	                    MemorySemanticsCrossWorkgroupMemoryMask | MemorySemanticsSubgroupMemoryMask);
14861 }
14862 
emit_array_copy(const string & lhs,uint32_t rhs_id,StorageClass,StorageClass)14863 void CompilerGLSL::emit_array_copy(const string &lhs, uint32_t rhs_id, StorageClass, StorageClass)
14864 {
14865 	statement(lhs, " = ", to_expression(rhs_id), ";");
14866 }
14867 
unroll_array_from_complex_load(uint32_t target_id,uint32_t source_id,std::string & expr)14868 void CompilerGLSL::unroll_array_from_complex_load(uint32_t target_id, uint32_t source_id, std::string &expr)
14869 {
14870 	if (!backend.force_gl_in_out_block)
14871 		return;
14872 	// This path is only relevant for GL backends.
14873 
14874 	auto *var = maybe_get<SPIRVariable>(source_id);
14875 	if (!var)
14876 		return;
14877 
14878 	if (var->storage != StorageClassInput)
14879 		return;
14880 
14881 	auto &type = get_variable_data_type(*var);
14882 	if (type.array.empty())
14883 		return;
14884 
14885 	auto builtin = BuiltIn(get_decoration(var->self, DecorationBuiltIn));
14886 	bool is_builtin = is_builtin_variable(*var) && (builtin == BuiltInPointSize || builtin == BuiltInPosition);
14887 	bool is_tess = is_tessellation_shader();
14888 	bool is_patch = has_decoration(var->self, DecorationPatch);
14889 
14890 	// Tessellation input arrays are special in that they are unsized, so we cannot directly copy from it.
14891 	// We must unroll the array load.
14892 	// For builtins, we couldn't catch this case normally,
14893 	// because this is resolved in the OpAccessChain in most cases.
14894 	// If we load the entire array, we have no choice but to unroll here.
14895 	if (!is_patch && (is_builtin || is_tess))
14896 	{
14897 		auto new_expr = join("_", target_id, "_unrolled");
14898 		statement(variable_decl(type, new_expr, target_id), ";");
14899 		string array_expr;
14900 		if (type.array_size_literal.back())
14901 		{
14902 			array_expr = convert_to_string(type.array.back());
14903 			if (type.array.back() == 0)
14904 				SPIRV_CROSS_THROW("Cannot unroll an array copy from unsized array.");
14905 		}
14906 		else
14907 			array_expr = to_expression(type.array.back());
14908 
14909 		// The array size might be a specialization constant, so use a for-loop instead.
14910 		statement("for (int i = 0; i < int(", array_expr, "); i++)");
14911 		begin_scope();
14912 		if (is_builtin)
14913 			statement(new_expr, "[i] = gl_in[i].", expr, ";");
14914 		else
14915 			statement(new_expr, "[i] = ", expr, "[i];");
14916 		end_scope();
14917 
14918 		expr = move(new_expr);
14919 	}
14920 }
14921 
cast_from_builtin_load(uint32_t source_id,std::string & expr,const SPIRType & expr_type)14922 void CompilerGLSL::cast_from_builtin_load(uint32_t source_id, std::string &expr, const SPIRType &expr_type)
14923 {
14924 	auto *var = maybe_get_backing_variable(source_id);
14925 	if (var)
14926 		source_id = var->self;
14927 
14928 	// Only interested in standalone builtin variables.
14929 	if (!has_decoration(source_id, DecorationBuiltIn))
14930 		return;
14931 
14932 	auto builtin = static_cast<BuiltIn>(get_decoration(source_id, DecorationBuiltIn));
14933 	auto expected_type = expr_type.basetype;
14934 
14935 	// TODO: Fill in for more builtins.
14936 	switch (builtin)
14937 	{
14938 	case BuiltInLayer:
14939 	case BuiltInPrimitiveId:
14940 	case BuiltInViewportIndex:
14941 	case BuiltInInstanceId:
14942 	case BuiltInInstanceIndex:
14943 	case BuiltInVertexId:
14944 	case BuiltInVertexIndex:
14945 	case BuiltInSampleId:
14946 	case BuiltInBaseVertex:
14947 	case BuiltInBaseInstance:
14948 	case BuiltInDrawIndex:
14949 	case BuiltInFragStencilRefEXT:
14950 	case BuiltInInstanceCustomIndexNV:
14951 		expected_type = SPIRType::Int;
14952 		break;
14953 
14954 	case BuiltInGlobalInvocationId:
14955 	case BuiltInLocalInvocationId:
14956 	case BuiltInWorkgroupId:
14957 	case BuiltInLocalInvocationIndex:
14958 	case BuiltInWorkgroupSize:
14959 	case BuiltInNumWorkgroups:
14960 	case BuiltInIncomingRayFlagsNV:
14961 	case BuiltInLaunchIdNV:
14962 	case BuiltInLaunchSizeNV:
14963 		expected_type = SPIRType::UInt;
14964 		break;
14965 
14966 	default:
14967 		break;
14968 	}
14969 
14970 	if (expected_type != expr_type.basetype)
14971 		expr = bitcast_expression(expr_type, expected_type, expr);
14972 }
14973 
cast_to_builtin_store(uint32_t target_id,std::string & expr,const SPIRType & expr_type)14974 void CompilerGLSL::cast_to_builtin_store(uint32_t target_id, std::string &expr, const SPIRType &expr_type)
14975 {
14976 	// Only interested in standalone builtin variables.
14977 	if (!has_decoration(target_id, DecorationBuiltIn))
14978 		return;
14979 
14980 	auto builtin = static_cast<BuiltIn>(get_decoration(target_id, DecorationBuiltIn));
14981 	auto expected_type = expr_type.basetype;
14982 
14983 	// TODO: Fill in for more builtins.
14984 	switch (builtin)
14985 	{
14986 	case BuiltInLayer:
14987 	case BuiltInPrimitiveId:
14988 	case BuiltInViewportIndex:
14989 	case BuiltInFragStencilRefEXT:
14990 		expected_type = SPIRType::Int;
14991 		break;
14992 
14993 	default:
14994 		break;
14995 	}
14996 
14997 	if (expected_type != expr_type.basetype)
14998 	{
14999 		auto type = expr_type;
15000 		type.basetype = expected_type;
15001 		expr = bitcast_expression(type, expr_type.basetype, expr);
15002 	}
15003 }
15004 
convert_non_uniform_expression(const SPIRType & type,std::string & expr)15005 void CompilerGLSL::convert_non_uniform_expression(const SPIRType &type, std::string &expr)
15006 {
15007 	if (*backend.nonuniform_qualifier == '\0')
15008 		return;
15009 
15010 	// Handle SPV_EXT_descriptor_indexing.
15011 	if (type.basetype == SPIRType::Sampler || type.basetype == SPIRType::SampledImage ||
15012 	    type.basetype == SPIRType::Image)
15013 	{
15014 		// The image/sampler ID must be declared as non-uniform.
15015 		// However, it is not legal GLSL to have
15016 		// nonuniformEXT(samplers[index]), so we must move the nonuniform qualifier
15017 		// to the array indexing, like
15018 		// samplers[nonuniformEXT(index)].
15019 		// While the access chain will generally be nonuniformEXT, it's not necessarily so,
15020 		// so we might have to fixup the OpLoad-ed expression late.
15021 
15022 		auto start_array_index = expr.find_first_of('[');
15023 
15024 		if (start_array_index == string::npos)
15025 			return;
15026 
15027 		// Check for the edge case that a non-arrayed resource was marked to be nonuniform,
15028 		// and the bracket we found is actually part of non-resource related data.
15029 		if (expr.find_first_of(',') < start_array_index)
15030 			return;
15031 
15032 		// We've opened a bracket, track expressions until we can close the bracket.
15033 		// This must be our image index.
15034 		size_t end_array_index = string::npos;
15035 		unsigned bracket_count = 1;
15036 		for (size_t index = start_array_index + 1; index < expr.size(); index++)
15037 		{
15038 			if (expr[index] == ']')
15039 			{
15040 				if (--bracket_count == 0)
15041 				{
15042 					end_array_index = index;
15043 					break;
15044 				}
15045 			}
15046 			else if (expr[index] == '[')
15047 				bracket_count++;
15048 		}
15049 
15050 		assert(bracket_count == 0);
15051 
15052 		// Doesn't really make sense to declare a non-arrayed image with nonuniformEXT, but there's
15053 		// nothing we can do here to express that.
15054 		if (start_array_index == string::npos || end_array_index == string::npos || end_array_index < start_array_index)
15055 			return;
15056 
15057 		start_array_index++;
15058 
15059 		expr = join(expr.substr(0, start_array_index), backend.nonuniform_qualifier, "(",
15060 		            expr.substr(start_array_index, end_array_index - start_array_index), ")",
15061 		            expr.substr(end_array_index, string::npos));
15062 	}
15063 }
15064 
emit_block_hints(const SPIRBlock &)15065 void CompilerGLSL::emit_block_hints(const SPIRBlock &)
15066 {
15067 }
15068 
preserve_alias_on_reset(uint32_t id)15069 void CompilerGLSL::preserve_alias_on_reset(uint32_t id)
15070 {
15071 	preserved_aliases[id] = get_name(id);
15072 }
15073 
reset_name_caches()15074 void CompilerGLSL::reset_name_caches()
15075 {
15076 	for (auto &preserved : preserved_aliases)
15077 		set_name(preserved.first, preserved.second);
15078 
15079 	preserved_aliases.clear();
15080 	resource_names.clear();
15081 	block_input_names.clear();
15082 	block_output_names.clear();
15083 	block_ubo_names.clear();
15084 	block_ssbo_names.clear();
15085 	block_names.clear();
15086 	function_overloads.clear();
15087 }
15088 
fixup_type_alias()15089 void CompilerGLSL::fixup_type_alias()
15090 {
15091 	// Due to how some backends work, the "master" type of type_alias must be a block-like type if it exists.
15092 	ir.for_each_typed_id<SPIRType>([&](uint32_t self, SPIRType &type) {
15093 		if (!type.type_alias)
15094 			return;
15095 
15096 		if (has_decoration(type.self, DecorationBlock) || has_decoration(type.self, DecorationBufferBlock))
15097 		{
15098 			// Top-level block types should never alias anything else.
15099 			type.type_alias = 0;
15100 		}
15101 		else if (type_is_block_like(type) && type.self == ID(self))
15102 		{
15103 			// A block-like type is any type which contains Offset decoration, but not top-level blocks,
15104 			// i.e. blocks which are placed inside buffers.
15105 			// Become the master.
15106 			ir.for_each_typed_id<SPIRType>([&](uint32_t other_id, SPIRType &other_type) {
15107 				if (other_id == self)
15108 					return;
15109 
15110 				if (other_type.type_alias == type.type_alias)
15111 					other_type.type_alias = self;
15112 			});
15113 
15114 			this->get<SPIRType>(type.type_alias).type_alias = self;
15115 			type.type_alias = 0;
15116 		}
15117 	});
15118 }
15119 
reorder_type_alias()15120 void CompilerGLSL::reorder_type_alias()
15121 {
15122 	// Reorder declaration of types so that the master of the type alias is always emitted first.
15123 	// We need this in case a type B depends on type A (A must come before in the vector), but A is an alias of a type Abuffer, which
15124 	// means declaration of A doesn't happen (yet), and order would be B, ABuffer and not ABuffer, B. Fix this up here.
15125 	auto loop_lock = ir.create_loop_hard_lock();
15126 
15127 	auto &type_ids = ir.ids_for_type[TypeType];
15128 	for (auto alias_itr = begin(type_ids); alias_itr != end(type_ids); ++alias_itr)
15129 	{
15130 		auto &type = get<SPIRType>(*alias_itr);
15131 		if (type.type_alias != TypeID(0) &&
15132 		    !has_extended_decoration(type.type_alias, SPIRVCrossDecorationBufferBlockRepacked))
15133 		{
15134 			// We will skip declaring this type, so make sure the type_alias type comes before.
15135 			auto master_itr = find(begin(type_ids), end(type_ids), ID(type.type_alias));
15136 			assert(master_itr != end(type_ids));
15137 
15138 			if (alias_itr < master_itr)
15139 			{
15140 				// Must also swap the type order for the constant-type joined array.
15141 				auto &joined_types = ir.ids_for_constant_or_type;
15142 				auto alt_alias_itr = find(begin(joined_types), end(joined_types), *alias_itr);
15143 				auto alt_master_itr = find(begin(joined_types), end(joined_types), *master_itr);
15144 				assert(alt_alias_itr != end(joined_types));
15145 				assert(alt_master_itr != end(joined_types));
15146 
15147 				swap(*alias_itr, *master_itr);
15148 				swap(*alt_alias_itr, *alt_master_itr);
15149 			}
15150 		}
15151 	}
15152 }
15153 
emit_line_directive(uint32_t file_id,uint32_t line_literal)15154 void CompilerGLSL::emit_line_directive(uint32_t file_id, uint32_t line_literal)
15155 {
15156 	// If we are redirecting statements, ignore the line directive.
15157 	// Common case here is continue blocks.
15158 	if (redirect_statement)
15159 		return;
15160 
15161 	if (options.emit_line_directives)
15162 	{
15163 		require_extension_internal("GL_GOOGLE_cpp_style_line_directive");
15164 		statement_no_indent("#line ", line_literal, " \"", get<SPIRString>(file_id).str, "\"");
15165 	}
15166 }
15167 
propagate_nonuniform_qualifier(uint32_t id)15168 void CompilerGLSL::propagate_nonuniform_qualifier(uint32_t id)
15169 {
15170 	// SPIR-V might only tag the very last ID with NonUniformEXT, but for codegen,
15171 	// we need to know NonUniformEXT a little earlier, when the resource is actually loaded.
15172 	// Back-propagate the qualifier based on the expression dependency chain.
15173 
15174 	if (!has_decoration(id, DecorationNonUniformEXT))
15175 	{
15176 		set_decoration(id, DecorationNonUniformEXT);
15177 		force_recompile();
15178 	}
15179 
15180 	auto *e = maybe_get<SPIRExpression>(id);
15181 	auto *combined = maybe_get<SPIRCombinedImageSampler>(id);
15182 	auto *chain = maybe_get<SPIRAccessChain>(id);
15183 	if (e)
15184 	{
15185 		for (auto &expr : e->expression_dependencies)
15186 			propagate_nonuniform_qualifier(expr);
15187 		for (auto &expr : e->implied_read_expressions)
15188 			propagate_nonuniform_qualifier(expr);
15189 	}
15190 	else if (combined)
15191 	{
15192 		propagate_nonuniform_qualifier(combined->image);
15193 		propagate_nonuniform_qualifier(combined->sampler);
15194 	}
15195 	else if (chain)
15196 	{
15197 		for (auto &expr : chain->implied_read_expressions)
15198 			propagate_nonuniform_qualifier(expr);
15199 	}
15200 }
15201 
emit_copy_logical_type(uint32_t lhs_id,uint32_t lhs_type_id,uint32_t rhs_id,uint32_t rhs_type_id,SmallVector<uint32_t> chain)15202 void CompilerGLSL::emit_copy_logical_type(uint32_t lhs_id, uint32_t lhs_type_id, uint32_t rhs_id, uint32_t rhs_type_id,
15203                                           SmallVector<uint32_t> chain)
15204 {
15205 	// Fully unroll all member/array indices one by one.
15206 
15207 	auto &lhs_type = get<SPIRType>(lhs_type_id);
15208 	auto &rhs_type = get<SPIRType>(rhs_type_id);
15209 
15210 	if (!lhs_type.array.empty())
15211 	{
15212 		// Could use a loop here to support specialization constants, but it gets rather complicated with nested array types,
15213 		// and this is a rather obscure opcode anyways, keep it simple unless we are forced to.
15214 		uint32_t array_size = to_array_size_literal(lhs_type);
15215 		chain.push_back(0);
15216 
15217 		for (uint32_t i = 0; i < array_size; i++)
15218 		{
15219 			chain.back() = i;
15220 			emit_copy_logical_type(lhs_id, lhs_type.parent_type, rhs_id, rhs_type.parent_type, chain);
15221 		}
15222 	}
15223 	else if (lhs_type.basetype == SPIRType::Struct)
15224 	{
15225 		chain.push_back(0);
15226 		uint32_t member_count = uint32_t(lhs_type.member_types.size());
15227 		for (uint32_t i = 0; i < member_count; i++)
15228 		{
15229 			chain.back() = i;
15230 			emit_copy_logical_type(lhs_id, lhs_type.member_types[i], rhs_id, rhs_type.member_types[i], chain);
15231 		}
15232 	}
15233 	else
15234 	{
15235 		// Need to handle unpack/packing fixups since this can differ wildly between the logical types,
15236 		// particularly in MSL.
15237 		// To deal with this, we emit access chains and go through emit_store_statement
15238 		// to deal with all the special cases we can encounter.
15239 
15240 		AccessChainMeta lhs_meta, rhs_meta;
15241 		auto lhs = access_chain_internal(lhs_id, chain.data(), uint32_t(chain.size()),
15242 		                                 ACCESS_CHAIN_INDEX_IS_LITERAL_BIT, &lhs_meta);
15243 		auto rhs = access_chain_internal(rhs_id, chain.data(), uint32_t(chain.size()),
15244 		                                 ACCESS_CHAIN_INDEX_IS_LITERAL_BIT, &rhs_meta);
15245 
15246 		uint32_t id = ir.increase_bound_by(2);
15247 		lhs_id = id;
15248 		rhs_id = id + 1;
15249 
15250 		{
15251 			auto &lhs_expr = set<SPIRExpression>(lhs_id, move(lhs), lhs_type_id, true);
15252 			lhs_expr.need_transpose = lhs_meta.need_transpose;
15253 
15254 			if (lhs_meta.storage_is_packed)
15255 				set_extended_decoration(lhs_id, SPIRVCrossDecorationPhysicalTypePacked);
15256 			if (lhs_meta.storage_physical_type != 0)
15257 				set_extended_decoration(lhs_id, SPIRVCrossDecorationPhysicalTypeID, lhs_meta.storage_physical_type);
15258 
15259 			forwarded_temporaries.insert(lhs_id);
15260 			suppressed_usage_tracking.insert(lhs_id);
15261 		}
15262 
15263 		{
15264 			auto &rhs_expr = set<SPIRExpression>(rhs_id, move(rhs), rhs_type_id, true);
15265 			rhs_expr.need_transpose = rhs_meta.need_transpose;
15266 
15267 			if (rhs_meta.storage_is_packed)
15268 				set_extended_decoration(rhs_id, SPIRVCrossDecorationPhysicalTypePacked);
15269 			if (rhs_meta.storage_physical_type != 0)
15270 				set_extended_decoration(rhs_id, SPIRVCrossDecorationPhysicalTypeID, rhs_meta.storage_physical_type);
15271 
15272 			forwarded_temporaries.insert(rhs_id);
15273 			suppressed_usage_tracking.insert(rhs_id);
15274 		}
15275 
15276 		emit_store_statement(lhs_id, rhs_id);
15277 	}
15278 }
15279 
subpass_input_is_framebuffer_fetch(uint32_t id) const15280 bool CompilerGLSL::subpass_input_is_framebuffer_fetch(uint32_t id) const
15281 {
15282 	if (!has_decoration(id, DecorationInputAttachmentIndex))
15283 		return false;
15284 
15285 	uint32_t input_attachment_index = get_decoration(id, DecorationInputAttachmentIndex);
15286 	for (auto &remap : subpass_to_framebuffer_fetch_attachment)
15287 		if (remap.first == input_attachment_index)
15288 			return true;
15289 
15290 	return false;
15291 }
15292 
find_subpass_input_by_attachment_index(uint32_t index) const15293 const SPIRVariable *CompilerGLSL::find_subpass_input_by_attachment_index(uint32_t index) const
15294 {
15295 	const SPIRVariable *ret = nullptr;
15296 	ir.for_each_typed_id<SPIRVariable>([&](uint32_t, const SPIRVariable &var) {
15297 		if (has_decoration(var.self, DecorationInputAttachmentIndex) &&
15298 		    get_decoration(var.self, DecorationInputAttachmentIndex) == index)
15299 		{
15300 			ret = &var;
15301 		}
15302 	});
15303 	return ret;
15304 }
15305 
find_color_output_by_location(uint32_t location) const15306 const SPIRVariable *CompilerGLSL::find_color_output_by_location(uint32_t location) const
15307 {
15308 	const SPIRVariable *ret = nullptr;
15309 	ir.for_each_typed_id<SPIRVariable>([&](uint32_t, const SPIRVariable &var) {
15310 		if (var.storage == StorageClassOutput && get_decoration(var.self, DecorationLocation) == location)
15311 			ret = &var;
15312 	});
15313 	return ret;
15314 }
15315 
emit_inout_fragment_outputs_copy_to_subpass_inputs()15316 void CompilerGLSL::emit_inout_fragment_outputs_copy_to_subpass_inputs()
15317 {
15318 	for (auto &remap : subpass_to_framebuffer_fetch_attachment)
15319 	{
15320 		auto *subpass_var = find_subpass_input_by_attachment_index(remap.first);
15321 		auto *output_var = find_color_output_by_location(remap.second);
15322 		if (!subpass_var)
15323 			continue;
15324 		if (!output_var)
15325 			SPIRV_CROSS_THROW("Need to declare the corresponding fragment output variable to be able "
15326 			                  "to read from it.");
15327 		if (is_array(get<SPIRType>(output_var->basetype)))
15328 			SPIRV_CROSS_THROW("Cannot use GL_EXT_shader_framebuffer_fetch with arrays of color outputs.");
15329 
15330 		auto &func = get<SPIRFunction>(get_entry_point().self);
15331 		func.fixup_hooks_in.push_back([=]() {
15332 			if (is_legacy())
15333 			{
15334 				statement(to_expression(subpass_var->self), " = ", "gl_LastFragData[",
15335 				          get_decoration(output_var->self, DecorationLocation), "];");
15336 			}
15337 			else
15338 			{
15339 				uint32_t num_rt_components = this->get<SPIRType>(output_var->basetype).vecsize;
15340 				statement(to_expression(subpass_var->self), vector_swizzle(num_rt_components, 0), " = ",
15341 				          to_expression(output_var->self), ";");
15342 			}
15343 		});
15344 	}
15345 }
15346 
variable_is_depth_or_compare(VariableID id) const15347 bool CompilerGLSL::variable_is_depth_or_compare(VariableID id) const
15348 {
15349 	return image_is_comparison(get<SPIRType>(get<SPIRVariable>(id).basetype), id);
15350 }
15351 
get_extension_name(Candidate c)15352 const char *CompilerGLSL::ShaderSubgroupSupportHelper::get_extension_name(Candidate c)
15353 {
15354 	static const char *const retval[CandidateCount] = { "GL_KHR_shader_subgroup_ballot",
15355 		                                                "GL_KHR_shader_subgroup_basic",
15356 		                                                "GL_KHR_shader_subgroup_vote",
15357 		                                                "GL_NV_gpu_shader_5",
15358 		                                                "GL_NV_shader_thread_group",
15359 		                                                "GL_NV_shader_thread_shuffle",
15360 		                                                "GL_ARB_shader_ballot",
15361 		                                                "GL_ARB_shader_group_vote",
15362 		                                                "GL_AMD_gcn_shader" };
15363 	return retval[c];
15364 }
15365 
get_extra_required_extension_names(Candidate c)15366 SmallVector<std::string> CompilerGLSL::ShaderSubgroupSupportHelper::get_extra_required_extension_names(Candidate c)
15367 {
15368 	switch (c)
15369 	{
15370 	case ARB_shader_ballot:
15371 		return { "GL_ARB_shader_int64" };
15372 	case AMD_gcn_shader:
15373 		return { "GL_AMD_gpu_shader_int64", "GL_NV_gpu_shader5" };
15374 	default:
15375 		return {};
15376 	}
15377 }
15378 
get_extra_required_extension_predicate(Candidate c)15379 const char *CompilerGLSL::ShaderSubgroupSupportHelper::get_extra_required_extension_predicate(Candidate c)
15380 {
15381 	switch (c)
15382 	{
15383 	case ARB_shader_ballot:
15384 		return "defined(GL_ARB_shader_int64)";
15385 	case AMD_gcn_shader:
15386 		return "(defined(GL_AMD_gpu_shader_int64) || defined(GL_NV_gpu_shader5))";
15387 	default:
15388 		return "";
15389 	}
15390 }
15391 
15392 CompilerGLSL::ShaderSubgroupSupportHelper::FeatureVector CompilerGLSL::ShaderSubgroupSupportHelper::
get_feature_dependencies(Feature feature)15393     get_feature_dependencies(Feature feature)
15394 {
15395 	switch (feature)
15396 	{
15397 	case SubgroupAllEqualT:
15398 		return { SubgroupBroadcast_First, SubgroupAll_Any_AllEqualBool };
15399 	case SubgroupElect:
15400 		return { SubgroupBallotFindLSB_MSB, SubgroupBallot, SubgroupInvocationID };
15401 	case SubgroupInverseBallot_InclBitCount_ExclBitCout:
15402 		return { SubgroupMask };
15403 	case SubgroupBallotBitCount:
15404 		return { SubgroupBallot };
15405 	default:
15406 		return {};
15407 	}
15408 }
15409 
15410 CompilerGLSL::ShaderSubgroupSupportHelper::FeatureMask CompilerGLSL::ShaderSubgroupSupportHelper::
get_feature_dependency_mask(Feature feature)15411     get_feature_dependency_mask(Feature feature)
15412 {
15413 	return build_mask(get_feature_dependencies(feature));
15414 }
15415 
can_feature_be_implemented_without_extensions(Feature feature)15416 bool CompilerGLSL::ShaderSubgroupSupportHelper::can_feature_be_implemented_without_extensions(Feature feature)
15417 {
15418 	static const bool retval[FeatureCount] = { false, false, false, false, false, false,
15419 		                                       true, // SubgroupBalloFindLSB_MSB
15420 		                                       false, false, false, false,
15421 		                                       true, // SubgroupMemBarrier - replaced with workgroup memory barriers
15422 		                                       false, false, true,  false };
15423 
15424 	return retval[feature];
15425 }
15426 
15427 CompilerGLSL::ShaderSubgroupSupportHelper::Candidate CompilerGLSL::ShaderSubgroupSupportHelper::
get_KHR_extension_for_feature(Feature feature)15428     get_KHR_extension_for_feature(Feature feature)
15429 {
15430 	static const Candidate extensions[FeatureCount] = {
15431 		KHR_shader_subgroup_ballot, KHR_shader_subgroup_basic,  KHR_shader_subgroup_basic,  KHR_shader_subgroup_basic,
15432 		KHR_shader_subgroup_basic,  KHR_shader_subgroup_ballot, KHR_shader_subgroup_ballot, KHR_shader_subgroup_vote,
15433 		KHR_shader_subgroup_vote,   KHR_shader_subgroup_basic,  KHR_shader_subgroup_basic, KHR_shader_subgroup_basic,
15434 		KHR_shader_subgroup_ballot, KHR_shader_subgroup_ballot, KHR_shader_subgroup_ballot, KHR_shader_subgroup_ballot
15435 	};
15436 
15437 	return extensions[feature];
15438 }
15439 
request_feature(Feature feature)15440 void CompilerGLSL::ShaderSubgroupSupportHelper::request_feature(Feature feature)
15441 {
15442 	feature_mask |= (FeatureMask(1) << feature) | get_feature_dependency_mask(feature);
15443 }
15444 
is_feature_requested(Feature feature) const15445 bool CompilerGLSL::ShaderSubgroupSupportHelper::is_feature_requested(Feature feature) const
15446 {
15447 	return (feature_mask & (1u << feature)) != 0;
15448 }
15449 
resolve() const15450 CompilerGLSL::ShaderSubgroupSupportHelper::Result CompilerGLSL::ShaderSubgroupSupportHelper::resolve() const
15451 {
15452 	Result res;
15453 
15454 	for (uint32_t i = 0u; i < FeatureCount; ++i)
15455 	{
15456 		if (feature_mask & (1u << i))
15457 		{
15458 			auto feature = static_cast<Feature>(i);
15459 			std::unordered_set<uint32_t> unique_candidates;
15460 
15461 			auto candidates = get_candidates_for_feature(feature);
15462 			unique_candidates.insert(candidates.begin(), candidates.end());
15463 
15464 			auto deps = get_feature_dependencies(feature);
15465 			for (Feature d : deps)
15466 			{
15467 				candidates = get_candidates_for_feature(d);
15468 				if (!candidates.empty())
15469 					unique_candidates.insert(candidates.begin(), candidates.end());
15470 			}
15471 
15472 			for (uint32_t c : unique_candidates)
15473 				++res.weights[static_cast<Candidate>(c)];
15474 		}
15475 	}
15476 
15477 	return res;
15478 }
15479 
15480 CompilerGLSL::ShaderSubgroupSupportHelper::CandidateVector CompilerGLSL::ShaderSubgroupSupportHelper::
get_candidates_for_feature(Feature ft,const Result & r)15481     get_candidates_for_feature(Feature ft, const Result &r)
15482 {
15483 	auto c = get_candidates_for_feature(ft);
15484 	auto cmp = [&r](Candidate a, Candidate b) {
15485 		if (r.weights[a] == r.weights[b])
15486 			return a < b; // Prefer candidates with lower enum value
15487 		return r.weights[a] > r.weights[b];
15488 	};
15489 	std::sort(c.begin(), c.end(), cmp);
15490 	return c;
15491 }
15492 
15493 CompilerGLSL::ShaderSubgroupSupportHelper::CandidateVector CompilerGLSL::ShaderSubgroupSupportHelper::
get_candidates_for_feature(Feature feature)15494     get_candidates_for_feature(Feature feature)
15495 {
15496 	switch (feature)
15497 	{
15498 	case SubgroupMask:
15499 		return { KHR_shader_subgroup_ballot, NV_shader_thread_group, ARB_shader_ballot };
15500 	case SubgroupSize:
15501 		return { KHR_shader_subgroup_basic, NV_shader_thread_group, AMD_gcn_shader, ARB_shader_ballot };
15502 	case SubgroupInvocationID:
15503 		return { KHR_shader_subgroup_basic, NV_shader_thread_group, ARB_shader_ballot };
15504 	case SubgroupID:
15505 		return { KHR_shader_subgroup_basic, NV_shader_thread_group };
15506 	case NumSubgroups:
15507 		return { KHR_shader_subgroup_basic, NV_shader_thread_group };
15508 	case SubgroupBroadcast_First:
15509 		return { KHR_shader_subgroup_ballot, NV_shader_thread_shuffle, ARB_shader_ballot };
15510 	case SubgroupBallotFindLSB_MSB:
15511 		return { KHR_shader_subgroup_ballot, NV_shader_thread_group };
15512 	case SubgroupAll_Any_AllEqualBool:
15513 		return { KHR_shader_subgroup_vote, NV_gpu_shader_5, ARB_shader_group_vote, AMD_gcn_shader };
15514 	case SubgroupAllEqualT:
15515 		return {}; // depends on other features only
15516 	case SubgroupElect:
15517 		return {}; // depends on other features only
15518 	case SubgroupBallot:
15519 		return { KHR_shader_subgroup_ballot, NV_shader_thread_group, ARB_shader_ballot };
15520 	case SubgroupBarrier:
15521 		return { KHR_shader_subgroup_basic, NV_shader_thread_group, ARB_shader_ballot, AMD_gcn_shader };
15522 	case SubgroupMemBarrier:
15523 		return { KHR_shader_subgroup_basic };
15524 	case SubgroupInverseBallot_InclBitCount_ExclBitCout:
15525 		return {};
15526 	case SubgroupBallotBitExtract:
15527 		return { NV_shader_thread_group };
15528 	case SubgroupBallotBitCount:
15529 		return {};
15530 	default:
15531 		return {};
15532 	}
15533 }
15534 
build_mask(const SmallVector<Feature> & features)15535 CompilerGLSL::ShaderSubgroupSupportHelper::FeatureMask CompilerGLSL::ShaderSubgroupSupportHelper::build_mask(
15536     const SmallVector<Feature> &features)
15537 {
15538 	FeatureMask mask = 0;
15539 	for (Feature f : features)
15540 		mask |= FeatureMask(1) << f;
15541 	return mask;
15542 }
15543 
Result()15544 CompilerGLSL::ShaderSubgroupSupportHelper::Result::Result()
15545 {
15546 	for (auto &weight : weights)
15547 		weight = 0;
15548 
15549 	// Make sure KHR_shader_subgroup extensions are always prefered.
15550 	const uint32_t big_num = FeatureCount;
15551 	weights[KHR_shader_subgroup_ballot] = big_num;
15552 	weights[KHR_shader_subgroup_basic] = big_num;
15553 	weights[KHR_shader_subgroup_vote] = big_num;
15554 }
15555 
request_workaround_wrapper_overload(TypeID id)15556 void CompilerGLSL::request_workaround_wrapper_overload(TypeID id)
15557 {
15558 	// Must be ordered to maintain deterministic output, so vector is appropriate.
15559 	if (find(begin(workaround_ubo_load_overload_types), end(workaround_ubo_load_overload_types), id) ==
15560 	    end(workaround_ubo_load_overload_types))
15561 	{
15562 		force_recompile();
15563 		workaround_ubo_load_overload_types.push_back(id);
15564 	}
15565 }
15566 
rewrite_load_for_wrapped_row_major(std::string & expr,TypeID loaded_type,ID ptr)15567 void CompilerGLSL::rewrite_load_for_wrapped_row_major(std::string &expr, TypeID loaded_type, ID ptr)
15568 {
15569 	// Loading row-major matrices from UBOs on older AMD Windows OpenGL drivers is problematic.
15570 	// To load these types correctly, we must first wrap them in a dummy function which only purpose is to
15571 	// ensure row_major decoration is actually respected.
15572 	auto *var = maybe_get_backing_variable(ptr);
15573 	if (!var)
15574 		return;
15575 
15576 	auto &backing_type = get<SPIRType>(var->basetype);
15577 	bool is_ubo = backing_type.basetype == SPIRType::Struct && backing_type.storage == StorageClassUniform &&
15578 	              has_decoration(backing_type.self, DecorationBlock);
15579 	if (!is_ubo)
15580 		return;
15581 
15582 	auto *type = &get<SPIRType>(loaded_type);
15583 	bool rewrite = false;
15584 
15585 	if (is_matrix(*type))
15586 	{
15587 		// To avoid adding a lot of unnecessary meta tracking to forward the row_major state,
15588 		// we will simply look at the base struct itself. It is exceptionally rare to mix and match row-major/col-major state.
15589 		// If there is any row-major action going on, we apply the workaround.
15590 		// It is harmless to apply the workaround to column-major matrices, so this is still a valid solution.
15591 		// If an access chain occurred, the workaround is not required, so loading vectors or scalars don't need workaround.
15592 		type = &backing_type;
15593 	}
15594 
15595 	if (type->basetype == SPIRType::Struct)
15596 	{
15597 		// If we're loading a struct where any member is a row-major matrix, apply the workaround.
15598 		for (uint32_t i = 0; i < uint32_t(type->member_types.size()); i++)
15599 		{
15600 			if (combined_decoration_for_member(*type, i).get(DecorationRowMajor))
15601 			{
15602 				rewrite = true;
15603 				break;
15604 			}
15605 		}
15606 	}
15607 
15608 	if (rewrite)
15609 	{
15610 		request_workaround_wrapper_overload(loaded_type);
15611 		expr = join("spvWorkaroundRowMajor(", expr, ")");
15612 	}
15613 }
15614