1 /*
2  * Copyright 2015-2019 Arm Limited
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *     http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "spirv_glsl.hpp"
18 #include "GLSL.std.450.h"
19 #include "spirv_common.hpp"
20 #include <algorithm>
21 #include <assert.h>
22 #include <cmath>
23 #include <limits>
24 #include <locale.h>
25 #include <utility>
26 
27 #ifndef _WIN32
28 #include <langinfo.h>
29 #endif
30 #include <locale.h>
31 
32 using namespace spv;
33 using namespace SPIRV_CROSS_NAMESPACE;
34 using namespace std;
35 
is_unsigned_opcode(Op op)36 static bool is_unsigned_opcode(Op op)
37 {
38 	// Don't have to be exhaustive, only relevant for legacy target checking ...
39 	switch (op)
40 	{
41 	case OpShiftRightLogical:
42 	case OpUGreaterThan:
43 	case OpUGreaterThanEqual:
44 	case OpULessThan:
45 	case OpULessThanEqual:
46 	case OpUConvert:
47 	case OpUDiv:
48 	case OpUMod:
49 	case OpUMulExtended:
50 	case OpConvertUToF:
51 	case OpConvertFToU:
52 		return true;
53 
54 	default:
55 		return false;
56 	}
57 }
58 
is_unsigned_glsl_opcode(GLSLstd450 op)59 static bool is_unsigned_glsl_opcode(GLSLstd450 op)
60 {
61 	// Don't have to be exhaustive, only relevant for legacy target checking ...
62 	switch (op)
63 	{
64 	case GLSLstd450UClamp:
65 	case GLSLstd450UMin:
66 	case GLSLstd450UMax:
67 	case GLSLstd450FindUMsb:
68 		return true;
69 
70 	default:
71 		return false;
72 	}
73 }
74 
packing_is_vec4_padded(BufferPackingStandard packing)75 static bool packing_is_vec4_padded(BufferPackingStandard packing)
76 {
77 	switch (packing)
78 	{
79 	case BufferPackingHLSLCbuffer:
80 	case BufferPackingHLSLCbufferPackOffset:
81 	case BufferPackingStd140:
82 	case BufferPackingStd140EnhancedLayout:
83 		return true;
84 
85 	default:
86 		return false;
87 	}
88 }
89 
packing_is_hlsl(BufferPackingStandard packing)90 static bool packing_is_hlsl(BufferPackingStandard packing)
91 {
92 	switch (packing)
93 	{
94 	case BufferPackingHLSLCbuffer:
95 	case BufferPackingHLSLCbufferPackOffset:
96 		return true;
97 
98 	default:
99 		return false;
100 	}
101 }
102 
packing_has_flexible_offset(BufferPackingStandard packing)103 static bool packing_has_flexible_offset(BufferPackingStandard packing)
104 {
105 	switch (packing)
106 	{
107 	case BufferPackingStd140:
108 	case BufferPackingStd430:
109 	case BufferPackingScalar:
110 	case BufferPackingHLSLCbuffer:
111 		return false;
112 
113 	default:
114 		return true;
115 	}
116 }
117 
packing_is_scalar(BufferPackingStandard packing)118 static bool packing_is_scalar(BufferPackingStandard packing)
119 {
120 	switch (packing)
121 	{
122 	case BufferPackingScalar:
123 	case BufferPackingScalarEnhancedLayout:
124 		return true;
125 
126 	default:
127 		return false;
128 	}
129 }
130 
packing_to_substruct_packing(BufferPackingStandard packing)131 static BufferPackingStandard packing_to_substruct_packing(BufferPackingStandard packing)
132 {
133 	switch (packing)
134 	{
135 	case BufferPackingStd140EnhancedLayout:
136 		return BufferPackingStd140;
137 	case BufferPackingStd430EnhancedLayout:
138 		return BufferPackingStd430;
139 	case BufferPackingHLSLCbufferPackOffset:
140 		return BufferPackingHLSLCbuffer;
141 	case BufferPackingScalarEnhancedLayout:
142 		return BufferPackingScalar;
143 	default:
144 		return packing;
145 	}
146 }
147 
148 // Sanitizes underscores for GLSL where multiple underscores in a row are not allowed.
sanitize_underscores(const string & str)149 string CompilerGLSL::sanitize_underscores(const string &str)
150 {
151 	string res;
152 	res.reserve(str.size());
153 
154 	bool last_underscore = false;
155 	for (auto c : str)
156 	{
157 		if (c == '_')
158 		{
159 			if (last_underscore)
160 				continue;
161 
162 			res += c;
163 			last_underscore = true;
164 		}
165 		else
166 		{
167 			res += c;
168 			last_underscore = false;
169 		}
170 	}
171 	return res;
172 }
173 
init()174 void CompilerGLSL::init()
175 {
176 	if (ir.source.known)
177 	{
178 		options.es = ir.source.es;
179 		options.version = ir.source.version;
180 	}
181 
182 	// Query the locale to see what the decimal point is.
183 	// We'll rely on fixing it up ourselves in the rare case we have a comma-as-decimal locale
184 	// rather than setting locales ourselves. Settings locales in a safe and isolated way is rather
185 	// tricky.
186 #ifdef _WIN32
187 	// On Windows, localeconv uses thread-local storage, so it should be fine.
188 	const struct lconv *conv = localeconv();
189 	if (conv && conv->decimal_point)
190 		current_locale_radix_character = *conv->decimal_point;
191 #elif defined(__ANDROID__) && __ANDROID_API__ < 26
192 	// nl_langinfo is not supported on this platform, fall back to the worse alternative.
193 	const struct lconv *conv = localeconv();
194 	if (conv && conv->decimal_point)
195 		current_locale_radix_character = *conv->decimal_point;
196 #else
197 	// localeconv, the portable function is not MT safe ...
198 	const char *decimal_point = nl_langinfo(RADIXCHAR);
199 	if (decimal_point && *decimal_point != '\0')
200 		current_locale_radix_character = *decimal_point;
201 #endif
202 }
203 
to_pls_layout(PlsFormat format)204 static const char *to_pls_layout(PlsFormat format)
205 {
206 	switch (format)
207 	{
208 	case PlsR11FG11FB10F:
209 		return "layout(r11f_g11f_b10f) ";
210 	case PlsR32F:
211 		return "layout(r32f) ";
212 	case PlsRG16F:
213 		return "layout(rg16f) ";
214 	case PlsRGB10A2:
215 		return "layout(rgb10_a2) ";
216 	case PlsRGBA8:
217 		return "layout(rgba8) ";
218 	case PlsRG16:
219 		return "layout(rg16) ";
220 	case PlsRGBA8I:
221 		return "layout(rgba8i)";
222 	case PlsRG16I:
223 		return "layout(rg16i) ";
224 	case PlsRGB10A2UI:
225 		return "layout(rgb10_a2ui) ";
226 	case PlsRGBA8UI:
227 		return "layout(rgba8ui) ";
228 	case PlsRG16UI:
229 		return "layout(rg16ui) ";
230 	case PlsR32UI:
231 		return "layout(r32ui) ";
232 	default:
233 		return "";
234 	}
235 }
236 
pls_format_to_basetype(PlsFormat format)237 static SPIRType::BaseType pls_format_to_basetype(PlsFormat format)
238 {
239 	switch (format)
240 	{
241 	default:
242 	case PlsR11FG11FB10F:
243 	case PlsR32F:
244 	case PlsRG16F:
245 	case PlsRGB10A2:
246 	case PlsRGBA8:
247 	case PlsRG16:
248 		return SPIRType::Float;
249 
250 	case PlsRGBA8I:
251 	case PlsRG16I:
252 		return SPIRType::Int;
253 
254 	case PlsRGB10A2UI:
255 	case PlsRGBA8UI:
256 	case PlsRG16UI:
257 	case PlsR32UI:
258 		return SPIRType::UInt;
259 	}
260 }
261 
pls_format_to_components(PlsFormat format)262 static uint32_t pls_format_to_components(PlsFormat format)
263 {
264 	switch (format)
265 	{
266 	default:
267 	case PlsR32F:
268 	case PlsR32UI:
269 		return 1;
270 
271 	case PlsRG16F:
272 	case PlsRG16:
273 	case PlsRG16UI:
274 	case PlsRG16I:
275 		return 2;
276 
277 	case PlsR11FG11FB10F:
278 		return 3;
279 
280 	case PlsRGB10A2:
281 	case PlsRGBA8:
282 	case PlsRGBA8I:
283 	case PlsRGB10A2UI:
284 	case PlsRGBA8UI:
285 		return 4;
286 	}
287 }
288 
vector_swizzle(int vecsize,int index)289 static const char *vector_swizzle(int vecsize, int index)
290 {
291 	static const char *const swizzle[4][4] = {
292 		{ ".x", ".y", ".z", ".w" },
293 		{ ".xy", ".yz", ".zw", nullptr },
294 		{ ".xyz", ".yzw", nullptr, nullptr },
295 #if defined(__GNUC__) && (__GNUC__ == 9)
296 		// This works around a GCC 9 bug, see details in https://gcc.gnu.org/bugzilla/show_bug.cgi?id=90947.
297 		// This array ends up being compiled as all nullptrs, tripping the assertions below.
298 		{ "", nullptr, nullptr, "$" },
299 #else
300 		{ "", nullptr, nullptr, nullptr },
301 #endif
302 	};
303 
304 	assert(vecsize >= 1 && vecsize <= 4);
305 	assert(index >= 0 && index < 4);
306 	assert(swizzle[vecsize - 1][index]);
307 
308 	return swizzle[vecsize - 1][index];
309 }
310 
reset()311 void CompilerGLSL::reset()
312 {
313 	// We do some speculative optimizations which should pretty much always work out,
314 	// but just in case the SPIR-V is rather weird, recompile until it's happy.
315 	// This typically only means one extra pass.
316 	clear_force_recompile();
317 
318 	// Clear invalid expression tracking.
319 	invalid_expressions.clear();
320 	current_function = nullptr;
321 
322 	// Clear temporary usage tracking.
323 	expression_usage_counts.clear();
324 	forwarded_temporaries.clear();
325 	suppressed_usage_tracking.clear();
326 
327 	// Ensure that we declare phi-variable copies even if the original declaration isn't deferred
328 	flushed_phi_variables.clear();
329 
330 	reset_name_caches();
331 
332 	ir.for_each_typed_id<SPIRFunction>([&](uint32_t, SPIRFunction &func) {
333 		func.active = false;
334 		func.flush_undeclared = true;
335 	});
336 
337 	ir.for_each_typed_id<SPIRVariable>([&](uint32_t, SPIRVariable &var) { var.dependees.clear(); });
338 
339 	ir.reset_all_of_type<SPIRExpression>();
340 	ir.reset_all_of_type<SPIRAccessChain>();
341 
342 	statement_count = 0;
343 	indent = 0;
344 }
345 
remap_pls_variables()346 void CompilerGLSL::remap_pls_variables()
347 {
348 	for (auto &input : pls_inputs)
349 	{
350 		auto &var = get<SPIRVariable>(input.id);
351 
352 		bool input_is_target = false;
353 		if (var.storage == StorageClassUniformConstant)
354 		{
355 			auto &type = get<SPIRType>(var.basetype);
356 			input_is_target = type.image.dim == DimSubpassData;
357 		}
358 
359 		if (var.storage != StorageClassInput && !input_is_target)
360 			SPIRV_CROSS_THROW("Can only use in and target variables for PLS inputs.");
361 		var.remapped_variable = true;
362 	}
363 
364 	for (auto &output : pls_outputs)
365 	{
366 		auto &var = get<SPIRVariable>(output.id);
367 		if (var.storage != StorageClassOutput)
368 			SPIRV_CROSS_THROW("Can only use out variables for PLS outputs.");
369 		var.remapped_variable = true;
370 	}
371 }
372 
find_static_extensions()373 void CompilerGLSL::find_static_extensions()
374 {
375 	ir.for_each_typed_id<SPIRType>([&](uint32_t, const SPIRType &type) {
376 		if (type.basetype == SPIRType::Double)
377 		{
378 			if (options.es)
379 				SPIRV_CROSS_THROW("FP64 not supported in ES profile.");
380 			if (!options.es && options.version < 400)
381 				require_extension_internal("GL_ARB_gpu_shader_fp64");
382 		}
383 		else if (type.basetype == SPIRType::Int64 || type.basetype == SPIRType::UInt64)
384 		{
385 			if (options.es)
386 				SPIRV_CROSS_THROW("64-bit integers not supported in ES profile.");
387 			if (!options.es)
388 				require_extension_internal("GL_ARB_gpu_shader_int64");
389 		}
390 		else if (type.basetype == SPIRType::Half)
391 		{
392 			require_extension_internal("GL_EXT_shader_explicit_arithmetic_types_float16");
393 			if (options.vulkan_semantics)
394 				require_extension_internal("GL_EXT_shader_16bit_storage");
395 		}
396 		else if (type.basetype == SPIRType::SByte || type.basetype == SPIRType::UByte)
397 		{
398 			require_extension_internal("GL_EXT_shader_explicit_arithmetic_types_int8");
399 			if (options.vulkan_semantics)
400 				require_extension_internal("GL_EXT_shader_8bit_storage");
401 		}
402 		else if (type.basetype == SPIRType::Short || type.basetype == SPIRType::UShort)
403 		{
404 			require_extension_internal("GL_EXT_shader_explicit_arithmetic_types_int16");
405 			if (options.vulkan_semantics)
406 				require_extension_internal("GL_EXT_shader_16bit_storage");
407 		}
408 	});
409 
410 	auto &execution = get_entry_point();
411 	switch (execution.model)
412 	{
413 	case ExecutionModelGLCompute:
414 		if (!options.es && options.version < 430)
415 			require_extension_internal("GL_ARB_compute_shader");
416 		if (options.es && options.version < 310)
417 			SPIRV_CROSS_THROW("At least ESSL 3.10 required for compute shaders.");
418 		break;
419 
420 	case ExecutionModelGeometry:
421 		if (options.es && options.version < 320)
422 			require_extension_internal("GL_EXT_geometry_shader");
423 		if (!options.es && options.version < 150)
424 			require_extension_internal("GL_ARB_geometry_shader4");
425 
426 		if (execution.flags.get(ExecutionModeInvocations) && execution.invocations != 1)
427 		{
428 			// Instanced GS is part of 400 core or this extension.
429 			if (!options.es && options.version < 400)
430 				require_extension_internal("GL_ARB_gpu_shader5");
431 		}
432 		break;
433 
434 	case ExecutionModelTessellationEvaluation:
435 	case ExecutionModelTessellationControl:
436 		if (options.es && options.version < 320)
437 			require_extension_internal("GL_EXT_tessellation_shader");
438 		if (!options.es && options.version < 400)
439 			require_extension_internal("GL_ARB_tessellation_shader");
440 		break;
441 
442 	case ExecutionModelRayGenerationNV:
443 	case ExecutionModelIntersectionNV:
444 	case ExecutionModelAnyHitNV:
445 	case ExecutionModelClosestHitNV:
446 	case ExecutionModelMissNV:
447 	case ExecutionModelCallableNV:
448 		if (options.es || options.version < 460)
449 			SPIRV_CROSS_THROW("Ray tracing shaders require non-es profile with version 460 or above.");
450 		require_extension_internal("GL_NV_ray_tracing");
451 		break;
452 
453 	default:
454 		break;
455 	}
456 
457 	if (!pls_inputs.empty() || !pls_outputs.empty())
458 		require_extension_internal("GL_EXT_shader_pixel_local_storage");
459 
460 	if (options.separate_shader_objects && !options.es && options.version < 410)
461 		require_extension_internal("GL_ARB_separate_shader_objects");
462 
463 	if (ir.addressing_model == AddressingModelPhysicalStorageBuffer64EXT)
464 	{
465 		if (!options.vulkan_semantics)
466 			SPIRV_CROSS_THROW("GL_EXT_buffer_reference is only supported in Vulkan GLSL.");
467 		if (options.es && options.version < 320)
468 			SPIRV_CROSS_THROW("GL_EXT_buffer_reference requires ESSL 320.");
469 		else if (!options.es && options.version < 450)
470 			SPIRV_CROSS_THROW("GL_EXT_buffer_reference requires GLSL 450.");
471 		require_extension_internal("GL_EXT_buffer_reference");
472 	}
473 	else if (ir.addressing_model != AddressingModelLogical)
474 	{
475 		SPIRV_CROSS_THROW("Only Logical and PhysicalStorageBuffer64EXT addressing models are supported.");
476 	}
477 
478 	// Check for nonuniform qualifier.
479 	// Instead of looping over all decorations to find this, just look at capabilities.
480 	for (auto &cap : ir.declared_capabilities)
481 	{
482 		bool nonuniform_indexing = false;
483 		switch (cap)
484 		{
485 		case CapabilityShaderNonUniformEXT:
486 		case CapabilityRuntimeDescriptorArrayEXT:
487 			if (!options.vulkan_semantics)
488 				SPIRV_CROSS_THROW("GL_EXT_nonuniform_qualifier is only supported in Vulkan GLSL.");
489 			require_extension_internal("GL_EXT_nonuniform_qualifier");
490 			nonuniform_indexing = true;
491 			break;
492 
493 		default:
494 			break;
495 		}
496 
497 		if (nonuniform_indexing)
498 			break;
499 	}
500 }
501 
compile()502 string CompilerGLSL::compile()
503 {
504 	if (options.vulkan_semantics)
505 		backend.allow_precision_qualifiers = true;
506 	backend.force_gl_in_out_block = true;
507 	backend.supports_extensions = true;
508 	backend.use_array_constructor = true;
509 
510 	// Scan the SPIR-V to find trivial uses of extensions.
511 	fixup_type_alias();
512 	reorder_type_alias();
513 	build_function_control_flow_graphs_and_analyze();
514 	find_static_extensions();
515 	fixup_image_load_store_access();
516 	update_active_builtins();
517 	analyze_image_and_sampler_usage();
518 	analyze_interlocked_resource_usage();
519 
520 	// Shaders might cast unrelated data to pointers of non-block types.
521 	// Find all such instances and make sure we can cast the pointers to a synthesized block type.
522 	if (ir.addressing_model == AddressingModelPhysicalStorageBuffer64EXT)
523 		analyze_non_block_pointer_types();
524 
525 	uint32_t pass_count = 0;
526 	do
527 	{
528 		if (pass_count >= 3)
529 			SPIRV_CROSS_THROW("Over 3 compilation loops detected. Must be a bug!");
530 
531 		reset();
532 
533 		buffer.reset();
534 
535 		emit_header();
536 		emit_resources();
537 
538 		emit_function(get<SPIRFunction>(ir.default_entry_point), Bitset());
539 
540 		pass_count++;
541 	} while (is_forcing_recompilation());
542 
543 	// Implement the interlocked wrapper function at the end.
544 	// The body was implemented in lieu of main().
545 	if (interlocked_is_complex)
546 	{
547 		statement("void main()");
548 		begin_scope();
549 		statement("// Interlocks were used in a way not compatible with GLSL, this is very slow.");
550 		if (options.es)
551 			statement("beginInvocationInterlockNV();");
552 		else
553 			statement("beginInvocationInterlockARB();");
554 		statement("spvMainInterlockedBody();");
555 		if (options.es)
556 			statement("endInvocationInterlockNV();");
557 		else
558 			statement("endInvocationInterlockARB();");
559 		end_scope();
560 	}
561 
562 	// Entry point in GLSL is always main().
563 	get_entry_point().name = "main";
564 
565 	return buffer.str();
566 }
567 
get_partial_source()568 std::string CompilerGLSL::get_partial_source()
569 {
570 	return buffer.str();
571 }
572 
build_workgroup_size(SmallVector<string> & arguments,const SpecializationConstant & wg_x,const SpecializationConstant & wg_y,const SpecializationConstant & wg_z)573 void CompilerGLSL::build_workgroup_size(SmallVector<string> &arguments, const SpecializationConstant &wg_x,
574                                         const SpecializationConstant &wg_y, const SpecializationConstant &wg_z)
575 {
576 	auto &execution = get_entry_point();
577 
578 	if (wg_x.id)
579 	{
580 		if (options.vulkan_semantics)
581 			arguments.push_back(join("local_size_x_id = ", wg_x.constant_id));
582 		else
583 			arguments.push_back(join("local_size_x = ", get<SPIRConstant>(wg_x.id).specialization_constant_macro_name));
584 	}
585 	else
586 		arguments.push_back(join("local_size_x = ", execution.workgroup_size.x));
587 
588 	if (wg_y.id)
589 	{
590 		if (options.vulkan_semantics)
591 			arguments.push_back(join("local_size_y_id = ", wg_y.constant_id));
592 		else
593 			arguments.push_back(join("local_size_y = ", get<SPIRConstant>(wg_y.id).specialization_constant_macro_name));
594 	}
595 	else
596 		arguments.push_back(join("local_size_y = ", execution.workgroup_size.y));
597 
598 	if (wg_z.id)
599 	{
600 		if (options.vulkan_semantics)
601 			arguments.push_back(join("local_size_z_id = ", wg_z.constant_id));
602 		else
603 			arguments.push_back(join("local_size_z = ", get<SPIRConstant>(wg_z.id).specialization_constant_macro_name));
604 	}
605 	else
606 		arguments.push_back(join("local_size_z = ", execution.workgroup_size.z));
607 }
608 
emit_header()609 void CompilerGLSL::emit_header()
610 {
611 	auto &execution = get_entry_point();
612 	statement("#version ", options.version, options.es && options.version > 100 ? " es" : "");
613 
614 	if (!options.es && options.version < 420)
615 	{
616 		// Needed for binding = # on UBOs, etc.
617 		if (options.enable_420pack_extension)
618 		{
619 			statement("#ifdef GL_ARB_shading_language_420pack");
620 			statement("#extension GL_ARB_shading_language_420pack : require");
621 			statement("#endif");
622 		}
623 		// Needed for: layout(early_fragment_tests) in;
624 		if (execution.flags.get(ExecutionModeEarlyFragmentTests))
625 			require_extension_internal("GL_ARB_shader_image_load_store");
626 	}
627 
628 	// Needed for: layout(post_depth_coverage) in;
629 	if (execution.flags.get(ExecutionModePostDepthCoverage))
630 		require_extension_internal("GL_ARB_post_depth_coverage");
631 
632 	// Needed for: layout({pixel,sample}_interlock_[un]ordered) in;
633 	if (execution.flags.get(ExecutionModePixelInterlockOrderedEXT) ||
634 	    execution.flags.get(ExecutionModePixelInterlockUnorderedEXT) ||
635 	    execution.flags.get(ExecutionModeSampleInterlockOrderedEXT) ||
636 	    execution.flags.get(ExecutionModeSampleInterlockUnorderedEXT))
637 	{
638 		if (options.es)
639 		{
640 			if (options.version < 310)
641 				SPIRV_CROSS_THROW("At least ESSL 3.10 required for fragment shader interlock.");
642 			require_extension_internal("GL_NV_fragment_shader_interlock");
643 		}
644 		else
645 		{
646 			if (options.version < 420)
647 				require_extension_internal("GL_ARB_shader_image_load_store");
648 			require_extension_internal("GL_ARB_fragment_shader_interlock");
649 		}
650 	}
651 
652 	for (auto &ext : forced_extensions)
653 	{
654 		if (ext == "GL_EXT_shader_explicit_arithmetic_types_float16")
655 		{
656 			// Special case, this extension has a potential fallback to another vendor extension in normal GLSL.
657 			// GL_AMD_gpu_shader_half_float is a superset, so try that first.
658 			statement("#if defined(GL_AMD_gpu_shader_half_float)");
659 			statement("#extension GL_AMD_gpu_shader_half_float : require");
660 			if (!options.vulkan_semantics)
661 			{
662 				statement("#elif defined(GL_NV_gpu_shader5)");
663 				statement("#extension GL_NV_gpu_shader5 : require");
664 			}
665 			else
666 			{
667 				statement("#elif defined(GL_EXT_shader_explicit_arithmetic_types_float16)");
668 				statement("#extension GL_EXT_shader_explicit_arithmetic_types_float16 : require");
669 			}
670 			statement("#else");
671 			statement("#error No extension available for FP16.");
672 			statement("#endif");
673 		}
674 		else if (ext == "GL_EXT_shader_explicit_arithmetic_types_int16")
675 		{
676 			if (options.vulkan_semantics)
677 				statement("#extension GL_EXT_shader_explicit_arithmetic_types_int16 : require");
678 			else
679 			{
680 				statement("#if defined(GL_AMD_gpu_shader_int16)");
681 				statement("#extension GL_AMD_gpu_shader_int16 : require");
682 				statement("#else");
683 				statement("#error No extension available for Int16.");
684 				statement("#endif");
685 			}
686 		}
687 		else if (ext == "GL_ARB_post_depth_coverage")
688 		{
689 			if (options.es)
690 				statement("#extension GL_EXT_post_depth_coverage : require");
691 			else
692 			{
693 				statement("#if defined(GL_ARB_post_depth_coverge)");
694 				statement("#extension GL_ARB_post_depth_coverage : require");
695 				statement("#else");
696 				statement("#extension GL_EXT_post_depth_coverage : require");
697 				statement("#endif");
698 			}
699 		}
700 		else
701 			statement("#extension ", ext, " : require");
702 	}
703 
704 	for (auto &header : header_lines)
705 		statement(header);
706 
707 	SmallVector<string> inputs;
708 	SmallVector<string> outputs;
709 
710 	switch (execution.model)
711 	{
712 	case ExecutionModelGeometry:
713 		outputs.push_back(join("max_vertices = ", execution.output_vertices));
714 		if ((execution.flags.get(ExecutionModeInvocations)) && execution.invocations != 1)
715 			inputs.push_back(join("invocations = ", execution.invocations));
716 		if (execution.flags.get(ExecutionModeInputPoints))
717 			inputs.push_back("points");
718 		if (execution.flags.get(ExecutionModeInputLines))
719 			inputs.push_back("lines");
720 		if (execution.flags.get(ExecutionModeInputLinesAdjacency))
721 			inputs.push_back("lines_adjacency");
722 		if (execution.flags.get(ExecutionModeTriangles))
723 			inputs.push_back("triangles");
724 		if (execution.flags.get(ExecutionModeInputTrianglesAdjacency))
725 			inputs.push_back("triangles_adjacency");
726 		if (execution.flags.get(ExecutionModeOutputTriangleStrip))
727 			outputs.push_back("triangle_strip");
728 		if (execution.flags.get(ExecutionModeOutputPoints))
729 			outputs.push_back("points");
730 		if (execution.flags.get(ExecutionModeOutputLineStrip))
731 			outputs.push_back("line_strip");
732 		break;
733 
734 	case ExecutionModelTessellationControl:
735 		if (execution.flags.get(ExecutionModeOutputVertices))
736 			outputs.push_back(join("vertices = ", execution.output_vertices));
737 		break;
738 
739 	case ExecutionModelTessellationEvaluation:
740 		if (execution.flags.get(ExecutionModeQuads))
741 			inputs.push_back("quads");
742 		if (execution.flags.get(ExecutionModeTriangles))
743 			inputs.push_back("triangles");
744 		if (execution.flags.get(ExecutionModeIsolines))
745 			inputs.push_back("isolines");
746 		if (execution.flags.get(ExecutionModePointMode))
747 			inputs.push_back("point_mode");
748 
749 		if (!execution.flags.get(ExecutionModeIsolines))
750 		{
751 			if (execution.flags.get(ExecutionModeVertexOrderCw))
752 				inputs.push_back("cw");
753 			if (execution.flags.get(ExecutionModeVertexOrderCcw))
754 				inputs.push_back("ccw");
755 		}
756 
757 		if (execution.flags.get(ExecutionModeSpacingFractionalEven))
758 			inputs.push_back("fractional_even_spacing");
759 		if (execution.flags.get(ExecutionModeSpacingFractionalOdd))
760 			inputs.push_back("fractional_odd_spacing");
761 		if (execution.flags.get(ExecutionModeSpacingEqual))
762 			inputs.push_back("equal_spacing");
763 		break;
764 
765 	case ExecutionModelGLCompute:
766 	{
767 		if (execution.workgroup_size.constant != 0)
768 		{
769 			SpecializationConstant wg_x, wg_y, wg_z;
770 			get_work_group_size_specialization_constants(wg_x, wg_y, wg_z);
771 
772 			// If there are any spec constants on legacy GLSL, defer declaration, we need to set up macro
773 			// declarations before we can emit the work group size.
774 			if (options.vulkan_semantics ||
775 			    ((wg_x.id == ConstantID(0)) && (wg_y.id == ConstantID(0)) && (wg_z.id == ConstantID(0))))
776 				build_workgroup_size(inputs, wg_x, wg_y, wg_z);
777 		}
778 		else
779 		{
780 			inputs.push_back(join("local_size_x = ", execution.workgroup_size.x));
781 			inputs.push_back(join("local_size_y = ", execution.workgroup_size.y));
782 			inputs.push_back(join("local_size_z = ", execution.workgroup_size.z));
783 		}
784 		break;
785 	}
786 
787 	case ExecutionModelFragment:
788 		if (options.es)
789 		{
790 			switch (options.fragment.default_float_precision)
791 			{
792 			case Options::Lowp:
793 				statement("precision lowp float;");
794 				break;
795 
796 			case Options::Mediump:
797 				statement("precision mediump float;");
798 				break;
799 
800 			case Options::Highp:
801 				statement("precision highp float;");
802 				break;
803 
804 			default:
805 				break;
806 			}
807 
808 			switch (options.fragment.default_int_precision)
809 			{
810 			case Options::Lowp:
811 				statement("precision lowp int;");
812 				break;
813 
814 			case Options::Mediump:
815 				statement("precision mediump int;");
816 				break;
817 
818 			case Options::Highp:
819 				statement("precision highp int;");
820 				break;
821 
822 			default:
823 				break;
824 			}
825 		}
826 
827 		if (execution.flags.get(ExecutionModeEarlyFragmentTests))
828 			inputs.push_back("early_fragment_tests");
829 		if (execution.flags.get(ExecutionModePostDepthCoverage))
830 			inputs.push_back("post_depth_coverage");
831 
832 		if (execution.flags.get(ExecutionModePixelInterlockOrderedEXT))
833 			inputs.push_back("pixel_interlock_ordered");
834 		else if (execution.flags.get(ExecutionModePixelInterlockUnorderedEXT))
835 			inputs.push_back("pixel_interlock_unordered");
836 		else if (execution.flags.get(ExecutionModeSampleInterlockOrderedEXT))
837 			inputs.push_back("sample_interlock_ordered");
838 		else if (execution.flags.get(ExecutionModeSampleInterlockUnorderedEXT))
839 			inputs.push_back("sample_interlock_unordered");
840 
841 		if (!options.es && execution.flags.get(ExecutionModeDepthGreater))
842 			statement("layout(depth_greater) out float gl_FragDepth;");
843 		else if (!options.es && execution.flags.get(ExecutionModeDepthLess))
844 			statement("layout(depth_less) out float gl_FragDepth;");
845 
846 		break;
847 
848 	default:
849 		break;
850 	}
851 
852 	if (!inputs.empty())
853 		statement("layout(", merge(inputs), ") in;");
854 	if (!outputs.empty())
855 		statement("layout(", merge(outputs), ") out;");
856 
857 	statement("");
858 }
859 
type_is_empty(const SPIRType & type)860 bool CompilerGLSL::type_is_empty(const SPIRType &type)
861 {
862 	return type.basetype == SPIRType::Struct && type.member_types.empty();
863 }
864 
emit_struct(SPIRType & type)865 void CompilerGLSL::emit_struct(SPIRType &type)
866 {
867 	// Struct types can be stamped out multiple times
868 	// with just different offsets, matrix layouts, etc ...
869 	// Type-punning with these types is legal, which complicates things
870 	// when we are storing struct and array types in an SSBO for example.
871 	// If the type master is packed however, we can no longer assume that the struct declaration will be redundant.
872 	if (type.type_alias != TypeID(0) &&
873 	    !has_extended_decoration(type.type_alias, SPIRVCrossDecorationBufferBlockRepacked))
874 		return;
875 
876 	add_resource_name(type.self);
877 	auto name = type_to_glsl(type);
878 
879 	statement(!backend.explicit_struct_type ? "struct " : "", name);
880 	begin_scope();
881 
882 	type.member_name_cache.clear();
883 
884 	uint32_t i = 0;
885 	bool emitted = false;
886 	for (auto &member : type.member_types)
887 	{
888 		add_member_name(type, i);
889 		emit_struct_member(type, member, i);
890 		i++;
891 		emitted = true;
892 	}
893 
894 	// Don't declare empty structs in GLSL, this is not allowed.
895 	if (type_is_empty(type) && !backend.supports_empty_struct)
896 	{
897 		statement("int empty_struct_member;");
898 		emitted = true;
899 	}
900 
901 	if (has_extended_decoration(type.self, SPIRVCrossDecorationPaddingTarget))
902 		emit_struct_padding_target(type);
903 
904 	end_scope_decl();
905 
906 	if (emitted)
907 		statement("");
908 }
909 
to_interpolation_qualifiers(const Bitset & flags)910 string CompilerGLSL::to_interpolation_qualifiers(const Bitset &flags)
911 {
912 	string res;
913 	//if (flags & (1ull << DecorationSmooth))
914 	//    res += "smooth ";
915 	if (flags.get(DecorationFlat))
916 		res += "flat ";
917 	if (flags.get(DecorationNoPerspective))
918 		res += "noperspective ";
919 	if (flags.get(DecorationCentroid))
920 		res += "centroid ";
921 	if (flags.get(DecorationPatch))
922 		res += "patch ";
923 	if (flags.get(DecorationSample))
924 		res += "sample ";
925 	if (flags.get(DecorationInvariant))
926 		res += "invariant ";
927 	if (flags.get(DecorationExplicitInterpAMD))
928 		res += "__explicitInterpAMD ";
929 
930 	return res;
931 }
932 
layout_for_member(const SPIRType & type,uint32_t index)933 string CompilerGLSL::layout_for_member(const SPIRType &type, uint32_t index)
934 {
935 	if (is_legacy())
936 		return "";
937 
938 	bool is_block = ir.meta[type.self].decoration.decoration_flags.get(DecorationBlock) ||
939 	                ir.meta[type.self].decoration.decoration_flags.get(DecorationBufferBlock);
940 	if (!is_block)
941 		return "";
942 
943 	auto &memb = ir.meta[type.self].members;
944 	if (index >= memb.size())
945 		return "";
946 	auto &dec = memb[index];
947 
948 	SmallVector<string> attr;
949 
950 	// We can only apply layouts on members in block interfaces.
951 	// This is a bit problematic because in SPIR-V decorations are applied on the struct types directly.
952 	// This is not supported on GLSL, so we have to make the assumption that if a struct within our buffer block struct
953 	// has a decoration, it was originally caused by a top-level layout() qualifier in GLSL.
954 	//
955 	// We would like to go from (SPIR-V style):
956 	//
957 	// struct Foo { layout(row_major) mat4 matrix; };
958 	// buffer UBO { Foo foo; };
959 	//
960 	// to
961 	//
962 	// struct Foo { mat4 matrix; }; // GLSL doesn't support any layout shenanigans in raw struct declarations.
963 	// buffer UBO { layout(row_major) Foo foo; }; // Apply the layout on top-level.
964 	auto flags = combined_decoration_for_member(type, index);
965 
966 	if (flags.get(DecorationRowMajor))
967 		attr.push_back("row_major");
968 	// We don't emit any global layouts, so column_major is default.
969 	//if (flags & (1ull << DecorationColMajor))
970 	//    attr.push_back("column_major");
971 
972 	if (dec.decoration_flags.get(DecorationLocation) && can_use_io_location(type.storage, true))
973 		attr.push_back(join("location = ", dec.location));
974 
975 	// Can only declare component if we can declare location.
976 	if (dec.decoration_flags.get(DecorationComponent) && can_use_io_location(type.storage, true))
977 	{
978 		if (!options.es)
979 		{
980 			if (options.version < 440 && options.version >= 140)
981 				require_extension_internal("GL_ARB_enhanced_layouts");
982 			else if (options.version < 140)
983 				SPIRV_CROSS_THROW("Component decoration is not supported in targets below GLSL 1.40.");
984 			attr.push_back(join("component = ", dec.component));
985 		}
986 		else
987 			SPIRV_CROSS_THROW("Component decoration is not supported in ES targets.");
988 	}
989 
990 	// SPIRVCrossDecorationPacked is set by layout_for_variable earlier to mark that we need to emit offset qualifiers.
991 	// This is only done selectively in GLSL as needed.
992 	if (has_extended_decoration(type.self, SPIRVCrossDecorationExplicitOffset) &&
993 	    dec.decoration_flags.get(DecorationOffset))
994 		attr.push_back(join("offset = ", dec.offset));
995 
996 	if (attr.empty())
997 		return "";
998 
999 	string res = "layout(";
1000 	res += merge(attr);
1001 	res += ") ";
1002 	return res;
1003 }
1004 
format_to_glsl(spv::ImageFormat format)1005 const char *CompilerGLSL::format_to_glsl(spv::ImageFormat format)
1006 {
1007 	if (options.es && is_desktop_only_format(format))
1008 		SPIRV_CROSS_THROW("Attempting to use image format not supported in ES profile.");
1009 
1010 	switch (format)
1011 	{
1012 	case ImageFormatRgba32f:
1013 		return "rgba32f";
1014 	case ImageFormatRgba16f:
1015 		return "rgba16f";
1016 	case ImageFormatR32f:
1017 		return "r32f";
1018 	case ImageFormatRgba8:
1019 		return "rgba8";
1020 	case ImageFormatRgba8Snorm:
1021 		return "rgba8_snorm";
1022 	case ImageFormatRg32f:
1023 		return "rg32f";
1024 	case ImageFormatRg16f:
1025 		return "rg16f";
1026 	case ImageFormatRgba32i:
1027 		return "rgba32i";
1028 	case ImageFormatRgba16i:
1029 		return "rgba16i";
1030 	case ImageFormatR32i:
1031 		return "r32i";
1032 	case ImageFormatRgba8i:
1033 		return "rgba8i";
1034 	case ImageFormatRg32i:
1035 		return "rg32i";
1036 	case ImageFormatRg16i:
1037 		return "rg16i";
1038 	case ImageFormatRgba32ui:
1039 		return "rgba32ui";
1040 	case ImageFormatRgba16ui:
1041 		return "rgba16ui";
1042 	case ImageFormatR32ui:
1043 		return "r32ui";
1044 	case ImageFormatRgba8ui:
1045 		return "rgba8ui";
1046 	case ImageFormatRg32ui:
1047 		return "rg32ui";
1048 	case ImageFormatRg16ui:
1049 		return "rg16ui";
1050 	case ImageFormatR11fG11fB10f:
1051 		return "r11f_g11f_b10f";
1052 	case ImageFormatR16f:
1053 		return "r16f";
1054 	case ImageFormatRgb10A2:
1055 		return "rgb10_a2";
1056 	case ImageFormatR8:
1057 		return "r8";
1058 	case ImageFormatRg8:
1059 		return "rg8";
1060 	case ImageFormatR16:
1061 		return "r16";
1062 	case ImageFormatRg16:
1063 		return "rg16";
1064 	case ImageFormatRgba16:
1065 		return "rgba16";
1066 	case ImageFormatR16Snorm:
1067 		return "r16_snorm";
1068 	case ImageFormatRg16Snorm:
1069 		return "rg16_snorm";
1070 	case ImageFormatRgba16Snorm:
1071 		return "rgba16_snorm";
1072 	case ImageFormatR8Snorm:
1073 		return "r8_snorm";
1074 	case ImageFormatRg8Snorm:
1075 		return "rg8_snorm";
1076 	case ImageFormatR8ui:
1077 		return "r8ui";
1078 	case ImageFormatRg8ui:
1079 		return "rg8ui";
1080 	case ImageFormatR16ui:
1081 		return "r16ui";
1082 	case ImageFormatRgb10a2ui:
1083 		return "rgb10_a2ui";
1084 	case ImageFormatR8i:
1085 		return "r8i";
1086 	case ImageFormatRg8i:
1087 		return "rg8i";
1088 	case ImageFormatR16i:
1089 		return "r16i";
1090 	default:
1091 	case ImageFormatUnknown:
1092 		return nullptr;
1093 	}
1094 }
1095 
type_to_packed_base_size(const SPIRType & type,BufferPackingStandard)1096 uint32_t CompilerGLSL::type_to_packed_base_size(const SPIRType &type, BufferPackingStandard)
1097 {
1098 	switch (type.basetype)
1099 	{
1100 	case SPIRType::Double:
1101 	case SPIRType::Int64:
1102 	case SPIRType::UInt64:
1103 		return 8;
1104 	case SPIRType::Float:
1105 	case SPIRType::Int:
1106 	case SPIRType::UInt:
1107 		return 4;
1108 	case SPIRType::Half:
1109 	case SPIRType::Short:
1110 	case SPIRType::UShort:
1111 		return 2;
1112 	case SPIRType::SByte:
1113 	case SPIRType::UByte:
1114 		return 1;
1115 
1116 	default:
1117 		SPIRV_CROSS_THROW("Unrecognized type in type_to_packed_base_size.");
1118 	}
1119 }
1120 
type_to_packed_alignment(const SPIRType & type,const Bitset & flags,BufferPackingStandard packing)1121 uint32_t CompilerGLSL::type_to_packed_alignment(const SPIRType &type, const Bitset &flags,
1122                                                 BufferPackingStandard packing)
1123 {
1124 	// If using PhysicalStorageBufferEXT storage class, this is a pointer,
1125 	// and is 64-bit.
1126 	if (type.storage == StorageClassPhysicalStorageBufferEXT)
1127 	{
1128 		if (!type.pointer)
1129 			SPIRV_CROSS_THROW("Types in PhysicalStorageBufferEXT must be pointers.");
1130 
1131 		if (ir.addressing_model == AddressingModelPhysicalStorageBuffer64EXT)
1132 		{
1133 			if (packing_is_vec4_padded(packing) && type_is_array_of_pointers(type))
1134 				return 16;
1135 			else
1136 				return 8;
1137 		}
1138 		else
1139 			SPIRV_CROSS_THROW("AddressingModelPhysicalStorageBuffer64EXT must be used for PhysicalStorageBufferEXT.");
1140 	}
1141 
1142 	if (!type.array.empty())
1143 	{
1144 		uint32_t minimum_alignment = 1;
1145 		if (packing_is_vec4_padded(packing))
1146 			minimum_alignment = 16;
1147 
1148 		auto *tmp = &get<SPIRType>(type.parent_type);
1149 		while (!tmp->array.empty())
1150 			tmp = &get<SPIRType>(tmp->parent_type);
1151 
1152 		// Get the alignment of the base type, then maybe round up.
1153 		return max(minimum_alignment, type_to_packed_alignment(*tmp, flags, packing));
1154 	}
1155 
1156 	if (type.basetype == SPIRType::Struct)
1157 	{
1158 		// Rule 9. Structs alignments are maximum alignment of its members.
1159 		uint32_t alignment = 1;
1160 		for (uint32_t i = 0; i < type.member_types.size(); i++)
1161 		{
1162 			auto member_flags = ir.meta[type.self].members[i].decoration_flags;
1163 			alignment =
1164 			    max(alignment, type_to_packed_alignment(get<SPIRType>(type.member_types[i]), member_flags, packing));
1165 		}
1166 
1167 		// In std140, struct alignment is rounded up to 16.
1168 		if (packing_is_vec4_padded(packing))
1169 			alignment = max(alignment, 16u);
1170 
1171 		return alignment;
1172 	}
1173 	else
1174 	{
1175 		const uint32_t base_alignment = type_to_packed_base_size(type, packing);
1176 
1177 		// Alignment requirement for scalar block layout is always the alignment for the most basic component.
1178 		if (packing_is_scalar(packing))
1179 			return base_alignment;
1180 
1181 		// Vectors are *not* aligned in HLSL, but there's an extra rule where vectors cannot straddle
1182 		// a vec4, this is handled outside since that part knows our current offset.
1183 		if (type.columns == 1 && packing_is_hlsl(packing))
1184 			return base_alignment;
1185 
1186 		// From 7.6.2.2 in GL 4.5 core spec.
1187 		// Rule 1
1188 		if (type.vecsize == 1 && type.columns == 1)
1189 			return base_alignment;
1190 
1191 		// Rule 2
1192 		if ((type.vecsize == 2 || type.vecsize == 4) && type.columns == 1)
1193 			return type.vecsize * base_alignment;
1194 
1195 		// Rule 3
1196 		if (type.vecsize == 3 && type.columns == 1)
1197 			return 4 * base_alignment;
1198 
1199 		// Rule 4 implied. Alignment does not change in std430.
1200 
1201 		// Rule 5. Column-major matrices are stored as arrays of
1202 		// vectors.
1203 		if (flags.get(DecorationColMajor) && type.columns > 1)
1204 		{
1205 			if (packing_is_vec4_padded(packing))
1206 				return 4 * base_alignment;
1207 			else if (type.vecsize == 3)
1208 				return 4 * base_alignment;
1209 			else
1210 				return type.vecsize * base_alignment;
1211 		}
1212 
1213 		// Rule 6 implied.
1214 
1215 		// Rule 7.
1216 		if (flags.get(DecorationRowMajor) && type.vecsize > 1)
1217 		{
1218 			if (packing_is_vec4_padded(packing))
1219 				return 4 * base_alignment;
1220 			else if (type.columns == 3)
1221 				return 4 * base_alignment;
1222 			else
1223 				return type.columns * base_alignment;
1224 		}
1225 
1226 		// Rule 8 implied.
1227 	}
1228 
1229 	SPIRV_CROSS_THROW("Did not find suitable rule for type. Bogus decorations?");
1230 }
1231 
type_to_packed_array_stride(const SPIRType & type,const Bitset & flags,BufferPackingStandard packing)1232 uint32_t CompilerGLSL::type_to_packed_array_stride(const SPIRType &type, const Bitset &flags,
1233                                                    BufferPackingStandard packing)
1234 {
1235 	// Array stride is equal to aligned size of the underlying type.
1236 	uint32_t parent = type.parent_type;
1237 	assert(parent);
1238 
1239 	auto &tmp = get<SPIRType>(parent);
1240 
1241 	uint32_t size = type_to_packed_size(tmp, flags, packing);
1242 	if (tmp.array.empty())
1243 	{
1244 		uint32_t alignment = type_to_packed_alignment(type, flags, packing);
1245 		return (size + alignment - 1) & ~(alignment - 1);
1246 	}
1247 	else
1248 	{
1249 		// For multidimensional arrays, array stride always matches size of subtype.
1250 		// The alignment cannot change because multidimensional arrays are basically N * M array elements.
1251 		return size;
1252 	}
1253 }
1254 
type_to_packed_size(const SPIRType & type,const Bitset & flags,BufferPackingStandard packing)1255 uint32_t CompilerGLSL::type_to_packed_size(const SPIRType &type, const Bitset &flags, BufferPackingStandard packing)
1256 {
1257 	if (!type.array.empty())
1258 	{
1259 		return to_array_size_literal(type) * type_to_packed_array_stride(type, flags, packing);
1260 	}
1261 
1262 	// If using PhysicalStorageBufferEXT storage class, this is a pointer,
1263 	// and is 64-bit.
1264 	if (type.storage == StorageClassPhysicalStorageBufferEXT)
1265 	{
1266 		if (!type.pointer)
1267 			SPIRV_CROSS_THROW("Types in PhysicalStorageBufferEXT must be pointers.");
1268 
1269 		if (ir.addressing_model == AddressingModelPhysicalStorageBuffer64EXT)
1270 			return 8;
1271 		else
1272 			SPIRV_CROSS_THROW("AddressingModelPhysicalStorageBuffer64EXT must be used for PhysicalStorageBufferEXT.");
1273 	}
1274 
1275 	uint32_t size = 0;
1276 
1277 	if (type.basetype == SPIRType::Struct)
1278 	{
1279 		uint32_t pad_alignment = 1;
1280 
1281 		for (uint32_t i = 0; i < type.member_types.size(); i++)
1282 		{
1283 			auto member_flags = ir.meta[type.self].members[i].decoration_flags;
1284 			auto &member_type = get<SPIRType>(type.member_types[i]);
1285 
1286 			uint32_t packed_alignment = type_to_packed_alignment(member_type, member_flags, packing);
1287 			uint32_t alignment = max(packed_alignment, pad_alignment);
1288 
1289 			// The next member following a struct member is aligned to the base alignment of the struct that came before.
1290 			// GL 4.5 spec, 7.6.2.2.
1291 			if (member_type.basetype == SPIRType::Struct)
1292 				pad_alignment = packed_alignment;
1293 			else
1294 				pad_alignment = 1;
1295 
1296 			size = (size + alignment - 1) & ~(alignment - 1);
1297 			size += type_to_packed_size(member_type, member_flags, packing);
1298 		}
1299 	}
1300 	else
1301 	{
1302 		const uint32_t base_alignment = type_to_packed_base_size(type, packing);
1303 
1304 		if (packing_is_scalar(packing))
1305 		{
1306 			size = type.vecsize * type.columns * base_alignment;
1307 		}
1308 		else
1309 		{
1310 			if (type.columns == 1)
1311 				size = type.vecsize * base_alignment;
1312 
1313 			if (flags.get(DecorationColMajor) && type.columns > 1)
1314 			{
1315 				if (packing_is_vec4_padded(packing))
1316 					size = type.columns * 4 * base_alignment;
1317 				else if (type.vecsize == 3)
1318 					size = type.columns * 4 * base_alignment;
1319 				else
1320 					size = type.columns * type.vecsize * base_alignment;
1321 			}
1322 
1323 			if (flags.get(DecorationRowMajor) && type.vecsize > 1)
1324 			{
1325 				if (packing_is_vec4_padded(packing))
1326 					size = type.vecsize * 4 * base_alignment;
1327 				else if (type.columns == 3)
1328 					size = type.vecsize * 4 * base_alignment;
1329 				else
1330 					size = type.vecsize * type.columns * base_alignment;
1331 			}
1332 		}
1333 	}
1334 
1335 	return size;
1336 }
1337 
buffer_is_packing_standard(const SPIRType & type,BufferPackingStandard packing,uint32_t start_offset,uint32_t end_offset)1338 bool CompilerGLSL::buffer_is_packing_standard(const SPIRType &type, BufferPackingStandard packing,
1339                                               uint32_t start_offset, uint32_t end_offset)
1340 {
1341 	// This is very tricky and error prone, but try to be exhaustive and correct here.
1342 	// SPIR-V doesn't directly say if we're using std430 or std140.
1343 	// SPIR-V communicates this using Offset and ArrayStride decorations (which is what really matters),
1344 	// so we have to try to infer whether or not the original GLSL source was std140 or std430 based on this information.
1345 	// We do not have to consider shared or packed since these layouts are not allowed in Vulkan SPIR-V (they are useless anyways, and custom offsets would do the same thing).
1346 	//
1347 	// It is almost certain that we're using std430, but it gets tricky with arrays in particular.
1348 	// We will assume std430, but infer std140 if we can prove the struct is not compliant with std430.
1349 	//
1350 	// The only two differences between std140 and std430 are related to padding alignment/array stride
1351 	// in arrays and structs. In std140 they take minimum vec4 alignment.
1352 	// std430 only removes the vec4 requirement.
1353 
1354 	uint32_t offset = 0;
1355 	uint32_t pad_alignment = 1;
1356 
1357 	bool is_top_level_block =
1358 	    has_decoration(type.self, DecorationBlock) || has_decoration(type.self, DecorationBufferBlock);
1359 
1360 	for (uint32_t i = 0; i < type.member_types.size(); i++)
1361 	{
1362 		auto &memb_type = get<SPIRType>(type.member_types[i]);
1363 		auto member_flags = ir.meta[type.self].members[i].decoration_flags;
1364 
1365 		// Verify alignment rules.
1366 		uint32_t packed_alignment = type_to_packed_alignment(memb_type, member_flags, packing);
1367 
1368 		// This is a rather dirty workaround to deal with some cases of OpSpecConstantOp used as array size, e.g:
1369 		// layout(constant_id = 0) const int s = 10;
1370 		// const int S = s + 5; // SpecConstantOp
1371 		// buffer Foo { int data[S]; }; // <-- Very hard for us to deduce a fixed value here,
1372 		// we would need full implementation of compile-time constant folding. :(
1373 		// If we are the last member of a struct, there might be cases where the actual size of that member is irrelevant
1374 		// for our analysis (e.g. unsized arrays).
1375 		// This lets us simply ignore that there are spec constant op sized arrays in our buffers.
1376 		// Querying size of this member will fail, so just don't call it unless we have to.
1377 		//
1378 		// This is likely "best effort" we can support without going into unacceptably complicated workarounds.
1379 		bool member_can_be_unsized =
1380 		    is_top_level_block && size_t(i + 1) == type.member_types.size() && !memb_type.array.empty();
1381 
1382 		uint32_t packed_size = 0;
1383 		if (!member_can_be_unsized)
1384 			packed_size = type_to_packed_size(memb_type, member_flags, packing);
1385 
1386 		// We only need to care about this if we have non-array types which can straddle the vec4 boundary.
1387 		if (packing_is_hlsl(packing))
1388 		{
1389 			// If a member straddles across a vec4 boundary, alignment is actually vec4.
1390 			uint32_t begin_word = offset / 16;
1391 			uint32_t end_word = (offset + packed_size - 1) / 16;
1392 			if (begin_word != end_word)
1393 				packed_alignment = max(packed_alignment, 16u);
1394 		}
1395 
1396 		uint32_t alignment = max(packed_alignment, pad_alignment);
1397 		offset = (offset + alignment - 1) & ~(alignment - 1);
1398 
1399 		// Field is not in the specified range anymore and we can ignore any further fields.
1400 		if (offset >= end_offset)
1401 			break;
1402 
1403 		// The next member following a struct member is aligned to the base alignment of the struct that came before.
1404 		// GL 4.5 spec, 7.6.2.2.
1405 		if (memb_type.basetype == SPIRType::Struct && !memb_type.pointer)
1406 			pad_alignment = packed_alignment;
1407 		else
1408 			pad_alignment = 1;
1409 
1410 		// Only care about packing if we are in the given range
1411 		if (offset >= start_offset)
1412 		{
1413 			uint32_t actual_offset = type_struct_member_offset(type, i);
1414 
1415 			// We only care about offsets in std140, std430, etc ...
1416 			// For EnhancedLayout variants, we have the flexibility to choose our own offsets.
1417 			if (!packing_has_flexible_offset(packing))
1418 			{
1419 				if (actual_offset != offset) // This cannot be the packing we're looking for.
1420 					return false;
1421 			}
1422 			else if ((actual_offset & (alignment - 1)) != 0)
1423 			{
1424 				// We still need to verify that alignment rules are observed, even if we have explicit offset.
1425 				return false;
1426 			}
1427 
1428 			// Verify array stride rules.
1429 			if (!memb_type.array.empty() && type_to_packed_array_stride(memb_type, member_flags, packing) !=
1430 			                                    type_struct_member_array_stride(type, i))
1431 				return false;
1432 
1433 			// Verify that sub-structs also follow packing rules.
1434 			// We cannot use enhanced layouts on substructs, so they better be up to spec.
1435 			auto substruct_packing = packing_to_substruct_packing(packing);
1436 
1437 			if (!memb_type.pointer && !memb_type.member_types.empty() &&
1438 			    !buffer_is_packing_standard(memb_type, substruct_packing))
1439 			{
1440 				return false;
1441 			}
1442 		}
1443 
1444 		// Bump size.
1445 		offset += packed_size;
1446 	}
1447 
1448 	return true;
1449 }
1450 
can_use_io_location(StorageClass storage,bool block)1451 bool CompilerGLSL::can_use_io_location(StorageClass storage, bool block)
1452 {
1453 	// Location specifiers are must have in SPIR-V, but they aren't really supported in earlier versions of GLSL.
1454 	// Be very explicit here about how to solve the issue.
1455 	if ((get_execution_model() != ExecutionModelVertex && storage == StorageClassInput) ||
1456 	    (get_execution_model() != ExecutionModelFragment && storage == StorageClassOutput))
1457 	{
1458 		uint32_t minimum_desktop_version = block ? 440 : 410;
1459 		// ARB_enhanced_layouts vs ARB_separate_shader_objects ...
1460 
1461 		if (!options.es && options.version < minimum_desktop_version && !options.separate_shader_objects)
1462 			return false;
1463 		else if (options.es && options.version < 310)
1464 			return false;
1465 	}
1466 
1467 	if ((get_execution_model() == ExecutionModelVertex && storage == StorageClassInput) ||
1468 	    (get_execution_model() == ExecutionModelFragment && storage == StorageClassOutput))
1469 	{
1470 		if (options.es && options.version < 300)
1471 			return false;
1472 		else if (!options.es && options.version < 330)
1473 			return false;
1474 	}
1475 
1476 	if (storage == StorageClassUniform || storage == StorageClassUniformConstant || storage == StorageClassPushConstant)
1477 	{
1478 		if (options.es && options.version < 310)
1479 			return false;
1480 		else if (!options.es && options.version < 430)
1481 			return false;
1482 	}
1483 
1484 	return true;
1485 }
1486 
layout_for_variable(const SPIRVariable & var)1487 string CompilerGLSL::layout_for_variable(const SPIRVariable &var)
1488 {
1489 	// FIXME: Come up with a better solution for when to disable layouts.
1490 	// Having layouts depend on extensions as well as which types
1491 	// of layouts are used. For now, the simple solution is to just disable
1492 	// layouts for legacy versions.
1493 	if (is_legacy())
1494 		return "";
1495 
1496 	SmallVector<string> attr;
1497 
1498 	auto &dec = ir.meta[var.self].decoration;
1499 	auto &type = get<SPIRType>(var.basetype);
1500 	auto &flags = dec.decoration_flags;
1501 	auto typeflags = ir.meta[type.self].decoration.decoration_flags;
1502 
1503 	if (options.vulkan_semantics && var.storage == StorageClassPushConstant)
1504 		attr.push_back("push_constant");
1505 	else if (var.storage == StorageClassShaderRecordBufferNV)
1506 		attr.push_back("shaderRecordNV");
1507 
1508 	if (flags.get(DecorationRowMajor))
1509 		attr.push_back("row_major");
1510 	if (flags.get(DecorationColMajor))
1511 		attr.push_back("column_major");
1512 
1513 	if (options.vulkan_semantics)
1514 	{
1515 		if (flags.get(DecorationInputAttachmentIndex))
1516 			attr.push_back(join("input_attachment_index = ", dec.input_attachment));
1517 	}
1518 
1519 	bool is_block = has_decoration(type.self, DecorationBlock);
1520 	if (flags.get(DecorationLocation) && can_use_io_location(var.storage, is_block))
1521 	{
1522 		Bitset combined_decoration;
1523 		for (uint32_t i = 0; i < ir.meta[type.self].members.size(); i++)
1524 			combined_decoration.merge_or(combined_decoration_for_member(type, i));
1525 
1526 		// If our members have location decorations, we don't need to
1527 		// emit location decorations at the top as well (looks weird).
1528 		if (!combined_decoration.get(DecorationLocation))
1529 			attr.push_back(join("location = ", dec.location));
1530 	}
1531 
1532 	// Can only declare Component if we can declare location.
1533 	if (flags.get(DecorationComponent) && can_use_io_location(var.storage, is_block))
1534 	{
1535 		if (!options.es)
1536 		{
1537 			if (options.version < 440 && options.version >= 140)
1538 				require_extension_internal("GL_ARB_enhanced_layouts");
1539 			else if (options.version < 140)
1540 				SPIRV_CROSS_THROW("Component decoration is not supported in targets below GLSL 1.40.");
1541 			attr.push_back(join("component = ", dec.component));
1542 		}
1543 		else
1544 			SPIRV_CROSS_THROW("Component decoration is not supported in ES targets.");
1545 	}
1546 
1547 	if (flags.get(DecorationIndex))
1548 		attr.push_back(join("index = ", dec.index));
1549 
1550 	// Do not emit set = decoration in regular GLSL output, but
1551 	// we need to preserve it in Vulkan GLSL mode.
1552 	if (var.storage != StorageClassPushConstant && var.storage != StorageClassShaderRecordBufferNV)
1553 	{
1554 		if (flags.get(DecorationDescriptorSet) && options.vulkan_semantics)
1555 			attr.push_back(join("set = ", dec.set));
1556 	}
1557 
1558 	bool push_constant_block = options.vulkan_semantics && var.storage == StorageClassPushConstant;
1559 	bool ssbo_block = var.storage == StorageClassStorageBuffer || var.storage == StorageClassShaderRecordBufferNV ||
1560 	                  (var.storage == StorageClassUniform && typeflags.get(DecorationBufferBlock));
1561 	bool emulated_ubo = var.storage == StorageClassPushConstant && options.emit_push_constant_as_uniform_buffer;
1562 	bool ubo_block = var.storage == StorageClassUniform && typeflags.get(DecorationBlock);
1563 
1564 	// GL 3.0/GLSL 1.30 is not considered legacy, but it doesn't have UBOs ...
1565 	bool can_use_buffer_blocks = (options.es && options.version >= 300) || (!options.es && options.version >= 140);
1566 
1567 	// pretend no UBOs when options say so
1568 	if (ubo_block && options.emit_uniform_buffer_as_plain_uniforms)
1569 		can_use_buffer_blocks = false;
1570 
1571 	bool can_use_binding;
1572 	if (options.es)
1573 		can_use_binding = options.version >= 310;
1574 	else
1575 		can_use_binding = options.enable_420pack_extension || (options.version >= 420);
1576 
1577 	// Make sure we don't emit binding layout for a classic uniform on GLSL 1.30.
1578 	if (!can_use_buffer_blocks && var.storage == StorageClassUniform)
1579 		can_use_binding = false;
1580 
1581 	if (var.storage == StorageClassShaderRecordBufferNV)
1582 		can_use_binding = false;
1583 
1584 	if (can_use_binding && flags.get(DecorationBinding))
1585 		attr.push_back(join("binding = ", dec.binding));
1586 
1587 	if (flags.get(DecorationOffset))
1588 		attr.push_back(join("offset = ", dec.offset));
1589 
1590 	// Instead of adding explicit offsets for every element here, just assume we're using std140 or std430.
1591 	// If SPIR-V does not comply with either layout, we cannot really work around it.
1592 	if (can_use_buffer_blocks && (ubo_block || emulated_ubo))
1593 	{
1594 		attr.push_back(buffer_to_packing_standard(type, false));
1595 	}
1596 	else if (can_use_buffer_blocks && (push_constant_block || ssbo_block))
1597 	{
1598 		attr.push_back(buffer_to_packing_standard(type, true));
1599 	}
1600 
1601 	// For images, the type itself adds a layout qualifer.
1602 	// Only emit the format for storage images.
1603 	if (type.basetype == SPIRType::Image && type.image.sampled == 2)
1604 	{
1605 		const char *fmt = format_to_glsl(type.image.format);
1606 		if (fmt)
1607 			attr.push_back(fmt);
1608 	}
1609 
1610 	if (attr.empty())
1611 		return "";
1612 
1613 	string res = "layout(";
1614 	res += merge(attr);
1615 	res += ") ";
1616 	return res;
1617 }
1618 
buffer_to_packing_standard(const SPIRType & type,bool support_std430_without_scalar_layout)1619 string CompilerGLSL::buffer_to_packing_standard(const SPIRType &type, bool support_std430_without_scalar_layout)
1620 {
1621 	if (support_std430_without_scalar_layout && buffer_is_packing_standard(type, BufferPackingStd430))
1622 		return "std430";
1623 	else if (buffer_is_packing_standard(type, BufferPackingStd140))
1624 		return "std140";
1625 	else if (options.vulkan_semantics && buffer_is_packing_standard(type, BufferPackingScalar))
1626 	{
1627 		require_extension_internal("GL_EXT_scalar_block_layout");
1628 		return "scalar";
1629 	}
1630 	else if (support_std430_without_scalar_layout &&
1631 	         buffer_is_packing_standard(type, BufferPackingStd430EnhancedLayout))
1632 	{
1633 		if (options.es && !options.vulkan_semantics)
1634 			SPIRV_CROSS_THROW("Push constant block cannot be expressed as neither std430 nor std140. ES-targets do "
1635 			                  "not support GL_ARB_enhanced_layouts.");
1636 		if (!options.es && !options.vulkan_semantics && options.version < 440)
1637 			require_extension_internal("GL_ARB_enhanced_layouts");
1638 
1639 		set_extended_decoration(type.self, SPIRVCrossDecorationExplicitOffset);
1640 		return "std430";
1641 	}
1642 	else if (buffer_is_packing_standard(type, BufferPackingStd140EnhancedLayout))
1643 	{
1644 		// Fallback time. We might be able to use the ARB_enhanced_layouts to deal with this difference,
1645 		// however, we can only use layout(offset) on the block itself, not any substructs, so the substructs better be the appropriate layout.
1646 		// Enhanced layouts seem to always work in Vulkan GLSL, so no need for extensions there.
1647 		if (options.es && !options.vulkan_semantics)
1648 			SPIRV_CROSS_THROW("Push constant block cannot be expressed as neither std430 nor std140. ES-targets do "
1649 			                  "not support GL_ARB_enhanced_layouts.");
1650 		if (!options.es && !options.vulkan_semantics && options.version < 440)
1651 			require_extension_internal("GL_ARB_enhanced_layouts");
1652 
1653 		set_extended_decoration(type.self, SPIRVCrossDecorationExplicitOffset);
1654 		return "std140";
1655 	}
1656 	else if (options.vulkan_semantics && buffer_is_packing_standard(type, BufferPackingScalarEnhancedLayout))
1657 	{
1658 		set_extended_decoration(type.self, SPIRVCrossDecorationExplicitOffset);
1659 		require_extension_internal("GL_EXT_scalar_block_layout");
1660 		return "scalar";
1661 	}
1662 	else if (!support_std430_without_scalar_layout && options.vulkan_semantics &&
1663 	         buffer_is_packing_standard(type, BufferPackingStd430))
1664 	{
1665 		// UBOs can support std430 with GL_EXT_scalar_block_layout.
1666 		require_extension_internal("GL_EXT_scalar_block_layout");
1667 		return "std430";
1668 	}
1669 	else if (!support_std430_without_scalar_layout && options.vulkan_semantics &&
1670 	         buffer_is_packing_standard(type, BufferPackingStd430EnhancedLayout))
1671 	{
1672 		// UBOs can support std430 with GL_EXT_scalar_block_layout.
1673 		set_extended_decoration(type.self, SPIRVCrossDecorationExplicitOffset);
1674 		require_extension_internal("GL_EXT_scalar_block_layout");
1675 		return "std430";
1676 	}
1677 	else
1678 	{
1679 		SPIRV_CROSS_THROW("Buffer block cannot be expressed as any of std430, std140, scalar, even with enhanced "
1680 		                  "layouts. You can try flattening this block to support a more flexible layout.");
1681 	}
1682 }
1683 
emit_push_constant_block(const SPIRVariable & var)1684 void CompilerGLSL::emit_push_constant_block(const SPIRVariable &var)
1685 {
1686 	if (flattened_buffer_blocks.count(var.self))
1687 		emit_buffer_block_flattened(var);
1688 	else if (options.vulkan_semantics)
1689 		emit_push_constant_block_vulkan(var);
1690 	else if (options.emit_push_constant_as_uniform_buffer)
1691 		emit_buffer_block_native(var);
1692 	else
1693 		emit_push_constant_block_glsl(var);
1694 }
1695 
emit_push_constant_block_vulkan(const SPIRVariable & var)1696 void CompilerGLSL::emit_push_constant_block_vulkan(const SPIRVariable &var)
1697 {
1698 	emit_buffer_block(var);
1699 }
1700 
emit_push_constant_block_glsl(const SPIRVariable & var)1701 void CompilerGLSL::emit_push_constant_block_glsl(const SPIRVariable &var)
1702 {
1703 	// OpenGL has no concept of push constant blocks, implement it as a uniform struct.
1704 	auto &type = get<SPIRType>(var.basetype);
1705 
1706 	auto &flags = ir.meta[var.self].decoration.decoration_flags;
1707 	flags.clear(DecorationBinding);
1708 	flags.clear(DecorationDescriptorSet);
1709 
1710 #if 0
1711     if (flags & ((1ull << DecorationBinding) | (1ull << DecorationDescriptorSet)))
1712         SPIRV_CROSS_THROW("Push constant blocks cannot be compiled to GLSL with Binding or Set syntax. "
1713                             "Remap to location with reflection API first or disable these decorations.");
1714 #endif
1715 
1716 	// We're emitting the push constant block as a regular struct, so disable the block qualifier temporarily.
1717 	// Otherwise, we will end up emitting layout() qualifiers on naked structs which is not allowed.
1718 	auto &block_flags = ir.meta[type.self].decoration.decoration_flags;
1719 	bool block_flag = block_flags.get(DecorationBlock);
1720 	block_flags.clear(DecorationBlock);
1721 
1722 	emit_struct(type);
1723 
1724 	if (block_flag)
1725 		block_flags.set(DecorationBlock);
1726 
1727 	emit_uniform(var);
1728 	statement("");
1729 }
1730 
emit_buffer_block(const SPIRVariable & var)1731 void CompilerGLSL::emit_buffer_block(const SPIRVariable &var)
1732 {
1733 	auto &type = get<SPIRType>(var.basetype);
1734 	bool ubo_block = var.storage == StorageClassUniform && has_decoration(type.self, DecorationBlock);
1735 
1736 	if (flattened_buffer_blocks.count(var.self))
1737 		emit_buffer_block_flattened(var);
1738 	else if (is_legacy() || (!options.es && options.version == 130) ||
1739 	         (ubo_block && options.emit_uniform_buffer_as_plain_uniforms))
1740 		emit_buffer_block_legacy(var);
1741 	else
1742 		emit_buffer_block_native(var);
1743 }
1744 
emit_buffer_block_legacy(const SPIRVariable & var)1745 void CompilerGLSL::emit_buffer_block_legacy(const SPIRVariable &var)
1746 {
1747 	auto &type = get<SPIRType>(var.basetype);
1748 	bool ssbo = var.storage == StorageClassStorageBuffer ||
1749 	            ir.meta[type.self].decoration.decoration_flags.get(DecorationBufferBlock);
1750 	if (ssbo)
1751 		SPIRV_CROSS_THROW("SSBOs not supported in legacy targets.");
1752 
1753 	// We're emitting the push constant block as a regular struct, so disable the block qualifier temporarily.
1754 	// Otherwise, we will end up emitting layout() qualifiers on naked structs which is not allowed.
1755 	auto &block_flags = ir.meta[type.self].decoration.decoration_flags;
1756 	bool block_flag = block_flags.get(DecorationBlock);
1757 	block_flags.clear(DecorationBlock);
1758 	emit_struct(type);
1759 	if (block_flag)
1760 		block_flags.set(DecorationBlock);
1761 	emit_uniform(var);
1762 	statement("");
1763 }
1764 
emit_buffer_reference_block(SPIRType & type,bool forward_declaration)1765 void CompilerGLSL::emit_buffer_reference_block(SPIRType &type, bool forward_declaration)
1766 {
1767 	string buffer_name;
1768 
1769 	if (forward_declaration)
1770 	{
1771 		// Block names should never alias, but from HLSL input they kind of can because block types are reused for UAVs ...
1772 		// Allow aliased name since we might be declaring the block twice. Once with buffer reference (forward declared) and one proper declaration.
1773 		// The names must match up.
1774 		buffer_name = to_name(type.self, false);
1775 
1776 		// Shaders never use the block by interface name, so we don't
1777 		// have to track this other than updating name caches.
1778 		// If we have a collision for any reason, just fallback immediately.
1779 		if (ir.meta[type.self].decoration.alias.empty() ||
1780 		    block_ssbo_names.find(buffer_name) != end(block_ssbo_names) ||
1781 		    resource_names.find(buffer_name) != end(resource_names))
1782 		{
1783 			buffer_name = join("_", type.self);
1784 		}
1785 
1786 		// Make sure we get something unique for both global name scope and block name scope.
1787 		// See GLSL 4.5 spec: section 4.3.9 for details.
1788 		add_variable(block_ssbo_names, resource_names, buffer_name);
1789 
1790 		// If for some reason buffer_name is an illegal name, make a final fallback to a workaround name.
1791 		// This cannot conflict with anything else, so we're safe now.
1792 		// We cannot reuse this fallback name in neither global scope (blocked by block_names) nor block name scope.
1793 		if (buffer_name.empty())
1794 			buffer_name = join("_", type.self);
1795 
1796 		block_names.insert(buffer_name);
1797 		block_ssbo_names.insert(buffer_name);
1798 	}
1799 	else if (type.basetype != SPIRType::Struct)
1800 		buffer_name = type_to_glsl(type);
1801 	else
1802 		buffer_name = to_name(type.self, false);
1803 
1804 	if (!forward_declaration)
1805 	{
1806 		if (type.basetype == SPIRType::Struct)
1807 			statement("layout(buffer_reference, ", buffer_to_packing_standard(type, true), ") buffer ", buffer_name);
1808 		else
1809 			statement("layout(buffer_reference) buffer ", buffer_name);
1810 
1811 		begin_scope();
1812 
1813 		if (type.basetype == SPIRType::Struct)
1814 		{
1815 			type.member_name_cache.clear();
1816 
1817 			uint32_t i = 0;
1818 			for (auto &member : type.member_types)
1819 			{
1820 				add_member_name(type, i);
1821 				emit_struct_member(type, member, i);
1822 				i++;
1823 			}
1824 		}
1825 		else
1826 		{
1827 			auto &pointee_type = get_pointee_type(type);
1828 			statement(type_to_glsl(pointee_type), " value", type_to_array_glsl(pointee_type), ";");
1829 		}
1830 
1831 		end_scope_decl();
1832 		statement("");
1833 	}
1834 	else
1835 	{
1836 		statement("layout(buffer_reference) buffer ", buffer_name, ";");
1837 	}
1838 }
1839 
emit_buffer_block_native(const SPIRVariable & var)1840 void CompilerGLSL::emit_buffer_block_native(const SPIRVariable &var)
1841 {
1842 	auto &type = get<SPIRType>(var.basetype);
1843 
1844 	Bitset flags = ir.get_buffer_block_flags(var);
1845 	bool ssbo = var.storage == StorageClassStorageBuffer || var.storage == StorageClassShaderRecordBufferNV ||
1846 	            ir.meta[type.self].decoration.decoration_flags.get(DecorationBufferBlock);
1847 	bool is_restrict = ssbo && flags.get(DecorationRestrict);
1848 	bool is_writeonly = ssbo && flags.get(DecorationNonReadable);
1849 	bool is_readonly = ssbo && flags.get(DecorationNonWritable);
1850 	bool is_coherent = ssbo && flags.get(DecorationCoherent);
1851 
1852 	// Block names should never alias, but from HLSL input they kind of can because block types are reused for UAVs ...
1853 	auto buffer_name = to_name(type.self, false);
1854 
1855 	auto &block_namespace = ssbo ? block_ssbo_names : block_ubo_names;
1856 
1857 	// Shaders never use the block by interface name, so we don't
1858 	// have to track this other than updating name caches.
1859 	// If we have a collision for any reason, just fallback immediately.
1860 	if (ir.meta[type.self].decoration.alias.empty() || block_namespace.find(buffer_name) != end(block_namespace) ||
1861 	    resource_names.find(buffer_name) != end(resource_names))
1862 	{
1863 		buffer_name = get_block_fallback_name(var.self);
1864 	}
1865 
1866 	// Make sure we get something unique for both global name scope and block name scope.
1867 	// See GLSL 4.5 spec: section 4.3.9 for details.
1868 	add_variable(block_namespace, resource_names, buffer_name);
1869 
1870 	// If for some reason buffer_name is an illegal name, make a final fallback to a workaround name.
1871 	// This cannot conflict with anything else, so we're safe now.
1872 	// We cannot reuse this fallback name in neither global scope (blocked by block_names) nor block name scope.
1873 	if (buffer_name.empty())
1874 		buffer_name = join("_", get<SPIRType>(var.basetype).self, "_", var.self);
1875 
1876 	block_names.insert(buffer_name);
1877 	block_namespace.insert(buffer_name);
1878 
1879 	// Save for post-reflection later.
1880 	declared_block_names[var.self] = buffer_name;
1881 
1882 	statement(layout_for_variable(var), is_coherent ? "coherent " : "", is_restrict ? "restrict " : "",
1883 	          is_writeonly ? "writeonly " : "", is_readonly ? "readonly " : "", ssbo ? "buffer " : "uniform ",
1884 	          buffer_name);
1885 
1886 	begin_scope();
1887 
1888 	type.member_name_cache.clear();
1889 
1890 	uint32_t i = 0;
1891 	for (auto &member : type.member_types)
1892 	{
1893 		add_member_name(type, i);
1894 		emit_struct_member(type, member, i);
1895 		i++;
1896 	}
1897 
1898 	// var.self can be used as a backup name for the block name,
1899 	// so we need to make sure we don't disturb the name here on a recompile.
1900 	// It will need to be reset if we have to recompile.
1901 	preserve_alias_on_reset(var.self);
1902 	add_resource_name(var.self);
1903 	end_scope_decl(to_name(var.self) + type_to_array_glsl(type));
1904 	statement("");
1905 }
1906 
emit_buffer_block_flattened(const SPIRVariable & var)1907 void CompilerGLSL::emit_buffer_block_flattened(const SPIRVariable &var)
1908 {
1909 	auto &type = get<SPIRType>(var.basetype);
1910 
1911 	// Block names should never alias.
1912 	auto buffer_name = to_name(type.self, false);
1913 	size_t buffer_size = (get_declared_struct_size(type) + 15) / 16;
1914 
1915 	SPIRType::BaseType basic_type;
1916 	if (get_common_basic_type(type, basic_type))
1917 	{
1918 		SPIRType tmp;
1919 		tmp.basetype = basic_type;
1920 		tmp.vecsize = 4;
1921 		if (basic_type != SPIRType::Float && basic_type != SPIRType::Int && basic_type != SPIRType::UInt)
1922 			SPIRV_CROSS_THROW("Basic types in a flattened UBO must be float, int or uint.");
1923 
1924 		auto flags = ir.get_buffer_block_flags(var);
1925 		statement("uniform ", flags_to_qualifiers_glsl(tmp, flags), type_to_glsl(tmp), " ", buffer_name, "[",
1926 		          buffer_size, "];");
1927 	}
1928 	else
1929 		SPIRV_CROSS_THROW("All basic types in a flattened block must be the same.");
1930 }
1931 
to_storage_qualifiers_glsl(const SPIRVariable & var)1932 const char *CompilerGLSL::to_storage_qualifiers_glsl(const SPIRVariable &var)
1933 {
1934 	auto &execution = get_entry_point();
1935 
1936 	if (var.storage == StorageClassInput || var.storage == StorageClassOutput)
1937 	{
1938 		if (is_legacy() && execution.model == ExecutionModelVertex)
1939 			return var.storage == StorageClassInput ? "attribute " : "varying ";
1940 		else if (is_legacy() && execution.model == ExecutionModelFragment)
1941 			return "varying "; // Fragment outputs are renamed so they never hit this case.
1942 		else
1943 			return var.storage == StorageClassInput ? "in " : "out ";
1944 	}
1945 	else if (var.storage == StorageClassUniformConstant || var.storage == StorageClassUniform ||
1946 	         var.storage == StorageClassPushConstant)
1947 	{
1948 		return "uniform ";
1949 	}
1950 	else if (var.storage == StorageClassRayPayloadNV)
1951 	{
1952 		return "rayPayloadNV ";
1953 	}
1954 	else if (var.storage == StorageClassIncomingRayPayloadNV)
1955 	{
1956 		return "rayPayloadInNV ";
1957 	}
1958 	else if (var.storage == StorageClassHitAttributeNV)
1959 	{
1960 		return "hitAttributeNV ";
1961 	}
1962 	else if (var.storage == StorageClassCallableDataNV)
1963 	{
1964 		return "callableDataNV ";
1965 	}
1966 	else if (var.storage == StorageClassIncomingCallableDataNV)
1967 	{
1968 		return "callableDataInNV ";
1969 	}
1970 
1971 	return "";
1972 }
1973 
emit_flattened_io_block(const SPIRVariable & var,const char * qual)1974 void CompilerGLSL::emit_flattened_io_block(const SPIRVariable &var, const char *qual)
1975 {
1976 	auto &type = get<SPIRType>(var.basetype);
1977 	if (!type.array.empty())
1978 		SPIRV_CROSS_THROW("Array of varying structs cannot be flattened to legacy-compatible varyings.");
1979 
1980 	auto old_flags = ir.meta[type.self].decoration.decoration_flags;
1981 	// Emit the members as if they are part of a block to get all qualifiers.
1982 	ir.meta[type.self].decoration.decoration_flags.set(DecorationBlock);
1983 
1984 	type.member_name_cache.clear();
1985 
1986 	uint32_t i = 0;
1987 	for (auto &member : type.member_types)
1988 	{
1989 		add_member_name(type, i);
1990 		auto &membertype = get<SPIRType>(member);
1991 
1992 		if (membertype.basetype == SPIRType::Struct)
1993 			SPIRV_CROSS_THROW("Cannot flatten struct inside structs in I/O variables.");
1994 
1995 		// Pass in the varying qualifier here so it will appear in the correct declaration order.
1996 		// Replace member name while emitting it so it encodes both struct name and member name.
1997 		// Sanitize underscores because joining the two identifiers might create more than 1 underscore in a row,
1998 		// which is not allowed.
1999 		auto backup_name = get_member_name(type.self, i);
2000 		auto member_name = to_member_name(type, i);
2001 		set_member_name(type.self, i, sanitize_underscores(join(to_name(var.self), "_", member_name)));
2002 		emit_struct_member(type, member, i, qual);
2003 		// Restore member name.
2004 		set_member_name(type.self, i, member_name);
2005 		i++;
2006 	}
2007 
2008 	ir.meta[type.self].decoration.decoration_flags = old_flags;
2009 
2010 	// Treat this variable as flattened from now on.
2011 	flattened_structs.insert(var.self);
2012 }
2013 
emit_interface_block(const SPIRVariable & var)2014 void CompilerGLSL::emit_interface_block(const SPIRVariable &var)
2015 {
2016 	auto &type = get<SPIRType>(var.basetype);
2017 
2018 	// Either make it plain in/out or in/out blocks depending on what shader is doing ...
2019 	bool block = ir.meta[type.self].decoration.decoration_flags.get(DecorationBlock);
2020 	const char *qual = to_storage_qualifiers_glsl(var);
2021 
2022 	if (block)
2023 	{
2024 		// ESSL earlier than 310 and GLSL earlier than 150 did not support
2025 		// I/O variables which are struct types.
2026 		// To support this, flatten the struct into separate varyings instead.
2027 		if ((options.es && options.version < 310) || (!options.es && options.version < 150))
2028 		{
2029 			// I/O blocks on ES require version 310 with Android Extension Pack extensions, or core version 320.
2030 			// On desktop, I/O blocks were introduced with geometry shaders in GL 3.2 (GLSL 150).
2031 			emit_flattened_io_block(var, qual);
2032 		}
2033 		else
2034 		{
2035 			if (options.es && options.version < 320)
2036 			{
2037 				// Geometry and tessellation extensions imply this extension.
2038 				if (!has_extension("GL_EXT_geometry_shader") && !has_extension("GL_EXT_tessellation_shader"))
2039 					require_extension_internal("GL_EXT_shader_io_blocks");
2040 			}
2041 
2042 			// Block names should never alias.
2043 			auto block_name = to_name(type.self, false);
2044 
2045 			// The namespace for I/O blocks is separate from other variables in GLSL.
2046 			auto &block_namespace = type.storage == StorageClassInput ? block_input_names : block_output_names;
2047 
2048 			// Shaders never use the block by interface name, so we don't
2049 			// have to track this other than updating name caches.
2050 			if (block_name.empty() || block_namespace.find(block_name) != end(block_namespace))
2051 				block_name = get_fallback_name(type.self);
2052 			else
2053 				block_namespace.insert(block_name);
2054 
2055 			// If for some reason buffer_name is an illegal name, make a final fallback to a workaround name.
2056 			// This cannot conflict with anything else, so we're safe now.
2057 			if (block_name.empty())
2058 				block_name = join("_", get<SPIRType>(var.basetype).self, "_", var.self);
2059 
2060 			// Instance names cannot alias block names.
2061 			resource_names.insert(block_name);
2062 
2063 			statement(layout_for_variable(var), qual, block_name);
2064 			begin_scope();
2065 
2066 			type.member_name_cache.clear();
2067 
2068 			uint32_t i = 0;
2069 			for (auto &member : type.member_types)
2070 			{
2071 				add_member_name(type, i);
2072 				emit_struct_member(type, member, i);
2073 				i++;
2074 			}
2075 
2076 			add_resource_name(var.self);
2077 			end_scope_decl(join(to_name(var.self), type_to_array_glsl(type)));
2078 			statement("");
2079 		}
2080 	}
2081 	else
2082 	{
2083 		// ESSL earlier than 310 and GLSL earlier than 150 did not support
2084 		// I/O variables which are struct types.
2085 		// To support this, flatten the struct into separate varyings instead.
2086 		if (type.basetype == SPIRType::Struct &&
2087 		    ((options.es && options.version < 310) || (!options.es && options.version < 150)))
2088 		{
2089 			emit_flattened_io_block(var, qual);
2090 		}
2091 		else
2092 		{
2093 			add_resource_name(var.self);
2094 			statement(layout_for_variable(var), to_qualifiers_glsl(var.self),
2095 			          variable_decl(type, to_name(var.self), var.self), ";");
2096 
2097 			// If a StorageClassOutput variable has an initializer, we need to initialize it in main().
2098 			if (var.storage == StorageClassOutput && var.initializer)
2099 			{
2100 				auto &entry_func = this->get<SPIRFunction>(ir.default_entry_point);
2101 				entry_func.fixup_hooks_in.push_back(
2102 				    [&]() { statement(to_name(var.self), " = ", to_expression(var.initializer), ";"); });
2103 			}
2104 		}
2105 	}
2106 }
2107 
emit_uniform(const SPIRVariable & var)2108 void CompilerGLSL::emit_uniform(const SPIRVariable &var)
2109 {
2110 	auto &type = get<SPIRType>(var.basetype);
2111 	if (type.basetype == SPIRType::Image && type.image.sampled == 2)
2112 	{
2113 		if (!options.es && options.version < 420)
2114 			require_extension_internal("GL_ARB_shader_image_load_store");
2115 		else if (options.es && options.version < 310)
2116 			SPIRV_CROSS_THROW("At least ESSL 3.10 required for shader image load store.");
2117 	}
2118 
2119 	add_resource_name(var.self);
2120 	statement(layout_for_variable(var), variable_decl(var), ";");
2121 }
2122 
constant_value_macro_name(uint32_t id)2123 string CompilerGLSL::constant_value_macro_name(uint32_t id)
2124 {
2125 	return join("SPIRV_CROSS_CONSTANT_ID_", id);
2126 }
2127 
emit_specialization_constant_op(const SPIRConstantOp & constant)2128 void CompilerGLSL::emit_specialization_constant_op(const SPIRConstantOp &constant)
2129 {
2130 	auto &type = get<SPIRType>(constant.basetype);
2131 	auto name = to_name(constant.self);
2132 	statement("const ", variable_decl(type, name), " = ", constant_op_expression(constant), ";");
2133 }
2134 
emit_constant(const SPIRConstant & constant)2135 void CompilerGLSL::emit_constant(const SPIRConstant &constant)
2136 {
2137 	auto &type = get<SPIRType>(constant.constant_type);
2138 	auto name = to_name(constant.self);
2139 
2140 	SpecializationConstant wg_x, wg_y, wg_z;
2141 	ID workgroup_size_id = get_work_group_size_specialization_constants(wg_x, wg_y, wg_z);
2142 
2143 	// This specialization constant is implicitly declared by emitting layout() in;
2144 	if (constant.self == workgroup_size_id)
2145 		return;
2146 
2147 	// These specialization constants are implicitly declared by emitting layout() in;
2148 	// In legacy GLSL, we will still need to emit macros for these, so a layout() in; declaration
2149 	// later can use macro overrides for work group size.
2150 	bool is_workgroup_size_constant = ConstantID(constant.self) == wg_x.id || ConstantID(constant.self) == wg_y.id ||
2151 	                                  ConstantID(constant.self) == wg_z.id;
2152 
2153 	if (options.vulkan_semantics && is_workgroup_size_constant)
2154 	{
2155 		// Vulkan GLSL does not need to declare workgroup spec constants explicitly, it is handled in layout().
2156 		return;
2157 	}
2158 	else if (!options.vulkan_semantics && is_workgroup_size_constant &&
2159 	         !has_decoration(constant.self, DecorationSpecId))
2160 	{
2161 		// Only bother declaring a workgroup size if it is actually a specialization constant, because we need macros.
2162 		return;
2163 	}
2164 
2165 	// Only scalars have constant IDs.
2166 	if (has_decoration(constant.self, DecorationSpecId))
2167 	{
2168 		if (options.vulkan_semantics)
2169 		{
2170 			statement("layout(constant_id = ", get_decoration(constant.self, DecorationSpecId), ") const ",
2171 			          variable_decl(type, name), " = ", constant_expression(constant), ";");
2172 		}
2173 		else
2174 		{
2175 			const string &macro_name = constant.specialization_constant_macro_name;
2176 			statement("#ifndef ", macro_name);
2177 			statement("#define ", macro_name, " ", constant_expression(constant));
2178 			statement("#endif");
2179 
2180 			// For workgroup size constants, only emit the macros.
2181 			if (!is_workgroup_size_constant)
2182 				statement("const ", variable_decl(type, name), " = ", macro_name, ";");
2183 		}
2184 	}
2185 	else
2186 	{
2187 		statement("const ", variable_decl(type, name), " = ", constant_expression(constant), ";");
2188 	}
2189 }
2190 
emit_entry_point_declarations()2191 void CompilerGLSL::emit_entry_point_declarations()
2192 {
2193 }
2194 
replace_illegal_names()2195 void CompilerGLSL::replace_illegal_names()
2196 {
2197 	// clang-format off
2198 	static const unordered_set<string> keywords = {
2199 		"abs", "acos", "acosh", "all", "any", "asin", "asinh", "atan", "atanh",
2200 		"atomicAdd", "atomicCompSwap", "atomicCounter", "atomicCounterDecrement", "atomicCounterIncrement",
2201 		"atomicExchange", "atomicMax", "atomicMin", "atomicOr", "atomicXor",
2202 		"bitCount", "bitfieldExtract", "bitfieldInsert", "bitfieldReverse",
2203 		"ceil", "cos", "cosh", "cross", "degrees",
2204 		"dFdx", "dFdxCoarse", "dFdxFine",
2205 		"dFdy", "dFdyCoarse", "dFdyFine",
2206 		"distance", "dot", "EmitStreamVertex", "EmitVertex", "EndPrimitive", "EndStreamPrimitive", "equal", "exp", "exp2",
2207 		"faceforward", "findLSB", "findMSB", "float16BitsToInt16", "float16BitsToUint16", "floatBitsToInt", "floatBitsToUint", "floor", "fma", "fract",
2208 		"frexp", "fwidth", "fwidthCoarse", "fwidthFine",
2209 		"greaterThan", "greaterThanEqual", "groupMemoryBarrier",
2210 		"imageAtomicAdd", "imageAtomicAnd", "imageAtomicCompSwap", "imageAtomicExchange", "imageAtomicMax", "imageAtomicMin", "imageAtomicOr", "imageAtomicXor",
2211 		"imageLoad", "imageSamples", "imageSize", "imageStore", "imulExtended", "int16BitsToFloat16", "intBitsToFloat", "interpolateAtOffset", "interpolateAtCentroid", "interpolateAtSample",
2212 		"inverse", "inversesqrt", "isinf", "isnan", "ldexp", "length", "lessThan", "lessThanEqual", "log", "log2",
2213 		"matrixCompMult", "max", "memoryBarrier", "memoryBarrierAtomicCounter", "memoryBarrierBuffer", "memoryBarrierImage", "memoryBarrierShared",
2214 		"min", "mix", "mod", "modf", "noise", "noise1", "noise2", "noise3", "noise4", "normalize", "not", "notEqual",
2215 		"outerProduct", "packDouble2x32", "packHalf2x16", "packInt2x16", "packInt4x16", "packSnorm2x16", "packSnorm4x8",
2216 		"packUint2x16", "packUint4x16", "packUnorm2x16", "packUnorm4x8", "pow",
2217 		"radians", "reflect", "refract", "round", "roundEven", "sign", "sin", "sinh", "smoothstep", "sqrt", "step",
2218 		"tan", "tanh", "texelFetch", "texelFetchOffset", "texture", "textureGather", "textureGatherOffset", "textureGatherOffsets",
2219 		"textureGrad", "textureGradOffset", "textureLod", "textureLodOffset", "textureOffset", "textureProj", "textureProjGrad",
2220 		"textureProjGradOffset", "textureProjLod", "textureProjLodOffset", "textureProjOffset", "textureQueryLevels", "textureQueryLod", "textureSamples", "textureSize",
2221 		"transpose", "trunc", "uaddCarry", "uint16BitsToFloat16", "uintBitsToFloat", "umulExtended", "unpackDouble2x32", "unpackHalf2x16", "unpackInt2x16", "unpackInt4x16",
2222 		"unpackSnorm2x16", "unpackSnorm4x8", "unpackUint2x16", "unpackUint4x16", "unpackUnorm2x16", "unpackUnorm4x8", "usubBorrow",
2223 
2224 		"active", "asm", "atomic_uint", "attribute", "bool", "break", "buffer",
2225 		"bvec2", "bvec3", "bvec4", "case", "cast", "centroid", "class", "coherent", "common", "const", "continue", "default", "discard",
2226 		"dmat2", "dmat2x2", "dmat2x3", "dmat2x4", "dmat3", "dmat3x2", "dmat3x3", "dmat3x4", "dmat4", "dmat4x2", "dmat4x3", "dmat4x4",
2227 		"do", "double", "dvec2", "dvec3", "dvec4", "else", "enum", "extern", "external", "false", "filter", "fixed", "flat", "float",
2228 		"for", "fvec2", "fvec3", "fvec4", "goto", "half", "highp", "hvec2", "hvec3", "hvec4", "if", "iimage1D", "iimage1DArray",
2229 		"iimage2D", "iimage2DArray", "iimage2DMS", "iimage2DMSArray", "iimage2DRect", "iimage3D", "iimageBuffer", "iimageCube",
2230 		"iimageCubeArray", "image1D", "image1DArray", "image2D", "image2DArray", "image2DMS", "image2DMSArray", "image2DRect",
2231 		"image3D", "imageBuffer", "imageCube", "imageCubeArray", "in", "inline", "inout", "input", "int", "interface", "invariant",
2232 		"isampler1D", "isampler1DArray", "isampler2D", "isampler2DArray", "isampler2DMS", "isampler2DMSArray", "isampler2DRect",
2233 		"isampler3D", "isamplerBuffer", "isamplerCube", "isamplerCubeArray", "ivec2", "ivec3", "ivec4", "layout", "long", "lowp",
2234 		"mat2", "mat2x2", "mat2x3", "mat2x4", "mat3", "mat3x2", "mat3x3", "mat3x4", "mat4", "mat4x2", "mat4x3", "mat4x4", "mediump",
2235 		"namespace", "noinline", "noperspective", "out", "output", "packed", "partition", "patch", "precise", "precision", "public", "readonly",
2236 		"resource", "restrict", "return", "sample", "sampler1D", "sampler1DArray", "sampler1DArrayShadow",
2237 		"sampler1DShadow", "sampler2D", "sampler2DArray", "sampler2DArrayShadow", "sampler2DMS", "sampler2DMSArray",
2238 		"sampler2DRect", "sampler2DRectShadow", "sampler2DShadow", "sampler3D", "sampler3DRect", "samplerBuffer",
2239 		"samplerCube", "samplerCubeArray", "samplerCubeArrayShadow", "samplerCubeShadow", "shared", "short", "sizeof", "smooth", "static",
2240 		"struct", "subroutine", "superp", "switch", "template", "this", "true", "typedef", "uimage1D", "uimage1DArray", "uimage2D",
2241 		"uimage2DArray", "uimage2DMS", "uimage2DMSArray", "uimage2DRect", "uimage3D", "uimageBuffer", "uimageCube",
2242 		"uimageCubeArray", "uint", "uniform", "union", "unsigned", "usampler1D", "usampler1DArray", "usampler2D", "usampler2DArray",
2243 		"usampler2DMS", "usampler2DMSArray", "usampler2DRect", "usampler3D", "usamplerBuffer", "usamplerCube",
2244 		"usamplerCubeArray", "using", "uvec2", "uvec3", "uvec4", "varying", "vec2", "vec3", "vec4", "void", "volatile",
2245 		"while", "writeonly",
2246 	};
2247 	// clang-format on
2248 
2249 	ir.for_each_typed_id<SPIRVariable>([&](uint32_t, const SPIRVariable &var) {
2250 		if (!is_hidden_variable(var))
2251 		{
2252 			auto &m = ir.meta[var.self].decoration;
2253 			if (m.alias.compare(0, 3, "gl_") == 0 || keywords.find(m.alias) != end(keywords))
2254 				m.alias = join("_", m.alias);
2255 		}
2256 	});
2257 }
2258 
replace_fragment_output(SPIRVariable & var)2259 void CompilerGLSL::replace_fragment_output(SPIRVariable &var)
2260 {
2261 	auto &m = ir.meta[var.self].decoration;
2262 	uint32_t location = 0;
2263 	if (m.decoration_flags.get(DecorationLocation))
2264 		location = m.location;
2265 
2266 	// If our variable is arrayed, we must not emit the array part of this as the SPIR-V will
2267 	// do the access chain part of this for us.
2268 	auto &type = get<SPIRType>(var.basetype);
2269 
2270 	if (type.array.empty())
2271 	{
2272 		// Redirect the write to a specific render target in legacy GLSL.
2273 		m.alias = join("gl_FragData[", location, "]");
2274 
2275 		if (is_legacy_es() && location != 0)
2276 			require_extension_internal("GL_EXT_draw_buffers");
2277 	}
2278 	else if (type.array.size() == 1)
2279 	{
2280 		// If location is non-zero, we probably have to add an offset.
2281 		// This gets really tricky since we'd have to inject an offset in the access chain.
2282 		// FIXME: This seems like an extremely odd-ball case, so it's probably fine to leave it like this for now.
2283 		m.alias = "gl_FragData";
2284 		if (location != 0)
2285 			SPIRV_CROSS_THROW("Arrayed output variable used, but location is not 0. "
2286 			                  "This is unimplemented in SPIRV-Cross.");
2287 
2288 		if (is_legacy_es())
2289 			require_extension_internal("GL_EXT_draw_buffers");
2290 	}
2291 	else
2292 		SPIRV_CROSS_THROW("Array-of-array output variable used. This cannot be implemented in legacy GLSL.");
2293 
2294 	var.compat_builtin = true; // We don't want to declare this variable, but use the name as-is.
2295 }
2296 
replace_fragment_outputs()2297 void CompilerGLSL::replace_fragment_outputs()
2298 {
2299 	ir.for_each_typed_id<SPIRVariable>([&](uint32_t, SPIRVariable &var) {
2300 		auto &type = this->get<SPIRType>(var.basetype);
2301 
2302 		if (!is_builtin_variable(var) && !var.remapped_variable && type.pointer && var.storage == StorageClassOutput)
2303 			replace_fragment_output(var);
2304 	});
2305 }
2306 
remap_swizzle(const SPIRType & out_type,uint32_t input_components,const string & expr)2307 string CompilerGLSL::remap_swizzle(const SPIRType &out_type, uint32_t input_components, const string &expr)
2308 {
2309 	if (out_type.vecsize == input_components)
2310 		return expr;
2311 	else if (input_components == 1 && !backend.can_swizzle_scalar)
2312 		return join(type_to_glsl(out_type), "(", expr, ")");
2313 	else
2314 	{
2315 		// FIXME: This will not work with packed expressions.
2316 		auto e = enclose_expression(expr) + ".";
2317 		// Just clamp the swizzle index if we have more outputs than inputs.
2318 		for (uint32_t c = 0; c < out_type.vecsize; c++)
2319 			e += index_to_swizzle(min(c, input_components - 1));
2320 		if (backend.swizzle_is_function && out_type.vecsize > 1)
2321 			e += "()";
2322 
2323 		remove_duplicate_swizzle(e);
2324 		return e;
2325 	}
2326 }
2327 
emit_pls()2328 void CompilerGLSL::emit_pls()
2329 {
2330 	auto &execution = get_entry_point();
2331 	if (execution.model != ExecutionModelFragment)
2332 		SPIRV_CROSS_THROW("Pixel local storage only supported in fragment shaders.");
2333 
2334 	if (!options.es)
2335 		SPIRV_CROSS_THROW("Pixel local storage only supported in OpenGL ES.");
2336 
2337 	if (options.version < 300)
2338 		SPIRV_CROSS_THROW("Pixel local storage only supported in ESSL 3.0 and above.");
2339 
2340 	if (!pls_inputs.empty())
2341 	{
2342 		statement("__pixel_local_inEXT _PLSIn");
2343 		begin_scope();
2344 		for (auto &input : pls_inputs)
2345 			statement(pls_decl(input), ";");
2346 		end_scope_decl();
2347 		statement("");
2348 	}
2349 
2350 	if (!pls_outputs.empty())
2351 	{
2352 		statement("__pixel_local_outEXT _PLSOut");
2353 		begin_scope();
2354 		for (auto &output : pls_outputs)
2355 			statement(pls_decl(output), ";");
2356 		end_scope_decl();
2357 		statement("");
2358 	}
2359 }
2360 
fixup_image_load_store_access()2361 void CompilerGLSL::fixup_image_load_store_access()
2362 {
2363 	ir.for_each_typed_id<SPIRVariable>([&](uint32_t var, const SPIRVariable &) {
2364 		auto &vartype = expression_type(var);
2365 		if (vartype.basetype == SPIRType::Image)
2366 		{
2367 			// Older glslangValidator does not emit required qualifiers here.
2368 			// Solve this by making the image access as restricted as possible and loosen up if we need to.
2369 			// If any no-read/no-write flags are actually set, assume that the compiler knows what it's doing.
2370 
2371 			auto &flags = ir.meta[var].decoration.decoration_flags;
2372 			if (!flags.get(DecorationNonWritable) && !flags.get(DecorationNonReadable))
2373 			{
2374 				flags.set(DecorationNonWritable);
2375 				flags.set(DecorationNonReadable);
2376 			}
2377 		}
2378 	});
2379 }
2380 
emit_declared_builtin_block(StorageClass storage,ExecutionModel model)2381 void CompilerGLSL::emit_declared_builtin_block(StorageClass storage, ExecutionModel model)
2382 {
2383 	Bitset emitted_builtins;
2384 	Bitset global_builtins;
2385 	const SPIRVariable *block_var = nullptr;
2386 	bool emitted_block = false;
2387 	bool builtin_array = false;
2388 
2389 	// Need to use declared size in the type.
2390 	// These variables might have been declared, but not statically used, so we haven't deduced their size yet.
2391 	uint32_t cull_distance_size = 0;
2392 	uint32_t clip_distance_size = 0;
2393 
2394 	ir.for_each_typed_id<SPIRVariable>([&](uint32_t, SPIRVariable &var) {
2395 		auto &type = this->get<SPIRType>(var.basetype);
2396 		bool block = has_decoration(type.self, DecorationBlock);
2397 		Bitset builtins;
2398 
2399 		if (var.storage == storage && block && is_builtin_variable(var))
2400 		{
2401 			uint32_t index = 0;
2402 			for (auto &m : ir.meta[type.self].members)
2403 			{
2404 				if (m.builtin)
2405 				{
2406 					builtins.set(m.builtin_type);
2407 					if (m.builtin_type == BuiltInCullDistance)
2408 						cull_distance_size = this->get<SPIRType>(type.member_types[index]).array.front();
2409 					else if (m.builtin_type == BuiltInClipDistance)
2410 						clip_distance_size = this->get<SPIRType>(type.member_types[index]).array.front();
2411 				}
2412 				index++;
2413 			}
2414 		}
2415 		else if (var.storage == storage && !block && is_builtin_variable(var))
2416 		{
2417 			// While we're at it, collect all declared global builtins (HLSL mostly ...).
2418 			auto &m = ir.meta[var.self].decoration;
2419 			if (m.builtin)
2420 			{
2421 				global_builtins.set(m.builtin_type);
2422 				if (m.builtin_type == BuiltInCullDistance)
2423 					cull_distance_size = type.array.front();
2424 				else if (m.builtin_type == BuiltInClipDistance)
2425 					clip_distance_size = type.array.front();
2426 			}
2427 		}
2428 
2429 		if (builtins.empty())
2430 			return;
2431 
2432 		if (emitted_block)
2433 			SPIRV_CROSS_THROW("Cannot use more than one builtin I/O block.");
2434 
2435 		emitted_builtins = builtins;
2436 		emitted_block = true;
2437 		builtin_array = !type.array.empty();
2438 		block_var = &var;
2439 	});
2440 
2441 	global_builtins =
2442 	    Bitset(global_builtins.get_lower() & ((1ull << BuiltInPosition) | (1ull << BuiltInPointSize) |
2443 	                                          (1ull << BuiltInClipDistance) | (1ull << BuiltInCullDistance)));
2444 
2445 	// Try to collect all other declared builtins.
2446 	if (!emitted_block)
2447 		emitted_builtins = global_builtins;
2448 
2449 	// Can't declare an empty interface block.
2450 	if (emitted_builtins.empty())
2451 		return;
2452 
2453 	if (storage == StorageClassOutput)
2454 		statement("out gl_PerVertex");
2455 	else
2456 		statement("in gl_PerVertex");
2457 
2458 	begin_scope();
2459 	if (emitted_builtins.get(BuiltInPosition))
2460 		statement("vec4 gl_Position;");
2461 	if (emitted_builtins.get(BuiltInPointSize))
2462 		statement("float gl_PointSize;");
2463 	if (emitted_builtins.get(BuiltInClipDistance))
2464 		statement("float gl_ClipDistance[", clip_distance_size, "];");
2465 	if (emitted_builtins.get(BuiltInCullDistance))
2466 		statement("float gl_CullDistance[", cull_distance_size, "];");
2467 
2468 	bool tessellation = model == ExecutionModelTessellationEvaluation || model == ExecutionModelTessellationControl;
2469 	if (builtin_array)
2470 	{
2471 		// Make sure the array has a supported name in the code.
2472 		if (storage == StorageClassOutput)
2473 			set_name(block_var->self, "gl_out");
2474 		else if (storage == StorageClassInput)
2475 			set_name(block_var->self, "gl_in");
2476 
2477 		if (model == ExecutionModelTessellationControl && storage == StorageClassOutput)
2478 			end_scope_decl(join(to_name(block_var->self), "[", get_entry_point().output_vertices, "]"));
2479 		else
2480 			end_scope_decl(join(to_name(block_var->self), tessellation ? "[gl_MaxPatchVertices]" : "[]"));
2481 	}
2482 	else
2483 		end_scope_decl();
2484 	statement("");
2485 }
2486 
declare_undefined_values()2487 void CompilerGLSL::declare_undefined_values()
2488 {
2489 	bool emitted = false;
2490 	ir.for_each_typed_id<SPIRUndef>([&](uint32_t, const SPIRUndef &undef) {
2491 		statement(variable_decl(this->get<SPIRType>(undef.basetype), to_name(undef.self), undef.self), ";");
2492 		emitted = true;
2493 	});
2494 
2495 	if (emitted)
2496 		statement("");
2497 }
2498 
variable_is_lut(const SPIRVariable & var) const2499 bool CompilerGLSL::variable_is_lut(const SPIRVariable &var) const
2500 {
2501 	bool statically_assigned = var.statically_assigned && var.static_expression != ID(0) && var.remapped_variable;
2502 
2503 	if (statically_assigned)
2504 	{
2505 		auto *constant = maybe_get<SPIRConstant>(var.static_expression);
2506 		if (constant && constant->is_used_as_lut)
2507 			return true;
2508 	}
2509 
2510 	return false;
2511 }
2512 
emit_resources()2513 void CompilerGLSL::emit_resources()
2514 {
2515 	auto &execution = get_entry_point();
2516 
2517 	replace_illegal_names();
2518 
2519 	// Legacy GL uses gl_FragData[], redeclare all fragment outputs
2520 	// with builtins.
2521 	if (execution.model == ExecutionModelFragment && is_legacy())
2522 		replace_fragment_outputs();
2523 
2524 	// Emit PLS blocks if we have such variables.
2525 	if (!pls_inputs.empty() || !pls_outputs.empty())
2526 		emit_pls();
2527 
2528 	// Emit custom gl_PerVertex for SSO compatibility.
2529 	if (options.separate_shader_objects && !options.es && execution.model != ExecutionModelFragment)
2530 	{
2531 		switch (execution.model)
2532 		{
2533 		case ExecutionModelGeometry:
2534 		case ExecutionModelTessellationControl:
2535 		case ExecutionModelTessellationEvaluation:
2536 			emit_declared_builtin_block(StorageClassInput, execution.model);
2537 			emit_declared_builtin_block(StorageClassOutput, execution.model);
2538 			break;
2539 
2540 		case ExecutionModelVertex:
2541 			emit_declared_builtin_block(StorageClassOutput, execution.model);
2542 			break;
2543 
2544 		default:
2545 			break;
2546 		}
2547 	}
2548 	else
2549 	{
2550 		// Need to redeclare clip/cull distance with explicit size to use them.
2551 		// SPIR-V mandates these builtins have a size declared.
2552 		const char *storage = execution.model == ExecutionModelFragment ? "in" : "out";
2553 		if (clip_distance_count != 0)
2554 			statement(storage, " float gl_ClipDistance[", clip_distance_count, "];");
2555 		if (cull_distance_count != 0)
2556 			statement(storage, " float gl_CullDistance[", cull_distance_count, "];");
2557 		if (clip_distance_count != 0 || cull_distance_count != 0)
2558 			statement("");
2559 	}
2560 
2561 	if (position_invariant)
2562 	{
2563 		statement("invariant gl_Position;");
2564 		statement("");
2565 	}
2566 
2567 	bool emitted = false;
2568 
2569 	// If emitted Vulkan GLSL,
2570 	// emit specialization constants as actual floats,
2571 	// spec op expressions will redirect to the constant name.
2572 	//
2573 	{
2574 		auto loop_lock = ir.create_loop_hard_lock();
2575 		for (auto &id_ : ir.ids_for_constant_or_type)
2576 		{
2577 			auto &id = ir.ids[id_];
2578 
2579 			if (id.get_type() == TypeConstant)
2580 			{
2581 				auto &c = id.get<SPIRConstant>();
2582 
2583 				bool needs_declaration = c.specialization || c.is_used_as_lut;
2584 
2585 				if (needs_declaration)
2586 				{
2587 					if (!options.vulkan_semantics && c.specialization)
2588 					{
2589 						c.specialization_constant_macro_name =
2590 						    constant_value_macro_name(get_decoration(c.self, DecorationSpecId));
2591 					}
2592 					emit_constant(c);
2593 					emitted = true;
2594 				}
2595 			}
2596 			else if (id.get_type() == TypeConstantOp)
2597 			{
2598 				emit_specialization_constant_op(id.get<SPIRConstantOp>());
2599 				emitted = true;
2600 			}
2601 			else if (id.get_type() == TypeType)
2602 			{
2603 				auto &type = id.get<SPIRType>();
2604 				if (type.basetype == SPIRType::Struct && type.array.empty() && !type.pointer &&
2605 				    (!ir.meta[type.self].decoration.decoration_flags.get(DecorationBlock) &&
2606 				     !ir.meta[type.self].decoration.decoration_flags.get(DecorationBufferBlock)))
2607 				{
2608 					if (emitted)
2609 						statement("");
2610 					emitted = false;
2611 
2612 					emit_struct(type);
2613 				}
2614 			}
2615 		}
2616 	}
2617 
2618 	if (emitted)
2619 		statement("");
2620 
2621 	// If we needed to declare work group size late, check here.
2622 	// If the work group size depends on a specialization constant, we need to declare the layout() block
2623 	// after constants (and their macros) have been declared.
2624 	if (execution.model == ExecutionModelGLCompute && !options.vulkan_semantics &&
2625 	    execution.workgroup_size.constant != 0)
2626 	{
2627 		SpecializationConstant wg_x, wg_y, wg_z;
2628 		get_work_group_size_specialization_constants(wg_x, wg_y, wg_z);
2629 
2630 		if ((wg_x.id != ConstantID(0)) || (wg_y.id != ConstantID(0)) || (wg_z.id != ConstantID(0)))
2631 		{
2632 			SmallVector<string> inputs;
2633 			build_workgroup_size(inputs, wg_x, wg_y, wg_z);
2634 			statement("layout(", merge(inputs), ") in;");
2635 			statement("");
2636 		}
2637 	}
2638 
2639 	emitted = false;
2640 
2641 	if (ir.addressing_model == AddressingModelPhysicalStorageBuffer64EXT)
2642 	{
2643 		for (auto type : physical_storage_non_block_pointer_types)
2644 		{
2645 			emit_buffer_reference_block(get<SPIRType>(type), false);
2646 		}
2647 
2648 		// Output buffer reference blocks.
2649 		// Do this in two stages, one with forward declaration,
2650 		// and one without. Buffer reference blocks can reference themselves
2651 		// to support things like linked lists.
2652 		ir.for_each_typed_id<SPIRType>([&](uint32_t, SPIRType &type) {
2653 			bool has_block_flags = has_decoration(type.self, DecorationBlock);
2654 			if (has_block_flags && type.pointer && type.pointer_depth == 1 && !type_is_array_of_pointers(type) &&
2655 			    type.storage == StorageClassPhysicalStorageBufferEXT)
2656 			{
2657 				emit_buffer_reference_block(type, true);
2658 			}
2659 		});
2660 
2661 		ir.for_each_typed_id<SPIRType>([&](uint32_t, SPIRType &type) {
2662 			bool has_block_flags = has_decoration(type.self, DecorationBlock);
2663 			if (has_block_flags && type.pointer && type.pointer_depth == 1 && !type_is_array_of_pointers(type) &&
2664 			    type.storage == StorageClassPhysicalStorageBufferEXT)
2665 			{
2666 				emit_buffer_reference_block(type, false);
2667 			}
2668 		});
2669 	}
2670 
2671 	// Output UBOs and SSBOs
2672 	ir.for_each_typed_id<SPIRVariable>([&](uint32_t, SPIRVariable &var) {
2673 		auto &type = this->get<SPIRType>(var.basetype);
2674 
2675 		bool is_block_storage = type.storage == StorageClassStorageBuffer || type.storage == StorageClassUniform ||
2676 		                        type.storage == StorageClassShaderRecordBufferNV;
2677 		bool has_block_flags = ir.meta[type.self].decoration.decoration_flags.get(DecorationBlock) ||
2678 		                       ir.meta[type.self].decoration.decoration_flags.get(DecorationBufferBlock);
2679 
2680 		if (var.storage != StorageClassFunction && type.pointer && is_block_storage && !is_hidden_variable(var) &&
2681 		    has_block_flags)
2682 		{
2683 			emit_buffer_block(var);
2684 		}
2685 	});
2686 
2687 	// Output push constant blocks
2688 	ir.for_each_typed_id<SPIRVariable>([&](uint32_t, SPIRVariable &var) {
2689 		auto &type = this->get<SPIRType>(var.basetype);
2690 		if (var.storage != StorageClassFunction && type.pointer && type.storage == StorageClassPushConstant &&
2691 		    !is_hidden_variable(var))
2692 		{
2693 			emit_push_constant_block(var);
2694 		}
2695 	});
2696 
2697 	bool skip_separate_image_sampler = !combined_image_samplers.empty() || !options.vulkan_semantics;
2698 
2699 	// Output Uniform Constants (values, samplers, images, etc).
2700 	ir.for_each_typed_id<SPIRVariable>([&](uint32_t, SPIRVariable &var) {
2701 		auto &type = this->get<SPIRType>(var.basetype);
2702 
2703 		// If we're remapping separate samplers and images, only emit the combined samplers.
2704 		if (skip_separate_image_sampler)
2705 		{
2706 			// Sampler buffers are always used without a sampler, and they will also work in regular GL.
2707 			bool sampler_buffer = type.basetype == SPIRType::Image && type.image.dim == DimBuffer;
2708 			bool separate_image = type.basetype == SPIRType::Image && type.image.sampled == 1;
2709 			bool separate_sampler = type.basetype == SPIRType::Sampler;
2710 			if (!sampler_buffer && (separate_image || separate_sampler))
2711 				return;
2712 		}
2713 
2714 		if (var.storage != StorageClassFunction && type.pointer &&
2715 		    (type.storage == StorageClassUniformConstant || type.storage == StorageClassAtomicCounter ||
2716 		     type.storage == StorageClassRayPayloadNV || type.storage == StorageClassIncomingRayPayloadNV ||
2717 		     type.storage == StorageClassCallableDataNV || type.storage == StorageClassIncomingCallableDataNV ||
2718 		     type.storage == StorageClassHitAttributeNV) &&
2719 		    !is_hidden_variable(var))
2720 		{
2721 			emit_uniform(var);
2722 			emitted = true;
2723 		}
2724 	});
2725 
2726 	if (emitted)
2727 		statement("");
2728 	emitted = false;
2729 
2730 	// Output in/out interfaces.
2731 	ir.for_each_typed_id<SPIRVariable>([&](uint32_t, SPIRVariable &var) {
2732 		auto &type = this->get<SPIRType>(var.basetype);
2733 
2734 		if (var.storage != StorageClassFunction && type.pointer &&
2735 		    (var.storage == StorageClassInput || var.storage == StorageClassOutput) &&
2736 		    interface_variable_exists_in_entry_point(var.self) && !is_hidden_variable(var))
2737 		{
2738 			emit_interface_block(var);
2739 			emitted = true;
2740 		}
2741 		else if (is_builtin_variable(var))
2742 		{
2743 			// For gl_InstanceIndex emulation on GLES, the API user needs to
2744 			// supply this uniform.
2745 			if (options.vertex.support_nonzero_base_instance &&
2746 			    ir.meta[var.self].decoration.builtin_type == BuiltInInstanceIndex && !options.vulkan_semantics)
2747 			{
2748 				statement("uniform int SPIRV_Cross_BaseInstance;");
2749 				emitted = true;
2750 			}
2751 		}
2752 	});
2753 
2754 	// Global variables.
2755 	for (auto global : global_variables)
2756 	{
2757 		auto &var = get<SPIRVariable>(global);
2758 		if (var.storage != StorageClassOutput)
2759 		{
2760 			if (!variable_is_lut(var))
2761 			{
2762 				add_resource_name(var.self);
2763 				statement(variable_decl(var), ";");
2764 				emitted = true;
2765 			}
2766 		}
2767 	}
2768 
2769 	if (emitted)
2770 		statement("");
2771 
2772 	declare_undefined_values();
2773 }
2774 
2775 // Returns a string representation of the ID, usable as a function arg.
2776 // Default is to simply return the expression representation fo the arg ID.
2777 // Subclasses may override to modify the return value.
to_func_call_arg(const SPIRFunction::Parameter &,uint32_t id)2778 string CompilerGLSL::to_func_call_arg(const SPIRFunction::Parameter &, uint32_t id)
2779 {
2780 	// Make sure that we use the name of the original variable, and not the parameter alias.
2781 	uint32_t name_id = id;
2782 	auto *var = maybe_get<SPIRVariable>(id);
2783 	if (var && var->basevariable)
2784 		name_id = var->basevariable;
2785 	return to_expression(name_id);
2786 }
2787 
handle_invalid_expression(uint32_t id)2788 void CompilerGLSL::handle_invalid_expression(uint32_t id)
2789 {
2790 	// We tried to read an invalidated expression.
2791 	// This means we need another pass at compilation, but next time, force temporary variables so that they cannot be invalidated.
2792 	forced_temporaries.insert(id);
2793 	force_recompile();
2794 }
2795 
2796 // Converts the format of the current expression from packed to unpacked,
2797 // by wrapping the expression in a constructor of the appropriate type.
2798 // GLSL does not support packed formats, so simply return the expression.
2799 // Subclasses that do will override.
unpack_expression_type(string expr_str,const SPIRType &,uint32_t,bool,bool)2800 string CompilerGLSL::unpack_expression_type(string expr_str, const SPIRType &, uint32_t, bool, bool)
2801 {
2802 	return expr_str;
2803 }
2804 
2805 // Sometimes we proactively enclosed an expression where it turns out we might have not needed it after all.
strip_enclosed_expression(string & expr)2806 void CompilerGLSL::strip_enclosed_expression(string &expr)
2807 {
2808 	if (expr.size() < 2 || expr.front() != '(' || expr.back() != ')')
2809 		return;
2810 
2811 	// Have to make sure that our first and last parens actually enclose everything inside it.
2812 	uint32_t paren_count = 0;
2813 	for (auto &c : expr)
2814 	{
2815 		if (c == '(')
2816 			paren_count++;
2817 		else if (c == ')')
2818 		{
2819 			paren_count--;
2820 
2821 			// If we hit 0 and this is not the final char, our first and final parens actually don't
2822 			// enclose the expression, and we cannot strip, e.g.: (a + b) * (c + d).
2823 			if (paren_count == 0 && &c != &expr.back())
2824 				return;
2825 		}
2826 	}
2827 	expr.erase(expr.size() - 1, 1);
2828 	expr.erase(begin(expr));
2829 }
2830 
enclose_expression(const string & expr)2831 string CompilerGLSL::enclose_expression(const string &expr)
2832 {
2833 	bool need_parens = false;
2834 
2835 	// If the expression starts with a unary we need to enclose to deal with cases where we have back-to-back
2836 	// unary expressions.
2837 	if (!expr.empty())
2838 	{
2839 		auto c = expr.front();
2840 		if (c == '-' || c == '+' || c == '!' || c == '~' || c == '&' || c == '*')
2841 			need_parens = true;
2842 	}
2843 
2844 	if (!need_parens)
2845 	{
2846 		uint32_t paren_count = 0;
2847 		for (auto c : expr)
2848 		{
2849 			if (c == '(' || c == '[')
2850 				paren_count++;
2851 			else if (c == ')' || c == ']')
2852 			{
2853 				assert(paren_count);
2854 				paren_count--;
2855 			}
2856 			else if (c == ' ' && paren_count == 0)
2857 			{
2858 				need_parens = true;
2859 				break;
2860 			}
2861 		}
2862 		assert(paren_count == 0);
2863 	}
2864 
2865 	// If this expression contains any spaces which are not enclosed by parentheses,
2866 	// we need to enclose it so we can treat the whole string as an expression.
2867 	// This happens when two expressions have been part of a binary op earlier.
2868 	if (need_parens)
2869 		return join('(', expr, ')');
2870 	else
2871 		return expr;
2872 }
2873 
dereference_expression(const SPIRType & expr_type,const std::string & expr)2874 string CompilerGLSL::dereference_expression(const SPIRType &expr_type, const std::string &expr)
2875 {
2876 	// If this expression starts with an address-of operator ('&'), then
2877 	// just return the part after the operator.
2878 	// TODO: Strip parens if unnecessary?
2879 	if (expr.front() == '&')
2880 		return expr.substr(1);
2881 	else if (backend.native_pointers)
2882 		return join('*', expr);
2883 	else if (expr_type.storage == StorageClassPhysicalStorageBufferEXT && expr_type.basetype != SPIRType::Struct &&
2884 	         expr_type.pointer_depth == 1)
2885 	{
2886 		return join(enclose_expression(expr), ".value");
2887 	}
2888 	else
2889 		return expr;
2890 }
2891 
address_of_expression(const std::string & expr)2892 string CompilerGLSL::address_of_expression(const std::string &expr)
2893 {
2894 	if (expr.size() > 3 && expr[0] == '(' && expr[1] == '*' && expr.back() == ')')
2895 	{
2896 		// If we have an expression which looks like (*foo), taking the address of it is the same as stripping
2897 		// the first two and last characters. We might have to enclose the expression.
2898 		// This doesn't work for cases like (*foo + 10),
2899 		// but this is an r-value expression which we cannot take the address of anyways.
2900 		return enclose_expression(expr.substr(2, expr.size() - 3));
2901 	}
2902 	else if (expr.front() == '*')
2903 	{
2904 		// If this expression starts with a dereference operator ('*'), then
2905 		// just return the part after the operator.
2906 		return expr.substr(1);
2907 	}
2908 	else
2909 		return join('&', enclose_expression(expr));
2910 }
2911 
2912 // Just like to_expression except that we enclose the expression inside parentheses if needed.
to_enclosed_expression(uint32_t id,bool register_expression_read)2913 string CompilerGLSL::to_enclosed_expression(uint32_t id, bool register_expression_read)
2914 {
2915 	return enclose_expression(to_expression(id, register_expression_read));
2916 }
2917 
2918 // Used explicitly when we want to read a row-major expression, but without any transpose shenanigans.
2919 // need_transpose must be forced to false.
to_unpacked_row_major_matrix_expression(uint32_t id)2920 string CompilerGLSL::to_unpacked_row_major_matrix_expression(uint32_t id)
2921 {
2922 	return unpack_expression_type(to_expression(id), expression_type(id),
2923 	                              get_extended_decoration(id, SPIRVCrossDecorationPhysicalTypeID),
2924 	                              has_extended_decoration(id, SPIRVCrossDecorationPhysicalTypePacked), true);
2925 }
2926 
to_unpacked_expression(uint32_t id,bool register_expression_read)2927 string CompilerGLSL::to_unpacked_expression(uint32_t id, bool register_expression_read)
2928 {
2929 	// If we need to transpose, it will also take care of unpacking rules.
2930 	auto *e = maybe_get<SPIRExpression>(id);
2931 	bool need_transpose = e && e->need_transpose;
2932 	bool is_remapped = has_extended_decoration(id, SPIRVCrossDecorationPhysicalTypeID);
2933 	bool is_packed = has_extended_decoration(id, SPIRVCrossDecorationPhysicalTypePacked);
2934 
2935 	if (!need_transpose && (is_remapped || is_packed))
2936 	{
2937 		return unpack_expression_type(to_expression(id, register_expression_read),
2938 		                              get_pointee_type(expression_type_id(id)),
2939 		                              get_extended_decoration(id, SPIRVCrossDecorationPhysicalTypeID),
2940 		                              has_extended_decoration(id, SPIRVCrossDecorationPhysicalTypePacked), false);
2941 	}
2942 	else
2943 		return to_expression(id, register_expression_read);
2944 }
2945 
to_enclosed_unpacked_expression(uint32_t id,bool register_expression_read)2946 string CompilerGLSL::to_enclosed_unpacked_expression(uint32_t id, bool register_expression_read)
2947 {
2948 	// If we need to transpose, it will also take care of unpacking rules.
2949 	auto *e = maybe_get<SPIRExpression>(id);
2950 	bool need_transpose = e && e->need_transpose;
2951 	bool is_remapped = has_extended_decoration(id, SPIRVCrossDecorationPhysicalTypeID);
2952 	bool is_packed = has_extended_decoration(id, SPIRVCrossDecorationPhysicalTypePacked);
2953 	if (!need_transpose && (is_remapped || is_packed))
2954 	{
2955 		return unpack_expression_type(to_expression(id, register_expression_read), expression_type(id),
2956 		                              get_extended_decoration(id, SPIRVCrossDecorationPhysicalTypeID),
2957 		                              has_extended_decoration(id, SPIRVCrossDecorationPhysicalTypePacked), false);
2958 	}
2959 	else
2960 		return to_enclosed_expression(id, register_expression_read);
2961 }
2962 
to_dereferenced_expression(uint32_t id,bool register_expression_read)2963 string CompilerGLSL::to_dereferenced_expression(uint32_t id, bool register_expression_read)
2964 {
2965 	auto &type = expression_type(id);
2966 	if (type.pointer && should_dereference(id))
2967 		return dereference_expression(type, to_enclosed_expression(id, register_expression_read));
2968 	else
2969 		return to_expression(id, register_expression_read);
2970 }
2971 
to_pointer_expression(uint32_t id,bool register_expression_read)2972 string CompilerGLSL::to_pointer_expression(uint32_t id, bool register_expression_read)
2973 {
2974 	auto &type = expression_type(id);
2975 	if (type.pointer && expression_is_lvalue(id) && !should_dereference(id))
2976 		return address_of_expression(to_enclosed_expression(id, register_expression_read));
2977 	else
2978 		return to_unpacked_expression(id, register_expression_read);
2979 }
2980 
to_enclosed_pointer_expression(uint32_t id,bool register_expression_read)2981 string CompilerGLSL::to_enclosed_pointer_expression(uint32_t id, bool register_expression_read)
2982 {
2983 	auto &type = expression_type(id);
2984 	if (type.pointer && expression_is_lvalue(id) && !should_dereference(id))
2985 		return address_of_expression(to_enclosed_expression(id, register_expression_read));
2986 	else
2987 		return to_enclosed_unpacked_expression(id, register_expression_read);
2988 }
2989 
to_extract_component_expression(uint32_t id,uint32_t index)2990 string CompilerGLSL::to_extract_component_expression(uint32_t id, uint32_t index)
2991 {
2992 	auto expr = to_enclosed_expression(id);
2993 	if (has_extended_decoration(id, SPIRVCrossDecorationPhysicalTypePacked))
2994 		return join(expr, "[", index, "]");
2995 	else
2996 		return join(expr, ".", index_to_swizzle(index));
2997 }
2998 
to_rerolled_array_expression(const string & base_expr,const SPIRType & type)2999 string CompilerGLSL::to_rerolled_array_expression(const string &base_expr, const SPIRType &type)
3000 {
3001 	uint32_t size = to_array_size_literal(type);
3002 	auto &parent = get<SPIRType>(type.parent_type);
3003 	string expr = "{ ";
3004 
3005 	for (uint32_t i = 0; i < size; i++)
3006 	{
3007 		auto subexpr = join(base_expr, "[", convert_to_string(i), "]");
3008 		if (parent.array.empty())
3009 			expr += subexpr;
3010 		else
3011 			expr += to_rerolled_array_expression(subexpr, parent);
3012 
3013 		if (i + 1 < size)
3014 			expr += ", ";
3015 	}
3016 
3017 	expr += " }";
3018 	return expr;
3019 }
3020 
to_composite_constructor_expression(uint32_t id)3021 string CompilerGLSL::to_composite_constructor_expression(uint32_t id)
3022 {
3023 	auto &type = expression_type(id);
3024 	if (!backend.array_is_value_type && !type.array.empty())
3025 	{
3026 		// For this case, we need to "re-roll" an array initializer from a temporary.
3027 		// We cannot simply pass the array directly, since it decays to a pointer and it cannot
3028 		// participate in a struct initializer. E.g.
3029 		// float arr[2] = { 1.0, 2.0 };
3030 		// Foo foo = { arr }; must be transformed to
3031 		// Foo foo = { { arr[0], arr[1] } };
3032 		// The array sizes cannot be deduced from specialization constants since we cannot use any loops.
3033 
3034 		// We're only triggering one read of the array expression, but this is fine since arrays have to be declared
3035 		// as temporaries anyways.
3036 		return to_rerolled_array_expression(to_enclosed_expression(id), type);
3037 	}
3038 	else
3039 		return to_unpacked_expression(id);
3040 }
3041 
to_expression(uint32_t id,bool register_expression_read)3042 string CompilerGLSL::to_expression(uint32_t id, bool register_expression_read)
3043 {
3044 	auto itr = invalid_expressions.find(id);
3045 	if (itr != end(invalid_expressions))
3046 		handle_invalid_expression(id);
3047 
3048 	if (ir.ids[id].get_type() == TypeExpression)
3049 	{
3050 		// We might have a more complex chain of dependencies.
3051 		// A possible scenario is that we
3052 		//
3053 		// %1 = OpLoad
3054 		// %2 = OpDoSomething %1 %1. here %2 will have a dependency on %1.
3055 		// %3 = OpDoSomethingAgain %2 %2. Here %3 will lose the link to %1 since we don't propagate the dependencies like that.
3056 		// OpStore %1 %foo // Here we can invalidate %1, and hence all expressions which depend on %1. Only %2 will know since it's part of invalid_expressions.
3057 		// %4 = OpDoSomethingAnotherTime %3 %3 // If we forward all expressions we will see %1 expression after store, not before.
3058 		//
3059 		// However, we can propagate up a list of depended expressions when we used %2, so we can check if %2 is invalid when reading %3 after the store,
3060 		// and see that we should not forward reads of the original variable.
3061 		auto &expr = get<SPIRExpression>(id);
3062 		for (uint32_t dep : expr.expression_dependencies)
3063 			if (invalid_expressions.find(dep) != end(invalid_expressions))
3064 				handle_invalid_expression(dep);
3065 	}
3066 
3067 	if (register_expression_read)
3068 		track_expression_read(id);
3069 
3070 	switch (ir.ids[id].get_type())
3071 	{
3072 	case TypeExpression:
3073 	{
3074 		auto &e = get<SPIRExpression>(id);
3075 		if (e.base_expression)
3076 			return to_enclosed_expression(e.base_expression) + e.expression;
3077 		else if (e.need_transpose)
3078 		{
3079 			// This should not be reached for access chains, since we always deal explicitly with transpose state
3080 			// when consuming an access chain expression.
3081 			uint32_t physical_type_id = get_extended_decoration(id, SPIRVCrossDecorationPhysicalTypeID);
3082 			bool is_packed = has_extended_decoration(id, SPIRVCrossDecorationPhysicalTypePacked);
3083 			return convert_row_major_matrix(e.expression, get<SPIRType>(e.expression_type), physical_type_id,
3084 			                                is_packed);
3085 		}
3086 		else
3087 		{
3088 			if (is_forcing_recompilation())
3089 			{
3090 				// During first compilation phase, certain expression patterns can trigger exponential growth of memory.
3091 				// Avoid this by returning dummy expressions during this phase.
3092 				// Do not use empty expressions here, because those are sentinels for other cases.
3093 				return "_";
3094 			}
3095 			else
3096 				return e.expression;
3097 		}
3098 	}
3099 
3100 	case TypeConstant:
3101 	{
3102 		auto &c = get<SPIRConstant>(id);
3103 		auto &type = get<SPIRType>(c.constant_type);
3104 
3105 		// WorkGroupSize may be a constant.
3106 		auto &dec = ir.meta[c.self].decoration;
3107 		if (dec.builtin)
3108 			return builtin_to_glsl(dec.builtin_type, StorageClassGeneric);
3109 		else if (c.specialization)
3110 			return to_name(id);
3111 		else if (c.is_used_as_lut)
3112 			return to_name(id);
3113 		else if (type.basetype == SPIRType::Struct && !backend.can_declare_struct_inline)
3114 			return to_name(id);
3115 		else if (!type.array.empty() && !backend.can_declare_arrays_inline)
3116 			return to_name(id);
3117 		else
3118 			return constant_expression(c);
3119 	}
3120 
3121 	case TypeConstantOp:
3122 		return to_name(id);
3123 
3124 	case TypeVariable:
3125 	{
3126 		auto &var = get<SPIRVariable>(id);
3127 		// If we try to use a loop variable before the loop header, we have to redirect it to the static expression,
3128 		// the variable has not been declared yet.
3129 		if (var.statically_assigned || (var.loop_variable && !var.loop_variable_enable))
3130 			return to_expression(var.static_expression);
3131 		else if (var.deferred_declaration)
3132 		{
3133 			var.deferred_declaration = false;
3134 			return variable_decl(var);
3135 		}
3136 		else if (flattened_structs.count(id))
3137 		{
3138 			return load_flattened_struct(var);
3139 		}
3140 		else
3141 		{
3142 			auto &dec = ir.meta[var.self].decoration;
3143 			if (dec.builtin)
3144 				return builtin_to_glsl(dec.builtin_type, var.storage);
3145 			else
3146 				return to_name(id);
3147 		}
3148 	}
3149 
3150 	case TypeCombinedImageSampler:
3151 		// This type should never be taken the expression of directly.
3152 		// The intention is that texture sampling functions will extract the image and samplers
3153 		// separately and take their expressions as needed.
3154 		// GLSL does not use this type because OpSampledImage immediately creates a combined image sampler
3155 		// expression ala sampler2D(texture, sampler).
3156 		SPIRV_CROSS_THROW("Combined image samplers have no default expression representation.");
3157 
3158 	case TypeAccessChain:
3159 		// We cannot express this type. They only have meaning in other OpAccessChains, OpStore or OpLoad.
3160 		SPIRV_CROSS_THROW("Access chains have no default expression representation.");
3161 
3162 	default:
3163 		return to_name(id);
3164 	}
3165 }
3166 
constant_op_expression(const SPIRConstantOp & cop)3167 string CompilerGLSL::constant_op_expression(const SPIRConstantOp &cop)
3168 {
3169 	auto &type = get<SPIRType>(cop.basetype);
3170 	bool binary = false;
3171 	bool unary = false;
3172 	string op;
3173 
3174 	if (is_legacy() && is_unsigned_opcode(cop.opcode))
3175 		SPIRV_CROSS_THROW("Unsigned integers are not supported on legacy targets.");
3176 
3177 	// TODO: Find a clean way to reuse emit_instruction.
3178 	switch (cop.opcode)
3179 	{
3180 	case OpSConvert:
3181 	case OpUConvert:
3182 	case OpFConvert:
3183 		op = type_to_glsl_constructor(type);
3184 		break;
3185 
3186 #define GLSL_BOP(opname, x) \
3187 	case Op##opname:        \
3188 		binary = true;      \
3189 		op = x;             \
3190 		break
3191 
3192 #define GLSL_UOP(opname, x) \
3193 	case Op##opname:        \
3194 		unary = true;       \
3195 		op = x;             \
3196 		break
3197 
3198 		GLSL_UOP(SNegate, "-");
3199 		GLSL_UOP(Not, "~");
3200 		GLSL_BOP(IAdd, "+");
3201 		GLSL_BOP(ISub, "-");
3202 		GLSL_BOP(IMul, "*");
3203 		GLSL_BOP(SDiv, "/");
3204 		GLSL_BOP(UDiv, "/");
3205 		GLSL_BOP(UMod, "%");
3206 		GLSL_BOP(SMod, "%");
3207 		GLSL_BOP(ShiftRightLogical, ">>");
3208 		GLSL_BOP(ShiftRightArithmetic, ">>");
3209 		GLSL_BOP(ShiftLeftLogical, "<<");
3210 		GLSL_BOP(BitwiseOr, "|");
3211 		GLSL_BOP(BitwiseXor, "^");
3212 		GLSL_BOP(BitwiseAnd, "&");
3213 		GLSL_BOP(LogicalOr, "||");
3214 		GLSL_BOP(LogicalAnd, "&&");
3215 		GLSL_UOP(LogicalNot, "!");
3216 		GLSL_BOP(LogicalEqual, "==");
3217 		GLSL_BOP(LogicalNotEqual, "!=");
3218 		GLSL_BOP(IEqual, "==");
3219 		GLSL_BOP(INotEqual, "!=");
3220 		GLSL_BOP(ULessThan, "<");
3221 		GLSL_BOP(SLessThan, "<");
3222 		GLSL_BOP(ULessThanEqual, "<=");
3223 		GLSL_BOP(SLessThanEqual, "<=");
3224 		GLSL_BOP(UGreaterThan, ">");
3225 		GLSL_BOP(SGreaterThan, ">");
3226 		GLSL_BOP(UGreaterThanEqual, ">=");
3227 		GLSL_BOP(SGreaterThanEqual, ">=");
3228 
3229 	case OpSelect:
3230 	{
3231 		if (cop.arguments.size() < 3)
3232 			SPIRV_CROSS_THROW("Not enough arguments to OpSpecConstantOp.");
3233 
3234 		// This one is pretty annoying. It's triggered from
3235 		// uint(bool), int(bool) from spec constants.
3236 		// In order to preserve its compile-time constness in Vulkan GLSL,
3237 		// we need to reduce the OpSelect expression back to this simplified model.
3238 		// If we cannot, fail.
3239 		if (to_trivial_mix_op(type, op, cop.arguments[2], cop.arguments[1], cop.arguments[0]))
3240 		{
3241 			// Implement as a simple cast down below.
3242 		}
3243 		else
3244 		{
3245 			// Implement a ternary and pray the compiler understands it :)
3246 			return to_ternary_expression(type, cop.arguments[0], cop.arguments[1], cop.arguments[2]);
3247 		}
3248 		break;
3249 	}
3250 
3251 	case OpVectorShuffle:
3252 	{
3253 		string expr = type_to_glsl_constructor(type);
3254 		expr += "(";
3255 
3256 		uint32_t left_components = expression_type(cop.arguments[0]).vecsize;
3257 		string left_arg = to_enclosed_expression(cop.arguments[0]);
3258 		string right_arg = to_enclosed_expression(cop.arguments[1]);
3259 
3260 		for (uint32_t i = 2; i < uint32_t(cop.arguments.size()); i++)
3261 		{
3262 			uint32_t index = cop.arguments[i];
3263 			if (index >= left_components)
3264 				expr += right_arg + "." + "xyzw"[index - left_components];
3265 			else
3266 				expr += left_arg + "." + "xyzw"[index];
3267 
3268 			if (i + 1 < uint32_t(cop.arguments.size()))
3269 				expr += ", ";
3270 		}
3271 
3272 		expr += ")";
3273 		return expr;
3274 	}
3275 
3276 	case OpCompositeExtract:
3277 	{
3278 		auto expr = access_chain_internal(cop.arguments[0], &cop.arguments[1], uint32_t(cop.arguments.size() - 1),
3279 		                                  ACCESS_CHAIN_INDEX_IS_LITERAL_BIT, nullptr);
3280 		return expr;
3281 	}
3282 
3283 	case OpCompositeInsert:
3284 		SPIRV_CROSS_THROW("OpCompositeInsert spec constant op is not supported.");
3285 
3286 	default:
3287 		// Some opcodes are unimplemented here, these are currently not possible to test from glslang.
3288 		SPIRV_CROSS_THROW("Unimplemented spec constant op.");
3289 	}
3290 
3291 	uint32_t bit_width = 0;
3292 	if (unary || binary || cop.opcode == OpSConvert || cop.opcode == OpUConvert)
3293 		bit_width = expression_type(cop.arguments[0]).width;
3294 
3295 	SPIRType::BaseType input_type;
3296 	bool skip_cast_if_equal_type = opcode_is_sign_invariant(cop.opcode);
3297 
3298 	switch (cop.opcode)
3299 	{
3300 	case OpIEqual:
3301 	case OpINotEqual:
3302 		input_type = to_signed_basetype(bit_width);
3303 		break;
3304 
3305 	case OpSLessThan:
3306 	case OpSLessThanEqual:
3307 	case OpSGreaterThan:
3308 	case OpSGreaterThanEqual:
3309 	case OpSMod:
3310 	case OpSDiv:
3311 	case OpShiftRightArithmetic:
3312 	case OpSConvert:
3313 	case OpSNegate:
3314 		input_type = to_signed_basetype(bit_width);
3315 		break;
3316 
3317 	case OpULessThan:
3318 	case OpULessThanEqual:
3319 	case OpUGreaterThan:
3320 	case OpUGreaterThanEqual:
3321 	case OpUMod:
3322 	case OpUDiv:
3323 	case OpShiftRightLogical:
3324 	case OpUConvert:
3325 		input_type = to_unsigned_basetype(bit_width);
3326 		break;
3327 
3328 	default:
3329 		input_type = type.basetype;
3330 		break;
3331 	}
3332 
3333 #undef GLSL_BOP
3334 #undef GLSL_UOP
3335 	if (binary)
3336 	{
3337 		if (cop.arguments.size() < 2)
3338 			SPIRV_CROSS_THROW("Not enough arguments to OpSpecConstantOp.");
3339 
3340 		string cast_op0;
3341 		string cast_op1;
3342 		auto expected_type = binary_op_bitcast_helper(cast_op0, cast_op1, input_type, cop.arguments[0],
3343 		                                              cop.arguments[1], skip_cast_if_equal_type);
3344 
3345 		if (type.basetype != input_type && type.basetype != SPIRType::Boolean)
3346 		{
3347 			expected_type.basetype = input_type;
3348 			auto expr = bitcast_glsl_op(type, expected_type);
3349 			expr += '(';
3350 			expr += join(cast_op0, " ", op, " ", cast_op1);
3351 			expr += ')';
3352 			return expr;
3353 		}
3354 		else
3355 			return join("(", cast_op0, " ", op, " ", cast_op1, ")");
3356 	}
3357 	else if (unary)
3358 	{
3359 		if (cop.arguments.size() < 1)
3360 			SPIRV_CROSS_THROW("Not enough arguments to OpSpecConstantOp.");
3361 
3362 		// Auto-bitcast to result type as needed.
3363 		// Works around various casting scenarios in glslang as there is no OpBitcast for specialization constants.
3364 		return join("(", op, bitcast_glsl(type, cop.arguments[0]), ")");
3365 	}
3366 	else if (cop.opcode == OpSConvert || cop.opcode == OpUConvert)
3367 	{
3368 		if (cop.arguments.size() < 1)
3369 			SPIRV_CROSS_THROW("Not enough arguments to OpSpecConstantOp.");
3370 
3371 		auto &arg_type = expression_type(cop.arguments[0]);
3372 		if (arg_type.width < type.width && input_type != arg_type.basetype)
3373 		{
3374 			auto expected = arg_type;
3375 			expected.basetype = input_type;
3376 			return join(op, "(", bitcast_glsl(expected, cop.arguments[0]), ")");
3377 		}
3378 		else
3379 			return join(op, "(", to_expression(cop.arguments[0]), ")");
3380 	}
3381 	else
3382 	{
3383 		if (cop.arguments.size() < 1)
3384 			SPIRV_CROSS_THROW("Not enough arguments to OpSpecConstantOp.");
3385 		return join(op, "(", to_expression(cop.arguments[0]), ")");
3386 	}
3387 }
3388 
constant_expression(const SPIRConstant & c)3389 string CompilerGLSL::constant_expression(const SPIRConstant &c)
3390 {
3391 	auto &type = get<SPIRType>(c.constant_type);
3392 
3393 	if (type.pointer)
3394 	{
3395 		return backend.null_pointer_literal;
3396 	}
3397 	else if (!c.subconstants.empty())
3398 	{
3399 		// Handles Arrays and structures.
3400 		string res;
3401 
3402 		// Allow Metal to use the array<T> template to make arrays a value type
3403 		bool needs_trailing_tracket = false;
3404 		if (backend.use_initializer_list && backend.use_typed_initializer_list && type.basetype == SPIRType::Struct &&
3405 		    type.array.empty())
3406 		{
3407 			res = type_to_glsl_constructor(type) + "{ ";
3408 		}
3409 		else if (backend.use_initializer_list && backend.use_typed_initializer_list && !type.array.empty())
3410 		{
3411 			res = type_to_glsl_constructor(type) + "({ ";
3412 			needs_trailing_tracket = true;
3413 		}
3414 		else if (backend.use_initializer_list)
3415 		{
3416 			res = "{ ";
3417 		}
3418 		else
3419 		{
3420 			res = type_to_glsl_constructor(type) + "(";
3421 		}
3422 
3423 		for (auto &elem : c.subconstants)
3424 		{
3425 			auto &subc = get<SPIRConstant>(elem);
3426 			if (subc.specialization)
3427 				res += to_name(elem);
3428 			else
3429 				res += constant_expression(subc);
3430 
3431 			if (&elem != &c.subconstants.back())
3432 				res += ", ";
3433 		}
3434 
3435 		res += backend.use_initializer_list ? " }" : ")";
3436 		if (needs_trailing_tracket)
3437 			res += ")";
3438 
3439 		return res;
3440 	}
3441 	else if (type.basetype == SPIRType::Struct && type.member_types.size() == 0)
3442 	{
3443 		// Metal tessellation likes empty structs which are then constant expressions.
3444 		if (backend.supports_empty_struct)
3445 			return "{ }";
3446 		else if (backend.use_typed_initializer_list)
3447 			return join(type_to_glsl(get<SPIRType>(c.constant_type)), "{ 0 }");
3448 		else if (backend.use_initializer_list)
3449 			return "{ 0 }";
3450 		else
3451 			return join(type_to_glsl(get<SPIRType>(c.constant_type)), "(0)");
3452 	}
3453 	else if (c.columns() == 1)
3454 	{
3455 		return constant_expression_vector(c, 0);
3456 	}
3457 	else
3458 	{
3459 		string res = type_to_glsl(get<SPIRType>(c.constant_type)) + "(";
3460 		for (uint32_t col = 0; col < c.columns(); col++)
3461 		{
3462 			if (c.specialization_constant_id(col) != 0)
3463 				res += to_name(c.specialization_constant_id(col));
3464 			else
3465 				res += constant_expression_vector(c, col);
3466 
3467 			if (col + 1 < c.columns())
3468 				res += ", ";
3469 		}
3470 		res += ")";
3471 		return res;
3472 	}
3473 }
3474 
3475 #ifdef _MSC_VER
3476 // sprintf warning.
3477 // We cannot rely on snprintf existing because, ..., MSVC.
3478 #pragma warning(push)
3479 #pragma warning(disable : 4996)
3480 #endif
3481 
convert_half_to_string(const SPIRConstant & c,uint32_t col,uint32_t row)3482 string CompilerGLSL::convert_half_to_string(const SPIRConstant &c, uint32_t col, uint32_t row)
3483 {
3484 	string res;
3485 	float float_value = c.scalar_f16(col, row);
3486 
3487 	// There is no literal "hf" in GL_NV_gpu_shader5, so to avoid lots
3488 	// of complicated workarounds, just value-cast to the half type always.
3489 	if (std::isnan(float_value) || std::isinf(float_value))
3490 	{
3491 		SPIRType type;
3492 		type.basetype = SPIRType::Half;
3493 		type.vecsize = 1;
3494 		type.columns = 1;
3495 
3496 		if (float_value == numeric_limits<float>::infinity())
3497 			res = join(type_to_glsl(type), "(1.0 / 0.0)");
3498 		else if (float_value == -numeric_limits<float>::infinity())
3499 			res = join(type_to_glsl(type), "(-1.0 / 0.0)");
3500 		else if (std::isnan(float_value))
3501 			res = join(type_to_glsl(type), "(0.0 / 0.0)");
3502 		else
3503 			SPIRV_CROSS_THROW("Cannot represent non-finite floating point constant.");
3504 	}
3505 	else
3506 	{
3507 		SPIRType type;
3508 		type.basetype = SPIRType::Half;
3509 		type.vecsize = 1;
3510 		type.columns = 1;
3511 		res = join(type_to_glsl(type), "(", convert_to_string(float_value, current_locale_radix_character), ")");
3512 	}
3513 
3514 	return res;
3515 }
3516 
convert_float_to_string(const SPIRConstant & c,uint32_t col,uint32_t row)3517 string CompilerGLSL::convert_float_to_string(const SPIRConstant &c, uint32_t col, uint32_t row)
3518 {
3519 	string res;
3520 	float float_value = c.scalar_f32(col, row);
3521 
3522 	if (std::isnan(float_value) || std::isinf(float_value))
3523 	{
3524 		// Use special representation.
3525 		if (!is_legacy())
3526 		{
3527 			SPIRType out_type;
3528 			SPIRType in_type;
3529 			out_type.basetype = SPIRType::Float;
3530 			in_type.basetype = SPIRType::UInt;
3531 			out_type.vecsize = 1;
3532 			in_type.vecsize = 1;
3533 			out_type.width = 32;
3534 			in_type.width = 32;
3535 
3536 			char print_buffer[32];
3537 			sprintf(print_buffer, "0x%xu", c.scalar(col, row));
3538 			res = join(bitcast_glsl_op(out_type, in_type), "(", print_buffer, ")");
3539 		}
3540 		else
3541 		{
3542 			if (float_value == numeric_limits<float>::infinity())
3543 			{
3544 				if (backend.float_literal_suffix)
3545 					res = "(1.0f / 0.0f)";
3546 				else
3547 					res = "(1.0 / 0.0)";
3548 			}
3549 			else if (float_value == -numeric_limits<float>::infinity())
3550 			{
3551 				if (backend.float_literal_suffix)
3552 					res = "(-1.0f / 0.0f)";
3553 				else
3554 					res = "(-1.0 / 0.0)";
3555 			}
3556 			else if (std::isnan(float_value))
3557 			{
3558 				if (backend.float_literal_suffix)
3559 					res = "(0.0f / 0.0f)";
3560 				else
3561 					res = "(0.0 / 0.0)";
3562 			}
3563 			else
3564 				SPIRV_CROSS_THROW("Cannot represent non-finite floating point constant.");
3565 		}
3566 	}
3567 	else
3568 	{
3569 		res = convert_to_string(float_value, current_locale_radix_character);
3570 		if (backend.float_literal_suffix)
3571 			res += "f";
3572 	}
3573 
3574 	return res;
3575 }
3576 
convert_double_to_string(const SPIRConstant & c,uint32_t col,uint32_t row)3577 std::string CompilerGLSL::convert_double_to_string(const SPIRConstant &c, uint32_t col, uint32_t row)
3578 {
3579 	string res;
3580 	double double_value = c.scalar_f64(col, row);
3581 
3582 	if (std::isnan(double_value) || std::isinf(double_value))
3583 	{
3584 		// Use special representation.
3585 		if (!is_legacy())
3586 		{
3587 			SPIRType out_type;
3588 			SPIRType in_type;
3589 			out_type.basetype = SPIRType::Double;
3590 			in_type.basetype = SPIRType::UInt64;
3591 			out_type.vecsize = 1;
3592 			in_type.vecsize = 1;
3593 			out_type.width = 64;
3594 			in_type.width = 64;
3595 
3596 			uint64_t u64_value = c.scalar_u64(col, row);
3597 
3598 			if (options.es)
3599 				SPIRV_CROSS_THROW("64-bit integers/float not supported in ES profile.");
3600 			require_extension_internal("GL_ARB_gpu_shader_int64");
3601 
3602 			char print_buffer[64];
3603 			sprintf(print_buffer, "0x%llx%s", static_cast<unsigned long long>(u64_value),
3604 			        backend.long_long_literal_suffix ? "ull" : "ul");
3605 			res = join(bitcast_glsl_op(out_type, in_type), "(", print_buffer, ")");
3606 		}
3607 		else
3608 		{
3609 			if (options.es)
3610 				SPIRV_CROSS_THROW("FP64 not supported in ES profile.");
3611 			if (options.version < 400)
3612 				require_extension_internal("GL_ARB_gpu_shader_fp64");
3613 
3614 			if (double_value == numeric_limits<double>::infinity())
3615 			{
3616 				if (backend.double_literal_suffix)
3617 					res = "(1.0lf / 0.0lf)";
3618 				else
3619 					res = "(1.0 / 0.0)";
3620 			}
3621 			else if (double_value == -numeric_limits<double>::infinity())
3622 			{
3623 				if (backend.double_literal_suffix)
3624 					res = "(-1.0lf / 0.0lf)";
3625 				else
3626 					res = "(-1.0 / 0.0)";
3627 			}
3628 			else if (std::isnan(double_value))
3629 			{
3630 				if (backend.double_literal_suffix)
3631 					res = "(0.0lf / 0.0lf)";
3632 				else
3633 					res = "(0.0 / 0.0)";
3634 			}
3635 			else
3636 				SPIRV_CROSS_THROW("Cannot represent non-finite floating point constant.");
3637 		}
3638 	}
3639 	else
3640 	{
3641 		res = convert_to_string(double_value, current_locale_radix_character);
3642 		if (backend.double_literal_suffix)
3643 			res += "lf";
3644 	}
3645 
3646 	return res;
3647 }
3648 
3649 #ifdef _MSC_VER
3650 #pragma warning(pop)
3651 #endif
3652 
constant_expression_vector(const SPIRConstant & c,uint32_t vector)3653 string CompilerGLSL::constant_expression_vector(const SPIRConstant &c, uint32_t vector)
3654 {
3655 	auto type = get<SPIRType>(c.constant_type);
3656 	type.columns = 1;
3657 
3658 	auto scalar_type = type;
3659 	scalar_type.vecsize = 1;
3660 
3661 	string res;
3662 	bool splat = backend.use_constructor_splatting && c.vector_size() > 1;
3663 	bool swizzle_splat = backend.can_swizzle_scalar && c.vector_size() > 1;
3664 
3665 	if (!type_is_floating_point(type))
3666 	{
3667 		// Cannot swizzle literal integers as a special case.
3668 		swizzle_splat = false;
3669 	}
3670 
3671 	if (splat || swizzle_splat)
3672 	{
3673 		// Cannot use constant splatting if we have specialization constants somewhere in the vector.
3674 		for (uint32_t i = 0; i < c.vector_size(); i++)
3675 		{
3676 			if (c.specialization_constant_id(vector, i) != 0)
3677 			{
3678 				splat = false;
3679 				swizzle_splat = false;
3680 				break;
3681 			}
3682 		}
3683 	}
3684 
3685 	if (splat || swizzle_splat)
3686 	{
3687 		if (type.width == 64)
3688 		{
3689 			uint64_t ident = c.scalar_u64(vector, 0);
3690 			for (uint32_t i = 1; i < c.vector_size(); i++)
3691 			{
3692 				if (ident != c.scalar_u64(vector, i))
3693 				{
3694 					splat = false;
3695 					swizzle_splat = false;
3696 					break;
3697 				}
3698 			}
3699 		}
3700 		else
3701 		{
3702 			uint32_t ident = c.scalar(vector, 0);
3703 			for (uint32_t i = 1; i < c.vector_size(); i++)
3704 			{
3705 				if (ident != c.scalar(vector, i))
3706 				{
3707 					splat = false;
3708 					swizzle_splat = false;
3709 				}
3710 			}
3711 		}
3712 	}
3713 
3714 	if (c.vector_size() > 1 && !swizzle_splat)
3715 		res += type_to_glsl(type) + "(";
3716 
3717 	switch (type.basetype)
3718 	{
3719 	case SPIRType::Half:
3720 		if (splat || swizzle_splat)
3721 		{
3722 			res += convert_half_to_string(c, vector, 0);
3723 			if (swizzle_splat)
3724 				res = remap_swizzle(get<SPIRType>(c.constant_type), 1, res);
3725 		}
3726 		else
3727 		{
3728 			for (uint32_t i = 0; i < c.vector_size(); i++)
3729 			{
3730 				if (c.vector_size() > 1 && c.specialization_constant_id(vector, i) != 0)
3731 					res += to_name(c.specialization_constant_id(vector, i));
3732 				else
3733 					res += convert_half_to_string(c, vector, i);
3734 
3735 				if (i + 1 < c.vector_size())
3736 					res += ", ";
3737 			}
3738 		}
3739 		break;
3740 
3741 	case SPIRType::Float:
3742 		if (splat || swizzle_splat)
3743 		{
3744 			res += convert_float_to_string(c, vector, 0);
3745 			if (swizzle_splat)
3746 				res = remap_swizzle(get<SPIRType>(c.constant_type), 1, res);
3747 		}
3748 		else
3749 		{
3750 			for (uint32_t i = 0; i < c.vector_size(); i++)
3751 			{
3752 				if (c.vector_size() > 1 && c.specialization_constant_id(vector, i) != 0)
3753 					res += to_name(c.specialization_constant_id(vector, i));
3754 				else
3755 					res += convert_float_to_string(c, vector, i);
3756 
3757 				if (i + 1 < c.vector_size())
3758 					res += ", ";
3759 			}
3760 		}
3761 		break;
3762 
3763 	case SPIRType::Double:
3764 		if (splat || swizzle_splat)
3765 		{
3766 			res += convert_double_to_string(c, vector, 0);
3767 			if (swizzle_splat)
3768 				res = remap_swizzle(get<SPIRType>(c.constant_type), 1, res);
3769 		}
3770 		else
3771 		{
3772 			for (uint32_t i = 0; i < c.vector_size(); i++)
3773 			{
3774 				if (c.vector_size() > 1 && c.specialization_constant_id(vector, i) != 0)
3775 					res += to_name(c.specialization_constant_id(vector, i));
3776 				else
3777 					res += convert_double_to_string(c, vector, i);
3778 
3779 				if (i + 1 < c.vector_size())
3780 					res += ", ";
3781 			}
3782 		}
3783 		break;
3784 
3785 	case SPIRType::Int64:
3786 		if (splat)
3787 		{
3788 			res += convert_to_string(c.scalar_i64(vector, 0));
3789 			if (backend.long_long_literal_suffix)
3790 				res += "ll";
3791 			else
3792 				res += "l";
3793 		}
3794 		else
3795 		{
3796 			for (uint32_t i = 0; i < c.vector_size(); i++)
3797 			{
3798 				if (c.vector_size() > 1 && c.specialization_constant_id(vector, i) != 0)
3799 					res += to_name(c.specialization_constant_id(vector, i));
3800 				else
3801 				{
3802 					res += convert_to_string(c.scalar_i64(vector, i));
3803 					if (backend.long_long_literal_suffix)
3804 						res += "ll";
3805 					else
3806 						res += "l";
3807 				}
3808 
3809 				if (i + 1 < c.vector_size())
3810 					res += ", ";
3811 			}
3812 		}
3813 		break;
3814 
3815 	case SPIRType::UInt64:
3816 		if (splat)
3817 		{
3818 			res += convert_to_string(c.scalar_u64(vector, 0));
3819 			if (backend.long_long_literal_suffix)
3820 				res += "ull";
3821 			else
3822 				res += "ul";
3823 		}
3824 		else
3825 		{
3826 			for (uint32_t i = 0; i < c.vector_size(); i++)
3827 			{
3828 				if (c.vector_size() > 1 && c.specialization_constant_id(vector, i) != 0)
3829 					res += to_name(c.specialization_constant_id(vector, i));
3830 				else
3831 				{
3832 					res += convert_to_string(c.scalar_u64(vector, i));
3833 					if (backend.long_long_literal_suffix)
3834 						res += "ull";
3835 					else
3836 						res += "ul";
3837 				}
3838 
3839 				if (i + 1 < c.vector_size())
3840 					res += ", ";
3841 			}
3842 		}
3843 		break;
3844 
3845 	case SPIRType::UInt:
3846 		if (splat)
3847 		{
3848 			res += convert_to_string(c.scalar(vector, 0));
3849 			if (is_legacy())
3850 			{
3851 				// Fake unsigned constant literals with signed ones if possible.
3852 				// Things like array sizes, etc, tend to be unsigned even though they could just as easily be signed.
3853 				if (c.scalar_i32(vector, 0) < 0)
3854 					SPIRV_CROSS_THROW("Tried to convert uint literal into int, but this made the literal negative.");
3855 			}
3856 			else if (backend.uint32_t_literal_suffix)
3857 				res += "u";
3858 		}
3859 		else
3860 		{
3861 			for (uint32_t i = 0; i < c.vector_size(); i++)
3862 			{
3863 				if (c.vector_size() > 1 && c.specialization_constant_id(vector, i) != 0)
3864 					res += to_name(c.specialization_constant_id(vector, i));
3865 				else
3866 				{
3867 					res += convert_to_string(c.scalar(vector, i));
3868 					if (is_legacy())
3869 					{
3870 						// Fake unsigned constant literals with signed ones if possible.
3871 						// Things like array sizes, etc, tend to be unsigned even though they could just as easily be signed.
3872 						if (c.scalar_i32(vector, i) < 0)
3873 							SPIRV_CROSS_THROW(
3874 							    "Tried to convert uint literal into int, but this made the literal negative.");
3875 					}
3876 					else if (backend.uint32_t_literal_suffix)
3877 						res += "u";
3878 				}
3879 
3880 				if (i + 1 < c.vector_size())
3881 					res += ", ";
3882 			}
3883 		}
3884 		break;
3885 
3886 	case SPIRType::Int:
3887 		if (splat)
3888 			res += convert_to_string(c.scalar_i32(vector, 0));
3889 		else
3890 		{
3891 			for (uint32_t i = 0; i < c.vector_size(); i++)
3892 			{
3893 				if (c.vector_size() > 1 && c.specialization_constant_id(vector, i) != 0)
3894 					res += to_name(c.specialization_constant_id(vector, i));
3895 				else
3896 					res += convert_to_string(c.scalar_i32(vector, i));
3897 				if (i + 1 < c.vector_size())
3898 					res += ", ";
3899 			}
3900 		}
3901 		break;
3902 
3903 	case SPIRType::UShort:
3904 		if (splat)
3905 		{
3906 			res += convert_to_string(c.scalar(vector, 0));
3907 		}
3908 		else
3909 		{
3910 			for (uint32_t i = 0; i < c.vector_size(); i++)
3911 			{
3912 				if (c.vector_size() > 1 && c.specialization_constant_id(vector, i) != 0)
3913 					res += to_name(c.specialization_constant_id(vector, i));
3914 				else
3915 				{
3916 					if (*backend.uint16_t_literal_suffix)
3917 					{
3918 						res += convert_to_string(c.scalar_u16(vector, i));
3919 						res += backend.uint16_t_literal_suffix;
3920 					}
3921 					else
3922 					{
3923 						// If backend doesn't have a literal suffix, we need to value cast.
3924 						res += type_to_glsl(scalar_type);
3925 						res += "(";
3926 						res += convert_to_string(c.scalar_u16(vector, i));
3927 						res += ")";
3928 					}
3929 				}
3930 
3931 				if (i + 1 < c.vector_size())
3932 					res += ", ";
3933 			}
3934 		}
3935 		break;
3936 
3937 	case SPIRType::Short:
3938 		if (splat)
3939 		{
3940 			res += convert_to_string(c.scalar_i16(vector, 0));
3941 		}
3942 		else
3943 		{
3944 			for (uint32_t i = 0; i < c.vector_size(); i++)
3945 			{
3946 				if (c.vector_size() > 1 && c.specialization_constant_id(vector, i) != 0)
3947 					res += to_name(c.specialization_constant_id(vector, i));
3948 				else
3949 				{
3950 					if (*backend.int16_t_literal_suffix)
3951 					{
3952 						res += convert_to_string(c.scalar_i16(vector, i));
3953 						res += backend.int16_t_literal_suffix;
3954 					}
3955 					else
3956 					{
3957 						// If backend doesn't have a literal suffix, we need to value cast.
3958 						res += type_to_glsl(scalar_type);
3959 						res += "(";
3960 						res += convert_to_string(c.scalar_i16(vector, i));
3961 						res += ")";
3962 					}
3963 				}
3964 
3965 				if (i + 1 < c.vector_size())
3966 					res += ", ";
3967 			}
3968 		}
3969 		break;
3970 
3971 	case SPIRType::UByte:
3972 		if (splat)
3973 		{
3974 			res += convert_to_string(c.scalar_u8(vector, 0));
3975 		}
3976 		else
3977 		{
3978 			for (uint32_t i = 0; i < c.vector_size(); i++)
3979 			{
3980 				if (c.vector_size() > 1 && c.specialization_constant_id(vector, i) != 0)
3981 					res += to_name(c.specialization_constant_id(vector, i));
3982 				else
3983 				{
3984 					res += type_to_glsl(scalar_type);
3985 					res += "(";
3986 					res += convert_to_string(c.scalar_u8(vector, i));
3987 					res += ")";
3988 				}
3989 
3990 				if (i + 1 < c.vector_size())
3991 					res += ", ";
3992 			}
3993 		}
3994 		break;
3995 
3996 	case SPIRType::SByte:
3997 		if (splat)
3998 		{
3999 			res += convert_to_string(c.scalar_i8(vector, 0));
4000 		}
4001 		else
4002 		{
4003 			for (uint32_t i = 0; i < c.vector_size(); i++)
4004 			{
4005 				if (c.vector_size() > 1 && c.specialization_constant_id(vector, i) != 0)
4006 					res += to_name(c.specialization_constant_id(vector, i));
4007 				else
4008 				{
4009 					res += type_to_glsl(scalar_type);
4010 					res += "(";
4011 					res += convert_to_string(c.scalar_i8(vector, i));
4012 					res += ")";
4013 				}
4014 
4015 				if (i + 1 < c.vector_size())
4016 					res += ", ";
4017 			}
4018 		}
4019 		break;
4020 
4021 	case SPIRType::Boolean:
4022 		if (splat)
4023 			res += c.scalar(vector, 0) ? "true" : "false";
4024 		else
4025 		{
4026 			for (uint32_t i = 0; i < c.vector_size(); i++)
4027 			{
4028 				if (c.vector_size() > 1 && c.specialization_constant_id(vector, i) != 0)
4029 					res += to_name(c.specialization_constant_id(vector, i));
4030 				else
4031 					res += c.scalar(vector, i) ? "true" : "false";
4032 
4033 				if (i + 1 < c.vector_size())
4034 					res += ", ";
4035 			}
4036 		}
4037 		break;
4038 
4039 	default:
4040 		SPIRV_CROSS_THROW("Invalid constant expression basetype.");
4041 	}
4042 
4043 	if (c.vector_size() > 1 && !swizzle_splat)
4044 		res += ")";
4045 
4046 	return res;
4047 }
4048 
emit_uninitialized_temporary_expression(uint32_t type,uint32_t id)4049 SPIRExpression &CompilerGLSL::emit_uninitialized_temporary_expression(uint32_t type, uint32_t id)
4050 {
4051 	forced_temporaries.insert(id);
4052 	emit_uninitialized_temporary(type, id);
4053 	return set<SPIRExpression>(id, to_name(id), type, true);
4054 }
4055 
emit_uninitialized_temporary(uint32_t result_type,uint32_t result_id)4056 void CompilerGLSL::emit_uninitialized_temporary(uint32_t result_type, uint32_t result_id)
4057 {
4058 	// If we're declaring temporaries inside continue blocks,
4059 	// we must declare the temporary in the loop header so that the continue block can avoid declaring new variables.
4060 	if (current_continue_block && !hoisted_temporaries.count(result_id))
4061 	{
4062 		auto &header = get<SPIRBlock>(current_continue_block->loop_dominator);
4063 		if (find_if(begin(header.declare_temporary), end(header.declare_temporary),
4064 		            [result_type, result_id](const pair<uint32_t, uint32_t> &tmp) {
4065 			            return tmp.first == result_type && tmp.second == result_id;
4066 		            }) == end(header.declare_temporary))
4067 		{
4068 			header.declare_temporary.emplace_back(result_type, result_id);
4069 			hoisted_temporaries.insert(result_id);
4070 			force_recompile();
4071 		}
4072 	}
4073 	else if (hoisted_temporaries.count(result_id) == 0)
4074 	{
4075 		auto &type = get<SPIRType>(result_type);
4076 		auto &flags = ir.meta[result_id].decoration.decoration_flags;
4077 
4078 		// The result_id has not been made into an expression yet, so use flags interface.
4079 		add_local_variable_name(result_id);
4080 		statement(flags_to_qualifiers_glsl(type, flags), variable_decl(type, to_name(result_id)), ";");
4081 	}
4082 }
4083 
declare_temporary(uint32_t result_type,uint32_t result_id)4084 string CompilerGLSL::declare_temporary(uint32_t result_type, uint32_t result_id)
4085 {
4086 	auto &type = get<SPIRType>(result_type);
4087 	auto &flags = ir.meta[result_id].decoration.decoration_flags;
4088 
4089 	// If we're declaring temporaries inside continue blocks,
4090 	// we must declare the temporary in the loop header so that the continue block can avoid declaring new variables.
4091 	if (current_continue_block && !hoisted_temporaries.count(result_id))
4092 	{
4093 		auto &header = get<SPIRBlock>(current_continue_block->loop_dominator);
4094 		if (find_if(begin(header.declare_temporary), end(header.declare_temporary),
4095 		            [result_type, result_id](const pair<uint32_t, uint32_t> &tmp) {
4096 			            return tmp.first == result_type && tmp.second == result_id;
4097 		            }) == end(header.declare_temporary))
4098 		{
4099 			header.declare_temporary.emplace_back(result_type, result_id);
4100 			hoisted_temporaries.insert(result_id);
4101 			force_recompile();
4102 		}
4103 
4104 		return join(to_name(result_id), " = ");
4105 	}
4106 	else if (hoisted_temporaries.count(result_id))
4107 	{
4108 		// The temporary has already been declared earlier, so just "declare" the temporary by writing to it.
4109 		return join(to_name(result_id), " = ");
4110 	}
4111 	else
4112 	{
4113 		// The result_id has not been made into an expression yet, so use flags interface.
4114 		add_local_variable_name(result_id);
4115 		return join(flags_to_qualifiers_glsl(type, flags), variable_decl(type, to_name(result_id)), " = ");
4116 	}
4117 }
4118 
expression_is_forwarded(uint32_t id) const4119 bool CompilerGLSL::expression_is_forwarded(uint32_t id) const
4120 {
4121 	return forwarded_temporaries.count(id) != 0;
4122 }
4123 
expression_suppresses_usage_tracking(uint32_t id) const4124 bool CompilerGLSL::expression_suppresses_usage_tracking(uint32_t id) const
4125 {
4126 	return suppressed_usage_tracking.count(id) != 0;
4127 }
4128 
emit_op(uint32_t result_type,uint32_t result_id,const string & rhs,bool forwarding,bool suppress_usage_tracking)4129 SPIRExpression &CompilerGLSL::emit_op(uint32_t result_type, uint32_t result_id, const string &rhs, bool forwarding,
4130                                       bool suppress_usage_tracking)
4131 {
4132 	if (forwarding && (forced_temporaries.find(result_id) == end(forced_temporaries)))
4133 	{
4134 		// Just forward it without temporary.
4135 		// If the forward is trivial, we do not force flushing to temporary for this expression.
4136 		forwarded_temporaries.insert(result_id);
4137 		if (suppress_usage_tracking)
4138 			suppressed_usage_tracking.insert(result_id);
4139 
4140 		return set<SPIRExpression>(result_id, rhs, result_type, true);
4141 	}
4142 	else
4143 	{
4144 		// If expression isn't immutable, bind it to a temporary and make the new temporary immutable (they always are).
4145 		statement(declare_temporary(result_type, result_id), rhs, ";");
4146 		return set<SPIRExpression>(result_id, to_name(result_id), result_type, true);
4147 	}
4148 }
4149 
emit_unary_op(uint32_t result_type,uint32_t result_id,uint32_t op0,const char * op)4150 void CompilerGLSL::emit_unary_op(uint32_t result_type, uint32_t result_id, uint32_t op0, const char *op)
4151 {
4152 	bool forward = should_forward(op0);
4153 	emit_op(result_type, result_id, join(op, to_enclosed_unpacked_expression(op0)), forward);
4154 	inherit_expression_dependencies(result_id, op0);
4155 }
4156 
emit_binary_op(uint32_t result_type,uint32_t result_id,uint32_t op0,uint32_t op1,const char * op)4157 void CompilerGLSL::emit_binary_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1, const char *op)
4158 {
4159 	bool forward = should_forward(op0) && should_forward(op1);
4160 	emit_op(result_type, result_id,
4161 	        join(to_enclosed_unpacked_expression(op0), " ", op, " ", to_enclosed_unpacked_expression(op1)), forward);
4162 
4163 	inherit_expression_dependencies(result_id, op0);
4164 	inherit_expression_dependencies(result_id, op1);
4165 }
4166 
emit_unrolled_unary_op(uint32_t result_type,uint32_t result_id,uint32_t operand,const char * op)4167 void CompilerGLSL::emit_unrolled_unary_op(uint32_t result_type, uint32_t result_id, uint32_t operand, const char *op)
4168 {
4169 	auto &type = get<SPIRType>(result_type);
4170 	auto expr = type_to_glsl_constructor(type);
4171 	expr += '(';
4172 	for (uint32_t i = 0; i < type.vecsize; i++)
4173 	{
4174 		// Make sure to call to_expression multiple times to ensure
4175 		// that these expressions are properly flushed to temporaries if needed.
4176 		expr += op;
4177 		expr += to_extract_component_expression(operand, i);
4178 
4179 		if (i + 1 < type.vecsize)
4180 			expr += ", ";
4181 	}
4182 	expr += ')';
4183 	emit_op(result_type, result_id, expr, should_forward(operand));
4184 
4185 	inherit_expression_dependencies(result_id, operand);
4186 }
4187 
emit_unrolled_binary_op(uint32_t result_type,uint32_t result_id,uint32_t op0,uint32_t op1,const char * op,bool negate,SPIRType::BaseType expected_type)4188 void CompilerGLSL::emit_unrolled_binary_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1,
4189                                            const char *op, bool negate, SPIRType::BaseType expected_type)
4190 {
4191 	auto &type0 = expression_type(op0);
4192 	auto &type1 = expression_type(op1);
4193 
4194 	SPIRType target_type0 = type0;
4195 	SPIRType target_type1 = type1;
4196 	target_type0.basetype = expected_type;
4197 	target_type1.basetype = expected_type;
4198 	target_type0.vecsize = 1;
4199 	target_type1.vecsize = 1;
4200 
4201 	auto &type = get<SPIRType>(result_type);
4202 	auto expr = type_to_glsl_constructor(type);
4203 	expr += '(';
4204 	for (uint32_t i = 0; i < type.vecsize; i++)
4205 	{
4206 		// Make sure to call to_expression multiple times to ensure
4207 		// that these expressions are properly flushed to temporaries if needed.
4208 		if (negate)
4209 			expr += "!(";
4210 
4211 		if (expected_type != SPIRType::Unknown && type0.basetype != expected_type)
4212 			expr += bitcast_expression(target_type0, type0.basetype, to_extract_component_expression(op0, i));
4213 		else
4214 			expr += to_extract_component_expression(op0, i);
4215 
4216 		expr += ' ';
4217 		expr += op;
4218 		expr += ' ';
4219 
4220 		if (expected_type != SPIRType::Unknown && type1.basetype != expected_type)
4221 			expr += bitcast_expression(target_type1, type1.basetype, to_extract_component_expression(op1, i));
4222 		else
4223 			expr += to_extract_component_expression(op1, i);
4224 
4225 		if (negate)
4226 			expr += ")";
4227 
4228 		if (i + 1 < type.vecsize)
4229 			expr += ", ";
4230 	}
4231 	expr += ')';
4232 	emit_op(result_type, result_id, expr, should_forward(op0) && should_forward(op1));
4233 
4234 	inherit_expression_dependencies(result_id, op0);
4235 	inherit_expression_dependencies(result_id, op1);
4236 }
4237 
binary_op_bitcast_helper(string & cast_op0,string & cast_op1,SPIRType::BaseType & input_type,uint32_t op0,uint32_t op1,bool skip_cast_if_equal_type)4238 SPIRType CompilerGLSL::binary_op_bitcast_helper(string &cast_op0, string &cast_op1, SPIRType::BaseType &input_type,
4239                                                 uint32_t op0, uint32_t op1, bool skip_cast_if_equal_type)
4240 {
4241 	auto &type0 = expression_type(op0);
4242 	auto &type1 = expression_type(op1);
4243 
4244 	// We have to bitcast if our inputs are of different type, or if our types are not equal to expected inputs.
4245 	// For some functions like OpIEqual and INotEqual, we don't care if inputs are of different types than expected
4246 	// since equality test is exactly the same.
4247 	bool cast = (type0.basetype != type1.basetype) || (!skip_cast_if_equal_type && type0.basetype != input_type);
4248 
4249 	// Create a fake type so we can bitcast to it.
4250 	// We only deal with regular arithmetic types here like int, uints and so on.
4251 	SPIRType expected_type;
4252 	expected_type.basetype = input_type;
4253 	expected_type.vecsize = type0.vecsize;
4254 	expected_type.columns = type0.columns;
4255 	expected_type.width = type0.width;
4256 
4257 	if (cast)
4258 	{
4259 		cast_op0 = bitcast_glsl(expected_type, op0);
4260 		cast_op1 = bitcast_glsl(expected_type, op1);
4261 	}
4262 	else
4263 	{
4264 		// If we don't cast, our actual input type is that of the first (or second) argument.
4265 		cast_op0 = to_enclosed_unpacked_expression(op0);
4266 		cast_op1 = to_enclosed_unpacked_expression(op1);
4267 		input_type = type0.basetype;
4268 	}
4269 
4270 	return expected_type;
4271 }
4272 
emit_binary_op_cast(uint32_t result_type,uint32_t result_id,uint32_t op0,uint32_t op1,const char * op,SPIRType::BaseType input_type,bool skip_cast_if_equal_type)4273 void CompilerGLSL::emit_binary_op_cast(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1,
4274                                        const char *op, SPIRType::BaseType input_type, bool skip_cast_if_equal_type)
4275 {
4276 	string cast_op0, cast_op1;
4277 	auto expected_type = binary_op_bitcast_helper(cast_op0, cast_op1, input_type, op0, op1, skip_cast_if_equal_type);
4278 	auto &out_type = get<SPIRType>(result_type);
4279 
4280 	// We might have casted away from the result type, so bitcast again.
4281 	// For example, arithmetic right shift with uint inputs.
4282 	// Special case boolean outputs since relational opcodes output booleans instead of int/uint.
4283 	string expr;
4284 	if (out_type.basetype != input_type && out_type.basetype != SPIRType::Boolean)
4285 	{
4286 		expected_type.basetype = input_type;
4287 		expr = bitcast_glsl_op(out_type, expected_type);
4288 		expr += '(';
4289 		expr += join(cast_op0, " ", op, " ", cast_op1);
4290 		expr += ')';
4291 	}
4292 	else
4293 		expr += join(cast_op0, " ", op, " ", cast_op1);
4294 
4295 	emit_op(result_type, result_id, expr, should_forward(op0) && should_forward(op1));
4296 	inherit_expression_dependencies(result_id, op0);
4297 	inherit_expression_dependencies(result_id, op1);
4298 }
4299 
emit_unary_func_op(uint32_t result_type,uint32_t result_id,uint32_t op0,const char * op)4300 void CompilerGLSL::emit_unary_func_op(uint32_t result_type, uint32_t result_id, uint32_t op0, const char *op)
4301 {
4302 	bool forward = should_forward(op0);
4303 	emit_op(result_type, result_id, join(op, "(", to_unpacked_expression(op0), ")"), forward);
4304 	inherit_expression_dependencies(result_id, op0);
4305 }
4306 
emit_binary_func_op(uint32_t result_type,uint32_t result_id,uint32_t op0,uint32_t op1,const char * op)4307 void CompilerGLSL::emit_binary_func_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1,
4308                                        const char *op)
4309 {
4310 	bool forward = should_forward(op0) && should_forward(op1);
4311 	emit_op(result_type, result_id, join(op, "(", to_unpacked_expression(op0), ", ", to_unpacked_expression(op1), ")"),
4312 	        forward);
4313 	inherit_expression_dependencies(result_id, op0);
4314 	inherit_expression_dependencies(result_id, op1);
4315 }
4316 
emit_unary_func_op_cast(uint32_t result_type,uint32_t result_id,uint32_t op0,const char * op,SPIRType::BaseType input_type,SPIRType::BaseType expected_result_type)4317 void CompilerGLSL::emit_unary_func_op_cast(uint32_t result_type, uint32_t result_id, uint32_t op0, const char *op,
4318                                            SPIRType::BaseType input_type, SPIRType::BaseType expected_result_type)
4319 {
4320 	auto &out_type = get<SPIRType>(result_type);
4321 	auto &expr_type = expression_type(op0);
4322 	auto expected_type = out_type;
4323 
4324 	// Bit-widths might be different in unary cases because we use it for SConvert/UConvert and friends.
4325 	expected_type.basetype = input_type;
4326 	expected_type.width = expr_type.width;
4327 	string cast_op = expr_type.basetype != input_type ? bitcast_glsl(expected_type, op0) : to_unpacked_expression(op0);
4328 
4329 	string expr;
4330 	if (out_type.basetype != expected_result_type)
4331 	{
4332 		expected_type.basetype = expected_result_type;
4333 		expected_type.width = out_type.width;
4334 		expr = bitcast_glsl_op(out_type, expected_type);
4335 		expr += '(';
4336 		expr += join(op, "(", cast_op, ")");
4337 		expr += ')';
4338 	}
4339 	else
4340 	{
4341 		expr += join(op, "(", cast_op, ")");
4342 	}
4343 
4344 	emit_op(result_type, result_id, expr, should_forward(op0));
4345 	inherit_expression_dependencies(result_id, op0);
4346 }
4347 
4348 // Very special case. Handling bitfieldExtract requires us to deal with different bitcasts of different signs
4349 // and different vector sizes all at once. Need a special purpose method here.
emit_trinary_func_op_bitextract(uint32_t result_type,uint32_t result_id,uint32_t op0,uint32_t op1,uint32_t op2,const char * op,SPIRType::BaseType expected_result_type,SPIRType::BaseType input_type0,SPIRType::BaseType input_type1,SPIRType::BaseType input_type2)4350 void CompilerGLSL::emit_trinary_func_op_bitextract(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1,
4351                                                    uint32_t op2, const char *op,
4352                                                    SPIRType::BaseType expected_result_type,
4353                                                    SPIRType::BaseType input_type0, SPIRType::BaseType input_type1,
4354                                                    SPIRType::BaseType input_type2)
4355 {
4356 	auto &out_type = get<SPIRType>(result_type);
4357 	auto expected_type = out_type;
4358 	expected_type.basetype = input_type0;
4359 
4360 	string cast_op0 =
4361 	    expression_type(op0).basetype != input_type0 ? bitcast_glsl(expected_type, op0) : to_unpacked_expression(op0);
4362 
4363 	auto op1_expr = to_unpacked_expression(op1);
4364 	auto op2_expr = to_unpacked_expression(op2);
4365 
4366 	// Use value casts here instead. Input must be exactly int or uint, but SPIR-V might be 16-bit.
4367 	expected_type.basetype = input_type1;
4368 	expected_type.vecsize = 1;
4369 	string cast_op1 = expression_type(op1).basetype != input_type1 ?
4370 	                      join(type_to_glsl_constructor(expected_type), "(", op1_expr, ")") :
4371 	                      op1_expr;
4372 
4373 	expected_type.basetype = input_type2;
4374 	expected_type.vecsize = 1;
4375 	string cast_op2 = expression_type(op2).basetype != input_type2 ?
4376 	                      join(type_to_glsl_constructor(expected_type), "(", op2_expr, ")") :
4377 	                      op2_expr;
4378 
4379 	string expr;
4380 	if (out_type.basetype != expected_result_type)
4381 	{
4382 		expected_type.vecsize = out_type.vecsize;
4383 		expected_type.basetype = expected_result_type;
4384 		expr = bitcast_glsl_op(out_type, expected_type);
4385 		expr += '(';
4386 		expr += join(op, "(", cast_op0, ", ", cast_op1, ", ", cast_op2, ")");
4387 		expr += ')';
4388 	}
4389 	else
4390 	{
4391 		expr += join(op, "(", cast_op0, ", ", cast_op1, ", ", cast_op2, ")");
4392 	}
4393 
4394 	emit_op(result_type, result_id, expr, should_forward(op0) && should_forward(op1) && should_forward(op2));
4395 	inherit_expression_dependencies(result_id, op0);
4396 	inherit_expression_dependencies(result_id, op1);
4397 	inherit_expression_dependencies(result_id, op2);
4398 }
4399 
emit_trinary_func_op_cast(uint32_t result_type,uint32_t result_id,uint32_t op0,uint32_t op1,uint32_t op2,const char * op,SPIRType::BaseType input_type)4400 void CompilerGLSL::emit_trinary_func_op_cast(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1,
4401                                              uint32_t op2, const char *op, SPIRType::BaseType input_type)
4402 {
4403 	auto &out_type = get<SPIRType>(result_type);
4404 	auto expected_type = out_type;
4405 	expected_type.basetype = input_type;
4406 	string cast_op0 =
4407 	    expression_type(op0).basetype != input_type ? bitcast_glsl(expected_type, op0) : to_unpacked_expression(op0);
4408 	string cast_op1 =
4409 	    expression_type(op1).basetype != input_type ? bitcast_glsl(expected_type, op1) : to_unpacked_expression(op1);
4410 	string cast_op2 =
4411 	    expression_type(op2).basetype != input_type ? bitcast_glsl(expected_type, op2) : to_unpacked_expression(op2);
4412 
4413 	string expr;
4414 	if (out_type.basetype != input_type)
4415 	{
4416 		expr = bitcast_glsl_op(out_type, expected_type);
4417 		expr += '(';
4418 		expr += join(op, "(", cast_op0, ", ", cast_op1, ", ", cast_op2, ")");
4419 		expr += ')';
4420 	}
4421 	else
4422 	{
4423 		expr += join(op, "(", cast_op0, ", ", cast_op1, ", ", cast_op2, ")");
4424 	}
4425 
4426 	emit_op(result_type, result_id, expr, should_forward(op0) && should_forward(op1) && should_forward(op2));
4427 	inherit_expression_dependencies(result_id, op0);
4428 	inherit_expression_dependencies(result_id, op1);
4429 	inherit_expression_dependencies(result_id, op2);
4430 }
4431 
emit_binary_func_op_cast(uint32_t result_type,uint32_t result_id,uint32_t op0,uint32_t op1,const char * op,SPIRType::BaseType input_type,bool skip_cast_if_equal_type)4432 void CompilerGLSL::emit_binary_func_op_cast(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1,
4433                                             const char *op, SPIRType::BaseType input_type, bool skip_cast_if_equal_type)
4434 {
4435 	string cast_op0, cast_op1;
4436 	auto expected_type = binary_op_bitcast_helper(cast_op0, cast_op1, input_type, op0, op1, skip_cast_if_equal_type);
4437 	auto &out_type = get<SPIRType>(result_type);
4438 
4439 	// Special case boolean outputs since relational opcodes output booleans instead of int/uint.
4440 	string expr;
4441 	if (out_type.basetype != input_type && out_type.basetype != SPIRType::Boolean)
4442 	{
4443 		expected_type.basetype = input_type;
4444 		expr = bitcast_glsl_op(out_type, expected_type);
4445 		expr += '(';
4446 		expr += join(op, "(", cast_op0, ", ", cast_op1, ")");
4447 		expr += ')';
4448 	}
4449 	else
4450 	{
4451 		expr += join(op, "(", cast_op0, ", ", cast_op1, ")");
4452 	}
4453 
4454 	emit_op(result_type, result_id, expr, should_forward(op0) && should_forward(op1));
4455 	inherit_expression_dependencies(result_id, op0);
4456 	inherit_expression_dependencies(result_id, op1);
4457 }
4458 
emit_trinary_func_op(uint32_t result_type,uint32_t result_id,uint32_t op0,uint32_t op1,uint32_t op2,const char * op)4459 void CompilerGLSL::emit_trinary_func_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1,
4460                                         uint32_t op2, const char *op)
4461 {
4462 	bool forward = should_forward(op0) && should_forward(op1) && should_forward(op2);
4463 	emit_op(result_type, result_id,
4464 	        join(op, "(", to_unpacked_expression(op0), ", ", to_unpacked_expression(op1), ", ",
4465 	             to_unpacked_expression(op2), ")"),
4466 	        forward);
4467 
4468 	inherit_expression_dependencies(result_id, op0);
4469 	inherit_expression_dependencies(result_id, op1);
4470 	inherit_expression_dependencies(result_id, op2);
4471 }
4472 
emit_quaternary_func_op(uint32_t result_type,uint32_t result_id,uint32_t op0,uint32_t op1,uint32_t op2,uint32_t op3,const char * op)4473 void CompilerGLSL::emit_quaternary_func_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1,
4474                                            uint32_t op2, uint32_t op3, const char *op)
4475 {
4476 	bool forward = should_forward(op0) && should_forward(op1) && should_forward(op2) && should_forward(op3);
4477 	emit_op(result_type, result_id,
4478 	        join(op, "(", to_unpacked_expression(op0), ", ", to_unpacked_expression(op1), ", ",
4479 	             to_unpacked_expression(op2), ", ", to_unpacked_expression(op3), ")"),
4480 	        forward);
4481 
4482 	inherit_expression_dependencies(result_id, op0);
4483 	inherit_expression_dependencies(result_id, op1);
4484 	inherit_expression_dependencies(result_id, op2);
4485 	inherit_expression_dependencies(result_id, op3);
4486 }
4487 
emit_bitfield_insert_op(uint32_t result_type,uint32_t result_id,uint32_t op0,uint32_t op1,uint32_t op2,uint32_t op3,const char * op,SPIRType::BaseType offset_count_type)4488 void CompilerGLSL::emit_bitfield_insert_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1,
4489                                            uint32_t op2, uint32_t op3, const char *op,
4490                                            SPIRType::BaseType offset_count_type)
4491 {
4492 	// Only need to cast offset/count arguments. Types of base/insert must be same as result type,
4493 	// and bitfieldInsert is sign invariant.
4494 	bool forward = should_forward(op0) && should_forward(op1) && should_forward(op2) && should_forward(op3);
4495 
4496 	auto op0_expr = to_unpacked_expression(op0);
4497 	auto op1_expr = to_unpacked_expression(op1);
4498 	auto op2_expr = to_unpacked_expression(op2);
4499 	auto op3_expr = to_unpacked_expression(op3);
4500 
4501 	SPIRType target_type;
4502 	target_type.vecsize = 1;
4503 	target_type.basetype = offset_count_type;
4504 
4505 	if (expression_type(op2).basetype != offset_count_type)
4506 	{
4507 		// Value-cast here. Input might be 16-bit. GLSL requires int.
4508 		op2_expr = join(type_to_glsl_constructor(target_type), "(", op2_expr, ")");
4509 	}
4510 
4511 	if (expression_type(op3).basetype != offset_count_type)
4512 	{
4513 		// Value-cast here. Input might be 16-bit. GLSL requires int.
4514 		op3_expr = join(type_to_glsl_constructor(target_type), "(", op3_expr, ")");
4515 	}
4516 
4517 	emit_op(result_type, result_id, join(op, "(", op0_expr, ", ", op1_expr, ", ", op2_expr, ", ", op3_expr, ")"),
4518 	        forward);
4519 
4520 	inherit_expression_dependencies(result_id, op0);
4521 	inherit_expression_dependencies(result_id, op1);
4522 	inherit_expression_dependencies(result_id, op2);
4523 	inherit_expression_dependencies(result_id, op3);
4524 }
4525 
4526 // EXT_shader_texture_lod only concerns fragment shaders so lod tex functions
4527 // are not allowed in ES 2 vertex shaders. But SPIR-V only supports lod tex
4528 // functions in vertex shaders so we revert those back to plain calls when
4529 // the lod is a constant value of zero.
check_explicit_lod_allowed(uint32_t lod)4530 bool CompilerGLSL::check_explicit_lod_allowed(uint32_t lod)
4531 {
4532 	auto &execution = get_entry_point();
4533 	bool allowed = !is_legacy_es() || execution.model == ExecutionModelFragment;
4534 	if (!allowed && lod != 0)
4535 	{
4536 		auto *lod_constant = maybe_get<SPIRConstant>(lod);
4537 		if (!lod_constant || lod_constant->scalar_f32() != 0.0f)
4538 		{
4539 			SPIRV_CROSS_THROW("Explicit lod not allowed in legacy ES non-fragment shaders.");
4540 		}
4541 	}
4542 	return allowed;
4543 }
4544 
legacy_tex_op(const std::string & op,const SPIRType & imgtype,uint32_t lod,uint32_t tex)4545 string CompilerGLSL::legacy_tex_op(const std::string &op, const SPIRType &imgtype, uint32_t lod, uint32_t tex)
4546 {
4547 	const char *type;
4548 	switch (imgtype.image.dim)
4549 	{
4550 	case spv::Dim1D:
4551 		type = (imgtype.image.arrayed && !options.es) ? "1DArray" : "1D";
4552 		break;
4553 	case spv::Dim2D:
4554 		type = (imgtype.image.arrayed && !options.es) ? "2DArray" : "2D";
4555 		break;
4556 	case spv::Dim3D:
4557 		type = "3D";
4558 		break;
4559 	case spv::DimCube:
4560 		type = "Cube";
4561 		break;
4562 	case spv::DimRect:
4563 		type = "2DRect";
4564 		break;
4565 	case spv::DimBuffer:
4566 		type = "Buffer";
4567 		break;
4568 	case spv::DimSubpassData:
4569 		type = "2D";
4570 		break;
4571 	default:
4572 		type = "";
4573 		break;
4574 	}
4575 
4576 	bool use_explicit_lod = check_explicit_lod_allowed(lod);
4577 
4578 	if (op == "textureLod" || op == "textureProjLod" || op == "textureGrad" || op == "textureProjGrad")
4579 	{
4580 		if (is_legacy_es())
4581 		{
4582 			if (use_explicit_lod)
4583 				require_extension_internal("GL_EXT_shader_texture_lod");
4584 		}
4585 		else if (is_legacy())
4586 			require_extension_internal("GL_ARB_shader_texture_lod");
4587 	}
4588 
4589 	if (op == "textureLodOffset" || op == "textureProjLodOffset")
4590 	{
4591 		if (is_legacy_es())
4592 			SPIRV_CROSS_THROW(join(op, " not allowed in legacy ES"));
4593 
4594 		require_extension_internal("GL_EXT_gpu_shader4");
4595 	}
4596 
4597 	// GLES has very limited support for shadow samplers.
4598 	// Basically shadow2D and shadow2DProj work through EXT_shadow_samplers,
4599 	// everything else can just throw
4600 	if (image_is_comparison(imgtype, tex) && is_legacy_es())
4601 	{
4602 		if (op == "texture" || op == "textureProj")
4603 			require_extension_internal("GL_EXT_shadow_samplers");
4604 		else
4605 			SPIRV_CROSS_THROW(join(op, " not allowed on depth samplers in legacy ES"));
4606 	}
4607 
4608 	bool is_es_and_depth = is_legacy_es() && image_is_comparison(imgtype, tex);
4609 	std::string type_prefix = image_is_comparison(imgtype, tex) ? "shadow" : "texture";
4610 
4611 	if (op == "texture")
4612 		return is_es_and_depth ? join(type_prefix, type, "EXT") : join(type_prefix, type);
4613 	else if (op == "textureLod")
4614 	{
4615 		if (use_explicit_lod)
4616 			return join(type_prefix, type, is_legacy_es() ? "LodEXT" : "Lod");
4617 		else
4618 			return join(type_prefix, type);
4619 	}
4620 	else if (op == "textureProj")
4621 		return join(type_prefix, type, is_es_and_depth ? "ProjEXT" : "Proj");
4622 	else if (op == "textureGrad")
4623 		return join(type_prefix, type, is_legacy_es() ? "GradEXT" : is_legacy_desktop() ? "GradARB" : "Grad");
4624 	else if (op == "textureProjLod")
4625 	{
4626 		if (use_explicit_lod)
4627 			return join(type_prefix, type, is_legacy_es() ? "ProjLodEXT" : "ProjLod");
4628 		else
4629 			return join(type_prefix, type, "Proj");
4630 	}
4631 	else if (op == "textureLodOffset")
4632 	{
4633 		if (use_explicit_lod)
4634 			return join(type_prefix, type, "LodOffset");
4635 		else
4636 			return join(type_prefix, type);
4637 	}
4638 	else if (op == "textureProjGrad")
4639 		return join(type_prefix, type,
4640 		            is_legacy_es() ? "ProjGradEXT" : is_legacy_desktop() ? "ProjGradARB" : "ProjGrad");
4641 	else if (op == "textureProjLodOffset")
4642 	{
4643 		if (use_explicit_lod)
4644 			return join(type_prefix, type, "ProjLodOffset");
4645 		else
4646 			return join(type_prefix, type, "ProjOffset");
4647 	}
4648 	else
4649 	{
4650 		SPIRV_CROSS_THROW(join("Unsupported legacy texture op: ", op));
4651 	}
4652 }
4653 
to_trivial_mix_op(const SPIRType & type,string & op,uint32_t left,uint32_t right,uint32_t lerp)4654 bool CompilerGLSL::to_trivial_mix_op(const SPIRType &type, string &op, uint32_t left, uint32_t right, uint32_t lerp)
4655 {
4656 	auto *cleft = maybe_get<SPIRConstant>(left);
4657 	auto *cright = maybe_get<SPIRConstant>(right);
4658 	auto &lerptype = expression_type(lerp);
4659 
4660 	// If our targets aren't constants, we cannot use construction.
4661 	if (!cleft || !cright)
4662 		return false;
4663 
4664 	// If our targets are spec constants, we cannot use construction.
4665 	if (cleft->specialization || cright->specialization)
4666 		return false;
4667 
4668 	// We can only use trivial construction if we have a scalar
4669 	// (should be possible to do it for vectors as well, but that is overkill for now).
4670 	if (lerptype.basetype != SPIRType::Boolean || lerptype.vecsize > 1)
4671 		return false;
4672 
4673 	// If our bool selects between 0 and 1, we can cast from bool instead, making our trivial constructor.
4674 	bool ret = false;
4675 	switch (type.basetype)
4676 	{
4677 	case SPIRType::Short:
4678 	case SPIRType::UShort:
4679 		ret = cleft->scalar_u16() == 0 && cright->scalar_u16() == 1;
4680 		break;
4681 
4682 	case SPIRType::Int:
4683 	case SPIRType::UInt:
4684 		ret = cleft->scalar() == 0 && cright->scalar() == 1;
4685 		break;
4686 
4687 	case SPIRType::Half:
4688 		ret = cleft->scalar_f16() == 0.0f && cright->scalar_f16() == 1.0f;
4689 		break;
4690 
4691 	case SPIRType::Float:
4692 		ret = cleft->scalar_f32() == 0.0f && cright->scalar_f32() == 1.0f;
4693 		break;
4694 
4695 	case SPIRType::Double:
4696 		ret = cleft->scalar_f64() == 0.0 && cright->scalar_f64() == 1.0;
4697 		break;
4698 
4699 	case SPIRType::Int64:
4700 	case SPIRType::UInt64:
4701 		ret = cleft->scalar_u64() == 0 && cright->scalar_u64() == 1;
4702 		break;
4703 
4704 	default:
4705 		break;
4706 	}
4707 
4708 	if (ret)
4709 		op = type_to_glsl_constructor(type);
4710 	return ret;
4711 }
4712 
to_ternary_expression(const SPIRType & restype,uint32_t select,uint32_t true_value,uint32_t false_value)4713 string CompilerGLSL::to_ternary_expression(const SPIRType &restype, uint32_t select, uint32_t true_value,
4714                                            uint32_t false_value)
4715 {
4716 	string expr;
4717 	auto &lerptype = expression_type(select);
4718 
4719 	if (lerptype.vecsize == 1)
4720 		expr = join(to_enclosed_expression(select), " ? ", to_enclosed_pointer_expression(true_value), " : ",
4721 		            to_enclosed_pointer_expression(false_value));
4722 	else
4723 	{
4724 		auto swiz = [this](uint32_t expression, uint32_t i) { return to_extract_component_expression(expression, i); };
4725 
4726 		expr = type_to_glsl_constructor(restype);
4727 		expr += "(";
4728 		for (uint32_t i = 0; i < restype.vecsize; i++)
4729 		{
4730 			expr += swiz(select, i);
4731 			expr += " ? ";
4732 			expr += swiz(true_value, i);
4733 			expr += " : ";
4734 			expr += swiz(false_value, i);
4735 			if (i + 1 < restype.vecsize)
4736 				expr += ", ";
4737 		}
4738 		expr += ")";
4739 	}
4740 
4741 	return expr;
4742 }
4743 
emit_mix_op(uint32_t result_type,uint32_t id,uint32_t left,uint32_t right,uint32_t lerp)4744 void CompilerGLSL::emit_mix_op(uint32_t result_type, uint32_t id, uint32_t left, uint32_t right, uint32_t lerp)
4745 {
4746 	auto &lerptype = expression_type(lerp);
4747 	auto &restype = get<SPIRType>(result_type);
4748 
4749 	// If this results in a variable pointer, assume it may be written through.
4750 	if (restype.pointer)
4751 	{
4752 		register_write(left);
4753 		register_write(right);
4754 	}
4755 
4756 	string mix_op;
4757 	bool has_boolean_mix = *backend.boolean_mix_function &&
4758 	                       ((options.es && options.version >= 310) || (!options.es && options.version >= 450));
4759 	bool trivial_mix = to_trivial_mix_op(restype, mix_op, left, right, lerp);
4760 
4761 	// Cannot use boolean mix when the lerp argument is just one boolean,
4762 	// fall back to regular trinary statements.
4763 	if (lerptype.vecsize == 1)
4764 		has_boolean_mix = false;
4765 
4766 	// If we can reduce the mix to a simple cast, do so.
4767 	// This helps for cases like int(bool), uint(bool) which is implemented with
4768 	// OpSelect bool 1 0.
4769 	if (trivial_mix)
4770 	{
4771 		emit_unary_func_op(result_type, id, lerp, mix_op.c_str());
4772 	}
4773 	else if (!has_boolean_mix && lerptype.basetype == SPIRType::Boolean)
4774 	{
4775 		// Boolean mix not supported on desktop without extension.
4776 		// Was added in OpenGL 4.5 with ES 3.1 compat.
4777 		//
4778 		// Could use GL_EXT_shader_integer_mix on desktop at least,
4779 		// but Apple doesn't support it. :(
4780 		// Just implement it as ternary expressions.
4781 		auto expr = to_ternary_expression(get<SPIRType>(result_type), lerp, right, left);
4782 		emit_op(result_type, id, expr, should_forward(left) && should_forward(right) && should_forward(lerp));
4783 		inherit_expression_dependencies(id, left);
4784 		inherit_expression_dependencies(id, right);
4785 		inherit_expression_dependencies(id, lerp);
4786 	}
4787 	else if (lerptype.basetype == SPIRType::Boolean)
4788 		emit_trinary_func_op(result_type, id, left, right, lerp, backend.boolean_mix_function);
4789 	else
4790 		emit_trinary_func_op(result_type, id, left, right, lerp, "mix");
4791 }
4792 
to_combined_image_sampler(VariableID image_id,VariableID samp_id)4793 string CompilerGLSL::to_combined_image_sampler(VariableID image_id, VariableID samp_id)
4794 {
4795 	// Keep track of the array indices we have used to load the image.
4796 	// We'll need to use the same array index into the combined image sampler array.
4797 	auto image_expr = to_expression(image_id);
4798 	string array_expr;
4799 	auto array_index = image_expr.find_first_of('[');
4800 	if (array_index != string::npos)
4801 		array_expr = image_expr.substr(array_index, string::npos);
4802 
4803 	auto &args = current_function->arguments;
4804 
4805 	// For GLSL and ESSL targets, we must enumerate all possible combinations for sampler2D(texture2D, sampler) and redirect
4806 	// all possible combinations into new sampler2D uniforms.
4807 	auto *image = maybe_get_backing_variable(image_id);
4808 	auto *samp = maybe_get_backing_variable(samp_id);
4809 	if (image)
4810 		image_id = image->self;
4811 	if (samp)
4812 		samp_id = samp->self;
4813 
4814 	auto image_itr = find_if(begin(args), end(args),
4815 	                         [image_id](const SPIRFunction::Parameter &param) { return image_id == param.id; });
4816 
4817 	auto sampler_itr = find_if(begin(args), end(args),
4818 	                           [samp_id](const SPIRFunction::Parameter &param) { return samp_id == param.id; });
4819 
4820 	if (image_itr != end(args) || sampler_itr != end(args))
4821 	{
4822 		// If any parameter originates from a parameter, we will find it in our argument list.
4823 		bool global_image = image_itr == end(args);
4824 		bool global_sampler = sampler_itr == end(args);
4825 		VariableID iid = global_image ? image_id : VariableID(uint32_t(image_itr - begin(args)));
4826 		VariableID sid = global_sampler ? samp_id : VariableID(uint32_t(sampler_itr - begin(args)));
4827 
4828 		auto &combined = current_function->combined_parameters;
4829 		auto itr = find_if(begin(combined), end(combined), [=](const SPIRFunction::CombinedImageSamplerParameter &p) {
4830 			return p.global_image == global_image && p.global_sampler == global_sampler && p.image_id == iid &&
4831 			       p.sampler_id == sid;
4832 		});
4833 
4834 		if (itr != end(combined))
4835 			return to_expression(itr->id) + array_expr;
4836 		else
4837 		{
4838 			SPIRV_CROSS_THROW(
4839 			    "Cannot find mapping for combined sampler parameter, was build_combined_image_samplers() used "
4840 			    "before compile() was called?");
4841 		}
4842 	}
4843 	else
4844 	{
4845 		// For global sampler2D, look directly at the global remapping table.
4846 		auto &mapping = combined_image_samplers;
4847 		auto itr = find_if(begin(mapping), end(mapping), [image_id, samp_id](const CombinedImageSampler &combined) {
4848 			return combined.image_id == image_id && combined.sampler_id == samp_id;
4849 		});
4850 
4851 		if (itr != end(combined_image_samplers))
4852 			return to_expression(itr->combined_id) + array_expr;
4853 		else
4854 		{
4855 			SPIRV_CROSS_THROW("Cannot find mapping for combined sampler, was build_combined_image_samplers() used "
4856 			                  "before compile() was called?");
4857 		}
4858 	}
4859 }
4860 
emit_sampled_image_op(uint32_t result_type,uint32_t result_id,uint32_t image_id,uint32_t samp_id)4861 void CompilerGLSL::emit_sampled_image_op(uint32_t result_type, uint32_t result_id, uint32_t image_id, uint32_t samp_id)
4862 {
4863 	if (options.vulkan_semantics && combined_image_samplers.empty())
4864 	{
4865 		emit_binary_func_op(result_type, result_id, image_id, samp_id,
4866 		                    type_to_glsl(get<SPIRType>(result_type), result_id).c_str());
4867 	}
4868 	else
4869 	{
4870 		// Make sure to suppress usage tracking. It is illegal to create temporaries of opaque types.
4871 		emit_op(result_type, result_id, to_combined_image_sampler(image_id, samp_id), true, true);
4872 	}
4873 
4874 	// Make sure to suppress usage tracking and any expression invalidation.
4875 	// It is illegal to create temporaries of opaque types.
4876 	forwarded_temporaries.erase(result_id);
4877 }
4878 
image_opcode_is_sample_no_dref(Op op)4879 static inline bool image_opcode_is_sample_no_dref(Op op)
4880 {
4881 	switch (op)
4882 	{
4883 	case OpImageSampleExplicitLod:
4884 	case OpImageSampleImplicitLod:
4885 	case OpImageSampleProjExplicitLod:
4886 	case OpImageSampleProjImplicitLod:
4887 	case OpImageFetch:
4888 	case OpImageRead:
4889 	case OpImageSparseSampleExplicitLod:
4890 	case OpImageSparseSampleImplicitLod:
4891 	case OpImageSparseSampleProjExplicitLod:
4892 	case OpImageSparseSampleProjImplicitLod:
4893 	case OpImageSparseFetch:
4894 	case OpImageSparseRead:
4895 		return true;
4896 
4897 	default:
4898 		return false;
4899 	}
4900 }
4901 
emit_texture_op(const Instruction & i)4902 void CompilerGLSL::emit_texture_op(const Instruction &i)
4903 {
4904 	auto *ops = stream(i);
4905 	auto op = static_cast<Op>(i.op);
4906 
4907 	SmallVector<uint32_t> inherited_expressions;
4908 
4909 	uint32_t result_type_id = ops[0];
4910 	uint32_t id = ops[1];
4911 
4912 	bool forward = false;
4913 	string expr = to_texture_op(i, &forward, inherited_expressions);
4914 	emit_op(result_type_id, id, expr, forward);
4915 	for (auto &inherit : inherited_expressions)
4916 		inherit_expression_dependencies(id, inherit);
4917 
4918 	switch (op)
4919 	{
4920 	case OpImageSampleDrefImplicitLod:
4921 	case OpImageSampleImplicitLod:
4922 	case OpImageSampleProjImplicitLod:
4923 	case OpImageSampleProjDrefImplicitLod:
4924 		register_control_dependent_expression(id);
4925 		break;
4926 
4927 	default:
4928 		break;
4929 	}
4930 }
4931 
to_texture_op(const Instruction & i,bool * forward,SmallVector<uint32_t> & inherited_expressions)4932 std::string CompilerGLSL::to_texture_op(const Instruction &i, bool *forward,
4933                                         SmallVector<uint32_t> &inherited_expressions)
4934 {
4935 	auto *ops = stream(i);
4936 	auto op = static_cast<Op>(i.op);
4937 	uint32_t length = i.length;
4938 
4939 	uint32_t result_type_id = ops[0];
4940 	VariableID img = ops[2];
4941 	uint32_t coord = ops[3];
4942 	uint32_t dref = 0;
4943 	uint32_t comp = 0;
4944 	bool gather = false;
4945 	bool proj = false;
4946 	bool fetch = false;
4947 	const uint32_t *opt = nullptr;
4948 
4949 	auto &result_type = get<SPIRType>(result_type_id);
4950 
4951 	inherited_expressions.push_back(coord);
4952 
4953 	// Make sure non-uniform decoration is back-propagated to where it needs to be.
4954 	if (has_decoration(img, DecorationNonUniformEXT))
4955 		propagate_nonuniform_qualifier(img);
4956 
4957 	switch (op)
4958 	{
4959 	case OpImageSampleDrefImplicitLod:
4960 	case OpImageSampleDrefExplicitLod:
4961 		dref = ops[4];
4962 		opt = &ops[5];
4963 		length -= 5;
4964 		break;
4965 
4966 	case OpImageSampleProjDrefImplicitLod:
4967 	case OpImageSampleProjDrefExplicitLod:
4968 		dref = ops[4];
4969 		opt = &ops[5];
4970 		length -= 5;
4971 		proj = true;
4972 		break;
4973 
4974 	case OpImageDrefGather:
4975 		dref = ops[4];
4976 		opt = &ops[5];
4977 		length -= 5;
4978 		gather = true;
4979 		break;
4980 
4981 	case OpImageGather:
4982 		comp = ops[4];
4983 		opt = &ops[5];
4984 		length -= 5;
4985 		gather = true;
4986 		break;
4987 
4988 	case OpImageFetch:
4989 	case OpImageRead: // Reads == fetches in Metal (other langs will not get here)
4990 		opt = &ops[4];
4991 		length -= 4;
4992 		fetch = true;
4993 		break;
4994 
4995 	case OpImageSampleProjImplicitLod:
4996 	case OpImageSampleProjExplicitLod:
4997 		opt = &ops[4];
4998 		length -= 4;
4999 		proj = true;
5000 		break;
5001 
5002 	default:
5003 		opt = &ops[4];
5004 		length -= 4;
5005 		break;
5006 	}
5007 
5008 	// Bypass pointers because we need the real image struct
5009 	auto &type = expression_type(img);
5010 	auto &imgtype = get<SPIRType>(type.self);
5011 
5012 	uint32_t coord_components = 0;
5013 	switch (imgtype.image.dim)
5014 	{
5015 	case spv::Dim1D:
5016 		coord_components = 1;
5017 		break;
5018 	case spv::Dim2D:
5019 		coord_components = 2;
5020 		break;
5021 	case spv::Dim3D:
5022 		coord_components = 3;
5023 		break;
5024 	case spv::DimCube:
5025 		coord_components = 3;
5026 		break;
5027 	case spv::DimBuffer:
5028 		coord_components = 1;
5029 		break;
5030 	default:
5031 		coord_components = 2;
5032 		break;
5033 	}
5034 
5035 	if (dref)
5036 		inherited_expressions.push_back(dref);
5037 
5038 	if (proj)
5039 		coord_components++;
5040 	if (imgtype.image.arrayed)
5041 		coord_components++;
5042 
5043 	uint32_t bias = 0;
5044 	uint32_t lod = 0;
5045 	uint32_t grad_x = 0;
5046 	uint32_t grad_y = 0;
5047 	uint32_t coffset = 0;
5048 	uint32_t offset = 0;
5049 	uint32_t coffsets = 0;
5050 	uint32_t sample = 0;
5051 	uint32_t minlod = 0;
5052 	uint32_t flags = 0;
5053 
5054 	if (length)
5055 	{
5056 		flags = *opt++;
5057 		length--;
5058 	}
5059 
5060 	auto test = [&](uint32_t &v, uint32_t flag) {
5061 		if (length && (flags & flag))
5062 		{
5063 			v = *opt++;
5064 			inherited_expressions.push_back(v);
5065 			length--;
5066 		}
5067 	};
5068 
5069 	test(bias, ImageOperandsBiasMask);
5070 	test(lod, ImageOperandsLodMask);
5071 	test(grad_x, ImageOperandsGradMask);
5072 	test(grad_y, ImageOperandsGradMask);
5073 	test(coffset, ImageOperandsConstOffsetMask);
5074 	test(offset, ImageOperandsOffsetMask);
5075 	test(coffsets, ImageOperandsConstOffsetsMask);
5076 	test(sample, ImageOperandsSampleMask);
5077 	test(minlod, ImageOperandsMinLodMask);
5078 
5079 	string expr;
5080 	expr += to_function_name(img, imgtype, !!fetch, !!gather, !!proj, !!coffsets, (!!coffset || !!offset),
5081 	                         (!!grad_x || !!grad_y), !!dref, lod, minlod);
5082 	expr += "(";
5083 	expr += to_function_args(img, imgtype, fetch, gather, proj, coord, coord_components, dref, grad_x, grad_y, lod,
5084 	                         coffset, offset, bias, comp, sample, minlod, forward);
5085 	expr += ")";
5086 
5087 	// texture(samplerXShadow) returns float. shadowX() returns vec4. Swizzle here.
5088 	if (is_legacy() && image_is_comparison(imgtype, img))
5089 		expr += ".r";
5090 
5091 	// Sampling from a texture which was deduced to be a depth image, might actually return 1 component here.
5092 	// Remap back to 4 components as sampling opcodes expect.
5093 	if (backend.comparison_image_samples_scalar && image_opcode_is_sample_no_dref(op))
5094 	{
5095 		bool image_is_depth = false;
5096 		const auto *combined = maybe_get<SPIRCombinedImageSampler>(img);
5097 		VariableID image_id = combined ? combined->image : img;
5098 
5099 		if (combined && image_is_comparison(imgtype, combined->image))
5100 			image_is_depth = true;
5101 		else if (image_is_comparison(imgtype, img))
5102 			image_is_depth = true;
5103 
5104 		// We must also check the backing variable for the image.
5105 		// We might have loaded an OpImage, and used that handle for two different purposes.
5106 		// Once with comparison, once without.
5107 		auto *image_variable = maybe_get_backing_variable(image_id);
5108 		if (image_variable && image_is_comparison(get<SPIRType>(image_variable->basetype), image_variable->self))
5109 			image_is_depth = true;
5110 
5111 		if (image_is_depth)
5112 			expr = remap_swizzle(result_type, 1, expr);
5113 	}
5114 
5115 	if (!backend.support_small_type_sampling_result && result_type.width < 32)
5116 	{
5117 		// Just value cast (narrowing) to expected type since we cannot rely on narrowing to work automatically.
5118 		// Hopefully compiler picks this up and converts the texturing instruction to the appropriate precision.
5119 		expr = join(type_to_glsl_constructor(result_type), "(", expr, ")");
5120 	}
5121 
5122 	// Deals with reads from MSL. We might need to downconvert to fewer components.
5123 	if (op == OpImageRead)
5124 		expr = remap_swizzle(result_type, 4, expr);
5125 
5126 	return expr;
5127 }
5128 
expression_is_constant_null(uint32_t id) const5129 bool CompilerGLSL::expression_is_constant_null(uint32_t id) const
5130 {
5131 	auto *c = maybe_get<SPIRConstant>(id);
5132 	if (!c)
5133 		return false;
5134 	return c->constant_is_null();
5135 }
5136 
5137 // Returns the function name for a texture sampling function for the specified image and sampling characteristics.
5138 // For some subclasses, the function is a method on the specified image.
to_function_name(VariableID tex,const SPIRType & imgtype,bool is_fetch,bool is_gather,bool is_proj,bool has_array_offsets,bool has_offset,bool has_grad,bool,uint32_t lod,uint32_t minlod)5139 string CompilerGLSL::to_function_name(VariableID tex, const SPIRType &imgtype, bool is_fetch, bool is_gather,
5140                                       bool is_proj, bool has_array_offsets, bool has_offset, bool has_grad, bool,
5141                                       uint32_t lod, uint32_t minlod)
5142 {
5143 	if (minlod != 0)
5144 		SPIRV_CROSS_THROW("Sparse texturing not yet supported.");
5145 
5146 	string fname;
5147 
5148 	// textureLod on sampler2DArrayShadow and samplerCubeShadow does not exist in GLSL for some reason.
5149 	// To emulate this, we will have to use textureGrad with a constant gradient of 0.
5150 	// The workaround will assert that the LOD is in fact constant 0, or we cannot emit correct code.
5151 	// This happens for HLSL SampleCmpLevelZero on Texture2DArray and TextureCube.
5152 	bool workaround_lod_array_shadow_as_grad = false;
5153 	if (((imgtype.image.arrayed && imgtype.image.dim == Dim2D) || imgtype.image.dim == DimCube) &&
5154 	    image_is_comparison(imgtype, tex) && lod)
5155 	{
5156 		if (!expression_is_constant_null(lod))
5157 		{
5158 			SPIRV_CROSS_THROW(
5159 			    "textureLod on sampler2DArrayShadow is not constant 0.0. This cannot be expressed in GLSL.");
5160 		}
5161 		workaround_lod_array_shadow_as_grad = true;
5162 	}
5163 
5164 	if (is_fetch)
5165 		fname += "texelFetch";
5166 	else
5167 	{
5168 		fname += "texture";
5169 
5170 		if (is_gather)
5171 			fname += "Gather";
5172 		if (has_array_offsets)
5173 			fname += "Offsets";
5174 		if (is_proj)
5175 			fname += "Proj";
5176 		if (has_grad || workaround_lod_array_shadow_as_grad)
5177 			fname += "Grad";
5178 		if (!!lod && !workaround_lod_array_shadow_as_grad)
5179 			fname += "Lod";
5180 	}
5181 
5182 	if (has_offset)
5183 		fname += "Offset";
5184 
5185 	return is_legacy() ? legacy_tex_op(fname, imgtype, lod, tex) : fname;
5186 }
5187 
convert_separate_image_to_expression(uint32_t id)5188 std::string CompilerGLSL::convert_separate_image_to_expression(uint32_t id)
5189 {
5190 	auto *var = maybe_get_backing_variable(id);
5191 
5192 	// If we are fetching from a plain OpTypeImage, we must combine with a dummy sampler in GLSL.
5193 	// In Vulkan GLSL, we can make use of the newer GL_EXT_samplerless_texture_functions.
5194 	if (var)
5195 	{
5196 		auto &type = get<SPIRType>(var->basetype);
5197 		if (type.basetype == SPIRType::Image && type.image.sampled == 1 && type.image.dim != DimBuffer)
5198 		{
5199 			if (options.vulkan_semantics)
5200 			{
5201 				if (dummy_sampler_id)
5202 				{
5203 					// Don't need to consider Shadow state since the dummy sampler is always non-shadow.
5204 					auto sampled_type = type;
5205 					sampled_type.basetype = SPIRType::SampledImage;
5206 					return join(type_to_glsl(sampled_type), "(", to_expression(id), ", ",
5207 					            to_expression(dummy_sampler_id), ")");
5208 				}
5209 				else
5210 				{
5211 					// Newer glslang supports this extension to deal with texture2D as argument to texture functions.
5212 					require_extension_internal("GL_EXT_samplerless_texture_functions");
5213 				}
5214 			}
5215 			else
5216 			{
5217 				if (!dummy_sampler_id)
5218 					SPIRV_CROSS_THROW(
5219 					    "Cannot find dummy sampler ID. Was build_dummy_sampler_for_combined_images() called?");
5220 
5221 				return to_combined_image_sampler(id, dummy_sampler_id);
5222 			}
5223 		}
5224 	}
5225 
5226 	return to_expression(id);
5227 }
5228 
5229 // Returns the function args for a texture sampling function for the specified image and sampling characteristics.
to_function_args(VariableID img,const SPIRType & imgtype,bool is_fetch,bool is_gather,bool is_proj,uint32_t coord,uint32_t coord_components,uint32_t dref,uint32_t grad_x,uint32_t grad_y,uint32_t lod,uint32_t coffset,uint32_t offset,uint32_t bias,uint32_t comp,uint32_t sample,uint32_t,bool * p_forward)5230 string CompilerGLSL::to_function_args(VariableID img, const SPIRType &imgtype, bool is_fetch, bool is_gather,
5231                                       bool is_proj, uint32_t coord, uint32_t coord_components, uint32_t dref,
5232                                       uint32_t grad_x, uint32_t grad_y, uint32_t lod, uint32_t coffset, uint32_t offset,
5233                                       uint32_t bias, uint32_t comp, uint32_t sample, uint32_t /*minlod*/,
5234                                       bool *p_forward)
5235 {
5236 	string farg_str;
5237 	if (is_fetch)
5238 		farg_str = convert_separate_image_to_expression(img);
5239 	else
5240 		farg_str = to_expression(img);
5241 
5242 	bool swizz_func = backend.swizzle_is_function;
5243 	auto swizzle = [swizz_func](uint32_t comps, uint32_t in_comps) -> const char * {
5244 		if (comps == in_comps)
5245 			return "";
5246 
5247 		switch (comps)
5248 		{
5249 		case 1:
5250 			return ".x";
5251 		case 2:
5252 			return swizz_func ? ".xy()" : ".xy";
5253 		case 3:
5254 			return swizz_func ? ".xyz()" : ".xyz";
5255 		default:
5256 			return "";
5257 		}
5258 	};
5259 
5260 	bool forward = should_forward(coord);
5261 
5262 	// The IR can give us more components than we need, so chop them off as needed.
5263 	auto swizzle_expr = swizzle(coord_components, expression_type(coord).vecsize);
5264 	// Only enclose the UV expression if needed.
5265 	auto coord_expr = (*swizzle_expr == '\0') ? to_expression(coord) : (to_enclosed_expression(coord) + swizzle_expr);
5266 
5267 	// texelFetch only takes int, not uint.
5268 	auto &coord_type = expression_type(coord);
5269 	if (coord_type.basetype == SPIRType::UInt)
5270 	{
5271 		auto expected_type = coord_type;
5272 		expected_type.vecsize = coord_components;
5273 		expected_type.basetype = SPIRType::Int;
5274 		coord_expr = bitcast_expression(expected_type, coord_type.basetype, coord_expr);
5275 	}
5276 
5277 	// textureLod on sampler2DArrayShadow and samplerCubeShadow does not exist in GLSL for some reason.
5278 	// To emulate this, we will have to use textureGrad with a constant gradient of 0.
5279 	// The workaround will assert that the LOD is in fact constant 0, or we cannot emit correct code.
5280 	// This happens for HLSL SampleCmpLevelZero on Texture2DArray and TextureCube.
5281 	bool workaround_lod_array_shadow_as_grad =
5282 	    ((imgtype.image.arrayed && imgtype.image.dim == Dim2D) || imgtype.image.dim == DimCube) &&
5283 	    image_is_comparison(imgtype, img) && lod;
5284 
5285 	if (dref)
5286 	{
5287 		forward = forward && should_forward(dref);
5288 
5289 		// SPIR-V splits dref and coordinate.
5290 		if (is_gather || coord_components == 4) // GLSL also splits the arguments in two. Same for textureGather.
5291 		{
5292 			farg_str += ", ";
5293 			farg_str += to_expression(coord);
5294 			farg_str += ", ";
5295 			farg_str += to_expression(dref);
5296 		}
5297 		else if (is_proj)
5298 		{
5299 			// Have to reshuffle so we get vec4(coord, dref, proj), special case.
5300 			// Other shading languages splits up the arguments for coord and compare value like SPIR-V.
5301 			// The coordinate type for textureProj shadow is always vec4 even for sampler1DShadow.
5302 			farg_str += ", vec4(";
5303 
5304 			if (imgtype.image.dim == Dim1D)
5305 			{
5306 				// Could reuse coord_expr, but we will mess up the temporary usage checking.
5307 				farg_str += to_enclosed_expression(coord) + ".x";
5308 				farg_str += ", ";
5309 				farg_str += "0.0, ";
5310 				farg_str += to_expression(dref);
5311 				farg_str += ", ";
5312 				farg_str += to_enclosed_expression(coord) + ".y)";
5313 			}
5314 			else if (imgtype.image.dim == Dim2D)
5315 			{
5316 				// Could reuse coord_expr, but we will mess up the temporary usage checking.
5317 				farg_str += to_enclosed_expression(coord) + (swizz_func ? ".xy()" : ".xy");
5318 				farg_str += ", ";
5319 				farg_str += to_expression(dref);
5320 				farg_str += ", ";
5321 				farg_str += to_enclosed_expression(coord) + ".z)";
5322 			}
5323 			else
5324 				SPIRV_CROSS_THROW("Invalid type for textureProj with shadow.");
5325 		}
5326 		else
5327 		{
5328 			// Create a composite which merges coord/dref into a single vector.
5329 			auto type = expression_type(coord);
5330 			type.vecsize = coord_components + 1;
5331 			farg_str += ", ";
5332 			farg_str += type_to_glsl_constructor(type);
5333 			farg_str += "(";
5334 			farg_str += coord_expr;
5335 			farg_str += ", ";
5336 			farg_str += to_expression(dref);
5337 			farg_str += ")";
5338 		}
5339 	}
5340 	else
5341 	{
5342 		farg_str += ", ";
5343 		farg_str += coord_expr;
5344 	}
5345 
5346 	if (grad_x || grad_y)
5347 	{
5348 		forward = forward && should_forward(grad_x);
5349 		forward = forward && should_forward(grad_y);
5350 		farg_str += ", ";
5351 		farg_str += to_expression(grad_x);
5352 		farg_str += ", ";
5353 		farg_str += to_expression(grad_y);
5354 	}
5355 
5356 	if (lod)
5357 	{
5358 		if (workaround_lod_array_shadow_as_grad)
5359 		{
5360 			// Implement textureGrad() instead. LOD == 0.0 is implemented as gradient of 0.0.
5361 			// Implementing this as plain texture() is not safe on some implementations.
5362 			if (imgtype.image.dim == Dim2D)
5363 				farg_str += ", vec2(0.0), vec2(0.0)";
5364 			else if (imgtype.image.dim == DimCube)
5365 				farg_str += ", vec3(0.0), vec3(0.0)";
5366 		}
5367 		else
5368 		{
5369 			if (check_explicit_lod_allowed(lod))
5370 			{
5371 				forward = forward && should_forward(lod);
5372 				farg_str += ", ";
5373 
5374 				auto &lod_expr_type = expression_type(lod);
5375 
5376 				// Lod expression for TexelFetch in GLSL must be int, and only int.
5377 				if (is_fetch && imgtype.image.dim != DimBuffer && !imgtype.image.ms &&
5378 				    lod_expr_type.basetype != SPIRType::Int)
5379 				{
5380 					farg_str += join("int(", to_expression(lod), ")");
5381 				}
5382 				else
5383 				{
5384 					farg_str += to_expression(lod);
5385 				}
5386 			}
5387 		}
5388 	}
5389 	else if (is_fetch && imgtype.image.dim != DimBuffer && !imgtype.image.ms)
5390 	{
5391 		// Lod argument is optional in OpImageFetch, but we require a LOD value, pick 0 as the default.
5392 		farg_str += ", 0";
5393 	}
5394 
5395 	if (coffset)
5396 	{
5397 		forward = forward && should_forward(coffset);
5398 		farg_str += ", ";
5399 		farg_str += to_expression(coffset);
5400 	}
5401 	else if (offset)
5402 	{
5403 		forward = forward && should_forward(offset);
5404 		farg_str += ", ";
5405 		farg_str += to_expression(offset);
5406 	}
5407 
5408 	if (bias)
5409 	{
5410 		forward = forward && should_forward(bias);
5411 		farg_str += ", ";
5412 		farg_str += to_expression(bias);
5413 	}
5414 
5415 	if (comp)
5416 	{
5417 		forward = forward && should_forward(comp);
5418 		farg_str += ", ";
5419 		farg_str += to_expression(comp);
5420 	}
5421 
5422 	if (sample)
5423 	{
5424 		farg_str += ", ";
5425 		farg_str += to_expression(sample);
5426 	}
5427 
5428 	*p_forward = forward;
5429 
5430 	return farg_str;
5431 }
5432 
emit_glsl_op(uint32_t result_type,uint32_t id,uint32_t eop,const uint32_t * args,uint32_t length)5433 void CompilerGLSL::emit_glsl_op(uint32_t result_type, uint32_t id, uint32_t eop, const uint32_t *args, uint32_t length)
5434 {
5435 	auto op = static_cast<GLSLstd450>(eop);
5436 
5437 	if (is_legacy() && is_unsigned_glsl_opcode(op))
5438 		SPIRV_CROSS_THROW("Unsigned integers are not supported on legacy GLSL targets.");
5439 
5440 	// If we need to do implicit bitcasts, make sure we do it with the correct type.
5441 	uint32_t integer_width = get_integer_width_for_glsl_instruction(op, args, length);
5442 	auto int_type = to_signed_basetype(integer_width);
5443 	auto uint_type = to_unsigned_basetype(integer_width);
5444 
5445 	switch (op)
5446 	{
5447 	// FP fiddling
5448 	case GLSLstd450Round:
5449 		emit_unary_func_op(result_type, id, args[0], "round");
5450 		break;
5451 
5452 	case GLSLstd450RoundEven:
5453 		if ((options.es && options.version >= 300) || (!options.es && options.version >= 130))
5454 			emit_unary_func_op(result_type, id, args[0], "roundEven");
5455 		else
5456 			SPIRV_CROSS_THROW("roundEven supported only in ESSL 300 and GLSL 130 and up.");
5457 		break;
5458 
5459 	case GLSLstd450Trunc:
5460 		emit_unary_func_op(result_type, id, args[0], "trunc");
5461 		break;
5462 	case GLSLstd450SAbs:
5463 		emit_unary_func_op_cast(result_type, id, args[0], "abs", int_type, int_type);
5464 		break;
5465 	case GLSLstd450FAbs:
5466 		emit_unary_func_op(result_type, id, args[0], "abs");
5467 		break;
5468 	case GLSLstd450SSign:
5469 		emit_unary_func_op_cast(result_type, id, args[0], "sign", int_type, int_type);
5470 		break;
5471 	case GLSLstd450FSign:
5472 		emit_unary_func_op(result_type, id, args[0], "sign");
5473 		break;
5474 	case GLSLstd450Floor:
5475 		emit_unary_func_op(result_type, id, args[0], "floor");
5476 		break;
5477 	case GLSLstd450Ceil:
5478 		emit_unary_func_op(result_type, id, args[0], "ceil");
5479 		break;
5480 	case GLSLstd450Fract:
5481 		emit_unary_func_op(result_type, id, args[0], "fract");
5482 		break;
5483 	case GLSLstd450Radians:
5484 		emit_unary_func_op(result_type, id, args[0], "radians");
5485 		break;
5486 	case GLSLstd450Degrees:
5487 		emit_unary_func_op(result_type, id, args[0], "degrees");
5488 		break;
5489 	case GLSLstd450Fma:
5490 		if ((!options.es && options.version < 400) || (options.es && options.version < 320))
5491 		{
5492 			auto expr = join(to_enclosed_expression(args[0]), " * ", to_enclosed_expression(args[1]), " + ",
5493 			                 to_enclosed_expression(args[2]));
5494 
5495 			emit_op(result_type, id, expr,
5496 			        should_forward(args[0]) && should_forward(args[1]) && should_forward(args[2]));
5497 			for (uint32_t i = 0; i < 3; i++)
5498 				inherit_expression_dependencies(id, args[i]);
5499 		}
5500 		else
5501 			emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "fma");
5502 		break;
5503 	case GLSLstd450Modf:
5504 		register_call_out_argument(args[1]);
5505 		forced_temporaries.insert(id);
5506 		emit_binary_func_op(result_type, id, args[0], args[1], "modf");
5507 		break;
5508 
5509 	case GLSLstd450ModfStruct:
5510 	{
5511 		auto &type = get<SPIRType>(result_type);
5512 		emit_uninitialized_temporary_expression(result_type, id);
5513 		statement(to_expression(id), ".", to_member_name(type, 0), " = ", "modf(", to_expression(args[0]), ", ",
5514 		          to_expression(id), ".", to_member_name(type, 1), ");");
5515 		break;
5516 	}
5517 
5518 	// Minmax
5519 	case GLSLstd450UMin:
5520 		emit_binary_func_op_cast(result_type, id, args[0], args[1], "min", uint_type, false);
5521 		break;
5522 
5523 	case GLSLstd450SMin:
5524 		emit_binary_func_op_cast(result_type, id, args[0], args[1], "min", int_type, false);
5525 		break;
5526 
5527 	case GLSLstd450FMin:
5528 		emit_binary_func_op(result_type, id, args[0], args[1], "min");
5529 		break;
5530 
5531 	case GLSLstd450FMax:
5532 		emit_binary_func_op(result_type, id, args[0], args[1], "max");
5533 		break;
5534 
5535 	case GLSLstd450UMax:
5536 		emit_binary_func_op_cast(result_type, id, args[0], args[1], "max", uint_type, false);
5537 		break;
5538 
5539 	case GLSLstd450SMax:
5540 		emit_binary_func_op_cast(result_type, id, args[0], args[1], "max", int_type, false);
5541 		break;
5542 
5543 	case GLSLstd450FClamp:
5544 		emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "clamp");
5545 		break;
5546 
5547 	case GLSLstd450UClamp:
5548 		emit_trinary_func_op_cast(result_type, id, args[0], args[1], args[2], "clamp", uint_type);
5549 		break;
5550 
5551 	case GLSLstd450SClamp:
5552 		emit_trinary_func_op_cast(result_type, id, args[0], args[1], args[2], "clamp", int_type);
5553 		break;
5554 
5555 	// Trig
5556 	case GLSLstd450Sin:
5557 		emit_unary_func_op(result_type, id, args[0], "sin");
5558 		break;
5559 	case GLSLstd450Cos:
5560 		emit_unary_func_op(result_type, id, args[0], "cos");
5561 		break;
5562 	case GLSLstd450Tan:
5563 		emit_unary_func_op(result_type, id, args[0], "tan");
5564 		break;
5565 	case GLSLstd450Asin:
5566 		emit_unary_func_op(result_type, id, args[0], "asin");
5567 		break;
5568 	case GLSLstd450Acos:
5569 		emit_unary_func_op(result_type, id, args[0], "acos");
5570 		break;
5571 	case GLSLstd450Atan:
5572 		emit_unary_func_op(result_type, id, args[0], "atan");
5573 		break;
5574 	case GLSLstd450Sinh:
5575 		emit_unary_func_op(result_type, id, args[0], "sinh");
5576 		break;
5577 	case GLSLstd450Cosh:
5578 		emit_unary_func_op(result_type, id, args[0], "cosh");
5579 		break;
5580 	case GLSLstd450Tanh:
5581 		emit_unary_func_op(result_type, id, args[0], "tanh");
5582 		break;
5583 	case GLSLstd450Asinh:
5584 		emit_unary_func_op(result_type, id, args[0], "asinh");
5585 		break;
5586 	case GLSLstd450Acosh:
5587 		emit_unary_func_op(result_type, id, args[0], "acosh");
5588 		break;
5589 	case GLSLstd450Atanh:
5590 		emit_unary_func_op(result_type, id, args[0], "atanh");
5591 		break;
5592 	case GLSLstd450Atan2:
5593 		emit_binary_func_op(result_type, id, args[0], args[1], "atan");
5594 		break;
5595 
5596 	// Exponentials
5597 	case GLSLstd450Pow:
5598 		emit_binary_func_op(result_type, id, args[0], args[1], "pow");
5599 		break;
5600 	case GLSLstd450Exp:
5601 		emit_unary_func_op(result_type, id, args[0], "exp");
5602 		break;
5603 	case GLSLstd450Log:
5604 		emit_unary_func_op(result_type, id, args[0], "log");
5605 		break;
5606 	case GLSLstd450Exp2:
5607 		emit_unary_func_op(result_type, id, args[0], "exp2");
5608 		break;
5609 	case GLSLstd450Log2:
5610 		emit_unary_func_op(result_type, id, args[0], "log2");
5611 		break;
5612 	case GLSLstd450Sqrt:
5613 		emit_unary_func_op(result_type, id, args[0], "sqrt");
5614 		break;
5615 	case GLSLstd450InverseSqrt:
5616 		emit_unary_func_op(result_type, id, args[0], "inversesqrt");
5617 		break;
5618 
5619 	// Matrix math
5620 	case GLSLstd450Determinant:
5621 		emit_unary_func_op(result_type, id, args[0], "determinant");
5622 		break;
5623 	case GLSLstd450MatrixInverse:
5624 		emit_unary_func_op(result_type, id, args[0], "inverse");
5625 		break;
5626 
5627 	// Lerping
5628 	case GLSLstd450FMix:
5629 	case GLSLstd450IMix:
5630 	{
5631 		emit_mix_op(result_type, id, args[0], args[1], args[2]);
5632 		break;
5633 	}
5634 	case GLSLstd450Step:
5635 		emit_binary_func_op(result_type, id, args[0], args[1], "step");
5636 		break;
5637 	case GLSLstd450SmoothStep:
5638 		emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "smoothstep");
5639 		break;
5640 
5641 	// Packing
5642 	case GLSLstd450Frexp:
5643 		register_call_out_argument(args[1]);
5644 		forced_temporaries.insert(id);
5645 		emit_binary_func_op(result_type, id, args[0], args[1], "frexp");
5646 		break;
5647 
5648 	case GLSLstd450FrexpStruct:
5649 	{
5650 		auto &type = get<SPIRType>(result_type);
5651 		emit_uninitialized_temporary_expression(result_type, id);
5652 		statement(to_expression(id), ".", to_member_name(type, 0), " = ", "frexp(", to_expression(args[0]), ", ",
5653 		          to_expression(id), ".", to_member_name(type, 1), ");");
5654 		break;
5655 	}
5656 
5657 	case GLSLstd450Ldexp:
5658 	{
5659 		bool forward = should_forward(args[0]) && should_forward(args[1]);
5660 
5661 		auto op0 = to_unpacked_expression(args[0]);
5662 		auto op1 = to_unpacked_expression(args[1]);
5663 		auto &op1_type = expression_type(args[1]);
5664 		if (op1_type.basetype != SPIRType::Int)
5665 		{
5666 			// Need a value cast here.
5667 			auto target_type = op1_type;
5668 			target_type.basetype = SPIRType::Int;
5669 			op1 = join(type_to_glsl_constructor(target_type), "(", op1, ")");
5670 		}
5671 
5672 		auto expr = join("ldexp(", op0, ", ", op1, ")");
5673 
5674 		emit_op(result_type, id, expr, forward);
5675 		inherit_expression_dependencies(id, args[0]);
5676 		inherit_expression_dependencies(id, args[1]);
5677 		break;
5678 	}
5679 
5680 	case GLSLstd450PackSnorm4x8:
5681 		emit_unary_func_op(result_type, id, args[0], "packSnorm4x8");
5682 		break;
5683 	case GLSLstd450PackUnorm4x8:
5684 		emit_unary_func_op(result_type, id, args[0], "packUnorm4x8");
5685 		break;
5686 	case GLSLstd450PackSnorm2x16:
5687 		emit_unary_func_op(result_type, id, args[0], "packSnorm2x16");
5688 		break;
5689 	case GLSLstd450PackUnorm2x16:
5690 		emit_unary_func_op(result_type, id, args[0], "packUnorm2x16");
5691 		break;
5692 	case GLSLstd450PackHalf2x16:
5693 		emit_unary_func_op(result_type, id, args[0], "packHalf2x16");
5694 		break;
5695 	case GLSLstd450UnpackSnorm4x8:
5696 		emit_unary_func_op(result_type, id, args[0], "unpackSnorm4x8");
5697 		break;
5698 	case GLSLstd450UnpackUnorm4x8:
5699 		emit_unary_func_op(result_type, id, args[0], "unpackUnorm4x8");
5700 		break;
5701 	case GLSLstd450UnpackSnorm2x16:
5702 		emit_unary_func_op(result_type, id, args[0], "unpackSnorm2x16");
5703 		break;
5704 	case GLSLstd450UnpackUnorm2x16:
5705 		emit_unary_func_op(result_type, id, args[0], "unpackUnorm2x16");
5706 		break;
5707 	case GLSLstd450UnpackHalf2x16:
5708 		emit_unary_func_op(result_type, id, args[0], "unpackHalf2x16");
5709 		break;
5710 
5711 	case GLSLstd450PackDouble2x32:
5712 		emit_unary_func_op(result_type, id, args[0], "packDouble2x32");
5713 		break;
5714 	case GLSLstd450UnpackDouble2x32:
5715 		emit_unary_func_op(result_type, id, args[0], "unpackDouble2x32");
5716 		break;
5717 
5718 	// Vector math
5719 	case GLSLstd450Length:
5720 		emit_unary_func_op(result_type, id, args[0], "length");
5721 		break;
5722 	case GLSLstd450Distance:
5723 		emit_binary_func_op(result_type, id, args[0], args[1], "distance");
5724 		break;
5725 	case GLSLstd450Cross:
5726 		emit_binary_func_op(result_type, id, args[0], args[1], "cross");
5727 		break;
5728 	case GLSLstd450Normalize:
5729 		emit_unary_func_op(result_type, id, args[0], "normalize");
5730 		break;
5731 	case GLSLstd450FaceForward:
5732 		emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "faceforward");
5733 		break;
5734 	case GLSLstd450Reflect:
5735 		emit_binary_func_op(result_type, id, args[0], args[1], "reflect");
5736 		break;
5737 	case GLSLstd450Refract:
5738 		emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "refract");
5739 		break;
5740 
5741 	// Bit-fiddling
5742 	case GLSLstd450FindILsb:
5743 		// findLSB always returns int.
5744 		emit_unary_func_op_cast(result_type, id, args[0], "findLSB", expression_type(args[0]).basetype, int_type);
5745 		break;
5746 
5747 	case GLSLstd450FindSMsb:
5748 		emit_unary_func_op_cast(result_type, id, args[0], "findMSB", int_type, int_type);
5749 		break;
5750 
5751 	case GLSLstd450FindUMsb:
5752 		emit_unary_func_op_cast(result_type, id, args[0], "findMSB", uint_type,
5753 		                        int_type); // findMSB always returns int.
5754 		break;
5755 
5756 	// Multisampled varying
5757 	case GLSLstd450InterpolateAtCentroid:
5758 		emit_unary_func_op(result_type, id, args[0], "interpolateAtCentroid");
5759 		break;
5760 	case GLSLstd450InterpolateAtSample:
5761 		emit_binary_func_op(result_type, id, args[0], args[1], "interpolateAtSample");
5762 		break;
5763 	case GLSLstd450InterpolateAtOffset:
5764 		emit_binary_func_op(result_type, id, args[0], args[1], "interpolateAtOffset");
5765 		break;
5766 
5767 	case GLSLstd450NMin:
5768 	case GLSLstd450NMax:
5769 	{
5770 		emit_nminmax_op(result_type, id, args[0], args[1], op);
5771 		break;
5772 	}
5773 
5774 	case GLSLstd450NClamp:
5775 	{
5776 		// Make sure we have a unique ID here to avoid aliasing the extra sub-expressions between clamp and NMin sub-op.
5777 		// IDs cannot exceed 24 bits, so we can make use of the higher bits for some unique flags.
5778 		uint32_t &max_id = extra_sub_expressions[id | 0x80000000u];
5779 		if (!max_id)
5780 			max_id = ir.increase_bound_by(1);
5781 
5782 		// Inherit precision qualifiers.
5783 		ir.meta[max_id] = ir.meta[id];
5784 
5785 		emit_nminmax_op(result_type, max_id, args[0], args[1], GLSLstd450NMax);
5786 		emit_nminmax_op(result_type, id, max_id, args[2], GLSLstd450NMin);
5787 		break;
5788 	}
5789 
5790 	default:
5791 		statement("// unimplemented GLSL op ", eop);
5792 		break;
5793 	}
5794 }
5795 
emit_nminmax_op(uint32_t result_type,uint32_t id,uint32_t op0,uint32_t op1,GLSLstd450 op)5796 void CompilerGLSL::emit_nminmax_op(uint32_t result_type, uint32_t id, uint32_t op0, uint32_t op1, GLSLstd450 op)
5797 {
5798 	// Need to emulate this call.
5799 	uint32_t &ids = extra_sub_expressions[id];
5800 	if (!ids)
5801 	{
5802 		ids = ir.increase_bound_by(5);
5803 		auto btype = get<SPIRType>(result_type);
5804 		btype.basetype = SPIRType::Boolean;
5805 		set<SPIRType>(ids, btype);
5806 	}
5807 
5808 	uint32_t btype_id = ids + 0;
5809 	uint32_t left_nan_id = ids + 1;
5810 	uint32_t right_nan_id = ids + 2;
5811 	uint32_t tmp_id = ids + 3;
5812 	uint32_t mixed_first_id = ids + 4;
5813 
5814 	// Inherit precision qualifiers.
5815 	ir.meta[tmp_id] = ir.meta[id];
5816 	ir.meta[mixed_first_id] = ir.meta[id];
5817 
5818 	emit_unary_func_op(btype_id, left_nan_id, op0, "isnan");
5819 	emit_unary_func_op(btype_id, right_nan_id, op1, "isnan");
5820 	emit_binary_func_op(result_type, tmp_id, op0, op1, op == GLSLstd450NMin ? "min" : "max");
5821 	emit_mix_op(result_type, mixed_first_id, tmp_id, op1, left_nan_id);
5822 	emit_mix_op(result_type, id, mixed_first_id, op0, right_nan_id);
5823 }
5824 
emit_spv_amd_shader_ballot_op(uint32_t result_type,uint32_t id,uint32_t eop,const uint32_t * args,uint32_t)5825 void CompilerGLSL::emit_spv_amd_shader_ballot_op(uint32_t result_type, uint32_t id, uint32_t eop, const uint32_t *args,
5826                                                  uint32_t)
5827 {
5828 	require_extension_internal("GL_AMD_shader_ballot");
5829 
5830 	enum AMDShaderBallot
5831 	{
5832 		SwizzleInvocationsAMD = 1,
5833 		SwizzleInvocationsMaskedAMD = 2,
5834 		WriteInvocationAMD = 3,
5835 		MbcntAMD = 4
5836 	};
5837 
5838 	auto op = static_cast<AMDShaderBallot>(eop);
5839 
5840 	switch (op)
5841 	{
5842 	case SwizzleInvocationsAMD:
5843 		emit_binary_func_op(result_type, id, args[0], args[1], "swizzleInvocationsAMD");
5844 		register_control_dependent_expression(id);
5845 		break;
5846 
5847 	case SwizzleInvocationsMaskedAMD:
5848 		emit_binary_func_op(result_type, id, args[0], args[1], "swizzleInvocationsMaskedAMD");
5849 		register_control_dependent_expression(id);
5850 		break;
5851 
5852 	case WriteInvocationAMD:
5853 		emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "writeInvocationAMD");
5854 		register_control_dependent_expression(id);
5855 		break;
5856 
5857 	case MbcntAMD:
5858 		emit_unary_func_op(result_type, id, args[0], "mbcntAMD");
5859 		register_control_dependent_expression(id);
5860 		break;
5861 
5862 	default:
5863 		statement("// unimplemented SPV AMD shader ballot op ", eop);
5864 		break;
5865 	}
5866 }
5867 
emit_spv_amd_shader_explicit_vertex_parameter_op(uint32_t result_type,uint32_t id,uint32_t eop,const uint32_t * args,uint32_t)5868 void CompilerGLSL::emit_spv_amd_shader_explicit_vertex_parameter_op(uint32_t result_type, uint32_t id, uint32_t eop,
5869                                                                     const uint32_t *args, uint32_t)
5870 {
5871 	require_extension_internal("GL_AMD_shader_explicit_vertex_parameter");
5872 
5873 	enum AMDShaderExplicitVertexParameter
5874 	{
5875 		InterpolateAtVertexAMD = 1
5876 	};
5877 
5878 	auto op = static_cast<AMDShaderExplicitVertexParameter>(eop);
5879 
5880 	switch (op)
5881 	{
5882 	case InterpolateAtVertexAMD:
5883 		emit_binary_func_op(result_type, id, args[0], args[1], "interpolateAtVertexAMD");
5884 		break;
5885 
5886 	default:
5887 		statement("// unimplemented SPV AMD shader explicit vertex parameter op ", eop);
5888 		break;
5889 	}
5890 }
5891 
emit_spv_amd_shader_trinary_minmax_op(uint32_t result_type,uint32_t id,uint32_t eop,const uint32_t * args,uint32_t)5892 void CompilerGLSL::emit_spv_amd_shader_trinary_minmax_op(uint32_t result_type, uint32_t id, uint32_t eop,
5893                                                          const uint32_t *args, uint32_t)
5894 {
5895 	require_extension_internal("GL_AMD_shader_trinary_minmax");
5896 
5897 	enum AMDShaderTrinaryMinMax
5898 	{
5899 		FMin3AMD = 1,
5900 		UMin3AMD = 2,
5901 		SMin3AMD = 3,
5902 		FMax3AMD = 4,
5903 		UMax3AMD = 5,
5904 		SMax3AMD = 6,
5905 		FMid3AMD = 7,
5906 		UMid3AMD = 8,
5907 		SMid3AMD = 9
5908 	};
5909 
5910 	auto op = static_cast<AMDShaderTrinaryMinMax>(eop);
5911 
5912 	switch (op)
5913 	{
5914 	case FMin3AMD:
5915 	case UMin3AMD:
5916 	case SMin3AMD:
5917 		emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "min3");
5918 		break;
5919 
5920 	case FMax3AMD:
5921 	case UMax3AMD:
5922 	case SMax3AMD:
5923 		emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "max3");
5924 		break;
5925 
5926 	case FMid3AMD:
5927 	case UMid3AMD:
5928 	case SMid3AMD:
5929 		emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "mid3");
5930 		break;
5931 
5932 	default:
5933 		statement("// unimplemented SPV AMD shader trinary minmax op ", eop);
5934 		break;
5935 	}
5936 }
5937 
emit_spv_amd_gcn_shader_op(uint32_t result_type,uint32_t id,uint32_t eop,const uint32_t * args,uint32_t)5938 void CompilerGLSL::emit_spv_amd_gcn_shader_op(uint32_t result_type, uint32_t id, uint32_t eop, const uint32_t *args,
5939                                               uint32_t)
5940 {
5941 	require_extension_internal("GL_AMD_gcn_shader");
5942 
5943 	enum AMDGCNShader
5944 	{
5945 		CubeFaceIndexAMD = 1,
5946 		CubeFaceCoordAMD = 2,
5947 		TimeAMD = 3
5948 	};
5949 
5950 	auto op = static_cast<AMDGCNShader>(eop);
5951 
5952 	switch (op)
5953 	{
5954 	case CubeFaceIndexAMD:
5955 		emit_unary_func_op(result_type, id, args[0], "cubeFaceIndexAMD");
5956 		break;
5957 	case CubeFaceCoordAMD:
5958 		emit_unary_func_op(result_type, id, args[0], "cubeFaceCoordAMD");
5959 		break;
5960 	case TimeAMD:
5961 	{
5962 		string expr = "timeAMD()";
5963 		emit_op(result_type, id, expr, true);
5964 		register_control_dependent_expression(id);
5965 		break;
5966 	}
5967 
5968 	default:
5969 		statement("// unimplemented SPV AMD gcn shader op ", eop);
5970 		break;
5971 	}
5972 }
5973 
emit_subgroup_op(const Instruction & i)5974 void CompilerGLSL::emit_subgroup_op(const Instruction &i)
5975 {
5976 	const uint32_t *ops = stream(i);
5977 	auto op = static_cast<Op>(i.op);
5978 
5979 	if (!options.vulkan_semantics)
5980 		SPIRV_CROSS_THROW("Can only use subgroup operations in Vulkan semantics.");
5981 
5982 	switch (op)
5983 	{
5984 	case OpGroupNonUniformElect:
5985 		require_extension_internal("GL_KHR_shader_subgroup_basic");
5986 		break;
5987 
5988 	case OpGroupNonUniformBroadcast:
5989 	case OpGroupNonUniformBroadcastFirst:
5990 	case OpGroupNonUniformBallot:
5991 	case OpGroupNonUniformInverseBallot:
5992 	case OpGroupNonUniformBallotBitExtract:
5993 	case OpGroupNonUniformBallotBitCount:
5994 	case OpGroupNonUniformBallotFindLSB:
5995 	case OpGroupNonUniformBallotFindMSB:
5996 		require_extension_internal("GL_KHR_shader_subgroup_ballot");
5997 		break;
5998 
5999 	case OpGroupNonUniformShuffle:
6000 	case OpGroupNonUniformShuffleXor:
6001 		require_extension_internal("GL_KHR_shader_subgroup_shuffle");
6002 		break;
6003 
6004 	case OpGroupNonUniformShuffleUp:
6005 	case OpGroupNonUniformShuffleDown:
6006 		require_extension_internal("GL_KHR_shader_subgroup_shuffle_relative");
6007 		break;
6008 
6009 	case OpGroupNonUniformAll:
6010 	case OpGroupNonUniformAny:
6011 	case OpGroupNonUniformAllEqual:
6012 		require_extension_internal("GL_KHR_shader_subgroup_vote");
6013 		break;
6014 
6015 	case OpGroupNonUniformFAdd:
6016 	case OpGroupNonUniformFMul:
6017 	case OpGroupNonUniformFMin:
6018 	case OpGroupNonUniformFMax:
6019 	case OpGroupNonUniformIAdd:
6020 	case OpGroupNonUniformIMul:
6021 	case OpGroupNonUniformSMin:
6022 	case OpGroupNonUniformSMax:
6023 	case OpGroupNonUniformUMin:
6024 	case OpGroupNonUniformUMax:
6025 	case OpGroupNonUniformBitwiseAnd:
6026 	case OpGroupNonUniformBitwiseOr:
6027 	case OpGroupNonUniformBitwiseXor:
6028 	{
6029 		auto operation = static_cast<GroupOperation>(ops[3]);
6030 		if (operation == GroupOperationClusteredReduce)
6031 		{
6032 			require_extension_internal("GL_KHR_shader_subgroup_clustered");
6033 		}
6034 		else if (operation == GroupOperationExclusiveScan || operation == GroupOperationInclusiveScan ||
6035 		         operation == GroupOperationReduce)
6036 		{
6037 			require_extension_internal("GL_KHR_shader_subgroup_arithmetic");
6038 		}
6039 		else
6040 			SPIRV_CROSS_THROW("Invalid group operation.");
6041 		break;
6042 	}
6043 
6044 	case OpGroupNonUniformQuadSwap:
6045 	case OpGroupNonUniformQuadBroadcast:
6046 		require_extension_internal("GL_KHR_shader_subgroup_quad");
6047 		break;
6048 
6049 	default:
6050 		SPIRV_CROSS_THROW("Invalid opcode for subgroup.");
6051 	}
6052 
6053 	uint32_t result_type = ops[0];
6054 	uint32_t id = ops[1];
6055 
6056 	auto scope = static_cast<Scope>(get<SPIRConstant>(ops[2]).scalar());
6057 	if (scope != ScopeSubgroup)
6058 		SPIRV_CROSS_THROW("Only subgroup scope is supported.");
6059 
6060 	switch (op)
6061 	{
6062 	case OpGroupNonUniformElect:
6063 		emit_op(result_type, id, "subgroupElect()", true);
6064 		break;
6065 
6066 	case OpGroupNonUniformBroadcast:
6067 		emit_binary_func_op(result_type, id, ops[3], ops[4], "subgroupBroadcast");
6068 		break;
6069 
6070 	case OpGroupNonUniformBroadcastFirst:
6071 		emit_unary_func_op(result_type, id, ops[3], "subgroupBroadcastFirst");
6072 		break;
6073 
6074 	case OpGroupNonUniformBallot:
6075 		emit_unary_func_op(result_type, id, ops[3], "subgroupBallot");
6076 		break;
6077 
6078 	case OpGroupNonUniformInverseBallot:
6079 		emit_unary_func_op(result_type, id, ops[3], "subgroupInverseBallot");
6080 		break;
6081 
6082 	case OpGroupNonUniformBallotBitExtract:
6083 		emit_binary_func_op(result_type, id, ops[3], ops[4], "subgroupBallotBitExtract");
6084 		break;
6085 
6086 	case OpGroupNonUniformBallotFindLSB:
6087 		emit_unary_func_op(result_type, id, ops[3], "subgroupBallotFindLSB");
6088 		break;
6089 
6090 	case OpGroupNonUniformBallotFindMSB:
6091 		emit_unary_func_op(result_type, id, ops[3], "subgroupBallotFindMSB");
6092 		break;
6093 
6094 	case OpGroupNonUniformBallotBitCount:
6095 	{
6096 		auto operation = static_cast<GroupOperation>(ops[3]);
6097 		if (operation == GroupOperationReduce)
6098 			emit_unary_func_op(result_type, id, ops[4], "subgroupBallotBitCount");
6099 		else if (operation == GroupOperationInclusiveScan)
6100 			emit_unary_func_op(result_type, id, ops[4], "subgroupBallotInclusiveBitCount");
6101 		else if (operation == GroupOperationExclusiveScan)
6102 			emit_unary_func_op(result_type, id, ops[4], "subgroupBallotExclusiveBitCount");
6103 		else
6104 			SPIRV_CROSS_THROW("Invalid BitCount operation.");
6105 		break;
6106 	}
6107 
6108 	case OpGroupNonUniformShuffle:
6109 		emit_binary_func_op(result_type, id, ops[3], ops[4], "subgroupShuffle");
6110 		break;
6111 
6112 	case OpGroupNonUniformShuffleXor:
6113 		emit_binary_func_op(result_type, id, ops[3], ops[4], "subgroupShuffleXor");
6114 		break;
6115 
6116 	case OpGroupNonUniformShuffleUp:
6117 		emit_binary_func_op(result_type, id, ops[3], ops[4], "subgroupShuffleUp");
6118 		break;
6119 
6120 	case OpGroupNonUniformShuffleDown:
6121 		emit_binary_func_op(result_type, id, ops[3], ops[4], "subgroupShuffleDown");
6122 		break;
6123 
6124 	case OpGroupNonUniformAll:
6125 		emit_unary_func_op(result_type, id, ops[3], "subgroupAll");
6126 		break;
6127 
6128 	case OpGroupNonUniformAny:
6129 		emit_unary_func_op(result_type, id, ops[3], "subgroupAny");
6130 		break;
6131 
6132 	case OpGroupNonUniformAllEqual:
6133 		emit_unary_func_op(result_type, id, ops[3], "subgroupAllEqual");
6134 		break;
6135 
6136 		// clang-format off
6137 #define GLSL_GROUP_OP(op, glsl_op) \
6138 case OpGroupNonUniform##op: \
6139 	{ \
6140 		auto operation = static_cast<GroupOperation>(ops[3]); \
6141 		if (operation == GroupOperationReduce) \
6142 			emit_unary_func_op(result_type, id, ops[4], "subgroup" #glsl_op); \
6143 		else if (operation == GroupOperationInclusiveScan) \
6144 			emit_unary_func_op(result_type, id, ops[4], "subgroupInclusive" #glsl_op); \
6145 		else if (operation == GroupOperationExclusiveScan) \
6146 			emit_unary_func_op(result_type, id, ops[4], "subgroupExclusive" #glsl_op); \
6147 		else if (operation == GroupOperationClusteredReduce) \
6148 			emit_binary_func_op(result_type, id, ops[4], ops[5], "subgroupClustered" #glsl_op); \
6149 		else \
6150 			SPIRV_CROSS_THROW("Invalid group operation."); \
6151 		break; \
6152 	}
6153 	GLSL_GROUP_OP(FAdd, Add)
6154 	GLSL_GROUP_OP(FMul, Mul)
6155 	GLSL_GROUP_OP(FMin, Min)
6156 	GLSL_GROUP_OP(FMax, Max)
6157 	GLSL_GROUP_OP(IAdd, Add)
6158 	GLSL_GROUP_OP(IMul, Mul)
6159 	GLSL_GROUP_OP(SMin, Min)
6160 	GLSL_GROUP_OP(SMax, Max)
6161 	GLSL_GROUP_OP(UMin, Min)
6162 	GLSL_GROUP_OP(UMax, Max)
6163 	GLSL_GROUP_OP(BitwiseAnd, And)
6164 	GLSL_GROUP_OP(BitwiseOr, Or)
6165 	GLSL_GROUP_OP(BitwiseXor, Xor)
6166 #undef GLSL_GROUP_OP
6167 		// clang-format on
6168 
6169 	case OpGroupNonUniformQuadSwap:
6170 	{
6171 		uint32_t direction = get<SPIRConstant>(ops[4]).scalar();
6172 		if (direction == 0)
6173 			emit_unary_func_op(result_type, id, ops[3], "subgroupQuadSwapHorizontal");
6174 		else if (direction == 1)
6175 			emit_unary_func_op(result_type, id, ops[3], "subgroupQuadSwapVertical");
6176 		else if (direction == 2)
6177 			emit_unary_func_op(result_type, id, ops[3], "subgroupQuadSwapDiagonal");
6178 		else
6179 			SPIRV_CROSS_THROW("Invalid quad swap direction.");
6180 		break;
6181 	}
6182 
6183 	case OpGroupNonUniformQuadBroadcast:
6184 	{
6185 		emit_binary_func_op(result_type, id, ops[3], ops[4], "subgroupQuadBroadcast");
6186 		break;
6187 	}
6188 
6189 	default:
6190 		SPIRV_CROSS_THROW("Invalid opcode for subgroup.");
6191 	}
6192 
6193 	register_control_dependent_expression(id);
6194 }
6195 
bitcast_glsl_op(const SPIRType & out_type,const SPIRType & in_type)6196 string CompilerGLSL::bitcast_glsl_op(const SPIRType &out_type, const SPIRType &in_type)
6197 {
6198 	// OpBitcast can deal with pointers.
6199 	if (out_type.pointer || in_type.pointer)
6200 		return type_to_glsl(out_type);
6201 
6202 	if (out_type.basetype == in_type.basetype)
6203 		return "";
6204 
6205 	assert(out_type.basetype != SPIRType::Boolean);
6206 	assert(in_type.basetype != SPIRType::Boolean);
6207 
6208 	bool integral_cast = type_is_integral(out_type) && type_is_integral(in_type);
6209 	bool same_size_cast = out_type.width == in_type.width;
6210 
6211 	// Trivial bitcast case, casts between integers.
6212 	if (integral_cast && same_size_cast)
6213 		return type_to_glsl(out_type);
6214 
6215 	// Catch-all 8-bit arithmetic casts (GL_EXT_shader_explicit_arithmetic_types).
6216 	if (out_type.width == 8 && in_type.width >= 16 && integral_cast && in_type.vecsize == 1)
6217 		return "unpack8";
6218 	else if (in_type.width == 8 && out_type.width == 16 && integral_cast && out_type.vecsize == 1)
6219 		return "pack16";
6220 	else if (in_type.width == 8 && out_type.width == 32 && integral_cast && out_type.vecsize == 1)
6221 		return "pack32";
6222 
6223 	// Floating <-> Integer special casts. Just have to enumerate all cases. :(
6224 	// 16-bit, 32-bit and 64-bit floats.
6225 	if (out_type.basetype == SPIRType::UInt && in_type.basetype == SPIRType::Float)
6226 	{
6227 		if (is_legacy_es())
6228 			SPIRV_CROSS_THROW("Float -> Uint bitcast not supported on legacy ESSL.");
6229 		else if (!options.es && options.version < 330)
6230 			require_extension_internal("GL_ARB_shader_bit_encoding");
6231 		return "floatBitsToUint";
6232 	}
6233 	else if (out_type.basetype == SPIRType::Int && in_type.basetype == SPIRType::Float)
6234 	{
6235 		if (is_legacy_es())
6236 			SPIRV_CROSS_THROW("Float -> Int bitcast not supported on legacy ESSL.");
6237 		else if (!options.es && options.version < 330)
6238 			require_extension_internal("GL_ARB_shader_bit_encoding");
6239 		return "floatBitsToInt";
6240 	}
6241 	else if (out_type.basetype == SPIRType::Float && in_type.basetype == SPIRType::UInt)
6242 	{
6243 		if (is_legacy_es())
6244 			SPIRV_CROSS_THROW("Uint -> Float bitcast not supported on legacy ESSL.");
6245 		else if (!options.es && options.version < 330)
6246 			require_extension_internal("GL_ARB_shader_bit_encoding");
6247 		return "uintBitsToFloat";
6248 	}
6249 	else if (out_type.basetype == SPIRType::Float && in_type.basetype == SPIRType::Int)
6250 	{
6251 		if (is_legacy_es())
6252 			SPIRV_CROSS_THROW("Int -> Float bitcast not supported on legacy ESSL.");
6253 		else if (!options.es && options.version < 330)
6254 			require_extension_internal("GL_ARB_shader_bit_encoding");
6255 		return "intBitsToFloat";
6256 	}
6257 
6258 	else if (out_type.basetype == SPIRType::Int64 && in_type.basetype == SPIRType::Double)
6259 		return "doubleBitsToInt64";
6260 	else if (out_type.basetype == SPIRType::UInt64 && in_type.basetype == SPIRType::Double)
6261 		return "doubleBitsToUint64";
6262 	else if (out_type.basetype == SPIRType::Double && in_type.basetype == SPIRType::Int64)
6263 		return "int64BitsToDouble";
6264 	else if (out_type.basetype == SPIRType::Double && in_type.basetype == SPIRType::UInt64)
6265 		return "uint64BitsToDouble";
6266 	else if (out_type.basetype == SPIRType::Short && in_type.basetype == SPIRType::Half)
6267 		return "float16BitsToInt16";
6268 	else if (out_type.basetype == SPIRType::UShort && in_type.basetype == SPIRType::Half)
6269 		return "float16BitsToUint16";
6270 	else if (out_type.basetype == SPIRType::Half && in_type.basetype == SPIRType::Short)
6271 		return "int16BitsToFloat16";
6272 	else if (out_type.basetype == SPIRType::Half && in_type.basetype == SPIRType::UShort)
6273 		return "uint16BitsToFloat16";
6274 
6275 	// And finally, some even more special purpose casts.
6276 	if (out_type.basetype == SPIRType::UInt64 && in_type.basetype == SPIRType::UInt && in_type.vecsize == 2)
6277 		return "packUint2x32";
6278 	else if (out_type.basetype == SPIRType::Half && in_type.basetype == SPIRType::UInt && in_type.vecsize == 1)
6279 		return "unpackFloat2x16";
6280 	else if (out_type.basetype == SPIRType::UInt && in_type.basetype == SPIRType::Half && in_type.vecsize == 2)
6281 		return "packFloat2x16";
6282 	else if (out_type.basetype == SPIRType::Int && in_type.basetype == SPIRType::Short && in_type.vecsize == 2)
6283 		return "packInt2x16";
6284 	else if (out_type.basetype == SPIRType::Short && in_type.basetype == SPIRType::Int && in_type.vecsize == 1)
6285 		return "unpackInt2x16";
6286 	else if (out_type.basetype == SPIRType::UInt && in_type.basetype == SPIRType::UShort && in_type.vecsize == 2)
6287 		return "packUint2x16";
6288 	else if (out_type.basetype == SPIRType::UShort && in_type.basetype == SPIRType::UInt && in_type.vecsize == 1)
6289 		return "unpackUint2x16";
6290 	else if (out_type.basetype == SPIRType::Int64 && in_type.basetype == SPIRType::Short && in_type.vecsize == 4)
6291 		return "packInt4x16";
6292 	else if (out_type.basetype == SPIRType::Short && in_type.basetype == SPIRType::Int64 && in_type.vecsize == 1)
6293 		return "unpackInt4x16";
6294 	else if (out_type.basetype == SPIRType::UInt64 && in_type.basetype == SPIRType::UShort && in_type.vecsize == 4)
6295 		return "packUint4x16";
6296 	else if (out_type.basetype == SPIRType::UShort && in_type.basetype == SPIRType::UInt64 && in_type.vecsize == 1)
6297 		return "unpackUint4x16";
6298 
6299 	return "";
6300 }
6301 
bitcast_glsl(const SPIRType & result_type,uint32_t argument)6302 string CompilerGLSL::bitcast_glsl(const SPIRType &result_type, uint32_t argument)
6303 {
6304 	auto op = bitcast_glsl_op(result_type, expression_type(argument));
6305 	if (op.empty())
6306 		return to_enclosed_unpacked_expression(argument);
6307 	else
6308 		return join(op, "(", to_unpacked_expression(argument), ")");
6309 }
6310 
bitcast_expression(SPIRType::BaseType target_type,uint32_t arg)6311 std::string CompilerGLSL::bitcast_expression(SPIRType::BaseType target_type, uint32_t arg)
6312 {
6313 	auto expr = to_expression(arg);
6314 	auto &src_type = expression_type(arg);
6315 	if (src_type.basetype != target_type)
6316 	{
6317 		auto target = src_type;
6318 		target.basetype = target_type;
6319 		expr = join(bitcast_glsl_op(target, src_type), "(", expr, ")");
6320 	}
6321 
6322 	return expr;
6323 }
6324 
bitcast_expression(const SPIRType & target_type,SPIRType::BaseType expr_type,const std::string & expr)6325 std::string CompilerGLSL::bitcast_expression(const SPIRType &target_type, SPIRType::BaseType expr_type,
6326                                              const std::string &expr)
6327 {
6328 	if (target_type.basetype == expr_type)
6329 		return expr;
6330 
6331 	auto src_type = target_type;
6332 	src_type.basetype = expr_type;
6333 	return join(bitcast_glsl_op(target_type, src_type), "(", expr, ")");
6334 }
6335 
builtin_to_glsl(BuiltIn builtin,StorageClass storage)6336 string CompilerGLSL::builtin_to_glsl(BuiltIn builtin, StorageClass storage)
6337 {
6338 	switch (builtin)
6339 	{
6340 	case BuiltInPosition:
6341 		return "gl_Position";
6342 	case BuiltInPointSize:
6343 		return "gl_PointSize";
6344 	case BuiltInClipDistance:
6345 		return "gl_ClipDistance";
6346 	case BuiltInCullDistance:
6347 		return "gl_CullDistance";
6348 	case BuiltInVertexId:
6349 		if (options.vulkan_semantics)
6350 			SPIRV_CROSS_THROW(
6351 			    "Cannot implement gl_VertexID in Vulkan GLSL. This shader was created with GL semantics.");
6352 		return "gl_VertexID";
6353 	case BuiltInInstanceId:
6354 		if (options.vulkan_semantics)
6355 			SPIRV_CROSS_THROW(
6356 			    "Cannot implement gl_InstanceID in Vulkan GLSL. This shader was created with GL semantics.");
6357 		return "gl_InstanceID";
6358 	case BuiltInVertexIndex:
6359 		if (options.vulkan_semantics)
6360 			return "gl_VertexIndex";
6361 		else
6362 			return "gl_VertexID"; // gl_VertexID already has the base offset applied.
6363 	case BuiltInInstanceIndex:
6364 		if (options.vulkan_semantics)
6365 			return "gl_InstanceIndex";
6366 		else if (options.vertex.support_nonzero_base_instance)
6367 			return "(gl_InstanceID + SPIRV_Cross_BaseInstance)"; // ... but not gl_InstanceID.
6368 		else
6369 			return "gl_InstanceID";
6370 	case BuiltInPrimitiveId:
6371 		if (storage == StorageClassInput && get_entry_point().model == ExecutionModelGeometry)
6372 			return "gl_PrimitiveIDIn";
6373 		else
6374 			return "gl_PrimitiveID";
6375 	case BuiltInInvocationId:
6376 		return "gl_InvocationID";
6377 	case BuiltInLayer:
6378 		return "gl_Layer";
6379 	case BuiltInViewportIndex:
6380 		return "gl_ViewportIndex";
6381 	case BuiltInTessLevelOuter:
6382 		return "gl_TessLevelOuter";
6383 	case BuiltInTessLevelInner:
6384 		return "gl_TessLevelInner";
6385 	case BuiltInTessCoord:
6386 		return "gl_TessCoord";
6387 	case BuiltInFragCoord:
6388 		return "gl_FragCoord";
6389 	case BuiltInPointCoord:
6390 		return "gl_PointCoord";
6391 	case BuiltInFrontFacing:
6392 		return "gl_FrontFacing";
6393 	case BuiltInFragDepth:
6394 		return "gl_FragDepth";
6395 	case BuiltInNumWorkgroups:
6396 		return "gl_NumWorkGroups";
6397 	case BuiltInWorkgroupSize:
6398 		return "gl_WorkGroupSize";
6399 	case BuiltInWorkgroupId:
6400 		return "gl_WorkGroupID";
6401 	case BuiltInLocalInvocationId:
6402 		return "gl_LocalInvocationID";
6403 	case BuiltInGlobalInvocationId:
6404 		return "gl_GlobalInvocationID";
6405 	case BuiltInLocalInvocationIndex:
6406 		return "gl_LocalInvocationIndex";
6407 	case BuiltInHelperInvocation:
6408 		return "gl_HelperInvocation";
6409 	case BuiltInBaseVertex:
6410 		if (options.es)
6411 			SPIRV_CROSS_THROW("BaseVertex not supported in ES profile.");
6412 		if (options.version < 460)
6413 		{
6414 			require_extension_internal("GL_ARB_shader_draw_parameters");
6415 			return "gl_BaseVertexARB";
6416 		}
6417 		return "gl_BaseVertex";
6418 	case BuiltInBaseInstance:
6419 		if (options.es)
6420 			SPIRV_CROSS_THROW("BaseInstance not supported in ES profile.");
6421 		if (options.version < 460)
6422 		{
6423 			require_extension_internal("GL_ARB_shader_draw_parameters");
6424 			return "gl_BaseInstanceARB";
6425 		}
6426 		return "gl_BaseInstance";
6427 	case BuiltInDrawIndex:
6428 		if (options.es)
6429 			SPIRV_CROSS_THROW("DrawIndex not supported in ES profile.");
6430 		if (options.version < 460)
6431 		{
6432 			require_extension_internal("GL_ARB_shader_draw_parameters");
6433 			return "gl_DrawIDARB";
6434 		}
6435 		return "gl_DrawID";
6436 
6437 	case BuiltInSampleId:
6438 		if (options.es && options.version < 320)
6439 			require_extension_internal("GL_OES_sample_variables");
6440 		if (!options.es && options.version < 400)
6441 			SPIRV_CROSS_THROW("gl_SampleID not supported before GLSL 400.");
6442 		return "gl_SampleID";
6443 
6444 	case BuiltInSampleMask:
6445 		if (options.es && options.version < 320)
6446 			require_extension_internal("GL_OES_sample_variables");
6447 		if (!options.es && options.version < 400)
6448 			SPIRV_CROSS_THROW("gl_SampleMask/gl_SampleMaskIn not supported before GLSL 400.");
6449 
6450 		if (storage == StorageClassInput)
6451 			return "gl_SampleMaskIn";
6452 		else
6453 			return "gl_SampleMask";
6454 
6455 	case BuiltInSamplePosition:
6456 		if (options.es && options.version < 320)
6457 			require_extension_internal("GL_OES_sample_variables");
6458 		if (!options.es && options.version < 400)
6459 			SPIRV_CROSS_THROW("gl_SamplePosition not supported before GLSL 400.");
6460 		return "gl_SamplePosition";
6461 
6462 	case BuiltInViewIndex:
6463 		if (options.vulkan_semantics)
6464 		{
6465 			require_extension_internal("GL_EXT_multiview");
6466 			return "gl_ViewIndex";
6467 		}
6468 		else
6469 		{
6470 			require_extension_internal("GL_OVR_multiview2");
6471 			return "gl_ViewID_OVR";
6472 		}
6473 
6474 	case BuiltInNumSubgroups:
6475 		if (!options.vulkan_semantics)
6476 			SPIRV_CROSS_THROW("Need Vulkan semantics for subgroup.");
6477 		require_extension_internal("GL_KHR_shader_subgroup_basic");
6478 		return "gl_NumSubgroups";
6479 
6480 	case BuiltInSubgroupId:
6481 		if (!options.vulkan_semantics)
6482 			SPIRV_CROSS_THROW("Need Vulkan semantics for subgroup.");
6483 		require_extension_internal("GL_KHR_shader_subgroup_basic");
6484 		return "gl_SubgroupID";
6485 
6486 	case BuiltInSubgroupSize:
6487 		if (!options.vulkan_semantics)
6488 			SPIRV_CROSS_THROW("Need Vulkan semantics for subgroup.");
6489 		require_extension_internal("GL_KHR_shader_subgroup_basic");
6490 		return "gl_SubgroupSize";
6491 
6492 	case BuiltInSubgroupLocalInvocationId:
6493 		if (!options.vulkan_semantics)
6494 			SPIRV_CROSS_THROW("Need Vulkan semantics for subgroup.");
6495 		require_extension_internal("GL_KHR_shader_subgroup_basic");
6496 		return "gl_SubgroupInvocationID";
6497 
6498 	case BuiltInSubgroupEqMask:
6499 		if (!options.vulkan_semantics)
6500 			SPIRV_CROSS_THROW("Need Vulkan semantics for subgroup.");
6501 		require_extension_internal("GL_KHR_shader_subgroup_ballot");
6502 		return "gl_SubgroupEqMask";
6503 
6504 	case BuiltInSubgroupGeMask:
6505 		if (!options.vulkan_semantics)
6506 			SPIRV_CROSS_THROW("Need Vulkan semantics for subgroup.");
6507 		require_extension_internal("GL_KHR_shader_subgroup_ballot");
6508 		return "gl_SubgroupGeMask";
6509 
6510 	case BuiltInSubgroupGtMask:
6511 		if (!options.vulkan_semantics)
6512 			SPIRV_CROSS_THROW("Need Vulkan semantics for subgroup.");
6513 		require_extension_internal("GL_KHR_shader_subgroup_ballot");
6514 		return "gl_SubgroupGtMask";
6515 
6516 	case BuiltInSubgroupLeMask:
6517 		if (!options.vulkan_semantics)
6518 			SPIRV_CROSS_THROW("Need Vulkan semantics for subgroup.");
6519 		require_extension_internal("GL_KHR_shader_subgroup_ballot");
6520 		return "gl_SubgroupLeMask";
6521 
6522 	case BuiltInSubgroupLtMask:
6523 		if (!options.vulkan_semantics)
6524 			SPIRV_CROSS_THROW("Need Vulkan semantics for subgroup.");
6525 		require_extension_internal("GL_KHR_shader_subgroup_ballot");
6526 		return "gl_SubgroupLtMask";
6527 
6528 	case BuiltInLaunchIdNV:
6529 		return "gl_LaunchIDNV";
6530 	case BuiltInLaunchSizeNV:
6531 		return "gl_LaunchSizeNV";
6532 	case BuiltInWorldRayOriginNV:
6533 		return "gl_WorldRayOriginNV";
6534 	case BuiltInWorldRayDirectionNV:
6535 		return "gl_WorldRayDirectionNV";
6536 	case BuiltInObjectRayOriginNV:
6537 		return "gl_ObjectRayOriginNV";
6538 	case BuiltInObjectRayDirectionNV:
6539 		return "gl_ObjectRayDirectionNV";
6540 	case BuiltInRayTminNV:
6541 		return "gl_RayTminNV";
6542 	case BuiltInRayTmaxNV:
6543 		return "gl_RayTmaxNV";
6544 	case BuiltInInstanceCustomIndexNV:
6545 		return "gl_InstanceCustomIndexNV";
6546 	case BuiltInObjectToWorldNV:
6547 		return "gl_ObjectToWorldNV";
6548 	case BuiltInWorldToObjectNV:
6549 		return "gl_WorldToObjectNV";
6550 	case BuiltInHitTNV:
6551 		return "gl_HitTNV";
6552 	case BuiltInHitKindNV:
6553 		return "gl_HitKindNV";
6554 	case BuiltInIncomingRayFlagsNV:
6555 		return "gl_IncomingRayFlagsNV";
6556 
6557 	case BuiltInBaryCoordNV:
6558 	{
6559 		if (options.es && options.version < 320)
6560 			SPIRV_CROSS_THROW("gl_BaryCoordNV requires ESSL 320.");
6561 		else if (!options.es && options.version < 450)
6562 			SPIRV_CROSS_THROW("gl_BaryCoordNV requires GLSL 450.");
6563 		require_extension_internal("GL_NV_fragment_shader_barycentric");
6564 		return "gl_BaryCoordNV";
6565 	}
6566 
6567 	case BuiltInBaryCoordNoPerspNV:
6568 	{
6569 		if (options.es && options.version < 320)
6570 			SPIRV_CROSS_THROW("gl_BaryCoordNoPerspNV requires ESSL 320.");
6571 		else if (!options.es && options.version < 450)
6572 			SPIRV_CROSS_THROW("gl_BaryCoordNoPerspNV requires GLSL 450.");
6573 		require_extension_internal("GL_NV_fragment_shader_barycentric");
6574 		return "gl_BaryCoordNoPerspNV";
6575 	}
6576 
6577 	case BuiltInFragStencilRefEXT:
6578 	{
6579 		if (!options.es)
6580 		{
6581 			require_extension_internal("GL_ARB_shader_stencil_export");
6582 			return "gl_FragStencilRefARB";
6583 		}
6584 		else
6585 			SPIRV_CROSS_THROW("Stencil export not supported in GLES.");
6586 	}
6587 
6588 	case BuiltInDeviceIndex:
6589 		if (!options.vulkan_semantics)
6590 			SPIRV_CROSS_THROW("Need Vulkan semantics for device group support.");
6591 		require_extension_internal("GL_EXT_device_group");
6592 		return "gl_DeviceIndex";
6593 
6594 	default:
6595 		return join("gl_BuiltIn_", convert_to_string(builtin));
6596 	}
6597 }
6598 
index_to_swizzle(uint32_t index)6599 const char *CompilerGLSL::index_to_swizzle(uint32_t index)
6600 {
6601 	switch (index)
6602 	{
6603 	case 0:
6604 		return "x";
6605 	case 1:
6606 		return "y";
6607 	case 2:
6608 		return "z";
6609 	case 3:
6610 		return "w";
6611 	default:
6612 		SPIRV_CROSS_THROW("Swizzle index out of range");
6613 	}
6614 }
6615 
access_chain_internal_append_index(std::string & expr,uint32_t,const SPIRType * type,AccessChainFlags flags,bool &,uint32_t index)6616 void CompilerGLSL::access_chain_internal_append_index(std::string &expr, uint32_t /*base*/, const SPIRType *type,
6617                                                       AccessChainFlags flags, bool & /*access_chain_is_arrayed*/,
6618                                                       uint32_t index)
6619 {
6620 	bool index_is_literal = (flags & ACCESS_CHAIN_INDEX_IS_LITERAL_BIT) != 0;
6621 	bool register_expression_read = (flags & ACCESS_CHAIN_SKIP_REGISTER_EXPRESSION_READ_BIT) == 0;
6622 
6623 	expr += "[";
6624 
6625 	// If we are indexing into an array of SSBOs or UBOs, we need to index it with a non-uniform qualifier.
6626 	bool nonuniform_index =
6627 	    has_decoration(index, DecorationNonUniformEXT) &&
6628 	    (has_decoration(type->self, DecorationBlock) || has_decoration(type->self, DecorationBufferBlock));
6629 	if (nonuniform_index)
6630 	{
6631 		expr += backend.nonuniform_qualifier;
6632 		expr += "(";
6633 	}
6634 
6635 	if (index_is_literal)
6636 		expr += convert_to_string(index);
6637 	else
6638 		expr += to_expression(index, register_expression_read);
6639 
6640 	if (nonuniform_index)
6641 		expr += ")";
6642 
6643 	expr += "]";
6644 }
6645 
access_chain_internal(uint32_t base,const uint32_t * indices,uint32_t count,AccessChainFlags flags,AccessChainMeta * meta)6646 string CompilerGLSL::access_chain_internal(uint32_t base, const uint32_t *indices, uint32_t count,
6647                                            AccessChainFlags flags, AccessChainMeta *meta)
6648 {
6649 	string expr;
6650 
6651 	bool index_is_literal = (flags & ACCESS_CHAIN_INDEX_IS_LITERAL_BIT) != 0;
6652 	bool chain_only = (flags & ACCESS_CHAIN_CHAIN_ONLY_BIT) != 0;
6653 	bool ptr_chain = (flags & ACCESS_CHAIN_PTR_CHAIN_BIT) != 0;
6654 	bool register_expression_read = (flags & ACCESS_CHAIN_SKIP_REGISTER_EXPRESSION_READ_BIT) == 0;
6655 
6656 	if (!chain_only)
6657 	{
6658 		// We handle transpose explicitly, so don't resolve that here.
6659 		auto *e = maybe_get<SPIRExpression>(base);
6660 		bool old_transpose = e && e->need_transpose;
6661 		if (e)
6662 			e->need_transpose = false;
6663 		expr = to_enclosed_expression(base, register_expression_read);
6664 		if (e)
6665 			e->need_transpose = old_transpose;
6666 	}
6667 
6668 	// Start traversing type hierarchy at the proper non-pointer types,
6669 	// but keep type_id referencing the original pointer for use below.
6670 	uint32_t type_id = expression_type_id(base);
6671 
6672 	if (!backend.native_pointers)
6673 	{
6674 		if (ptr_chain)
6675 			SPIRV_CROSS_THROW("Backend does not support native pointers and does not support OpPtrAccessChain.");
6676 
6677 		// Wrapped buffer reference pointer types will need to poke into the internal "value" member before
6678 		// continuing the access chain.
6679 		if (should_dereference(base))
6680 		{
6681 			auto &type = get<SPIRType>(type_id);
6682 			expr = dereference_expression(type, expr);
6683 		}
6684 	}
6685 
6686 	const auto *type = &get_pointee_type(type_id);
6687 
6688 	bool access_chain_is_arrayed = expr.find_first_of('[') != string::npos;
6689 	bool row_major_matrix_needs_conversion = is_non_native_row_major_matrix(base);
6690 	bool is_packed = has_extended_decoration(base, SPIRVCrossDecorationPhysicalTypePacked);
6691 	uint32_t physical_type = get_extended_decoration(base, SPIRVCrossDecorationPhysicalTypeID);
6692 	bool is_invariant = has_decoration(base, DecorationInvariant);
6693 	bool pending_array_enclose = false;
6694 	bool dimension_flatten = false;
6695 
6696 	const auto append_index = [&](uint32_t index) {
6697 		access_chain_internal_append_index(expr, base, type, flags, access_chain_is_arrayed, index);
6698 	};
6699 
6700 	for (uint32_t i = 0; i < count; i++)
6701 	{
6702 		uint32_t index = indices[i];
6703 
6704 		// Pointer chains
6705 		if (ptr_chain && i == 0)
6706 		{
6707 			// If we are flattening multidimensional arrays, only create opening bracket on first
6708 			// array index.
6709 			if (options.flatten_multidimensional_arrays)
6710 			{
6711 				dimension_flatten = type->array.size() >= 1;
6712 				pending_array_enclose = dimension_flatten;
6713 				if (pending_array_enclose)
6714 					expr += "[";
6715 			}
6716 
6717 			if (options.flatten_multidimensional_arrays && dimension_flatten)
6718 			{
6719 				// If we are flattening multidimensional arrays, do manual stride computation.
6720 				if (index_is_literal)
6721 					expr += convert_to_string(index);
6722 				else
6723 					expr += to_enclosed_expression(index, register_expression_read);
6724 
6725 				for (auto j = uint32_t(type->array.size()); j; j--)
6726 				{
6727 					expr += " * ";
6728 					expr += enclose_expression(to_array_size(*type, j - 1));
6729 				}
6730 
6731 				if (type->array.empty())
6732 					pending_array_enclose = false;
6733 				else
6734 					expr += " + ";
6735 
6736 				if (!pending_array_enclose)
6737 					expr += "]";
6738 			}
6739 			else
6740 			{
6741 				append_index(index);
6742 			}
6743 
6744 			if (type->basetype == SPIRType::ControlPointArray)
6745 			{
6746 				type_id = type->parent_type;
6747 				type = &get<SPIRType>(type_id);
6748 			}
6749 
6750 			access_chain_is_arrayed = true;
6751 		}
6752 		// Arrays
6753 		else if (!type->array.empty())
6754 		{
6755 			// If we are flattening multidimensional arrays, only create opening bracket on first
6756 			// array index.
6757 			if (options.flatten_multidimensional_arrays && !pending_array_enclose)
6758 			{
6759 				dimension_flatten = type->array.size() > 1;
6760 				pending_array_enclose = dimension_flatten;
6761 				if (pending_array_enclose)
6762 					expr += "[";
6763 			}
6764 
6765 			assert(type->parent_type);
6766 
6767 			auto *var = maybe_get<SPIRVariable>(base);
6768 			if (backend.force_gl_in_out_block && i == 0 && var && is_builtin_variable(*var) &&
6769 			    !has_decoration(type->self, DecorationBlock))
6770 			{
6771 				// This deals with scenarios for tesc/geom where arrays of gl_Position[] are declared.
6772 				// Normally, these variables live in blocks when compiled from GLSL,
6773 				// but HLSL seems to just emit straight arrays here.
6774 				// We must pretend this access goes through gl_in/gl_out arrays
6775 				// to be able to access certain builtins as arrays.
6776 				auto builtin = ir.meta[base].decoration.builtin_type;
6777 				switch (builtin)
6778 				{
6779 				// case BuiltInCullDistance: // These are already arrays, need to figure out rules for these in tess/geom.
6780 				// case BuiltInClipDistance:
6781 				case BuiltInPosition:
6782 				case BuiltInPointSize:
6783 					if (var->storage == StorageClassInput)
6784 						expr = join("gl_in[", to_expression(index, register_expression_read), "].", expr);
6785 					else if (var->storage == StorageClassOutput)
6786 						expr = join("gl_out[", to_expression(index, register_expression_read), "].", expr);
6787 					else
6788 						append_index(index);
6789 					break;
6790 
6791 				default:
6792 					append_index(index);
6793 					break;
6794 				}
6795 			}
6796 			else if (options.flatten_multidimensional_arrays && dimension_flatten)
6797 			{
6798 				// If we are flattening multidimensional arrays, do manual stride computation.
6799 				auto &parent_type = get<SPIRType>(type->parent_type);
6800 
6801 				if (index_is_literal)
6802 					expr += convert_to_string(index);
6803 				else
6804 					expr += to_enclosed_expression(index, register_expression_read);
6805 
6806 				for (auto j = uint32_t(parent_type.array.size()); j; j--)
6807 				{
6808 					expr += " * ";
6809 					expr += enclose_expression(to_array_size(parent_type, j - 1));
6810 				}
6811 
6812 				if (parent_type.array.empty())
6813 					pending_array_enclose = false;
6814 				else
6815 					expr += " + ";
6816 
6817 				if (!pending_array_enclose)
6818 					expr += "]";
6819 			}
6820 			// Some builtins are arrays in SPIR-V but not in other languages, e.g. gl_SampleMask[] is an array in SPIR-V but not in Metal.
6821 			// By throwing away the index, we imply the index was 0, which it must be for gl_SampleMask.
6822 			else if (!builtin_translates_to_nonarray(BuiltIn(get_decoration(base, DecorationBuiltIn))))
6823 			{
6824 				append_index(index);
6825 			}
6826 
6827 			type_id = type->parent_type;
6828 			type = &get<SPIRType>(type_id);
6829 
6830 			access_chain_is_arrayed = true;
6831 		}
6832 		// For structs, the index refers to a constant, which indexes into the members.
6833 		// We also check if this member is a builtin, since we then replace the entire expression with the builtin one.
6834 		else if (type->basetype == SPIRType::Struct)
6835 		{
6836 			if (!index_is_literal)
6837 				index = get<SPIRConstant>(index).scalar();
6838 
6839 			if (index >= type->member_types.size())
6840 				SPIRV_CROSS_THROW("Member index is out of bounds!");
6841 
6842 			BuiltIn builtin;
6843 			if (is_member_builtin(*type, index, &builtin))
6844 			{
6845 				if (access_chain_is_arrayed)
6846 				{
6847 					expr += ".";
6848 					expr += builtin_to_glsl(builtin, type->storage);
6849 				}
6850 				else
6851 					expr = builtin_to_glsl(builtin, type->storage);
6852 			}
6853 			else
6854 			{
6855 				// If the member has a qualified name, use it as the entire chain
6856 				string qual_mbr_name = get_member_qualified_name(type_id, index);
6857 				if (!qual_mbr_name.empty())
6858 					expr = qual_mbr_name;
6859 				else
6860 					expr += to_member_reference(base, *type, index, ptr_chain);
6861 			}
6862 
6863 			if (has_member_decoration(type->self, index, DecorationInvariant))
6864 				is_invariant = true;
6865 
6866 			is_packed = member_is_packed_physical_type(*type, index);
6867 			if (member_is_remapped_physical_type(*type, index))
6868 				physical_type = get_extended_member_decoration(type->self, index, SPIRVCrossDecorationPhysicalTypeID);
6869 			else
6870 				physical_type = 0;
6871 
6872 			row_major_matrix_needs_conversion = member_is_non_native_row_major_matrix(*type, index);
6873 			type = &get<SPIRType>(type->member_types[index]);
6874 		}
6875 		// Matrix -> Vector
6876 		else if (type->columns > 1)
6877 		{
6878 			// If we have a row-major matrix here, we need to defer any transpose in case this access chain
6879 			// is used to store a column. We can resolve it right here and now if we access a scalar directly,
6880 			// by flipping indexing order of the matrix.
6881 
6882 			expr += "[";
6883 			if (index_is_literal)
6884 				expr += convert_to_string(index);
6885 			else
6886 				expr += to_expression(index, register_expression_read);
6887 			expr += "]";
6888 
6889 			type_id = type->parent_type;
6890 			type = &get<SPIRType>(type_id);
6891 		}
6892 		// Vector -> Scalar
6893 		else if (type->vecsize > 1)
6894 		{
6895 			string deferred_index;
6896 			if (row_major_matrix_needs_conversion)
6897 			{
6898 				// Flip indexing order.
6899 				auto column_index = expr.find_last_of('[');
6900 				if (column_index != string::npos)
6901 				{
6902 					deferred_index = expr.substr(column_index);
6903 					expr.resize(column_index);
6904 				}
6905 			}
6906 
6907 			if (index_is_literal && !is_packed && !row_major_matrix_needs_conversion)
6908 			{
6909 				expr += ".";
6910 				expr += index_to_swizzle(index);
6911 			}
6912 			else if (ir.ids[index].get_type() == TypeConstant && !is_packed && !row_major_matrix_needs_conversion)
6913 			{
6914 				auto &c = get<SPIRConstant>(index);
6915 				if (c.specialization)
6916 				{
6917 					// If the index is a spec constant, we cannot turn extract into a swizzle.
6918 					expr += join("[", to_expression(index), "]");
6919 				}
6920 				else
6921 				{
6922 					expr += ".";
6923 					expr += index_to_swizzle(c.scalar());
6924 				}
6925 			}
6926 			else if (index_is_literal)
6927 			{
6928 				// For packed vectors, we can only access them as an array, not by swizzle.
6929 				expr += join("[", index, "]");
6930 			}
6931 			else
6932 			{
6933 				expr += "[";
6934 				expr += to_expression(index, register_expression_read);
6935 				expr += "]";
6936 			}
6937 
6938 			expr += deferred_index;
6939 			row_major_matrix_needs_conversion = false;
6940 
6941 			is_packed = false;
6942 			physical_type = 0;
6943 			type_id = type->parent_type;
6944 			type = &get<SPIRType>(type_id);
6945 		}
6946 		else if (!backend.allow_truncated_access_chain)
6947 			SPIRV_CROSS_THROW("Cannot subdivide a scalar value!");
6948 	}
6949 
6950 	if (pending_array_enclose)
6951 	{
6952 		SPIRV_CROSS_THROW("Flattening of multidimensional arrays were enabled, "
6953 		                  "but the access chain was terminated in the middle of a multidimensional array. "
6954 		                  "This is not supported.");
6955 	}
6956 
6957 	if (meta)
6958 	{
6959 		meta->need_transpose = row_major_matrix_needs_conversion;
6960 		meta->storage_is_packed = is_packed;
6961 		meta->storage_is_invariant = is_invariant;
6962 		meta->storage_physical_type = physical_type;
6963 	}
6964 
6965 	return expr;
6966 }
6967 
to_flattened_struct_member(const SPIRVariable & var,uint32_t index)6968 string CompilerGLSL::to_flattened_struct_member(const SPIRVariable &var, uint32_t index)
6969 {
6970 	auto &type = get<SPIRType>(var.basetype);
6971 	return sanitize_underscores(join(to_name(var.self), "_", to_member_name(type, index)));
6972 }
6973 
access_chain(uint32_t base,const uint32_t * indices,uint32_t count,const SPIRType & target_type,AccessChainMeta * meta,bool ptr_chain)6974 string CompilerGLSL::access_chain(uint32_t base, const uint32_t *indices, uint32_t count, const SPIRType &target_type,
6975                                   AccessChainMeta *meta, bool ptr_chain)
6976 {
6977 	if (flattened_buffer_blocks.count(base))
6978 	{
6979 		uint32_t matrix_stride = 0;
6980 		bool need_transpose = false;
6981 		flattened_access_chain_offset(expression_type(base), indices, count, 0, 16, &need_transpose, &matrix_stride,
6982 		                              ptr_chain);
6983 
6984 		if (meta)
6985 		{
6986 			meta->need_transpose = target_type.columns > 1 && need_transpose;
6987 			meta->storage_is_packed = false;
6988 		}
6989 
6990 		return flattened_access_chain(base, indices, count, target_type, 0, matrix_stride, need_transpose);
6991 	}
6992 	else if (flattened_structs.count(base) && count > 0)
6993 	{
6994 		AccessChainFlags flags = ACCESS_CHAIN_CHAIN_ONLY_BIT | ACCESS_CHAIN_SKIP_REGISTER_EXPRESSION_READ_BIT;
6995 		if (ptr_chain)
6996 			flags |= ACCESS_CHAIN_PTR_CHAIN_BIT;
6997 
6998 		auto chain = access_chain_internal(base, indices, count, flags, nullptr).substr(1);
6999 		if (meta)
7000 		{
7001 			meta->need_transpose = false;
7002 			meta->storage_is_packed = false;
7003 		}
7004 		return sanitize_underscores(join(to_name(base), "_", chain));
7005 	}
7006 	else
7007 	{
7008 		AccessChainFlags flags = ACCESS_CHAIN_SKIP_REGISTER_EXPRESSION_READ_BIT;
7009 		if (ptr_chain)
7010 			flags |= ACCESS_CHAIN_PTR_CHAIN_BIT;
7011 		return access_chain_internal(base, indices, count, flags, meta);
7012 	}
7013 }
7014 
load_flattened_struct(SPIRVariable & var)7015 string CompilerGLSL::load_flattened_struct(SPIRVariable &var)
7016 {
7017 	auto expr = type_to_glsl_constructor(get<SPIRType>(var.basetype));
7018 	expr += '(';
7019 
7020 	auto &type = get<SPIRType>(var.basetype);
7021 	for (uint32_t i = 0; i < uint32_t(type.member_types.size()); i++)
7022 	{
7023 		if (i)
7024 			expr += ", ";
7025 
7026 		// Flatten the varyings.
7027 		// Apply name transformation for flattened I/O blocks.
7028 		expr += to_flattened_struct_member(var, i);
7029 	}
7030 	expr += ')';
7031 	return expr;
7032 }
7033 
store_flattened_struct(SPIRVariable & var,uint32_t value)7034 void CompilerGLSL::store_flattened_struct(SPIRVariable &var, uint32_t value)
7035 {
7036 	// We're trying to store a structure which has been flattened.
7037 	// Need to copy members one by one.
7038 	auto rhs = to_expression(value);
7039 
7040 	// Store result locally.
7041 	// Since we're declaring a variable potentially multiple times here,
7042 	// store the variable in an isolated scope.
7043 	begin_scope();
7044 	statement(variable_decl_function_local(var), " = ", rhs, ";");
7045 
7046 	auto &type = get<SPIRType>(var.basetype);
7047 	for (uint32_t i = 0; i < uint32_t(type.member_types.size()); i++)
7048 	{
7049 		// Flatten the varyings.
7050 		// Apply name transformation for flattened I/O blocks.
7051 
7052 		auto lhs = sanitize_underscores(join(to_name(var.self), "_", to_member_name(type, i)));
7053 		rhs = join(to_name(var.self), ".", to_member_name(type, i));
7054 		statement(lhs, " = ", rhs, ";");
7055 	}
7056 	end_scope();
7057 }
7058 
flattened_access_chain(uint32_t base,const uint32_t * indices,uint32_t count,const SPIRType & target_type,uint32_t offset,uint32_t matrix_stride,bool need_transpose)7059 std::string CompilerGLSL::flattened_access_chain(uint32_t base, const uint32_t *indices, uint32_t count,
7060                                                  const SPIRType &target_type, uint32_t offset, uint32_t matrix_stride,
7061                                                  bool need_transpose)
7062 {
7063 	if (!target_type.array.empty())
7064 		SPIRV_CROSS_THROW("Access chains that result in an array can not be flattened");
7065 	else if (target_type.basetype == SPIRType::Struct)
7066 		return flattened_access_chain_struct(base, indices, count, target_type, offset);
7067 	else if (target_type.columns > 1)
7068 		return flattened_access_chain_matrix(base, indices, count, target_type, offset, matrix_stride, need_transpose);
7069 	else
7070 		return flattened_access_chain_vector(base, indices, count, target_type, offset, matrix_stride, need_transpose);
7071 }
7072 
flattened_access_chain_struct(uint32_t base,const uint32_t * indices,uint32_t count,const SPIRType & target_type,uint32_t offset)7073 std::string CompilerGLSL::flattened_access_chain_struct(uint32_t base, const uint32_t *indices, uint32_t count,
7074                                                         const SPIRType &target_type, uint32_t offset)
7075 {
7076 	std::string expr;
7077 
7078 	expr += type_to_glsl_constructor(target_type);
7079 	expr += "(";
7080 
7081 	for (uint32_t i = 0; i < uint32_t(target_type.member_types.size()); ++i)
7082 	{
7083 		if (i != 0)
7084 			expr += ", ";
7085 
7086 		const SPIRType &member_type = get<SPIRType>(target_type.member_types[i]);
7087 		uint32_t member_offset = type_struct_member_offset(target_type, i);
7088 
7089 		// The access chain terminates at the struct, so we need to find matrix strides and row-major information
7090 		// ahead of time.
7091 		bool need_transpose = false;
7092 		uint32_t matrix_stride = 0;
7093 		if (member_type.columns > 1)
7094 		{
7095 			need_transpose = combined_decoration_for_member(target_type, i).get(DecorationRowMajor);
7096 			matrix_stride = type_struct_member_matrix_stride(target_type, i);
7097 		}
7098 
7099 		auto tmp = flattened_access_chain(base, indices, count, member_type, offset + member_offset, matrix_stride,
7100 		                                  need_transpose);
7101 
7102 		// Cannot forward transpositions, so resolve them here.
7103 		if (need_transpose)
7104 			expr += convert_row_major_matrix(tmp, member_type, 0, false);
7105 		else
7106 			expr += tmp;
7107 	}
7108 
7109 	expr += ")";
7110 
7111 	return expr;
7112 }
7113 
flattened_access_chain_matrix(uint32_t base,const uint32_t * indices,uint32_t count,const SPIRType & target_type,uint32_t offset,uint32_t matrix_stride,bool need_transpose)7114 std::string CompilerGLSL::flattened_access_chain_matrix(uint32_t base, const uint32_t *indices, uint32_t count,
7115                                                         const SPIRType &target_type, uint32_t offset,
7116                                                         uint32_t matrix_stride, bool need_transpose)
7117 {
7118 	assert(matrix_stride);
7119 	SPIRType tmp_type = target_type;
7120 	if (need_transpose)
7121 		swap(tmp_type.vecsize, tmp_type.columns);
7122 
7123 	std::string expr;
7124 
7125 	expr += type_to_glsl_constructor(tmp_type);
7126 	expr += "(";
7127 
7128 	for (uint32_t i = 0; i < tmp_type.columns; i++)
7129 	{
7130 		if (i != 0)
7131 			expr += ", ";
7132 
7133 		expr += flattened_access_chain_vector(base, indices, count, tmp_type, offset + i * matrix_stride, matrix_stride,
7134 		                                      /* need_transpose= */ false);
7135 	}
7136 
7137 	expr += ")";
7138 
7139 	return expr;
7140 }
7141 
flattened_access_chain_vector(uint32_t base,const uint32_t * indices,uint32_t count,const SPIRType & target_type,uint32_t offset,uint32_t matrix_stride,bool need_transpose)7142 std::string CompilerGLSL::flattened_access_chain_vector(uint32_t base, const uint32_t *indices, uint32_t count,
7143                                                         const SPIRType &target_type, uint32_t offset,
7144                                                         uint32_t matrix_stride, bool need_transpose)
7145 {
7146 	auto result = flattened_access_chain_offset(expression_type(base), indices, count, offset, 16);
7147 
7148 	auto buffer_name = to_name(expression_type(base).self);
7149 
7150 	if (need_transpose)
7151 	{
7152 		std::string expr;
7153 
7154 		if (target_type.vecsize > 1)
7155 		{
7156 			expr += type_to_glsl_constructor(target_type);
7157 			expr += "(";
7158 		}
7159 
7160 		for (uint32_t i = 0; i < target_type.vecsize; ++i)
7161 		{
7162 			if (i != 0)
7163 				expr += ", ";
7164 
7165 			uint32_t component_offset = result.second + i * matrix_stride;
7166 
7167 			assert(component_offset % (target_type.width / 8) == 0);
7168 			uint32_t index = component_offset / (target_type.width / 8);
7169 
7170 			expr += buffer_name;
7171 			expr += "[";
7172 			expr += result.first; // this is a series of N1 * k1 + N2 * k2 + ... that is either empty or ends with a +
7173 			expr += convert_to_string(index / 4);
7174 			expr += "]";
7175 
7176 			expr += vector_swizzle(1, index % 4);
7177 		}
7178 
7179 		if (target_type.vecsize > 1)
7180 		{
7181 			expr += ")";
7182 		}
7183 
7184 		return expr;
7185 	}
7186 	else
7187 	{
7188 		assert(result.second % (target_type.width / 8) == 0);
7189 		uint32_t index = result.second / (target_type.width / 8);
7190 
7191 		std::string expr;
7192 
7193 		expr += buffer_name;
7194 		expr += "[";
7195 		expr += result.first; // this is a series of N1 * k1 + N2 * k2 + ... that is either empty or ends with a +
7196 		expr += convert_to_string(index / 4);
7197 		expr += "]";
7198 
7199 		expr += vector_swizzle(target_type.vecsize, index % 4);
7200 
7201 		return expr;
7202 	}
7203 }
7204 
flattened_access_chain_offset(const SPIRType & basetype,const uint32_t * indices,uint32_t count,uint32_t offset,uint32_t word_stride,bool * need_transpose,uint32_t * out_matrix_stride,bool ptr_chain)7205 std::pair<std::string, uint32_t> CompilerGLSL::flattened_access_chain_offset(
7206     const SPIRType &basetype, const uint32_t *indices, uint32_t count, uint32_t offset, uint32_t word_stride,
7207     bool *need_transpose, uint32_t *out_matrix_stride, bool ptr_chain)
7208 {
7209 	// Start traversing type hierarchy at the proper non-pointer types.
7210 	const auto *type = &get_pointee_type(basetype);
7211 
7212 	// This holds the type of the current pointer which we are traversing through.
7213 	// We always start out from a struct type which is the block.
7214 	// This is primarily used to reflect the array strides and matrix strides later.
7215 	// For the first access chain index, type_id won't be needed, so just keep it as 0, it will be set
7216 	// accordingly as members of structs are accessed.
7217 	assert(type->basetype == SPIRType::Struct);
7218 	uint32_t type_id = 0;
7219 
7220 	std::string expr;
7221 
7222 	// Inherit matrix information in case we are access chaining a vector which might have come from a row major layout.
7223 	bool row_major_matrix_needs_conversion = need_transpose ? *need_transpose : false;
7224 	uint32_t matrix_stride = out_matrix_stride ? *out_matrix_stride : 0;
7225 
7226 	for (uint32_t i = 0; i < count; i++)
7227 	{
7228 		uint32_t index = indices[i];
7229 
7230 		// Pointers
7231 		if (ptr_chain && i == 0)
7232 		{
7233 			// Here, the pointer type will be decorated with an array stride.
7234 			uint32_t array_stride = get_decoration(basetype.self, DecorationArrayStride);
7235 			if (!array_stride)
7236 				SPIRV_CROSS_THROW("SPIR-V does not define ArrayStride for buffer block.");
7237 
7238 			auto *constant = maybe_get<SPIRConstant>(index);
7239 			if (constant)
7240 			{
7241 				// Constant array access.
7242 				offset += constant->scalar() * array_stride;
7243 			}
7244 			else
7245 			{
7246 				// Dynamic array access.
7247 				if (array_stride % word_stride)
7248 				{
7249 					SPIRV_CROSS_THROW(
7250 					    "Array stride for dynamic indexing must be divisible by the size of a 4-component vector. "
7251 					    "Likely culprit here is a float or vec2 array inside a push constant block which is std430. "
7252 					    "This cannot be flattened. Try using std140 layout instead.");
7253 				}
7254 
7255 				expr += to_enclosed_expression(index);
7256 				expr += " * ";
7257 				expr += convert_to_string(array_stride / word_stride);
7258 				expr += " + ";
7259 			}
7260 			// Type ID is unchanged.
7261 		}
7262 		// Arrays
7263 		else if (!type->array.empty())
7264 		{
7265 			// Here, the type_id will be a type ID for the array type itself.
7266 			uint32_t array_stride = get_decoration(type_id, DecorationArrayStride);
7267 			if (!array_stride)
7268 				SPIRV_CROSS_THROW("SPIR-V does not define ArrayStride for buffer block.");
7269 
7270 			auto *constant = maybe_get<SPIRConstant>(index);
7271 			if (constant)
7272 			{
7273 				// Constant array access.
7274 				offset += constant->scalar() * array_stride;
7275 			}
7276 			else
7277 			{
7278 				// Dynamic array access.
7279 				if (array_stride % word_stride)
7280 				{
7281 					SPIRV_CROSS_THROW(
7282 					    "Array stride for dynamic indexing must be divisible by the size of a 4-component vector. "
7283 					    "Likely culprit here is a float or vec2 array inside a push constant block which is std430. "
7284 					    "This cannot be flattened. Try using std140 layout instead.");
7285 				}
7286 
7287 				expr += to_enclosed_expression(index, false);
7288 				expr += " * ";
7289 				expr += convert_to_string(array_stride / word_stride);
7290 				expr += " + ";
7291 			}
7292 
7293 			uint32_t parent_type = type->parent_type;
7294 			type = &get<SPIRType>(parent_type);
7295 			type_id = parent_type;
7296 
7297 			// Type ID now refers to the array type with one less dimension.
7298 		}
7299 		// For structs, the index refers to a constant, which indexes into the members.
7300 		// We also check if this member is a builtin, since we then replace the entire expression with the builtin one.
7301 		else if (type->basetype == SPIRType::Struct)
7302 		{
7303 			index = get<SPIRConstant>(index).scalar();
7304 
7305 			if (index >= type->member_types.size())
7306 				SPIRV_CROSS_THROW("Member index is out of bounds!");
7307 
7308 			offset += type_struct_member_offset(*type, index);
7309 			type_id = type->member_types[index];
7310 
7311 			auto &struct_type = *type;
7312 			type = &get<SPIRType>(type->member_types[index]);
7313 
7314 			if (type->columns > 1)
7315 			{
7316 				matrix_stride = type_struct_member_matrix_stride(struct_type, index);
7317 				row_major_matrix_needs_conversion =
7318 				    combined_decoration_for_member(struct_type, index).get(DecorationRowMajor);
7319 			}
7320 			else
7321 				row_major_matrix_needs_conversion = false;
7322 		}
7323 		// Matrix -> Vector
7324 		else if (type->columns > 1)
7325 		{
7326 			auto *constant = maybe_get<SPIRConstant>(index);
7327 			if (constant)
7328 			{
7329 				index = get<SPIRConstant>(index).scalar();
7330 				offset += index * (row_major_matrix_needs_conversion ? (type->width / 8) : matrix_stride);
7331 			}
7332 			else
7333 			{
7334 				uint32_t indexing_stride = row_major_matrix_needs_conversion ? (type->width / 8) : matrix_stride;
7335 				// Dynamic array access.
7336 				if (indexing_stride % word_stride)
7337 				{
7338 					SPIRV_CROSS_THROW(
7339 					    "Matrix stride for dynamic indexing must be divisible by the size of a 4-component vector. "
7340 					    "Likely culprit here is a row-major matrix being accessed dynamically. "
7341 					    "This cannot be flattened. Try using std140 layout instead.");
7342 				}
7343 
7344 				expr += to_enclosed_expression(index, false);
7345 				expr += " * ";
7346 				expr += convert_to_string(indexing_stride / word_stride);
7347 				expr += " + ";
7348 			}
7349 
7350 			uint32_t parent_type = type->parent_type;
7351 			type = &get<SPIRType>(type->parent_type);
7352 			type_id = parent_type;
7353 		}
7354 		// Vector -> Scalar
7355 		else if (type->vecsize > 1)
7356 		{
7357 			auto *constant = maybe_get<SPIRConstant>(index);
7358 			if (constant)
7359 			{
7360 				index = get<SPIRConstant>(index).scalar();
7361 				offset += index * (row_major_matrix_needs_conversion ? matrix_stride : (type->width / 8));
7362 			}
7363 			else
7364 			{
7365 				uint32_t indexing_stride = row_major_matrix_needs_conversion ? matrix_stride : (type->width / 8);
7366 
7367 				// Dynamic array access.
7368 				if (indexing_stride % word_stride)
7369 				{
7370 					SPIRV_CROSS_THROW(
7371 					    "Stride for dynamic vector indexing must be divisible by the size of a 4-component vector. "
7372 					    "This cannot be flattened in legacy targets.");
7373 				}
7374 
7375 				expr += to_enclosed_expression(index, false);
7376 				expr += " * ";
7377 				expr += convert_to_string(indexing_stride / word_stride);
7378 				expr += " + ";
7379 			}
7380 
7381 			uint32_t parent_type = type->parent_type;
7382 			type = &get<SPIRType>(type->parent_type);
7383 			type_id = parent_type;
7384 		}
7385 		else
7386 			SPIRV_CROSS_THROW("Cannot subdivide a scalar value!");
7387 	}
7388 
7389 	if (need_transpose)
7390 		*need_transpose = row_major_matrix_needs_conversion;
7391 	if (out_matrix_stride)
7392 		*out_matrix_stride = matrix_stride;
7393 
7394 	return std::make_pair(expr, offset);
7395 }
7396 
should_dereference(uint32_t id)7397 bool CompilerGLSL::should_dereference(uint32_t id)
7398 {
7399 	const auto &type = expression_type(id);
7400 	// Non-pointer expressions don't need to be dereferenced.
7401 	if (!type.pointer)
7402 		return false;
7403 
7404 	// Handles shouldn't be dereferenced either.
7405 	if (!expression_is_lvalue(id))
7406 		return false;
7407 
7408 	// If id is a variable but not a phi variable, we should not dereference it.
7409 	if (auto *var = maybe_get<SPIRVariable>(id))
7410 		return var->phi_variable;
7411 
7412 	// If id is an access chain, we should not dereference it.
7413 	if (auto *expr = maybe_get<SPIRExpression>(id))
7414 		return !expr->access_chain;
7415 
7416 	// Otherwise, we should dereference this pointer expression.
7417 	return true;
7418 }
7419 
should_forward(uint32_t id) const7420 bool CompilerGLSL::should_forward(uint32_t id) const
7421 {
7422 	// If id is a variable we will try to forward it regardless of force_temporary check below
7423 	// This is important because otherwise we'll get local sampler copies (highp sampler2D foo = bar) that are invalid in OpenGL GLSL
7424 	auto *var = maybe_get<SPIRVariable>(id);
7425 	if (var && var->forwardable)
7426 		return true;
7427 
7428 	// For debugging emit temporary variables for all expressions
7429 	if (options.force_temporary)
7430 		return false;
7431 
7432 	// Immutable expression can always be forwarded.
7433 	if (is_immutable(id))
7434 		return true;
7435 
7436 	return false;
7437 }
7438 
should_suppress_usage_tracking(uint32_t id) const7439 bool CompilerGLSL::should_suppress_usage_tracking(uint32_t id) const
7440 {
7441 	// Used only by opcodes which don't do any real "work", they just swizzle data in some fashion.
7442 	return !expression_is_forwarded(id) || expression_suppresses_usage_tracking(id);
7443 }
7444 
track_expression_read(uint32_t id)7445 void CompilerGLSL::track_expression_read(uint32_t id)
7446 {
7447 	switch (ir.ids[id].get_type())
7448 	{
7449 	case TypeExpression:
7450 	{
7451 		auto &e = get<SPIRExpression>(id);
7452 		for (auto implied_read : e.implied_read_expressions)
7453 			track_expression_read(implied_read);
7454 		break;
7455 	}
7456 
7457 	case TypeAccessChain:
7458 	{
7459 		auto &e = get<SPIRAccessChain>(id);
7460 		for (auto implied_read : e.implied_read_expressions)
7461 			track_expression_read(implied_read);
7462 		break;
7463 	}
7464 
7465 	default:
7466 		break;
7467 	}
7468 
7469 	// If we try to read a forwarded temporary more than once we will stamp out possibly complex code twice.
7470 	// In this case, it's better to just bind the complex expression to the temporary and read that temporary twice.
7471 	if (expression_is_forwarded(id) && !expression_suppresses_usage_tracking(id))
7472 	{
7473 		auto &v = expression_usage_counts[id];
7474 		v++;
7475 
7476 		if (v >= 2)
7477 		{
7478 			//if (v == 2)
7479 			//    fprintf(stderr, "ID %u was forced to temporary due to more than 1 expression use!\n", id);
7480 
7481 			forced_temporaries.insert(id);
7482 			// Force a recompile after this pass to avoid forwarding this variable.
7483 			force_recompile();
7484 		}
7485 	}
7486 }
7487 
args_will_forward(uint32_t id,const uint32_t * args,uint32_t num_args,bool pure)7488 bool CompilerGLSL::args_will_forward(uint32_t id, const uint32_t *args, uint32_t num_args, bool pure)
7489 {
7490 	if (forced_temporaries.find(id) != end(forced_temporaries))
7491 		return false;
7492 
7493 	for (uint32_t i = 0; i < num_args; i++)
7494 		if (!should_forward(args[i]))
7495 			return false;
7496 
7497 	// We need to forward globals as well.
7498 	if (!pure)
7499 	{
7500 		for (auto global : global_variables)
7501 			if (!should_forward(global))
7502 				return false;
7503 		for (auto aliased : aliased_variables)
7504 			if (!should_forward(aliased))
7505 				return false;
7506 	}
7507 
7508 	return true;
7509 }
7510 
register_impure_function_call()7511 void CompilerGLSL::register_impure_function_call()
7512 {
7513 	// Impure functions can modify globals and aliased variables, so invalidate them as well.
7514 	for (auto global : global_variables)
7515 		flush_dependees(get<SPIRVariable>(global));
7516 	for (auto aliased : aliased_variables)
7517 		flush_dependees(get<SPIRVariable>(aliased));
7518 }
7519 
register_call_out_argument(uint32_t id)7520 void CompilerGLSL::register_call_out_argument(uint32_t id)
7521 {
7522 	register_write(id);
7523 
7524 	auto *var = maybe_get<SPIRVariable>(id);
7525 	if (var)
7526 		flush_variable_declaration(var->self);
7527 }
7528 
variable_decl_function_local(SPIRVariable & var)7529 string CompilerGLSL::variable_decl_function_local(SPIRVariable &var)
7530 {
7531 	// These variables are always function local,
7532 	// so make sure we emit the variable without storage qualifiers.
7533 	// Some backends will inject custom variables locally in a function
7534 	// with a storage qualifier which is not function-local.
7535 	auto old_storage = var.storage;
7536 	var.storage = StorageClassFunction;
7537 	auto expr = variable_decl(var);
7538 	var.storage = old_storage;
7539 	return expr;
7540 }
7541 
emit_variable_temporary_copies(const SPIRVariable & var)7542 void CompilerGLSL::emit_variable_temporary_copies(const SPIRVariable &var)
7543 {
7544 	// Ensure that we declare phi-variable copies even if the original declaration isn't deferred
7545 	if (var.allocate_temporary_copy && !flushed_phi_variables.count(var.self))
7546 	{
7547 		auto &type = get<SPIRType>(var.basetype);
7548 		auto &flags = get_decoration_bitset(var.self);
7549 		statement(flags_to_qualifiers_glsl(type, flags), variable_decl(type, join("_", var.self, "_copy")), ";");
7550 		flushed_phi_variables.insert(var.self);
7551 	}
7552 }
7553 
flush_variable_declaration(uint32_t id)7554 void CompilerGLSL::flush_variable_declaration(uint32_t id)
7555 {
7556 	// Ensure that we declare phi-variable copies even if the original declaration isn't deferred
7557 	auto *var = maybe_get<SPIRVariable>(id);
7558 	if (var && var->deferred_declaration)
7559 	{
7560 		statement(variable_decl_function_local(*var), ";");
7561 		var->deferred_declaration = false;
7562 	}
7563 	if (var)
7564 	{
7565 		emit_variable_temporary_copies(*var);
7566 	}
7567 }
7568 
remove_duplicate_swizzle(string & op)7569 bool CompilerGLSL::remove_duplicate_swizzle(string &op)
7570 {
7571 	auto pos = op.find_last_of('.');
7572 	if (pos == string::npos || pos == 0)
7573 		return false;
7574 
7575 	string final_swiz = op.substr(pos + 1, string::npos);
7576 
7577 	if (backend.swizzle_is_function)
7578 	{
7579 		if (final_swiz.size() < 2)
7580 			return false;
7581 
7582 		if (final_swiz.substr(final_swiz.size() - 2, string::npos) == "()")
7583 			final_swiz.erase(final_swiz.size() - 2, string::npos);
7584 		else
7585 			return false;
7586 	}
7587 
7588 	// Check if final swizzle is of form .x, .xy, .xyz, .xyzw or similar.
7589 	// If so, and previous swizzle is of same length,
7590 	// we can drop the final swizzle altogether.
7591 	for (uint32_t i = 0; i < final_swiz.size(); i++)
7592 	{
7593 		static const char expected[] = { 'x', 'y', 'z', 'w' };
7594 		if (i >= 4 || final_swiz[i] != expected[i])
7595 			return false;
7596 	}
7597 
7598 	auto prevpos = op.find_last_of('.', pos - 1);
7599 	if (prevpos == string::npos)
7600 		return false;
7601 
7602 	prevpos++;
7603 
7604 	// Make sure there are only swizzles here ...
7605 	for (auto i = prevpos; i < pos; i++)
7606 	{
7607 		if (op[i] < 'w' || op[i] > 'z')
7608 		{
7609 			// If swizzles are foo.xyz() like in C++ backend for example, check for that.
7610 			if (backend.swizzle_is_function && i + 2 == pos && op[i] == '(' && op[i + 1] == ')')
7611 				break;
7612 			return false;
7613 		}
7614 	}
7615 
7616 	// If original swizzle is large enough, just carve out the components we need.
7617 	// E.g. foobar.wyx.xy will turn into foobar.wy.
7618 	if (pos - prevpos >= final_swiz.size())
7619 	{
7620 		op.erase(prevpos + final_swiz.size(), string::npos);
7621 
7622 		// Add back the function call ...
7623 		if (backend.swizzle_is_function)
7624 			op += "()";
7625 	}
7626 	return true;
7627 }
7628 
7629 // Optimizes away vector swizzles where we have something like
7630 // vec3 foo;
7631 // foo.xyz <-- swizzle expression does nothing.
7632 // This is a very common pattern after OpCompositeCombine.
remove_unity_swizzle(uint32_t base,string & op)7633 bool CompilerGLSL::remove_unity_swizzle(uint32_t base, string &op)
7634 {
7635 	auto pos = op.find_last_of('.');
7636 	if (pos == string::npos || pos == 0)
7637 		return false;
7638 
7639 	string final_swiz = op.substr(pos + 1, string::npos);
7640 
7641 	if (backend.swizzle_is_function)
7642 	{
7643 		if (final_swiz.size() < 2)
7644 			return false;
7645 
7646 		if (final_swiz.substr(final_swiz.size() - 2, string::npos) == "()")
7647 			final_swiz.erase(final_swiz.size() - 2, string::npos);
7648 		else
7649 			return false;
7650 	}
7651 
7652 	// Check if final swizzle is of form .x, .xy, .xyz, .xyzw or similar.
7653 	// If so, and previous swizzle is of same length,
7654 	// we can drop the final swizzle altogether.
7655 	for (uint32_t i = 0; i < final_swiz.size(); i++)
7656 	{
7657 		static const char expected[] = { 'x', 'y', 'z', 'w' };
7658 		if (i >= 4 || final_swiz[i] != expected[i])
7659 			return false;
7660 	}
7661 
7662 	auto &type = expression_type(base);
7663 
7664 	// Sanity checking ...
7665 	assert(type.columns == 1 && type.array.empty());
7666 
7667 	if (type.vecsize == final_swiz.size())
7668 		op.erase(pos, string::npos);
7669 	return true;
7670 }
7671 
build_composite_combiner(uint32_t return_type,const uint32_t * elems,uint32_t length)7672 string CompilerGLSL::build_composite_combiner(uint32_t return_type, const uint32_t *elems, uint32_t length)
7673 {
7674 	ID base = 0;
7675 	string op;
7676 	string subop;
7677 
7678 	// Can only merge swizzles for vectors.
7679 	auto &type = get<SPIRType>(return_type);
7680 	bool can_apply_swizzle_opt = type.basetype != SPIRType::Struct && type.array.empty() && type.columns == 1;
7681 	bool swizzle_optimization = false;
7682 
7683 	for (uint32_t i = 0; i < length; i++)
7684 	{
7685 		auto *e = maybe_get<SPIRExpression>(elems[i]);
7686 
7687 		// If we're merging another scalar which belongs to the same base
7688 		// object, just merge the swizzles to avoid triggering more than 1 expression read as much as possible!
7689 		if (can_apply_swizzle_opt && e && e->base_expression && e->base_expression == base)
7690 		{
7691 			// Only supposed to be used for vector swizzle -> scalar.
7692 			assert(!e->expression.empty() && e->expression.front() == '.');
7693 			subop += e->expression.substr(1, string::npos);
7694 			swizzle_optimization = true;
7695 		}
7696 		else
7697 		{
7698 			// We'll likely end up with duplicated swizzles, e.g.
7699 			// foobar.xyz.xyz from patterns like
7700 			// OpVectorShuffle
7701 			// OpCompositeExtract x 3
7702 			// OpCompositeConstruct 3x + other scalar.
7703 			// Just modify op in-place.
7704 			if (swizzle_optimization)
7705 			{
7706 				if (backend.swizzle_is_function)
7707 					subop += "()";
7708 
7709 				// Don't attempt to remove unity swizzling if we managed to remove duplicate swizzles.
7710 				// The base "foo" might be vec4, while foo.xyz is vec3 (OpVectorShuffle) and looks like a vec3 due to the .xyz tacked on.
7711 				// We only want to remove the swizzles if we're certain that the resulting base will be the same vecsize.
7712 				// Essentially, we can only remove one set of swizzles, since that's what we have control over ...
7713 				// Case 1:
7714 				//  foo.yxz.xyz: Duplicate swizzle kicks in, giving foo.yxz, we are done.
7715 				//               foo.yxz was the result of OpVectorShuffle and we don't know the type of foo.
7716 				// Case 2:
7717 				//  foo.xyz: Duplicate swizzle won't kick in.
7718 				//           If foo is vec3, we can remove xyz, giving just foo.
7719 				if (!remove_duplicate_swizzle(subop))
7720 					remove_unity_swizzle(base, subop);
7721 
7722 				// Strips away redundant parens if we created them during component extraction.
7723 				strip_enclosed_expression(subop);
7724 				swizzle_optimization = false;
7725 				op += subop;
7726 			}
7727 			else
7728 				op += subop;
7729 
7730 			if (i)
7731 				op += ", ";
7732 			subop = to_composite_constructor_expression(elems[i]);
7733 		}
7734 
7735 		base = e ? e->base_expression : ID(0);
7736 	}
7737 
7738 	if (swizzle_optimization)
7739 	{
7740 		if (backend.swizzle_is_function)
7741 			subop += "()";
7742 
7743 		if (!remove_duplicate_swizzle(subop))
7744 			remove_unity_swizzle(base, subop);
7745 		// Strips away redundant parens if we created them during component extraction.
7746 		strip_enclosed_expression(subop);
7747 	}
7748 
7749 	op += subop;
7750 	return op;
7751 }
7752 
skip_argument(uint32_t id) const7753 bool CompilerGLSL::skip_argument(uint32_t id) const
7754 {
7755 	if (!combined_image_samplers.empty() || !options.vulkan_semantics)
7756 	{
7757 		auto &type = expression_type(id);
7758 		if (type.basetype == SPIRType::Sampler || (type.basetype == SPIRType::Image && type.image.sampled == 1))
7759 			return true;
7760 	}
7761 	return false;
7762 }
7763 
optimize_read_modify_write(const SPIRType & type,const string & lhs,const string & rhs)7764 bool CompilerGLSL::optimize_read_modify_write(const SPIRType &type, const string &lhs, const string &rhs)
7765 {
7766 	// Do this with strings because we have a very clear pattern we can check for and it avoids
7767 	// adding lots of special cases to the code emission.
7768 	if (rhs.size() < lhs.size() + 3)
7769 		return false;
7770 
7771 	// Do not optimize matrices. They are a bit awkward to reason about in general
7772 	// (in which order does operation happen?), and it does not work on MSL anyways.
7773 	if (type.vecsize > 1 && type.columns > 1)
7774 		return false;
7775 
7776 	auto index = rhs.find(lhs);
7777 	if (index != 0)
7778 		return false;
7779 
7780 	// TODO: Shift operators, but it's not important for now.
7781 	auto op = rhs.find_first_of("+-/*%|&^", lhs.size() + 1);
7782 	if (op != lhs.size() + 1)
7783 		return false;
7784 
7785 	// Check that the op is followed by space. This excludes && and ||.
7786 	if (rhs[op + 1] != ' ')
7787 		return false;
7788 
7789 	char bop = rhs[op];
7790 	auto expr = rhs.substr(lhs.size() + 3);
7791 	// Try to find increments and decrements. Makes it look neater as += 1, -= 1 is fairly rare to see in real code.
7792 	// Find some common patterns which are equivalent.
7793 	if ((bop == '+' || bop == '-') && (expr == "1" || expr == "uint(1)" || expr == "1u" || expr == "int(1u)"))
7794 		statement(lhs, bop, bop, ";");
7795 	else
7796 		statement(lhs, " ", bop, "= ", expr, ";");
7797 	return true;
7798 }
7799 
register_control_dependent_expression(uint32_t expr)7800 void CompilerGLSL::register_control_dependent_expression(uint32_t expr)
7801 {
7802 	if (forwarded_temporaries.find(expr) == end(forwarded_temporaries))
7803 		return;
7804 
7805 	assert(current_emitting_block);
7806 	current_emitting_block->invalidate_expressions.push_back(expr);
7807 }
7808 
emit_block_instructions(SPIRBlock & block)7809 void CompilerGLSL::emit_block_instructions(SPIRBlock &block)
7810 {
7811 	current_emitting_block = &block;
7812 	for (auto &op : block.ops)
7813 		emit_instruction(op);
7814 	current_emitting_block = nullptr;
7815 }
7816 
disallow_forwarding_in_expression_chain(const SPIRExpression & expr)7817 void CompilerGLSL::disallow_forwarding_in_expression_chain(const SPIRExpression &expr)
7818 {
7819 	// Allow trivially forwarded expressions like OpLoad or trivial shuffles,
7820 	// these will be marked as having suppressed usage tracking.
7821 	// Our only concern is to make sure arithmetic operations are done in similar ways.
7822 	if (expression_is_forwarded(expr.self) && !expression_suppresses_usage_tracking(expr.self) &&
7823 	    forced_invariant_temporaries.count(expr.self) == 0)
7824 	{
7825 		forced_temporaries.insert(expr.self);
7826 		forced_invariant_temporaries.insert(expr.self);
7827 		force_recompile();
7828 
7829 		for (auto &dependent : expr.expression_dependencies)
7830 			disallow_forwarding_in_expression_chain(get<SPIRExpression>(dependent));
7831 	}
7832 }
7833 
handle_store_to_invariant_variable(uint32_t store_id,uint32_t value_id)7834 void CompilerGLSL::handle_store_to_invariant_variable(uint32_t store_id, uint32_t value_id)
7835 {
7836 	// Variables or access chains marked invariant are complicated. We will need to make sure the code-gen leading up to
7837 	// this variable is consistent. The failure case for SPIRV-Cross is when an expression is forced to a temporary
7838 	// in one translation unit, but not another, e.g. due to multiple use of an expression.
7839 	// This causes variance despite the output variable being marked invariant, so the solution here is to force all dependent
7840 	// expressions to be temporaries.
7841 	// It is uncertain if this is enough to support invariant in all possible cases, but it should be good enough
7842 	// for all reasonable uses of invariant.
7843 	if (!has_decoration(store_id, DecorationInvariant))
7844 		return;
7845 
7846 	auto *expr = maybe_get<SPIRExpression>(value_id);
7847 	if (!expr)
7848 		return;
7849 
7850 	disallow_forwarding_in_expression_chain(*expr);
7851 }
7852 
emit_store_statement(uint32_t lhs_expression,uint32_t rhs_expression)7853 void CompilerGLSL::emit_store_statement(uint32_t lhs_expression, uint32_t rhs_expression)
7854 {
7855 	auto rhs = to_pointer_expression(rhs_expression);
7856 
7857 	// Statements to OpStore may be empty if it is a struct with zero members. Just forward the store to /dev/null.
7858 	if (!rhs.empty())
7859 	{
7860 		handle_store_to_invariant_variable(lhs_expression, rhs_expression);
7861 
7862 		auto lhs = to_dereferenced_expression(lhs_expression);
7863 
7864 		// We might need to bitcast in order to store to a builtin.
7865 		bitcast_to_builtin_store(lhs_expression, rhs, expression_type(rhs_expression));
7866 
7867 		// Tries to optimize assignments like "<lhs> = <lhs> op expr".
7868 		// While this is purely cosmetic, this is important for legacy ESSL where loop
7869 		// variable increments must be in either i++ or i += const-expr.
7870 		// Without this, we end up with i = i + 1, which is correct GLSL, but not correct GLES 2.0.
7871 		if (!optimize_read_modify_write(expression_type(rhs_expression), lhs, rhs))
7872 			statement(lhs, " = ", rhs, ";");
7873 		register_write(lhs_expression);
7874 	}
7875 }
7876 
get_integer_width_for_instruction(const Instruction & instr) const7877 uint32_t CompilerGLSL::get_integer_width_for_instruction(const Instruction &instr) const
7878 {
7879 	if (instr.length < 3)
7880 		return 32;
7881 
7882 	auto *ops = stream(instr);
7883 
7884 	switch (instr.op)
7885 	{
7886 	case OpSConvert:
7887 	case OpConvertSToF:
7888 	case OpUConvert:
7889 	case OpConvertUToF:
7890 	case OpIEqual:
7891 	case OpINotEqual:
7892 	case OpSLessThan:
7893 	case OpSLessThanEqual:
7894 	case OpSGreaterThan:
7895 	case OpSGreaterThanEqual:
7896 	case OpULessThan:
7897 	case OpULessThanEqual:
7898 	case OpUGreaterThan:
7899 	case OpUGreaterThanEqual:
7900 		return expression_type(ops[2]).width;
7901 
7902 	default:
7903 	{
7904 		// We can look at result type which is more robust.
7905 		auto *type = maybe_get<SPIRType>(ops[0]);
7906 		if (type && type_is_integral(*type))
7907 			return type->width;
7908 		else
7909 			return 32;
7910 	}
7911 	}
7912 }
7913 
get_integer_width_for_glsl_instruction(GLSLstd450 op,const uint32_t * ops,uint32_t length) const7914 uint32_t CompilerGLSL::get_integer_width_for_glsl_instruction(GLSLstd450 op, const uint32_t *ops, uint32_t length) const
7915 {
7916 	if (length < 1)
7917 		return 32;
7918 
7919 	switch (op)
7920 	{
7921 	case GLSLstd450SAbs:
7922 	case GLSLstd450SSign:
7923 	case GLSLstd450UMin:
7924 	case GLSLstd450SMin:
7925 	case GLSLstd450UMax:
7926 	case GLSLstd450SMax:
7927 	case GLSLstd450UClamp:
7928 	case GLSLstd450SClamp:
7929 	case GLSLstd450FindSMsb:
7930 	case GLSLstd450FindUMsb:
7931 		return expression_type(ops[0]).width;
7932 
7933 	default:
7934 	{
7935 		// We don't need to care about other opcodes, just return 32.
7936 		return 32;
7937 	}
7938 	}
7939 }
7940 
emit_instruction(const Instruction & instruction)7941 void CompilerGLSL::emit_instruction(const Instruction &instruction)
7942 {
7943 	auto ops = stream(instruction);
7944 	auto opcode = static_cast<Op>(instruction.op);
7945 	uint32_t length = instruction.length;
7946 
7947 #define GLSL_BOP(op) emit_binary_op(ops[0], ops[1], ops[2], ops[3], #op)
7948 #define GLSL_BOP_CAST(op, type) \
7949 	emit_binary_op_cast(ops[0], ops[1], ops[2], ops[3], #op, type, opcode_is_sign_invariant(opcode))
7950 #define GLSL_UOP(op) emit_unary_op(ops[0], ops[1], ops[2], #op)
7951 #define GLSL_QFOP(op) emit_quaternary_func_op(ops[0], ops[1], ops[2], ops[3], ops[4], ops[5], #op)
7952 #define GLSL_TFOP(op) emit_trinary_func_op(ops[0], ops[1], ops[2], ops[3], ops[4], #op)
7953 #define GLSL_BFOP(op) emit_binary_func_op(ops[0], ops[1], ops[2], ops[3], #op)
7954 #define GLSL_BFOP_CAST(op, type) \
7955 	emit_binary_func_op_cast(ops[0], ops[1], ops[2], ops[3], #op, type, opcode_is_sign_invariant(opcode))
7956 #define GLSL_BFOP(op) emit_binary_func_op(ops[0], ops[1], ops[2], ops[3], #op)
7957 #define GLSL_UFOP(op) emit_unary_func_op(ops[0], ops[1], ops[2], #op)
7958 
7959 	// If we need to do implicit bitcasts, make sure we do it with the correct type.
7960 	uint32_t integer_width = get_integer_width_for_instruction(instruction);
7961 	auto int_type = to_signed_basetype(integer_width);
7962 	auto uint_type = to_unsigned_basetype(integer_width);
7963 
7964 	switch (opcode)
7965 	{
7966 	// Dealing with memory
7967 	case OpLoad:
7968 	{
7969 		uint32_t result_type = ops[0];
7970 		uint32_t id = ops[1];
7971 		uint32_t ptr = ops[2];
7972 
7973 		flush_variable_declaration(ptr);
7974 
7975 		// If we're loading from memory that cannot be changed by the shader,
7976 		// just forward the expression directly to avoid needless temporaries.
7977 		// If an expression is mutable and forwardable, we speculate that it is immutable.
7978 		bool forward = should_forward(ptr) && forced_temporaries.find(id) == end(forced_temporaries);
7979 
7980 		// If loading a non-native row-major matrix, mark the expression as need_transpose.
7981 		bool need_transpose = false;
7982 		bool old_need_transpose = false;
7983 
7984 		auto *ptr_expression = maybe_get<SPIRExpression>(ptr);
7985 
7986 		if (forward)
7987 		{
7988 			// If we're forwarding the load, we're also going to forward transpose state, so don't transpose while
7989 			// taking the expression.
7990 			if (ptr_expression && ptr_expression->need_transpose)
7991 			{
7992 				old_need_transpose = true;
7993 				ptr_expression->need_transpose = false;
7994 				need_transpose = true;
7995 			}
7996 			else if (is_non_native_row_major_matrix(ptr))
7997 				need_transpose = true;
7998 		}
7999 
8000 		// If we are forwarding this load,
8001 		// don't register the read to access chain here, defer that to when we actually use the expression,
8002 		// using the add_implied_read_expression mechanism.
8003 		string expr;
8004 
8005 		bool is_packed = has_extended_decoration(ptr, SPIRVCrossDecorationPhysicalTypePacked);
8006 		bool is_remapped = has_extended_decoration(ptr, SPIRVCrossDecorationPhysicalTypeID);
8007 		if (forward || (!is_packed && !is_remapped))
8008 		{
8009 			// For the simple case, we do not need to deal with repacking.
8010 			expr = to_dereferenced_expression(ptr, false);
8011 		}
8012 		else
8013 		{
8014 			// If we are not forwarding the expression, we need to unpack and resolve any physical type remapping here before
8015 			// storing the expression to a temporary.
8016 			expr = to_unpacked_expression(ptr);
8017 		}
8018 
8019 		// We might need to bitcast in order to load from a builtin.
8020 		bitcast_from_builtin_load(ptr, expr, get<SPIRType>(result_type));
8021 
8022 		// We might be trying to load a gl_Position[N], where we should be
8023 		// doing float4[](gl_in[i].gl_Position, ...) instead.
8024 		// Similar workarounds are required for input arrays in tessellation.
8025 		unroll_array_from_complex_load(id, ptr, expr);
8026 
8027 		auto &type = get<SPIRType>(result_type);
8028 		// Shouldn't need to check for ID, but current glslang codegen requires it in some cases
8029 		// when loading Image/Sampler descriptors. It does not hurt to check ID as well.
8030 		if (has_decoration(id, DecorationNonUniformEXT) || has_decoration(ptr, DecorationNonUniformEXT))
8031 		{
8032 			propagate_nonuniform_qualifier(ptr);
8033 			convert_non_uniform_expression(type, expr);
8034 		}
8035 
8036 		if (forward && ptr_expression)
8037 			ptr_expression->need_transpose = old_need_transpose;
8038 
8039 		// By default, suppress usage tracking since using same expression multiple times does not imply any extra work.
8040 		// However, if we try to load a complex, composite object from a flattened buffer,
8041 		// we should avoid emitting the same code over and over and lower the result to a temporary.
8042 		bool usage_tracking = ptr_expression && flattened_buffer_blocks.count(ptr_expression->loaded_from) != 0 &&
8043 		                      (type.basetype == SPIRType::Struct || (type.columns > 1));
8044 
8045 		SPIRExpression *e = nullptr;
8046 		if (!backend.array_is_value_type && !type.array.empty() && !forward)
8047 		{
8048 			// Complicated load case where we need to make a copy of ptr, but we cannot, because
8049 			// it is an array, and our backend does not support arrays as value types.
8050 			// Emit the temporary, and copy it explicitly.
8051 			e = &emit_uninitialized_temporary_expression(result_type, id);
8052 			emit_array_copy(to_expression(id), ptr, StorageClassFunction, get_backing_variable_storage(ptr));
8053 		}
8054 		else
8055 			e = &emit_op(result_type, id, expr, forward, !usage_tracking);
8056 
8057 		e->need_transpose = need_transpose;
8058 		register_read(id, ptr, forward);
8059 
8060 		if (forward)
8061 		{
8062 			// Pass through whether the result is of a packed type and the physical type ID.
8063 			if (has_extended_decoration(ptr, SPIRVCrossDecorationPhysicalTypePacked))
8064 				set_extended_decoration(id, SPIRVCrossDecorationPhysicalTypePacked);
8065 			if (has_extended_decoration(ptr, SPIRVCrossDecorationPhysicalTypeID))
8066 			{
8067 				set_extended_decoration(id, SPIRVCrossDecorationPhysicalTypeID,
8068 				                        get_extended_decoration(ptr, SPIRVCrossDecorationPhysicalTypeID));
8069 			}
8070 		}
8071 		else
8072 		{
8073 			// This might have been set on an earlier compilation iteration, force it to be unset.
8074 			unset_extended_decoration(id, SPIRVCrossDecorationPhysicalTypePacked);
8075 			unset_extended_decoration(id, SPIRVCrossDecorationPhysicalTypeID);
8076 		}
8077 
8078 		inherit_expression_dependencies(id, ptr);
8079 		if (forward)
8080 			add_implied_read_expression(*e, ptr);
8081 		break;
8082 	}
8083 
8084 	case OpInBoundsAccessChain:
8085 	case OpAccessChain:
8086 	case OpPtrAccessChain:
8087 	{
8088 		auto *var = maybe_get<SPIRVariable>(ops[2]);
8089 		if (var)
8090 			flush_variable_declaration(var->self);
8091 
8092 		// If the base is immutable, the access chain pointer must also be.
8093 		// If an expression is mutable and forwardable, we speculate that it is immutable.
8094 		AccessChainMeta meta;
8095 		bool ptr_chain = opcode == OpPtrAccessChain;
8096 		auto e = access_chain(ops[2], &ops[3], length - 3, get<SPIRType>(ops[0]), &meta, ptr_chain);
8097 
8098 		auto &expr = set<SPIRExpression>(ops[1], move(e), ops[0], should_forward(ops[2]));
8099 
8100 		auto *backing_variable = maybe_get_backing_variable(ops[2]);
8101 		expr.loaded_from = backing_variable ? backing_variable->self : ID(ops[2]);
8102 		expr.need_transpose = meta.need_transpose;
8103 		expr.access_chain = true;
8104 
8105 		// Mark the result as being packed. Some platforms handled packed vectors differently than non-packed.
8106 		if (meta.storage_is_packed)
8107 			set_extended_decoration(ops[1], SPIRVCrossDecorationPhysicalTypePacked);
8108 		if (meta.storage_physical_type != 0)
8109 			set_extended_decoration(ops[1], SPIRVCrossDecorationPhysicalTypeID, meta.storage_physical_type);
8110 		if (meta.storage_is_invariant)
8111 			set_decoration(ops[1], DecorationInvariant);
8112 
8113 		// If we have some expression dependencies in our access chain, this access chain is technically a forwarded
8114 		// temporary which could be subject to invalidation.
8115 		// Need to assume we're forwarded while calling inherit_expression_depdendencies.
8116 		forwarded_temporaries.insert(ops[1]);
8117 		// The access chain itself is never forced to a temporary, but its dependencies might.
8118 		suppressed_usage_tracking.insert(ops[1]);
8119 
8120 		for (uint32_t i = 2; i < length; i++)
8121 		{
8122 			inherit_expression_dependencies(ops[1], ops[i]);
8123 			add_implied_read_expression(expr, ops[i]);
8124 		}
8125 
8126 		// If we have no dependencies after all, i.e., all indices in the access chain are immutable temporaries,
8127 		// we're not forwarded after all.
8128 		if (expr.expression_dependencies.empty())
8129 			forwarded_temporaries.erase(ops[1]);
8130 
8131 		break;
8132 	}
8133 
8134 	case OpStore:
8135 	{
8136 		auto *var = maybe_get<SPIRVariable>(ops[0]);
8137 
8138 		if (has_decoration(ops[0], DecorationNonUniformEXT))
8139 			propagate_nonuniform_qualifier(ops[0]);
8140 
8141 		if (var && var->statically_assigned)
8142 			var->static_expression = ops[1];
8143 		else if (var && var->loop_variable && !var->loop_variable_enable)
8144 			var->static_expression = ops[1];
8145 		else if (var && var->remapped_variable)
8146 		{
8147 			// Skip the write.
8148 		}
8149 		else if (var && flattened_structs.count(ops[0]))
8150 		{
8151 			store_flattened_struct(*var, ops[1]);
8152 			register_write(ops[0]);
8153 		}
8154 		else
8155 		{
8156 			emit_store_statement(ops[0], ops[1]);
8157 		}
8158 
8159 		// Storing a pointer results in a variable pointer, so we must conservatively assume
8160 		// we can write through it.
8161 		if (expression_type(ops[1]).pointer)
8162 			register_write(ops[1]);
8163 		break;
8164 	}
8165 
8166 	case OpArrayLength:
8167 	{
8168 		uint32_t result_type = ops[0];
8169 		uint32_t id = ops[1];
8170 		auto e = access_chain_internal(ops[2], &ops[3], length - 3, ACCESS_CHAIN_INDEX_IS_LITERAL_BIT, nullptr);
8171 		set<SPIRExpression>(id, join(type_to_glsl(get<SPIRType>(result_type)), "(", e, ".length())"), result_type,
8172 		                    true);
8173 		break;
8174 	}
8175 
8176 	// Function calls
8177 	case OpFunctionCall:
8178 	{
8179 		uint32_t result_type = ops[0];
8180 		uint32_t id = ops[1];
8181 		uint32_t func = ops[2];
8182 		const auto *arg = &ops[3];
8183 		length -= 3;
8184 
8185 		auto &callee = get<SPIRFunction>(func);
8186 		auto &return_type = get<SPIRType>(callee.return_type);
8187 		bool pure = function_is_pure(callee);
8188 
8189 		bool callee_has_out_variables = false;
8190 		bool emit_return_value_as_argument = false;
8191 
8192 		// Invalidate out variables passed to functions since they can be OpStore'd to.
8193 		for (uint32_t i = 0; i < length; i++)
8194 		{
8195 			if (callee.arguments[i].write_count)
8196 			{
8197 				register_call_out_argument(arg[i]);
8198 				callee_has_out_variables = true;
8199 			}
8200 
8201 			flush_variable_declaration(arg[i]);
8202 		}
8203 
8204 		if (!return_type.array.empty() && !backend.can_return_array)
8205 		{
8206 			callee_has_out_variables = true;
8207 			emit_return_value_as_argument = true;
8208 		}
8209 
8210 		if (!pure)
8211 			register_impure_function_call();
8212 
8213 		string funexpr;
8214 		SmallVector<string> arglist;
8215 		funexpr += to_name(func) + "(";
8216 
8217 		if (emit_return_value_as_argument)
8218 		{
8219 			statement(type_to_glsl(return_type), " ", to_name(id), type_to_array_glsl(return_type), ";");
8220 			arglist.push_back(to_name(id));
8221 		}
8222 
8223 		for (uint32_t i = 0; i < length; i++)
8224 		{
8225 			// Do not pass in separate images or samplers if we're remapping
8226 			// to combined image samplers.
8227 			if (skip_argument(arg[i]))
8228 				continue;
8229 
8230 			arglist.push_back(to_func_call_arg(callee.arguments[i], arg[i]));
8231 		}
8232 
8233 		for (auto &combined : callee.combined_parameters)
8234 		{
8235 			auto image_id = combined.global_image ? combined.image_id : VariableID(arg[combined.image_id]);
8236 			auto sampler_id = combined.global_sampler ? combined.sampler_id : VariableID(arg[combined.sampler_id]);
8237 			arglist.push_back(to_combined_image_sampler(image_id, sampler_id));
8238 		}
8239 
8240 		append_global_func_args(callee, length, arglist);
8241 
8242 		funexpr += merge(arglist);
8243 		funexpr += ")";
8244 
8245 		// Check for function call constraints.
8246 		check_function_call_constraints(arg, length);
8247 
8248 		if (return_type.basetype != SPIRType::Void)
8249 		{
8250 			// If the function actually writes to an out variable,
8251 			// take the conservative route and do not forward.
8252 			// The problem is that we might not read the function
8253 			// result (and emit the function) before an out variable
8254 			// is read (common case when return value is ignored!
8255 			// In order to avoid start tracking invalid variables,
8256 			// just avoid the forwarding problem altogether.
8257 			bool forward = args_will_forward(id, arg, length, pure) && !callee_has_out_variables && pure &&
8258 			               (forced_temporaries.find(id) == end(forced_temporaries));
8259 
8260 			if (emit_return_value_as_argument)
8261 			{
8262 				statement(funexpr, ";");
8263 				set<SPIRExpression>(id, to_name(id), result_type, true);
8264 			}
8265 			else
8266 				emit_op(result_type, id, funexpr, forward);
8267 
8268 			// Function calls are implicit loads from all variables in question.
8269 			// Set dependencies for them.
8270 			for (uint32_t i = 0; i < length; i++)
8271 				register_read(id, arg[i], forward);
8272 
8273 			// If we're going to forward the temporary result,
8274 			// put dependencies on every variable that must not change.
8275 			if (forward)
8276 				register_global_read_dependencies(callee, id);
8277 		}
8278 		else
8279 			statement(funexpr, ";");
8280 
8281 		break;
8282 	}
8283 
8284 	// Composite munging
8285 	case OpCompositeConstruct:
8286 	{
8287 		uint32_t result_type = ops[0];
8288 		uint32_t id = ops[1];
8289 		const auto *const elems = &ops[2];
8290 		length -= 2;
8291 
8292 		bool forward = true;
8293 		for (uint32_t i = 0; i < length; i++)
8294 			forward = forward && should_forward(elems[i]);
8295 
8296 		auto &out_type = get<SPIRType>(result_type);
8297 		auto *in_type = length > 0 ? &expression_type(elems[0]) : nullptr;
8298 
8299 		// Only splat if we have vector constructors.
8300 		// Arrays and structs must be initialized properly in full.
8301 		bool composite = !out_type.array.empty() || out_type.basetype == SPIRType::Struct;
8302 
8303 		bool splat = false;
8304 		bool swizzle_splat = false;
8305 
8306 		if (in_type)
8307 		{
8308 			splat = in_type->vecsize == 1 && in_type->columns == 1 && !composite && backend.use_constructor_splatting;
8309 			swizzle_splat = in_type->vecsize == 1 && in_type->columns == 1 && backend.can_swizzle_scalar;
8310 
8311 			if (ir.ids[elems[0]].get_type() == TypeConstant && !type_is_floating_point(*in_type))
8312 			{
8313 				// Cannot swizzle literal integers as a special case.
8314 				swizzle_splat = false;
8315 			}
8316 		}
8317 
8318 		if (splat || swizzle_splat)
8319 		{
8320 			uint32_t input = elems[0];
8321 			for (uint32_t i = 0; i < length; i++)
8322 			{
8323 				if (input != elems[i])
8324 				{
8325 					splat = false;
8326 					swizzle_splat = false;
8327 				}
8328 			}
8329 		}
8330 
8331 		if (out_type.basetype == SPIRType::Struct && !backend.can_declare_struct_inline)
8332 			forward = false;
8333 		if (!out_type.array.empty() && !backend.can_declare_arrays_inline)
8334 			forward = false;
8335 		if (type_is_empty(out_type) && !backend.supports_empty_struct)
8336 			forward = false;
8337 
8338 		string constructor_op;
8339 		if (backend.use_initializer_list && composite)
8340 		{
8341 			bool needs_trailing_tracket = false;
8342 			// Only use this path if we are building composites.
8343 			// This path cannot be used for arithmetic.
8344 			if (backend.use_typed_initializer_list && out_type.basetype == SPIRType::Struct && out_type.array.empty())
8345 				constructor_op += type_to_glsl_constructor(get<SPIRType>(result_type));
8346 			else if (backend.use_typed_initializer_list && !out_type.array.empty())
8347 			{
8348 				// MSL path. Array constructor is baked into type here, do not use _constructor variant.
8349 				constructor_op += type_to_glsl_constructor(get<SPIRType>(result_type)) + "(";
8350 				needs_trailing_tracket = true;
8351 			}
8352 			constructor_op += "{ ";
8353 
8354 			if (type_is_empty(out_type) && !backend.supports_empty_struct)
8355 				constructor_op += "0";
8356 			else if (splat)
8357 				constructor_op += to_unpacked_expression(elems[0]);
8358 			else
8359 				constructor_op += build_composite_combiner(result_type, elems, length);
8360 			constructor_op += " }";
8361 			if (needs_trailing_tracket)
8362 				constructor_op += ")";
8363 		}
8364 		else if (swizzle_splat && !composite)
8365 		{
8366 			constructor_op = remap_swizzle(get<SPIRType>(result_type), 1, to_unpacked_expression(elems[0]));
8367 		}
8368 		else
8369 		{
8370 			constructor_op = type_to_glsl_constructor(get<SPIRType>(result_type)) + "(";
8371 			if (type_is_empty(out_type) && !backend.supports_empty_struct)
8372 				constructor_op += "0";
8373 			else if (splat)
8374 				constructor_op += to_unpacked_expression(elems[0]);
8375 			else
8376 				constructor_op += build_composite_combiner(result_type, elems, length);
8377 			constructor_op += ")";
8378 		}
8379 
8380 		if (!constructor_op.empty())
8381 		{
8382 			emit_op(result_type, id, constructor_op, forward);
8383 			for (uint32_t i = 0; i < length; i++)
8384 				inherit_expression_dependencies(id, elems[i]);
8385 		}
8386 		break;
8387 	}
8388 
8389 	case OpVectorInsertDynamic:
8390 	{
8391 		uint32_t result_type = ops[0];
8392 		uint32_t id = ops[1];
8393 		uint32_t vec = ops[2];
8394 		uint32_t comp = ops[3];
8395 		uint32_t index = ops[4];
8396 
8397 		flush_variable_declaration(vec);
8398 
8399 		// Make a copy, then use access chain to store the variable.
8400 		statement(declare_temporary(result_type, id), to_expression(vec), ";");
8401 		set<SPIRExpression>(id, to_name(id), result_type, true);
8402 		auto chain = access_chain_internal(id, &index, 1, 0, nullptr);
8403 		statement(chain, " = ", to_expression(comp), ";");
8404 		break;
8405 	}
8406 
8407 	case OpVectorExtractDynamic:
8408 	{
8409 		uint32_t result_type = ops[0];
8410 		uint32_t id = ops[1];
8411 
8412 		auto expr = access_chain_internal(ops[2], &ops[3], 1, 0, nullptr);
8413 		emit_op(result_type, id, expr, should_forward(ops[2]));
8414 		inherit_expression_dependencies(id, ops[2]);
8415 		inherit_expression_dependencies(id, ops[3]);
8416 		break;
8417 	}
8418 
8419 	case OpCompositeExtract:
8420 	{
8421 		uint32_t result_type = ops[0];
8422 		uint32_t id = ops[1];
8423 		length -= 3;
8424 
8425 		auto &type = get<SPIRType>(result_type);
8426 
8427 		// We can only split the expression here if our expression is forwarded as a temporary.
8428 		bool allow_base_expression = forced_temporaries.find(id) == end(forced_temporaries);
8429 
8430 		// Do not allow base expression for struct members. We risk doing "swizzle" optimizations in this case.
8431 		auto &composite_type = expression_type(ops[2]);
8432 		if (composite_type.basetype == SPIRType::Struct || !composite_type.array.empty())
8433 			allow_base_expression = false;
8434 
8435 		// Packed expressions cannot be split up.
8436 		if (has_extended_decoration(ops[2], SPIRVCrossDecorationPhysicalTypePacked))
8437 			allow_base_expression = false;
8438 
8439 		// Cannot use base expression for row-major matrix row-extraction since we need to interleave access pattern
8440 		// into the base expression.
8441 		if (is_non_native_row_major_matrix(ops[2]))
8442 			allow_base_expression = false;
8443 
8444 		AccessChainMeta meta;
8445 		SPIRExpression *e = nullptr;
8446 
8447 		// Only apply this optimization if result is scalar.
8448 		if (allow_base_expression && should_forward(ops[2]) && type.vecsize == 1 && type.columns == 1 && length == 1)
8449 		{
8450 			// We want to split the access chain from the base.
8451 			// This is so we can later combine different CompositeExtract results
8452 			// with CompositeConstruct without emitting code like
8453 			//
8454 			// vec3 temp = texture(...).xyz
8455 			// vec4(temp.x, temp.y, temp.z, 1.0).
8456 			//
8457 			// when we actually wanted to emit this
8458 			// vec4(texture(...).xyz, 1.0).
8459 			//
8460 			// Including the base will prevent this and would trigger multiple reads
8461 			// from expression causing it to be forced to an actual temporary in GLSL.
8462 			auto expr = access_chain_internal(ops[2], &ops[3], length,
8463 			                                  ACCESS_CHAIN_INDEX_IS_LITERAL_BIT | ACCESS_CHAIN_CHAIN_ONLY_BIT, &meta);
8464 			e = &emit_op(result_type, id, expr, true, should_suppress_usage_tracking(ops[2]));
8465 			inherit_expression_dependencies(id, ops[2]);
8466 			e->base_expression = ops[2];
8467 		}
8468 		else
8469 		{
8470 			auto expr = access_chain_internal(ops[2], &ops[3], length, ACCESS_CHAIN_INDEX_IS_LITERAL_BIT, &meta);
8471 			e = &emit_op(result_type, id, expr, should_forward(ops[2]), should_suppress_usage_tracking(ops[2]));
8472 			inherit_expression_dependencies(id, ops[2]);
8473 		}
8474 
8475 		// Pass through some meta information to the loaded expression.
8476 		// We can still end up loading a buffer type to a variable, then CompositeExtract from it
8477 		// instead of loading everything through an access chain.
8478 		e->need_transpose = meta.need_transpose;
8479 		if (meta.storage_is_packed)
8480 			set_extended_decoration(id, SPIRVCrossDecorationPhysicalTypePacked);
8481 		if (meta.storage_physical_type != 0)
8482 			set_extended_decoration(id, SPIRVCrossDecorationPhysicalTypeID, meta.storage_physical_type);
8483 		if (meta.storage_is_invariant)
8484 			set_decoration(id, DecorationInvariant);
8485 
8486 		break;
8487 	}
8488 
8489 	case OpCompositeInsert:
8490 	{
8491 		uint32_t result_type = ops[0];
8492 		uint32_t id = ops[1];
8493 		uint32_t obj = ops[2];
8494 		uint32_t composite = ops[3];
8495 		const auto *elems = &ops[4];
8496 		length -= 4;
8497 
8498 		flush_variable_declaration(composite);
8499 
8500 		// Make a copy, then use access chain to store the variable.
8501 		statement(declare_temporary(result_type, id), to_expression(composite), ";");
8502 		set<SPIRExpression>(id, to_name(id), result_type, true);
8503 		auto chain = access_chain_internal(id, elems, length, ACCESS_CHAIN_INDEX_IS_LITERAL_BIT, nullptr);
8504 		statement(chain, " = ", to_expression(obj), ";");
8505 
8506 		break;
8507 	}
8508 
8509 	case OpCopyMemory:
8510 	{
8511 		uint32_t lhs = ops[0];
8512 		uint32_t rhs = ops[1];
8513 		if (lhs != rhs)
8514 		{
8515 			flush_variable_declaration(lhs);
8516 			flush_variable_declaration(rhs);
8517 			statement(to_expression(lhs), " = ", to_expression(rhs), ";");
8518 			register_write(lhs);
8519 		}
8520 		break;
8521 	}
8522 
8523 	case OpCopyObject:
8524 	{
8525 		uint32_t result_type = ops[0];
8526 		uint32_t id = ops[1];
8527 		uint32_t rhs = ops[2];
8528 		bool pointer = get<SPIRType>(result_type).pointer;
8529 
8530 		auto *chain = maybe_get<SPIRAccessChain>(rhs);
8531 		if (chain)
8532 		{
8533 			// Cannot lower to a SPIRExpression, just copy the object.
8534 			auto &e = set<SPIRAccessChain>(id, *chain);
8535 			e.self = id;
8536 		}
8537 		else if (expression_is_lvalue(rhs) && !pointer)
8538 		{
8539 			// Need a copy.
8540 			// For pointer types, we copy the pointer itself.
8541 			statement(declare_temporary(result_type, id), to_unpacked_expression(rhs), ";");
8542 			set<SPIRExpression>(id, to_name(id), result_type, true);
8543 		}
8544 		else
8545 		{
8546 			// RHS expression is immutable, so just forward it.
8547 			// Copying these things really make no sense, but
8548 			// seems to be allowed anyways.
8549 			auto &e = set<SPIRExpression>(id, to_expression(rhs), result_type, true);
8550 			if (pointer)
8551 			{
8552 				auto *var = maybe_get_backing_variable(rhs);
8553 				e.loaded_from = var ? var->self : ID(0);
8554 			}
8555 
8556 			// If we're copying an access chain, need to inherit the read expressions.
8557 			auto *rhs_expr = maybe_get<SPIRExpression>(rhs);
8558 			if (rhs_expr)
8559 			{
8560 				e.implied_read_expressions = rhs_expr->implied_read_expressions;
8561 				e.expression_dependencies = rhs_expr->expression_dependencies;
8562 			}
8563 		}
8564 		break;
8565 	}
8566 
8567 	case OpVectorShuffle:
8568 	{
8569 		uint32_t result_type = ops[0];
8570 		uint32_t id = ops[1];
8571 		uint32_t vec0 = ops[2];
8572 		uint32_t vec1 = ops[3];
8573 		const auto *elems = &ops[4];
8574 		length -= 4;
8575 
8576 		auto &type0 = expression_type(vec0);
8577 
8578 		// If we have the undefined swizzle index -1, we need to swizzle in undefined data,
8579 		// or in our case, T(0).
8580 		bool shuffle = false;
8581 		for (uint32_t i = 0; i < length; i++)
8582 			if (elems[i] >= type0.vecsize || elems[i] == 0xffffffffu)
8583 				shuffle = true;
8584 
8585 		// Cannot use swizzles with packed expressions, force shuffle path.
8586 		if (!shuffle && has_extended_decoration(vec0, SPIRVCrossDecorationPhysicalTypePacked))
8587 			shuffle = true;
8588 
8589 		string expr;
8590 		bool should_fwd, trivial_forward;
8591 
8592 		if (shuffle)
8593 		{
8594 			should_fwd = should_forward(vec0) && should_forward(vec1);
8595 			trivial_forward = should_suppress_usage_tracking(vec0) && should_suppress_usage_tracking(vec1);
8596 
8597 			// Constructor style and shuffling from two different vectors.
8598 			SmallVector<string> args;
8599 			for (uint32_t i = 0; i < length; i++)
8600 			{
8601 				if (elems[i] == 0xffffffffu)
8602 				{
8603 					// Use a constant 0 here.
8604 					// We could use the first component or similar, but then we risk propagating
8605 					// a value we might not need, and bog down codegen.
8606 					SPIRConstant c;
8607 					c.constant_type = type0.parent_type;
8608 					assert(type0.parent_type != ID(0));
8609 					args.push_back(constant_expression(c));
8610 				}
8611 				else if (elems[i] >= type0.vecsize)
8612 					args.push_back(to_extract_component_expression(vec1, elems[i] - type0.vecsize));
8613 				else
8614 					args.push_back(to_extract_component_expression(vec0, elems[i]));
8615 			}
8616 			expr += join(type_to_glsl_constructor(get<SPIRType>(result_type)), "(", merge(args), ")");
8617 		}
8618 		else
8619 		{
8620 			should_fwd = should_forward(vec0);
8621 			trivial_forward = should_suppress_usage_tracking(vec0);
8622 
8623 			// We only source from first vector, so can use swizzle.
8624 			// If the vector is packed, unpack it before applying a swizzle (needed for MSL)
8625 			expr += to_enclosed_unpacked_expression(vec0);
8626 			expr += ".";
8627 			for (uint32_t i = 0; i < length; i++)
8628 			{
8629 				assert(elems[i] != 0xffffffffu);
8630 				expr += index_to_swizzle(elems[i]);
8631 			}
8632 
8633 			if (backend.swizzle_is_function && length > 1)
8634 				expr += "()";
8635 		}
8636 
8637 		// A shuffle is trivial in that it doesn't actually *do* anything.
8638 		// We inherit the forwardedness from our arguments to avoid flushing out to temporaries when it's not really needed.
8639 
8640 		emit_op(result_type, id, expr, should_fwd, trivial_forward);
8641 
8642 		inherit_expression_dependencies(id, vec0);
8643 		if (vec0 != vec1)
8644 			inherit_expression_dependencies(id, vec1);
8645 		break;
8646 	}
8647 
8648 	// ALU
8649 	case OpIsNan:
8650 		GLSL_UFOP(isnan);
8651 		break;
8652 
8653 	case OpIsInf:
8654 		GLSL_UFOP(isinf);
8655 		break;
8656 
8657 	case OpSNegate:
8658 	case OpFNegate:
8659 		GLSL_UOP(-);
8660 		break;
8661 
8662 	case OpIAdd:
8663 	{
8664 		// For simple arith ops, prefer the output type if there's a mismatch to avoid extra bitcasts.
8665 		auto type = get<SPIRType>(ops[0]).basetype;
8666 		GLSL_BOP_CAST(+, type);
8667 		break;
8668 	}
8669 
8670 	case OpFAdd:
8671 		GLSL_BOP(+);
8672 		break;
8673 
8674 	case OpISub:
8675 	{
8676 		auto type = get<SPIRType>(ops[0]).basetype;
8677 		GLSL_BOP_CAST(-, type);
8678 		break;
8679 	}
8680 
8681 	case OpFSub:
8682 		GLSL_BOP(-);
8683 		break;
8684 
8685 	case OpIMul:
8686 	{
8687 		auto type = get<SPIRType>(ops[0]).basetype;
8688 		GLSL_BOP_CAST(*, type);
8689 		break;
8690 	}
8691 
8692 	case OpVectorTimesMatrix:
8693 	case OpMatrixTimesVector:
8694 	{
8695 		// If the matrix needs transpose, just flip the multiply order.
8696 		auto *e = maybe_get<SPIRExpression>(ops[opcode == OpMatrixTimesVector ? 2 : 3]);
8697 		if (e && e->need_transpose)
8698 		{
8699 			e->need_transpose = false;
8700 			string expr;
8701 
8702 			if (opcode == OpMatrixTimesVector)
8703 				expr = join(to_enclosed_unpacked_expression(ops[3]), " * ",
8704 				            enclose_expression(to_unpacked_row_major_matrix_expression(ops[2])));
8705 			else
8706 				expr = join(enclose_expression(to_unpacked_row_major_matrix_expression(ops[3])), " * ",
8707 				            to_enclosed_unpacked_expression(ops[2]));
8708 
8709 			bool forward = should_forward(ops[2]) && should_forward(ops[3]);
8710 			emit_op(ops[0], ops[1], expr, forward);
8711 			e->need_transpose = true;
8712 			inherit_expression_dependencies(ops[1], ops[2]);
8713 			inherit_expression_dependencies(ops[1], ops[3]);
8714 		}
8715 		else
8716 			GLSL_BOP(*);
8717 		break;
8718 	}
8719 
8720 	case OpMatrixTimesMatrix:
8721 	{
8722 		auto *a = maybe_get<SPIRExpression>(ops[2]);
8723 		auto *b = maybe_get<SPIRExpression>(ops[3]);
8724 
8725 		// If both matrices need transpose, we can multiply in flipped order and tag the expression as transposed.
8726 		// a^T * b^T = (b * a)^T.
8727 		if (a && b && a->need_transpose && b->need_transpose)
8728 		{
8729 			a->need_transpose = false;
8730 			b->need_transpose = false;
8731 			auto expr = join(enclose_expression(to_unpacked_row_major_matrix_expression(ops[3])), " * ",
8732 			                 enclose_expression(to_unpacked_row_major_matrix_expression(ops[2])));
8733 			bool forward = should_forward(ops[2]) && should_forward(ops[3]);
8734 			auto &e = emit_op(ops[0], ops[1], expr, forward);
8735 			e.need_transpose = true;
8736 			a->need_transpose = true;
8737 			b->need_transpose = true;
8738 			inherit_expression_dependencies(ops[1], ops[2]);
8739 			inherit_expression_dependencies(ops[1], ops[3]);
8740 		}
8741 		else
8742 			GLSL_BOP(*);
8743 
8744 		break;
8745 	}
8746 
8747 	case OpFMul:
8748 	case OpMatrixTimesScalar:
8749 	case OpVectorTimesScalar:
8750 		GLSL_BOP(*);
8751 		break;
8752 
8753 	case OpOuterProduct:
8754 		GLSL_BFOP(outerProduct);
8755 		break;
8756 
8757 	case OpDot:
8758 		GLSL_BFOP(dot);
8759 		break;
8760 
8761 	case OpTranspose:
8762 		GLSL_UFOP(transpose);
8763 		break;
8764 
8765 	case OpSRem:
8766 	{
8767 		uint32_t result_type = ops[0];
8768 		uint32_t result_id = ops[1];
8769 		uint32_t op0 = ops[2];
8770 		uint32_t op1 = ops[3];
8771 
8772 		// Needs special handling.
8773 		bool forward = should_forward(op0) && should_forward(op1);
8774 		auto expr = join(to_enclosed_expression(op0), " - ", to_enclosed_expression(op1), " * ", "(",
8775 		                 to_enclosed_expression(op0), " / ", to_enclosed_expression(op1), ")");
8776 
8777 		emit_op(result_type, result_id, expr, forward);
8778 		inherit_expression_dependencies(result_id, op0);
8779 		inherit_expression_dependencies(result_id, op1);
8780 		break;
8781 	}
8782 
8783 	case OpSDiv:
8784 		GLSL_BOP_CAST(/, int_type);
8785 		break;
8786 
8787 	case OpUDiv:
8788 		GLSL_BOP_CAST(/, uint_type);
8789 		break;
8790 
8791 	case OpIAddCarry:
8792 	case OpISubBorrow:
8793 	{
8794 		if (options.es && options.version < 310)
8795 			SPIRV_CROSS_THROW("Extended arithmetic is only available from ESSL 310.");
8796 		else if (!options.es && options.version < 400)
8797 			SPIRV_CROSS_THROW("Extended arithmetic is only available from GLSL 400.");
8798 
8799 		uint32_t result_type = ops[0];
8800 		uint32_t result_id = ops[1];
8801 		uint32_t op0 = ops[2];
8802 		uint32_t op1 = ops[3];
8803 		auto &type = get<SPIRType>(result_type);
8804 		emit_uninitialized_temporary_expression(result_type, result_id);
8805 		const char *op = opcode == OpIAddCarry ? "uaddCarry" : "usubBorrow";
8806 
8807 		statement(to_expression(result_id), ".", to_member_name(type, 0), " = ", op, "(", to_expression(op0), ", ",
8808 		          to_expression(op1), ", ", to_expression(result_id), ".", to_member_name(type, 1), ");");
8809 		break;
8810 	}
8811 
8812 	case OpUMulExtended:
8813 	case OpSMulExtended:
8814 	{
8815 		if (options.es && options.version < 310)
8816 			SPIRV_CROSS_THROW("Extended arithmetic is only available from ESSL 310.");
8817 		else if (!options.es && options.version < 400)
8818 			SPIRV_CROSS_THROW("Extended arithmetic is only available from GLSL 4000.");
8819 
8820 		uint32_t result_type = ops[0];
8821 		uint32_t result_id = ops[1];
8822 		uint32_t op0 = ops[2];
8823 		uint32_t op1 = ops[3];
8824 		auto &type = get<SPIRType>(result_type);
8825 		emit_uninitialized_temporary_expression(result_type, result_id);
8826 		const char *op = opcode == OpUMulExtended ? "umulExtended" : "imulExtended";
8827 
8828 		statement(op, "(", to_expression(op0), ", ", to_expression(op1), ", ", to_expression(result_id), ".",
8829 		          to_member_name(type, 1), ", ", to_expression(result_id), ".", to_member_name(type, 0), ");");
8830 		break;
8831 	}
8832 
8833 	case OpFDiv:
8834 		GLSL_BOP(/);
8835 		break;
8836 
8837 	case OpShiftRightLogical:
8838 		GLSL_BOP_CAST(>>, uint_type);
8839 		break;
8840 
8841 	case OpShiftRightArithmetic:
8842 		GLSL_BOP_CAST(>>, int_type);
8843 		break;
8844 
8845 	case OpShiftLeftLogical:
8846 	{
8847 		auto type = get<SPIRType>(ops[0]).basetype;
8848 		GLSL_BOP_CAST(<<, type);
8849 		break;
8850 	}
8851 
8852 	case OpBitwiseOr:
8853 	{
8854 		auto type = get<SPIRType>(ops[0]).basetype;
8855 		GLSL_BOP_CAST(|, type);
8856 		break;
8857 	}
8858 
8859 	case OpBitwiseXor:
8860 	{
8861 		auto type = get<SPIRType>(ops[0]).basetype;
8862 		GLSL_BOP_CAST(^, type);
8863 		break;
8864 	}
8865 
8866 	case OpBitwiseAnd:
8867 	{
8868 		auto type = get<SPIRType>(ops[0]).basetype;
8869 		GLSL_BOP_CAST(&, type);
8870 		break;
8871 	}
8872 
8873 	case OpNot:
8874 		GLSL_UOP(~);
8875 		break;
8876 
8877 	case OpUMod:
8878 		GLSL_BOP_CAST(%, uint_type);
8879 		break;
8880 
8881 	case OpSMod:
8882 		GLSL_BOP_CAST(%, int_type);
8883 		break;
8884 
8885 	case OpFMod:
8886 		GLSL_BFOP(mod);
8887 		break;
8888 
8889 	case OpFRem:
8890 	{
8891 		if (is_legacy())
8892 			SPIRV_CROSS_THROW("OpFRem requires trunc() and is only supported on non-legacy targets. A workaround is "
8893 			                  "needed for legacy.");
8894 
8895 		uint32_t result_type = ops[0];
8896 		uint32_t result_id = ops[1];
8897 		uint32_t op0 = ops[2];
8898 		uint32_t op1 = ops[3];
8899 
8900 		// Needs special handling.
8901 		bool forward = should_forward(op0) && should_forward(op1);
8902 		auto expr = join(to_enclosed_expression(op0), " - ", to_enclosed_expression(op1), " * ", "trunc(",
8903 		                 to_enclosed_expression(op0), " / ", to_enclosed_expression(op1), ")");
8904 
8905 		emit_op(result_type, result_id, expr, forward);
8906 		inherit_expression_dependencies(result_id, op0);
8907 		inherit_expression_dependencies(result_id, op1);
8908 		break;
8909 	}
8910 
8911 	// Relational
8912 	case OpAny:
8913 		GLSL_UFOP(any);
8914 		break;
8915 
8916 	case OpAll:
8917 		GLSL_UFOP(all);
8918 		break;
8919 
8920 	case OpSelect:
8921 		emit_mix_op(ops[0], ops[1], ops[4], ops[3], ops[2]);
8922 		break;
8923 
8924 	case OpLogicalOr:
8925 	{
8926 		// No vector variant in GLSL for logical OR.
8927 		auto result_type = ops[0];
8928 		auto id = ops[1];
8929 		auto &type = get<SPIRType>(result_type);
8930 
8931 		if (type.vecsize > 1)
8932 			emit_unrolled_binary_op(result_type, id, ops[2], ops[3], "||", false, SPIRType::Unknown);
8933 		else
8934 			GLSL_BOP(||);
8935 		break;
8936 	}
8937 
8938 	case OpLogicalAnd:
8939 	{
8940 		// No vector variant in GLSL for logical AND.
8941 		auto result_type = ops[0];
8942 		auto id = ops[1];
8943 		auto &type = get<SPIRType>(result_type);
8944 
8945 		if (type.vecsize > 1)
8946 			emit_unrolled_binary_op(result_type, id, ops[2], ops[3], "&&", false, SPIRType::Unknown);
8947 		else
8948 			GLSL_BOP(&&);
8949 		break;
8950 	}
8951 
8952 	case OpLogicalNot:
8953 	{
8954 		auto &type = get<SPIRType>(ops[0]);
8955 		if (type.vecsize > 1)
8956 			GLSL_UFOP(not);
8957 		else
8958 			GLSL_UOP(!);
8959 		break;
8960 	}
8961 
8962 	case OpIEqual:
8963 	{
8964 		if (expression_type(ops[2]).vecsize > 1)
8965 			GLSL_BFOP_CAST(equal, int_type);
8966 		else
8967 			GLSL_BOP_CAST(==, int_type);
8968 		break;
8969 	}
8970 
8971 	case OpLogicalEqual:
8972 	case OpFOrdEqual:
8973 	{
8974 		if (expression_type(ops[2]).vecsize > 1)
8975 			GLSL_BFOP(equal);
8976 		else
8977 			GLSL_BOP(==);
8978 		break;
8979 	}
8980 
8981 	case OpINotEqual:
8982 	{
8983 		if (expression_type(ops[2]).vecsize > 1)
8984 			GLSL_BFOP_CAST(notEqual, int_type);
8985 		else
8986 			GLSL_BOP_CAST(!=, int_type);
8987 		break;
8988 	}
8989 
8990 	case OpLogicalNotEqual:
8991 	case OpFOrdNotEqual:
8992 	{
8993 		if (expression_type(ops[2]).vecsize > 1)
8994 			GLSL_BFOP(notEqual);
8995 		else
8996 			GLSL_BOP(!=);
8997 		break;
8998 	}
8999 
9000 	case OpUGreaterThan:
9001 	case OpSGreaterThan:
9002 	{
9003 		auto type = opcode == OpUGreaterThan ? uint_type : int_type;
9004 		if (expression_type(ops[2]).vecsize > 1)
9005 			GLSL_BFOP_CAST(greaterThan, type);
9006 		else
9007 			GLSL_BOP_CAST(>, type);
9008 		break;
9009 	}
9010 
9011 	case OpFOrdGreaterThan:
9012 	{
9013 		if (expression_type(ops[2]).vecsize > 1)
9014 			GLSL_BFOP(greaterThan);
9015 		else
9016 			GLSL_BOP(>);
9017 		break;
9018 	}
9019 
9020 	case OpUGreaterThanEqual:
9021 	case OpSGreaterThanEqual:
9022 	{
9023 		auto type = opcode == OpUGreaterThanEqual ? uint_type : int_type;
9024 		if (expression_type(ops[2]).vecsize > 1)
9025 			GLSL_BFOP_CAST(greaterThanEqual, type);
9026 		else
9027 			GLSL_BOP_CAST(>=, type);
9028 		break;
9029 	}
9030 
9031 	case OpFOrdGreaterThanEqual:
9032 	{
9033 		if (expression_type(ops[2]).vecsize > 1)
9034 			GLSL_BFOP(greaterThanEqual);
9035 		else
9036 			GLSL_BOP(>=);
9037 		break;
9038 	}
9039 
9040 	case OpULessThan:
9041 	case OpSLessThan:
9042 	{
9043 		auto type = opcode == OpULessThan ? uint_type : int_type;
9044 		if (expression_type(ops[2]).vecsize > 1)
9045 			GLSL_BFOP_CAST(lessThan, type);
9046 		else
9047 			GLSL_BOP_CAST(<, type);
9048 		break;
9049 	}
9050 
9051 	case OpFOrdLessThan:
9052 	{
9053 		if (expression_type(ops[2]).vecsize > 1)
9054 			GLSL_BFOP(lessThan);
9055 		else
9056 			GLSL_BOP(<);
9057 		break;
9058 	}
9059 
9060 	case OpULessThanEqual:
9061 	case OpSLessThanEqual:
9062 	{
9063 		auto type = opcode == OpULessThanEqual ? uint_type : int_type;
9064 		if (expression_type(ops[2]).vecsize > 1)
9065 			GLSL_BFOP_CAST(lessThanEqual, type);
9066 		else
9067 			GLSL_BOP_CAST(<=, type);
9068 		break;
9069 	}
9070 
9071 	case OpFOrdLessThanEqual:
9072 	{
9073 		if (expression_type(ops[2]).vecsize > 1)
9074 			GLSL_BFOP(lessThanEqual);
9075 		else
9076 			GLSL_BOP(<=);
9077 		break;
9078 	}
9079 
9080 	// Conversion
9081 	case OpSConvert:
9082 	case OpConvertSToF:
9083 	case OpUConvert:
9084 	case OpConvertUToF:
9085 	{
9086 		auto input_type = opcode == OpSConvert || opcode == OpConvertSToF ? int_type : uint_type;
9087 		uint32_t result_type = ops[0];
9088 		uint32_t id = ops[1];
9089 
9090 		auto &type = get<SPIRType>(result_type);
9091 		auto &arg_type = expression_type(ops[2]);
9092 		auto func = type_to_glsl_constructor(type);
9093 
9094 		// If we're sign-extending or zero-extending, we need to make sure we cast from the correct type.
9095 		// For truncation, it does not matter, so don't emit useless casts.
9096 		if (arg_type.width < type.width)
9097 			emit_unary_func_op_cast(result_type, id, ops[2], func.c_str(), input_type, type.basetype);
9098 		else
9099 			emit_unary_func_op(result_type, id, ops[2], func.c_str());
9100 		break;
9101 	}
9102 
9103 	case OpConvertFToU:
9104 	case OpConvertFToS:
9105 	{
9106 		// Cast to expected arithmetic type, then potentially bitcast away to desired signedness.
9107 		uint32_t result_type = ops[0];
9108 		uint32_t id = ops[1];
9109 		auto &type = get<SPIRType>(result_type);
9110 		auto expected_type = type;
9111 		auto &float_type = expression_type(ops[2]);
9112 		expected_type.basetype =
9113 		    opcode == OpConvertFToS ? to_signed_basetype(type.width) : to_unsigned_basetype(type.width);
9114 
9115 		auto func = type_to_glsl_constructor(expected_type);
9116 		emit_unary_func_op_cast(result_type, id, ops[2], func.c_str(), float_type.basetype, expected_type.basetype);
9117 		break;
9118 	}
9119 
9120 	case OpFConvert:
9121 	{
9122 		uint32_t result_type = ops[0];
9123 		uint32_t id = ops[1];
9124 
9125 		auto func = type_to_glsl_constructor(get<SPIRType>(result_type));
9126 		emit_unary_func_op(result_type, id, ops[2], func.c_str());
9127 		break;
9128 	}
9129 
9130 	case OpBitcast:
9131 	{
9132 		uint32_t result_type = ops[0];
9133 		uint32_t id = ops[1];
9134 		uint32_t arg = ops[2];
9135 
9136 		auto op = bitcast_glsl_op(get<SPIRType>(result_type), expression_type(arg));
9137 		emit_unary_func_op(result_type, id, arg, op.c_str());
9138 		break;
9139 	}
9140 
9141 	case OpQuantizeToF16:
9142 	{
9143 		uint32_t result_type = ops[0];
9144 		uint32_t id = ops[1];
9145 		uint32_t arg = ops[2];
9146 
9147 		string op;
9148 		auto &type = get<SPIRType>(result_type);
9149 
9150 		switch (type.vecsize)
9151 		{
9152 		case 1:
9153 			op = join("unpackHalf2x16(packHalf2x16(vec2(", to_expression(arg), "))).x");
9154 			break;
9155 		case 2:
9156 			op = join("unpackHalf2x16(packHalf2x16(", to_expression(arg), "))");
9157 			break;
9158 		case 3:
9159 		{
9160 			auto op0 = join("unpackHalf2x16(packHalf2x16(", to_expression(arg), ".xy))");
9161 			auto op1 = join("unpackHalf2x16(packHalf2x16(", to_expression(arg), ".zz)).x");
9162 			op = join("vec3(", op0, ", ", op1, ")");
9163 			break;
9164 		}
9165 		case 4:
9166 		{
9167 			auto op0 = join("unpackHalf2x16(packHalf2x16(", to_expression(arg), ".xy))");
9168 			auto op1 = join("unpackHalf2x16(packHalf2x16(", to_expression(arg), ".zw))");
9169 			op = join("vec4(", op0, ", ", op1, ")");
9170 			break;
9171 		}
9172 		default:
9173 			SPIRV_CROSS_THROW("Illegal argument to OpQuantizeToF16.");
9174 		}
9175 
9176 		emit_op(result_type, id, op, should_forward(arg));
9177 		inherit_expression_dependencies(id, arg);
9178 		break;
9179 	}
9180 
9181 	// Derivatives
9182 	case OpDPdx:
9183 		GLSL_UFOP(dFdx);
9184 		if (is_legacy_es())
9185 			require_extension_internal("GL_OES_standard_derivatives");
9186 		register_control_dependent_expression(ops[1]);
9187 		break;
9188 
9189 	case OpDPdy:
9190 		GLSL_UFOP(dFdy);
9191 		if (is_legacy_es())
9192 			require_extension_internal("GL_OES_standard_derivatives");
9193 		register_control_dependent_expression(ops[1]);
9194 		break;
9195 
9196 	case OpDPdxFine:
9197 		GLSL_UFOP(dFdxFine);
9198 		if (options.es)
9199 		{
9200 			SPIRV_CROSS_THROW("GL_ARB_derivative_control is unavailable in OpenGL ES.");
9201 		}
9202 		if (options.version < 450)
9203 			require_extension_internal("GL_ARB_derivative_control");
9204 		register_control_dependent_expression(ops[1]);
9205 		break;
9206 
9207 	case OpDPdyFine:
9208 		GLSL_UFOP(dFdyFine);
9209 		if (options.es)
9210 		{
9211 			SPIRV_CROSS_THROW("GL_ARB_derivative_control is unavailable in OpenGL ES.");
9212 		}
9213 		if (options.version < 450)
9214 			require_extension_internal("GL_ARB_derivative_control");
9215 		register_control_dependent_expression(ops[1]);
9216 		break;
9217 
9218 	case OpDPdxCoarse:
9219 		if (options.es)
9220 		{
9221 			SPIRV_CROSS_THROW("GL_ARB_derivative_control is unavailable in OpenGL ES.");
9222 		}
9223 		GLSL_UFOP(dFdxCoarse);
9224 		if (options.version < 450)
9225 			require_extension_internal("GL_ARB_derivative_control");
9226 		register_control_dependent_expression(ops[1]);
9227 		break;
9228 
9229 	case OpDPdyCoarse:
9230 		GLSL_UFOP(dFdyCoarse);
9231 		if (options.es)
9232 		{
9233 			SPIRV_CROSS_THROW("GL_ARB_derivative_control is unavailable in OpenGL ES.");
9234 		}
9235 		if (options.version < 450)
9236 			require_extension_internal("GL_ARB_derivative_control");
9237 		register_control_dependent_expression(ops[1]);
9238 		break;
9239 
9240 	case OpFwidth:
9241 		GLSL_UFOP(fwidth);
9242 		if (is_legacy_es())
9243 			require_extension_internal("GL_OES_standard_derivatives");
9244 		register_control_dependent_expression(ops[1]);
9245 		break;
9246 
9247 	case OpFwidthCoarse:
9248 		GLSL_UFOP(fwidthCoarse);
9249 		if (options.es)
9250 		{
9251 			SPIRV_CROSS_THROW("GL_ARB_derivative_control is unavailable in OpenGL ES.");
9252 		}
9253 		if (options.version < 450)
9254 			require_extension_internal("GL_ARB_derivative_control");
9255 		register_control_dependent_expression(ops[1]);
9256 		break;
9257 
9258 	case OpFwidthFine:
9259 		GLSL_UFOP(fwidthFine);
9260 		if (options.es)
9261 		{
9262 			SPIRV_CROSS_THROW("GL_ARB_derivative_control is unavailable in OpenGL ES.");
9263 		}
9264 		if (options.version < 450)
9265 			require_extension_internal("GL_ARB_derivative_control");
9266 		register_control_dependent_expression(ops[1]);
9267 		break;
9268 
9269 	// Bitfield
9270 	case OpBitFieldInsert:
9271 	{
9272 		emit_bitfield_insert_op(ops[0], ops[1], ops[2], ops[3], ops[4], ops[5], "bitfieldInsert", SPIRType::Int);
9273 		break;
9274 	}
9275 
9276 	case OpBitFieldSExtract:
9277 	{
9278 		emit_trinary_func_op_bitextract(ops[0], ops[1], ops[2], ops[3], ops[4], "bitfieldExtract", int_type, int_type,
9279 		                                SPIRType::Int, SPIRType::Int);
9280 		break;
9281 	}
9282 
9283 	case OpBitFieldUExtract:
9284 	{
9285 		emit_trinary_func_op_bitextract(ops[0], ops[1], ops[2], ops[3], ops[4], "bitfieldExtract", uint_type, uint_type,
9286 		                                SPIRType::Int, SPIRType::Int);
9287 		break;
9288 	}
9289 
9290 	case OpBitReverse:
9291 		// BitReverse does not have issues with sign since result type must match input type.
9292 		GLSL_UFOP(bitfieldReverse);
9293 		break;
9294 
9295 	case OpBitCount:
9296 	{
9297 		auto basetype = expression_type(ops[2]).basetype;
9298 		emit_unary_func_op_cast(ops[0], ops[1], ops[2], "bitCount", basetype, int_type);
9299 		break;
9300 	}
9301 
9302 	// Atomics
9303 	case OpAtomicExchange:
9304 	{
9305 		uint32_t result_type = ops[0];
9306 		uint32_t id = ops[1];
9307 		uint32_t ptr = ops[2];
9308 		// Ignore semantics for now, probably only relevant to CL.
9309 		uint32_t val = ops[5];
9310 		const char *op = check_atomic_image(ptr) ? "imageAtomicExchange" : "atomicExchange";
9311 		forced_temporaries.insert(id);
9312 		emit_binary_func_op(result_type, id, ptr, val, op);
9313 		flush_all_atomic_capable_variables();
9314 		break;
9315 	}
9316 
9317 	case OpAtomicCompareExchange:
9318 	{
9319 		uint32_t result_type = ops[0];
9320 		uint32_t id = ops[1];
9321 		uint32_t ptr = ops[2];
9322 		uint32_t val = ops[6];
9323 		uint32_t comp = ops[7];
9324 		const char *op = check_atomic_image(ptr) ? "imageAtomicCompSwap" : "atomicCompSwap";
9325 
9326 		forced_temporaries.insert(id);
9327 		emit_trinary_func_op(result_type, id, ptr, comp, val, op);
9328 		flush_all_atomic_capable_variables();
9329 		break;
9330 	}
9331 
9332 	case OpAtomicLoad:
9333 		flush_all_atomic_capable_variables();
9334 		// FIXME: Image?
9335 		// OpAtomicLoad seems to only be relevant for atomic counters.
9336 		forced_temporaries.insert(ops[1]);
9337 		GLSL_UFOP(atomicCounter);
9338 		break;
9339 
9340 	case OpAtomicStore:
9341 		SPIRV_CROSS_THROW("Unsupported opcode OpAtomicStore.");
9342 
9343 	case OpAtomicIIncrement:
9344 	case OpAtomicIDecrement:
9345 	{
9346 		forced_temporaries.insert(ops[1]);
9347 		auto &type = expression_type(ops[2]);
9348 		if (type.storage == StorageClassAtomicCounter)
9349 		{
9350 			// Legacy GLSL stuff, not sure if this is relevant to support.
9351 			if (opcode == OpAtomicIIncrement)
9352 				GLSL_UFOP(atomicCounterIncrement);
9353 			else
9354 				GLSL_UFOP(atomicCounterDecrement);
9355 		}
9356 		else
9357 		{
9358 			bool atomic_image = check_atomic_image(ops[2]);
9359 			bool unsigned_type = (type.basetype == SPIRType::UInt) ||
9360 			                     (atomic_image && get<SPIRType>(type.image.type).basetype == SPIRType::UInt);
9361 			const char *op = atomic_image ? "imageAtomicAdd" : "atomicAdd";
9362 
9363 			const char *increment = nullptr;
9364 			if (opcode == OpAtomicIIncrement && unsigned_type)
9365 				increment = "1u";
9366 			else if (opcode == OpAtomicIIncrement)
9367 				increment = "1";
9368 			else if (unsigned_type)
9369 				increment = "uint(-1)";
9370 			else
9371 				increment = "-1";
9372 
9373 			emit_op(ops[0], ops[1], join(op, "(", to_expression(ops[2]), ", ", increment, ")"), false);
9374 		}
9375 
9376 		flush_all_atomic_capable_variables();
9377 		break;
9378 	}
9379 
9380 	case OpAtomicIAdd:
9381 	{
9382 		const char *op = check_atomic_image(ops[2]) ? "imageAtomicAdd" : "atomicAdd";
9383 		forced_temporaries.insert(ops[1]);
9384 		emit_binary_func_op(ops[0], ops[1], ops[2], ops[5], op);
9385 		flush_all_atomic_capable_variables();
9386 		break;
9387 	}
9388 
9389 	case OpAtomicISub:
9390 	{
9391 		const char *op = check_atomic_image(ops[2]) ? "imageAtomicAdd" : "atomicAdd";
9392 		forced_temporaries.insert(ops[1]);
9393 		auto expr = join(op, "(", to_expression(ops[2]), ", -", to_enclosed_expression(ops[5]), ")");
9394 		emit_op(ops[0], ops[1], expr, should_forward(ops[2]) && should_forward(ops[5]));
9395 		flush_all_atomic_capable_variables();
9396 		break;
9397 	}
9398 
9399 	case OpAtomicSMin:
9400 	case OpAtomicUMin:
9401 	{
9402 		const char *op = check_atomic_image(ops[2]) ? "imageAtomicMin" : "atomicMin";
9403 		forced_temporaries.insert(ops[1]);
9404 		emit_binary_func_op(ops[0], ops[1], ops[2], ops[5], op);
9405 		flush_all_atomic_capable_variables();
9406 		break;
9407 	}
9408 
9409 	case OpAtomicSMax:
9410 	case OpAtomicUMax:
9411 	{
9412 		const char *op = check_atomic_image(ops[2]) ? "imageAtomicMax" : "atomicMax";
9413 		forced_temporaries.insert(ops[1]);
9414 		emit_binary_func_op(ops[0], ops[1], ops[2], ops[5], op);
9415 		flush_all_atomic_capable_variables();
9416 		break;
9417 	}
9418 
9419 	case OpAtomicAnd:
9420 	{
9421 		const char *op = check_atomic_image(ops[2]) ? "imageAtomicAnd" : "atomicAnd";
9422 		forced_temporaries.insert(ops[1]);
9423 		emit_binary_func_op(ops[0], ops[1], ops[2], ops[5], op);
9424 		flush_all_atomic_capable_variables();
9425 		break;
9426 	}
9427 
9428 	case OpAtomicOr:
9429 	{
9430 		const char *op = check_atomic_image(ops[2]) ? "imageAtomicOr" : "atomicOr";
9431 		forced_temporaries.insert(ops[1]);
9432 		emit_binary_func_op(ops[0], ops[1], ops[2], ops[5], op);
9433 		flush_all_atomic_capable_variables();
9434 		break;
9435 	}
9436 
9437 	case OpAtomicXor:
9438 	{
9439 		const char *op = check_atomic_image(ops[2]) ? "imageAtomicXor" : "atomicXor";
9440 		forced_temporaries.insert(ops[1]);
9441 		emit_binary_func_op(ops[0], ops[1], ops[2], ops[5], op);
9442 		flush_all_atomic_capable_variables();
9443 		break;
9444 	}
9445 
9446 	// Geometry shaders
9447 	case OpEmitVertex:
9448 		statement("EmitVertex();");
9449 		break;
9450 
9451 	case OpEndPrimitive:
9452 		statement("EndPrimitive();");
9453 		break;
9454 
9455 	case OpEmitStreamVertex:
9456 		statement("EmitStreamVertex();");
9457 		break;
9458 
9459 	case OpEndStreamPrimitive:
9460 		statement("EndStreamPrimitive();");
9461 		break;
9462 
9463 	// Textures
9464 	case OpImageSampleExplicitLod:
9465 	case OpImageSampleProjExplicitLod:
9466 	case OpImageSampleDrefExplicitLod:
9467 	case OpImageSampleProjDrefExplicitLod:
9468 	case OpImageSampleImplicitLod:
9469 	case OpImageSampleProjImplicitLod:
9470 	case OpImageSampleDrefImplicitLod:
9471 	case OpImageSampleProjDrefImplicitLod:
9472 	case OpImageFetch:
9473 	case OpImageGather:
9474 	case OpImageDrefGather:
9475 		// Gets a bit hairy, so move this to a separate instruction.
9476 		emit_texture_op(instruction);
9477 		break;
9478 
9479 	case OpImage:
9480 	{
9481 		uint32_t result_type = ops[0];
9482 		uint32_t id = ops[1];
9483 
9484 		// Suppress usage tracking.
9485 		auto &e = emit_op(result_type, id, to_expression(ops[2]), true, true);
9486 
9487 		// When using the image, we need to know which variable it is actually loaded from.
9488 		auto *var = maybe_get_backing_variable(ops[2]);
9489 		e.loaded_from = var ? var->self : ID(0);
9490 		break;
9491 	}
9492 
9493 	case OpImageQueryLod:
9494 	{
9495 		if (!options.es && options.version < 400)
9496 		{
9497 			require_extension_internal("GL_ARB_texture_query_lod");
9498 			// For some reason, the ARB spec is all-caps.
9499 			GLSL_BFOP(textureQueryLOD);
9500 		}
9501 		else if (options.es)
9502 			SPIRV_CROSS_THROW("textureQueryLod not supported in ES profile.");
9503 		else
9504 			GLSL_BFOP(textureQueryLod);
9505 		register_control_dependent_expression(ops[1]);
9506 		break;
9507 	}
9508 
9509 	case OpImageQueryLevels:
9510 	{
9511 		uint32_t result_type = ops[0];
9512 		uint32_t id = ops[1];
9513 
9514 		if (!options.es && options.version < 430)
9515 			require_extension_internal("GL_ARB_texture_query_levels");
9516 		if (options.es)
9517 			SPIRV_CROSS_THROW("textureQueryLevels not supported in ES profile.");
9518 
9519 		auto expr = join("textureQueryLevels(", convert_separate_image_to_expression(ops[2]), ")");
9520 		auto &restype = get<SPIRType>(ops[0]);
9521 		expr = bitcast_expression(restype, SPIRType::Int, expr);
9522 		emit_op(result_type, id, expr, true);
9523 		break;
9524 	}
9525 
9526 	case OpImageQuerySamples:
9527 	{
9528 		auto &type = expression_type(ops[2]);
9529 		uint32_t result_type = ops[0];
9530 		uint32_t id = ops[1];
9531 
9532 		string expr;
9533 		if (type.image.sampled == 2)
9534 			expr = join("imageSamples(", to_expression(ops[2]), ")");
9535 		else
9536 			expr = join("textureSamples(", convert_separate_image_to_expression(ops[2]), ")");
9537 
9538 		auto &restype = get<SPIRType>(ops[0]);
9539 		expr = bitcast_expression(restype, SPIRType::Int, expr);
9540 		emit_op(result_type, id, expr, true);
9541 		break;
9542 	}
9543 
9544 	case OpSampledImage:
9545 	{
9546 		uint32_t result_type = ops[0];
9547 		uint32_t id = ops[1];
9548 		emit_sampled_image_op(result_type, id, ops[2], ops[3]);
9549 		inherit_expression_dependencies(id, ops[2]);
9550 		inherit_expression_dependencies(id, ops[3]);
9551 		break;
9552 	}
9553 
9554 	case OpImageQuerySizeLod:
9555 	{
9556 		uint32_t result_type = ops[0];
9557 		uint32_t id = ops[1];
9558 
9559 		auto expr = join("textureSize(", convert_separate_image_to_expression(ops[2]), ", ",
9560 		                 bitcast_expression(SPIRType::Int, ops[3]), ")");
9561 		auto &restype = get<SPIRType>(ops[0]);
9562 		expr = bitcast_expression(restype, SPIRType::Int, expr);
9563 		emit_op(result_type, id, expr, true);
9564 		break;
9565 	}
9566 
9567 	// Image load/store
9568 	case OpImageRead:
9569 	{
9570 		// We added Nonreadable speculatively to the OpImage variable due to glslangValidator
9571 		// not adding the proper qualifiers.
9572 		// If it turns out we need to read the image after all, remove the qualifier and recompile.
9573 		auto *var = maybe_get_backing_variable(ops[2]);
9574 		if (var)
9575 		{
9576 			auto &flags = ir.meta[var->self].decoration.decoration_flags;
9577 			if (flags.get(DecorationNonReadable))
9578 			{
9579 				flags.clear(DecorationNonReadable);
9580 				force_recompile();
9581 			}
9582 		}
9583 
9584 		uint32_t result_type = ops[0];
9585 		uint32_t id = ops[1];
9586 
9587 		bool pure;
9588 		string imgexpr;
9589 		auto &type = expression_type(ops[2]);
9590 
9591 		if (var && var->remapped_variable) // Remapped input, just read as-is without any op-code
9592 		{
9593 			if (type.image.ms)
9594 				SPIRV_CROSS_THROW("Trying to remap multisampled image to variable, this is not possible.");
9595 
9596 			auto itr =
9597 			    find_if(begin(pls_inputs), end(pls_inputs), [var](const PlsRemap &pls) { return pls.id == var->self; });
9598 
9599 			if (itr == end(pls_inputs))
9600 			{
9601 				// For non-PLS inputs, we rely on subpass type remapping information to get it right
9602 				// since ImageRead always returns 4-component vectors and the backing type is opaque.
9603 				if (!var->remapped_components)
9604 					SPIRV_CROSS_THROW("subpassInput was remapped, but remap_components is not set correctly.");
9605 				imgexpr = remap_swizzle(get<SPIRType>(result_type), var->remapped_components, to_expression(ops[2]));
9606 			}
9607 			else
9608 			{
9609 				// PLS input could have different number of components than what the SPIR expects, swizzle to
9610 				// the appropriate vector size.
9611 				uint32_t components = pls_format_to_components(itr->format);
9612 				imgexpr = remap_swizzle(get<SPIRType>(result_type), components, to_expression(ops[2]));
9613 			}
9614 			pure = true;
9615 		}
9616 		else if (type.image.dim == DimSubpassData)
9617 		{
9618 			if (options.vulkan_semantics)
9619 			{
9620 				// With Vulkan semantics, use the proper Vulkan GLSL construct.
9621 				if (type.image.ms)
9622 				{
9623 					uint32_t operands = ops[4];
9624 					if (operands != ImageOperandsSampleMask || length != 6)
9625 						SPIRV_CROSS_THROW(
9626 						    "Multisampled image used in OpImageRead, but unexpected operand mask was used.");
9627 
9628 					uint32_t samples = ops[5];
9629 					imgexpr = join("subpassLoad(", to_expression(ops[2]), ", ", to_expression(samples), ")");
9630 				}
9631 				else
9632 					imgexpr = join("subpassLoad(", to_expression(ops[2]), ")");
9633 			}
9634 			else
9635 			{
9636 				if (type.image.ms)
9637 				{
9638 					uint32_t operands = ops[4];
9639 					if (operands != ImageOperandsSampleMask || length != 6)
9640 						SPIRV_CROSS_THROW(
9641 						    "Multisampled image used in OpImageRead, but unexpected operand mask was used.");
9642 
9643 					uint32_t samples = ops[5];
9644 					imgexpr = join("texelFetch(", to_expression(ops[2]), ", ivec2(gl_FragCoord.xy), ",
9645 					               to_expression(samples), ")");
9646 				}
9647 				else
9648 				{
9649 					// Implement subpass loads via texture barrier style sampling.
9650 					imgexpr = join("texelFetch(", to_expression(ops[2]), ", ivec2(gl_FragCoord.xy), 0)");
9651 				}
9652 			}
9653 			imgexpr = remap_swizzle(get<SPIRType>(result_type), 4, imgexpr);
9654 			pure = true;
9655 		}
9656 		else
9657 		{
9658 			// imageLoad only accepts int coords, not uint.
9659 			auto coord_expr = to_expression(ops[3]);
9660 			auto target_coord_type = expression_type(ops[3]);
9661 			target_coord_type.basetype = SPIRType::Int;
9662 			coord_expr = bitcast_expression(target_coord_type, expression_type(ops[3]).basetype, coord_expr);
9663 
9664 			// Plain image load/store.
9665 			if (type.image.ms)
9666 			{
9667 				uint32_t operands = ops[4];
9668 				if (operands != ImageOperandsSampleMask || length != 6)
9669 					SPIRV_CROSS_THROW("Multisampled image used in OpImageRead, but unexpected operand mask was used.");
9670 
9671 				uint32_t samples = ops[5];
9672 				imgexpr =
9673 				    join("imageLoad(", to_expression(ops[2]), ", ", coord_expr, ", ", to_expression(samples), ")");
9674 			}
9675 			else
9676 				imgexpr = join("imageLoad(", to_expression(ops[2]), ", ", coord_expr, ")");
9677 
9678 			imgexpr = remap_swizzle(get<SPIRType>(result_type), 4, imgexpr);
9679 			pure = false;
9680 		}
9681 
9682 		if (var && var->forwardable)
9683 		{
9684 			bool forward = forced_temporaries.find(id) == end(forced_temporaries);
9685 			auto &e = emit_op(result_type, id, imgexpr, forward);
9686 
9687 			// We only need to track dependencies if we're reading from image load/store.
9688 			if (!pure)
9689 			{
9690 				e.loaded_from = var->self;
9691 				if (forward)
9692 					var->dependees.push_back(id);
9693 			}
9694 		}
9695 		else
9696 			emit_op(result_type, id, imgexpr, false);
9697 
9698 		inherit_expression_dependencies(id, ops[2]);
9699 		if (type.image.ms)
9700 			inherit_expression_dependencies(id, ops[5]);
9701 		break;
9702 	}
9703 
9704 	case OpImageTexelPointer:
9705 	{
9706 		uint32_t result_type = ops[0];
9707 		uint32_t id = ops[1];
9708 
9709 		auto coord_expr = to_expression(ops[3]);
9710 		auto target_coord_type = expression_type(ops[3]);
9711 		target_coord_type.basetype = SPIRType::Int;
9712 		coord_expr = bitcast_expression(target_coord_type, expression_type(ops[3]).basetype, coord_expr);
9713 
9714 		auto &e = set<SPIRExpression>(id, join(to_expression(ops[2]), ", ", coord_expr), result_type, true);
9715 
9716 		// When using the pointer, we need to know which variable it is actually loaded from.
9717 		auto *var = maybe_get_backing_variable(ops[2]);
9718 		e.loaded_from = var ? var->self : ID(0);
9719 		inherit_expression_dependencies(id, ops[3]);
9720 		break;
9721 	}
9722 
9723 	case OpImageWrite:
9724 	{
9725 		// We added Nonwritable speculatively to the OpImage variable due to glslangValidator
9726 		// not adding the proper qualifiers.
9727 		// If it turns out we need to write to the image after all, remove the qualifier and recompile.
9728 		auto *var = maybe_get_backing_variable(ops[0]);
9729 		if (var)
9730 		{
9731 			auto &flags = ir.meta[var->self].decoration.decoration_flags;
9732 			if (flags.get(DecorationNonWritable))
9733 			{
9734 				flags.clear(DecorationNonWritable);
9735 				force_recompile();
9736 			}
9737 		}
9738 
9739 		auto &type = expression_type(ops[0]);
9740 		auto &value_type = expression_type(ops[2]);
9741 		auto store_type = value_type;
9742 		store_type.vecsize = 4;
9743 
9744 		// imageStore only accepts int coords, not uint.
9745 		auto coord_expr = to_expression(ops[1]);
9746 		auto target_coord_type = expression_type(ops[1]);
9747 		target_coord_type.basetype = SPIRType::Int;
9748 		coord_expr = bitcast_expression(target_coord_type, expression_type(ops[1]).basetype, coord_expr);
9749 
9750 		if (type.image.ms)
9751 		{
9752 			uint32_t operands = ops[3];
9753 			if (operands != ImageOperandsSampleMask || length != 5)
9754 				SPIRV_CROSS_THROW("Multisampled image used in OpImageWrite, but unexpected operand mask was used.");
9755 			uint32_t samples = ops[4];
9756 			statement("imageStore(", to_expression(ops[0]), ", ", coord_expr, ", ", to_expression(samples), ", ",
9757 			          remap_swizzle(store_type, value_type.vecsize, to_expression(ops[2])), ");");
9758 		}
9759 		else
9760 			statement("imageStore(", to_expression(ops[0]), ", ", coord_expr, ", ",
9761 			          remap_swizzle(store_type, value_type.vecsize, to_expression(ops[2])), ");");
9762 
9763 		if (var && variable_storage_is_aliased(*var))
9764 			flush_all_aliased_variables();
9765 		break;
9766 	}
9767 
9768 	case OpImageQuerySize:
9769 	{
9770 		auto &type = expression_type(ops[2]);
9771 		uint32_t result_type = ops[0];
9772 		uint32_t id = ops[1];
9773 
9774 		if (type.basetype == SPIRType::Image)
9775 		{
9776 			string expr;
9777 			if (type.image.sampled == 2)
9778 			{
9779 				// The size of an image is always constant.
9780 				expr = join("imageSize(", to_expression(ops[2]), ")");
9781 			}
9782 			else
9783 			{
9784 				// This path is hit for samplerBuffers and multisampled images which do not have LOD.
9785 				expr = join("textureSize(", convert_separate_image_to_expression(ops[2]), ")");
9786 			}
9787 
9788 			auto &restype = get<SPIRType>(ops[0]);
9789 			expr = bitcast_expression(restype, SPIRType::Int, expr);
9790 			emit_op(result_type, id, expr, true);
9791 		}
9792 		else
9793 			SPIRV_CROSS_THROW("Invalid type for OpImageQuerySize.");
9794 		break;
9795 	}
9796 
9797 	// Compute
9798 	case OpControlBarrier:
9799 	case OpMemoryBarrier:
9800 	{
9801 		uint32_t execution_scope = 0;
9802 		uint32_t memory;
9803 		uint32_t semantics;
9804 
9805 		if (opcode == OpMemoryBarrier)
9806 		{
9807 			memory = get<SPIRConstant>(ops[0]).scalar();
9808 			semantics = get<SPIRConstant>(ops[1]).scalar();
9809 		}
9810 		else
9811 		{
9812 			execution_scope = get<SPIRConstant>(ops[0]).scalar();
9813 			memory = get<SPIRConstant>(ops[1]).scalar();
9814 			semantics = get<SPIRConstant>(ops[2]).scalar();
9815 		}
9816 
9817 		if (execution_scope == ScopeSubgroup || memory == ScopeSubgroup)
9818 		{
9819 			if (!options.vulkan_semantics)
9820 				SPIRV_CROSS_THROW("Can only use subgroup operations in Vulkan semantics.");
9821 			require_extension_internal("GL_KHR_shader_subgroup_basic");
9822 		}
9823 
9824 		if (execution_scope != ScopeSubgroup && get_entry_point().model == ExecutionModelTessellationControl)
9825 		{
9826 			// Control shaders only have barriers, and it implies memory barriers.
9827 			if (opcode == OpControlBarrier)
9828 				statement("barrier();");
9829 			break;
9830 		}
9831 
9832 		// We only care about these flags, acquire/release and friends are not relevant to GLSL.
9833 		semantics = mask_relevant_memory_semantics(semantics);
9834 
9835 		if (opcode == OpMemoryBarrier)
9836 		{
9837 			// If we are a memory barrier, and the next instruction is a control barrier, check if that memory barrier
9838 			// does what we need, so we avoid redundant barriers.
9839 			const Instruction *next = get_next_instruction_in_block(instruction);
9840 			if (next && next->op == OpControlBarrier)
9841 			{
9842 				auto *next_ops = stream(*next);
9843 				uint32_t next_memory = get<SPIRConstant>(next_ops[1]).scalar();
9844 				uint32_t next_semantics = get<SPIRConstant>(next_ops[2]).scalar();
9845 				next_semantics = mask_relevant_memory_semantics(next_semantics);
9846 
9847 				bool memory_scope_covered = false;
9848 				if (next_memory == memory)
9849 					memory_scope_covered = true;
9850 				else if (next_semantics == MemorySemanticsWorkgroupMemoryMask)
9851 				{
9852 					// If we only care about workgroup memory, either Device or Workgroup scope is fine,
9853 					// scope does not have to match.
9854 					if ((next_memory == ScopeDevice || next_memory == ScopeWorkgroup) &&
9855 					    (memory == ScopeDevice || memory == ScopeWorkgroup))
9856 					{
9857 						memory_scope_covered = true;
9858 					}
9859 				}
9860 				else if (memory == ScopeWorkgroup && next_memory == ScopeDevice)
9861 				{
9862 					// The control barrier has device scope, but the memory barrier just has workgroup scope.
9863 					memory_scope_covered = true;
9864 				}
9865 
9866 				// If we have the same memory scope, and all memory types are covered, we're good.
9867 				if (memory_scope_covered && (semantics & next_semantics) == semantics)
9868 					break;
9869 			}
9870 		}
9871 
9872 		// We are synchronizing some memory or syncing execution,
9873 		// so we cannot forward any loads beyond the memory barrier.
9874 		if (semantics || opcode == OpControlBarrier)
9875 		{
9876 			assert(current_emitting_block);
9877 			flush_control_dependent_expressions(current_emitting_block->self);
9878 			flush_all_active_variables();
9879 		}
9880 
9881 		if (memory == ScopeWorkgroup) // Only need to consider memory within a group
9882 		{
9883 			if (semantics == MemorySemanticsWorkgroupMemoryMask)
9884 				statement("memoryBarrierShared();");
9885 			else if (semantics != 0)
9886 				statement("groupMemoryBarrier();");
9887 		}
9888 		else if (memory == ScopeSubgroup)
9889 		{
9890 			const uint32_t all_barriers =
9891 			    MemorySemanticsWorkgroupMemoryMask | MemorySemanticsUniformMemoryMask | MemorySemanticsImageMemoryMask;
9892 
9893 			if (semantics & (MemorySemanticsCrossWorkgroupMemoryMask | MemorySemanticsSubgroupMemoryMask))
9894 			{
9895 				// These are not relevant for GLSL, but assume it means memoryBarrier().
9896 				// memoryBarrier() does everything, so no need to test anything else.
9897 				statement("subgroupMemoryBarrier();");
9898 			}
9899 			else if ((semantics & all_barriers) == all_barriers)
9900 			{
9901 				// Short-hand instead of emitting 3 barriers.
9902 				statement("subgroupMemoryBarrier();");
9903 			}
9904 			else
9905 			{
9906 				// Pick out individual barriers.
9907 				if (semantics & MemorySemanticsWorkgroupMemoryMask)
9908 					statement("subgroupMemoryBarrierShared();");
9909 				if (semantics & MemorySemanticsUniformMemoryMask)
9910 					statement("subgroupMemoryBarrierBuffer();");
9911 				if (semantics & MemorySemanticsImageMemoryMask)
9912 					statement("subgroupMemoryBarrierImage();");
9913 			}
9914 		}
9915 		else
9916 		{
9917 			const uint32_t all_barriers = MemorySemanticsWorkgroupMemoryMask | MemorySemanticsUniformMemoryMask |
9918 			                              MemorySemanticsImageMemoryMask | MemorySemanticsAtomicCounterMemoryMask;
9919 
9920 			if (semantics & (MemorySemanticsCrossWorkgroupMemoryMask | MemorySemanticsSubgroupMemoryMask))
9921 			{
9922 				// These are not relevant for GLSL, but assume it means memoryBarrier().
9923 				// memoryBarrier() does everything, so no need to test anything else.
9924 				statement("memoryBarrier();");
9925 			}
9926 			else if ((semantics & all_barriers) == all_barriers)
9927 			{
9928 				// Short-hand instead of emitting 4 barriers.
9929 				statement("memoryBarrier();");
9930 			}
9931 			else
9932 			{
9933 				// Pick out individual barriers.
9934 				if (semantics & MemorySemanticsWorkgroupMemoryMask)
9935 					statement("memoryBarrierShared();");
9936 				if (semantics & MemorySemanticsUniformMemoryMask)
9937 					statement("memoryBarrierBuffer();");
9938 				if (semantics & MemorySemanticsImageMemoryMask)
9939 					statement("memoryBarrierImage();");
9940 				if (semantics & MemorySemanticsAtomicCounterMemoryMask)
9941 					statement("memoryBarrierAtomicCounter();");
9942 			}
9943 		}
9944 
9945 		if (opcode == OpControlBarrier)
9946 		{
9947 			if (execution_scope == ScopeSubgroup)
9948 				statement("subgroupBarrier();");
9949 			else
9950 				statement("barrier();");
9951 		}
9952 		break;
9953 	}
9954 
9955 	case OpExtInst:
9956 	{
9957 		uint32_t extension_set = ops[2];
9958 
9959 		if (get<SPIRExtension>(extension_set).ext == SPIRExtension::GLSL)
9960 		{
9961 			emit_glsl_op(ops[0], ops[1], ops[3], &ops[4], length - 4);
9962 		}
9963 		else if (get<SPIRExtension>(extension_set).ext == SPIRExtension::SPV_AMD_shader_ballot)
9964 		{
9965 			emit_spv_amd_shader_ballot_op(ops[0], ops[1], ops[3], &ops[4], length - 4);
9966 		}
9967 		else if (get<SPIRExtension>(extension_set).ext == SPIRExtension::SPV_AMD_shader_explicit_vertex_parameter)
9968 		{
9969 			emit_spv_amd_shader_explicit_vertex_parameter_op(ops[0], ops[1], ops[3], &ops[4], length - 4);
9970 		}
9971 		else if (get<SPIRExtension>(extension_set).ext == SPIRExtension::SPV_AMD_shader_trinary_minmax)
9972 		{
9973 			emit_spv_amd_shader_trinary_minmax_op(ops[0], ops[1], ops[3], &ops[4], length - 4);
9974 		}
9975 		else if (get<SPIRExtension>(extension_set).ext == SPIRExtension::SPV_AMD_gcn_shader)
9976 		{
9977 			emit_spv_amd_gcn_shader_op(ops[0], ops[1], ops[3], &ops[4], length - 4);
9978 		}
9979 		else if (get<SPIRExtension>(extension_set).ext == SPIRExtension::SPV_debug_info)
9980 		{
9981 			break; // Ignore SPIR-V debug information extended instructions.
9982 		}
9983 		else
9984 		{
9985 			statement("// unimplemented ext op ", instruction.op);
9986 			break;
9987 		}
9988 
9989 		break;
9990 	}
9991 
9992 	// Legacy sub-group stuff ...
9993 	case OpSubgroupBallotKHR:
9994 	{
9995 		uint32_t result_type = ops[0];
9996 		uint32_t id = ops[1];
9997 		string expr;
9998 		expr = join("uvec4(unpackUint2x32(ballotARB(" + to_expression(ops[2]) + ")), 0u, 0u)");
9999 		emit_op(result_type, id, expr, should_forward(ops[2]));
10000 
10001 		require_extension_internal("GL_ARB_shader_ballot");
10002 		inherit_expression_dependencies(id, ops[2]);
10003 		register_control_dependent_expression(ops[1]);
10004 		break;
10005 	}
10006 
10007 	case OpSubgroupFirstInvocationKHR:
10008 	{
10009 		uint32_t result_type = ops[0];
10010 		uint32_t id = ops[1];
10011 		emit_unary_func_op(result_type, id, ops[2], "readFirstInvocationARB");
10012 
10013 		require_extension_internal("GL_ARB_shader_ballot");
10014 		register_control_dependent_expression(ops[1]);
10015 		break;
10016 	}
10017 
10018 	case OpSubgroupReadInvocationKHR:
10019 	{
10020 		uint32_t result_type = ops[0];
10021 		uint32_t id = ops[1];
10022 		emit_binary_func_op(result_type, id, ops[2], ops[3], "readInvocationARB");
10023 
10024 		require_extension_internal("GL_ARB_shader_ballot");
10025 		register_control_dependent_expression(ops[1]);
10026 		break;
10027 	}
10028 
10029 	case OpSubgroupAllKHR:
10030 	{
10031 		uint32_t result_type = ops[0];
10032 		uint32_t id = ops[1];
10033 		emit_unary_func_op(result_type, id, ops[2], "allInvocationsARB");
10034 
10035 		require_extension_internal("GL_ARB_shader_group_vote");
10036 		register_control_dependent_expression(ops[1]);
10037 		break;
10038 	}
10039 
10040 	case OpSubgroupAnyKHR:
10041 	{
10042 		uint32_t result_type = ops[0];
10043 		uint32_t id = ops[1];
10044 		emit_unary_func_op(result_type, id, ops[2], "anyInvocationARB");
10045 
10046 		require_extension_internal("GL_ARB_shader_group_vote");
10047 		register_control_dependent_expression(ops[1]);
10048 		break;
10049 	}
10050 
10051 	case OpSubgroupAllEqualKHR:
10052 	{
10053 		uint32_t result_type = ops[0];
10054 		uint32_t id = ops[1];
10055 		emit_unary_func_op(result_type, id, ops[2], "allInvocationsEqualARB");
10056 
10057 		require_extension_internal("GL_ARB_shader_group_vote");
10058 		register_control_dependent_expression(ops[1]);
10059 		break;
10060 	}
10061 
10062 	case OpGroupIAddNonUniformAMD:
10063 	case OpGroupFAddNonUniformAMD:
10064 	{
10065 		uint32_t result_type = ops[0];
10066 		uint32_t id = ops[1];
10067 		emit_unary_func_op(result_type, id, ops[4], "addInvocationsNonUniformAMD");
10068 
10069 		require_extension_internal("GL_AMD_shader_ballot");
10070 		register_control_dependent_expression(ops[1]);
10071 		break;
10072 	}
10073 
10074 	case OpGroupFMinNonUniformAMD:
10075 	case OpGroupUMinNonUniformAMD:
10076 	case OpGroupSMinNonUniformAMD:
10077 	{
10078 		uint32_t result_type = ops[0];
10079 		uint32_t id = ops[1];
10080 		emit_unary_func_op(result_type, id, ops[4], "minInvocationsNonUniformAMD");
10081 
10082 		require_extension_internal("GL_AMD_shader_ballot");
10083 		register_control_dependent_expression(ops[1]);
10084 		break;
10085 	}
10086 
10087 	case OpGroupFMaxNonUniformAMD:
10088 	case OpGroupUMaxNonUniformAMD:
10089 	case OpGroupSMaxNonUniformAMD:
10090 	{
10091 		uint32_t result_type = ops[0];
10092 		uint32_t id = ops[1];
10093 		emit_unary_func_op(result_type, id, ops[4], "maxInvocationsNonUniformAMD");
10094 
10095 		require_extension_internal("GL_AMD_shader_ballot");
10096 		register_control_dependent_expression(ops[1]);
10097 		break;
10098 	}
10099 
10100 	case OpFragmentMaskFetchAMD:
10101 	{
10102 		auto &type = expression_type(ops[2]);
10103 		uint32_t result_type = ops[0];
10104 		uint32_t id = ops[1];
10105 
10106 		if (type.image.dim == spv::DimSubpassData)
10107 		{
10108 			emit_unary_func_op(result_type, id, ops[2], "fragmentMaskFetchAMD");
10109 		}
10110 		else
10111 		{
10112 			emit_binary_func_op(result_type, id, ops[2], ops[3], "fragmentMaskFetchAMD");
10113 		}
10114 
10115 		require_extension_internal("GL_AMD_shader_fragment_mask");
10116 		break;
10117 	}
10118 
10119 	case OpFragmentFetchAMD:
10120 	{
10121 		auto &type = expression_type(ops[2]);
10122 		uint32_t result_type = ops[0];
10123 		uint32_t id = ops[1];
10124 
10125 		if (type.image.dim == spv::DimSubpassData)
10126 		{
10127 			emit_binary_func_op(result_type, id, ops[2], ops[4], "fragmentFetchAMD");
10128 		}
10129 		else
10130 		{
10131 			emit_trinary_func_op(result_type, id, ops[2], ops[3], ops[4], "fragmentFetchAMD");
10132 		}
10133 
10134 		require_extension_internal("GL_AMD_shader_fragment_mask");
10135 		break;
10136 	}
10137 
10138 	// Vulkan 1.1 sub-group stuff ...
10139 	case OpGroupNonUniformElect:
10140 	case OpGroupNonUniformBroadcast:
10141 	case OpGroupNonUniformBroadcastFirst:
10142 	case OpGroupNonUniformBallot:
10143 	case OpGroupNonUniformInverseBallot:
10144 	case OpGroupNonUniformBallotBitExtract:
10145 	case OpGroupNonUniformBallotBitCount:
10146 	case OpGroupNonUniformBallotFindLSB:
10147 	case OpGroupNonUniformBallotFindMSB:
10148 	case OpGroupNonUniformShuffle:
10149 	case OpGroupNonUniformShuffleXor:
10150 	case OpGroupNonUniformShuffleUp:
10151 	case OpGroupNonUniformShuffleDown:
10152 	case OpGroupNonUniformAll:
10153 	case OpGroupNonUniformAny:
10154 	case OpGroupNonUniformAllEqual:
10155 	case OpGroupNonUniformFAdd:
10156 	case OpGroupNonUniformIAdd:
10157 	case OpGroupNonUniformFMul:
10158 	case OpGroupNonUniformIMul:
10159 	case OpGroupNonUniformFMin:
10160 	case OpGroupNonUniformFMax:
10161 	case OpGroupNonUniformSMin:
10162 	case OpGroupNonUniformSMax:
10163 	case OpGroupNonUniformUMin:
10164 	case OpGroupNonUniformUMax:
10165 	case OpGroupNonUniformBitwiseAnd:
10166 	case OpGroupNonUniformBitwiseOr:
10167 	case OpGroupNonUniformBitwiseXor:
10168 	case OpGroupNonUniformQuadSwap:
10169 	case OpGroupNonUniformQuadBroadcast:
10170 		emit_subgroup_op(instruction);
10171 		break;
10172 
10173 	case OpFUnordEqual:
10174 	case OpFUnordNotEqual:
10175 	case OpFUnordLessThan:
10176 	case OpFUnordGreaterThan:
10177 	case OpFUnordLessThanEqual:
10178 	case OpFUnordGreaterThanEqual:
10179 	{
10180 		// GLSL doesn't specify if floating point comparisons are ordered or unordered,
10181 		// but glslang always emits ordered floating point compares for GLSL.
10182 		// To get unordered compares, we can test the opposite thing and invert the result.
10183 		// This way, we force true when there is any NaN present.
10184 		uint32_t op0 = ops[2];
10185 		uint32_t op1 = ops[3];
10186 
10187 		string expr;
10188 		if (expression_type(op0).vecsize > 1)
10189 		{
10190 			const char *comp_op = nullptr;
10191 			switch (opcode)
10192 			{
10193 			case OpFUnordEqual:
10194 				comp_op = "notEqual";
10195 				break;
10196 
10197 			case OpFUnordNotEqual:
10198 				comp_op = "equal";
10199 				break;
10200 
10201 			case OpFUnordLessThan:
10202 				comp_op = "greaterThanEqual";
10203 				break;
10204 
10205 			case OpFUnordLessThanEqual:
10206 				comp_op = "greaterThan";
10207 				break;
10208 
10209 			case OpFUnordGreaterThan:
10210 				comp_op = "lessThanEqual";
10211 				break;
10212 
10213 			case OpFUnordGreaterThanEqual:
10214 				comp_op = "lessThan";
10215 				break;
10216 
10217 			default:
10218 				assert(0);
10219 				break;
10220 			}
10221 
10222 			expr = join("not(", comp_op, "(", to_unpacked_expression(op0), ", ", to_unpacked_expression(op1), "))");
10223 		}
10224 		else
10225 		{
10226 			const char *comp_op = nullptr;
10227 			switch (opcode)
10228 			{
10229 			case OpFUnordEqual:
10230 				comp_op = " != ";
10231 				break;
10232 
10233 			case OpFUnordNotEqual:
10234 				comp_op = " == ";
10235 				break;
10236 
10237 			case OpFUnordLessThan:
10238 				comp_op = " >= ";
10239 				break;
10240 
10241 			case OpFUnordLessThanEqual:
10242 				comp_op = " > ";
10243 				break;
10244 
10245 			case OpFUnordGreaterThan:
10246 				comp_op = " <= ";
10247 				break;
10248 
10249 			case OpFUnordGreaterThanEqual:
10250 				comp_op = " < ";
10251 				break;
10252 
10253 			default:
10254 				assert(0);
10255 				break;
10256 			}
10257 
10258 			expr = join("!(", to_enclosed_unpacked_expression(op0), comp_op, to_enclosed_unpacked_expression(op1), ")");
10259 		}
10260 
10261 		emit_op(ops[0], ops[1], expr, should_forward(op0) && should_forward(op1));
10262 		inherit_expression_dependencies(ops[1], op0);
10263 		inherit_expression_dependencies(ops[1], op1);
10264 		break;
10265 	}
10266 
10267 	case OpReportIntersectionNV:
10268 		statement("reportIntersectionNV(", to_expression(ops[0]), ", ", to_expression(ops[1]), ");");
10269 		break;
10270 	case OpIgnoreIntersectionNV:
10271 		statement("ignoreIntersectionNV();");
10272 		break;
10273 	case OpTerminateRayNV:
10274 		statement("terminateRayNV();");
10275 		break;
10276 	case OpTraceNV:
10277 		statement("traceNV(", to_expression(ops[0]), ", ", to_expression(ops[1]), ", ", to_expression(ops[2]), ", ",
10278 		          to_expression(ops[3]), ", ", to_expression(ops[4]), ", ", to_expression(ops[5]), ", ",
10279 		          to_expression(ops[6]), ", ", to_expression(ops[7]), ", ", to_expression(ops[8]), ", ",
10280 		          to_expression(ops[9]), ", ", to_expression(ops[10]), ");");
10281 		break;
10282 	case OpExecuteCallableNV:
10283 		statement("executeCallableNV(", to_expression(ops[0]), ", ", to_expression(ops[1]), ");");
10284 		break;
10285 
10286 	case OpConvertUToPtr:
10287 	{
10288 		auto &type = get<SPIRType>(ops[0]);
10289 		if (type.storage != StorageClassPhysicalStorageBufferEXT)
10290 			SPIRV_CROSS_THROW("Only StorageClassPhysicalStorageBufferEXT is supported by OpConvertUToPtr.");
10291 
10292 		auto op = type_to_glsl(type);
10293 		emit_unary_func_op(ops[0], ops[1], ops[2], op.c_str());
10294 		break;
10295 	}
10296 
10297 	case OpConvertPtrToU:
10298 	{
10299 		auto &type = get<SPIRType>(ops[0]);
10300 		auto &ptr_type = expression_type(ops[2]);
10301 		if (ptr_type.storage != StorageClassPhysicalStorageBufferEXT)
10302 			SPIRV_CROSS_THROW("Only StorageClassPhysicalStorageBufferEXT is supported by OpConvertPtrToU.");
10303 
10304 		auto op = type_to_glsl(type);
10305 		emit_unary_func_op(ops[0], ops[1], ops[2], op.c_str());
10306 		break;
10307 	}
10308 
10309 	case OpUndef:
10310 		// Undefined value has been declared.
10311 		break;
10312 
10313 	case OpLine:
10314 	{
10315 		emit_line_directive(ops[0], ops[1]);
10316 		break;
10317 	}
10318 
10319 	case OpNoLine:
10320 		break;
10321 
10322 	case OpDemoteToHelperInvocationEXT:
10323 		if (!options.vulkan_semantics)
10324 			SPIRV_CROSS_THROW("GL_EXT_demote_to_helper_invocation is only supported in Vulkan GLSL.");
10325 		require_extension_internal("GL_EXT_demote_to_helper_invocation");
10326 		statement(backend.demote_literal, ";");
10327 		break;
10328 
10329 	case OpIsHelperInvocationEXT:
10330 		if (!options.vulkan_semantics)
10331 			SPIRV_CROSS_THROW("GL_EXT_demote_to_helper_invocation is only supported in Vulkan GLSL.");
10332 		require_extension_internal("GL_EXT_demote_to_helper_invocation");
10333 		emit_op(ops[0], ops[1], "helperInvocationEXT()", false);
10334 		break;
10335 
10336 	case OpBeginInvocationInterlockEXT:
10337 		// If the interlock is complex, we emit this elsewhere.
10338 		if (!interlocked_is_complex)
10339 		{
10340 			if (options.es)
10341 				statement("beginInvocationInterlockNV();");
10342 			else
10343 				statement("beginInvocationInterlockARB();");
10344 
10345 			flush_all_active_variables();
10346 			// Make sure forwarding doesn't propagate outside interlock region.
10347 		}
10348 		break;
10349 
10350 	case OpEndInvocationInterlockEXT:
10351 		// If the interlock is complex, we emit this elsewhere.
10352 		if (!interlocked_is_complex)
10353 		{
10354 			if (options.es)
10355 				statement("endInvocationInterlockNV();");
10356 			else
10357 				statement("endInvocationInterlockARB();");
10358 
10359 			flush_all_active_variables();
10360 			// Make sure forwarding doesn't propagate outside interlock region.
10361 		}
10362 		break;
10363 
10364 	default:
10365 		statement("// unimplemented op ", instruction.op);
10366 		break;
10367 	}
10368 }
10369 
10370 // Appends function arguments, mapped from global variables, beyond the specified arg index.
10371 // This is used when a function call uses fewer arguments than the function defines.
10372 // This situation may occur if the function signature has been dynamically modified to
10373 // extract global variables referenced from within the function, and convert them to
10374 // function arguments. This is necessary for shader languages that do not support global
10375 // access to shader input content from within a function (eg. Metal). Each additional
10376 // function args uses the name of the global variable. Function nesting will modify the
10377 // functions and function calls all the way up the nesting chain.
append_global_func_args(const SPIRFunction & func,uint32_t index,SmallVector<string> & arglist)10378 void CompilerGLSL::append_global_func_args(const SPIRFunction &func, uint32_t index, SmallVector<string> &arglist)
10379 {
10380 	auto &args = func.arguments;
10381 	uint32_t arg_cnt = uint32_t(args.size());
10382 	for (uint32_t arg_idx = index; arg_idx < arg_cnt; arg_idx++)
10383 	{
10384 		auto &arg = args[arg_idx];
10385 		assert(arg.alias_global_variable);
10386 
10387 		// If the underlying variable needs to be declared
10388 		// (ie. a local variable with deferred declaration), do so now.
10389 		uint32_t var_id = get<SPIRVariable>(arg.id).basevariable;
10390 		if (var_id)
10391 			flush_variable_declaration(var_id);
10392 
10393 		arglist.push_back(to_func_call_arg(arg, arg.id));
10394 	}
10395 }
10396 
to_member_name(const SPIRType & type,uint32_t index)10397 string CompilerGLSL::to_member_name(const SPIRType &type, uint32_t index)
10398 {
10399 	if (type.type_alias != TypeID(0) &&
10400 	    !has_extended_decoration(type.type_alias, SPIRVCrossDecorationBufferBlockRepacked))
10401 	{
10402 		return to_member_name(get<SPIRType>(type.type_alias), index);
10403 	}
10404 
10405 	auto &memb = ir.meta[type.self].members;
10406 	if (index < memb.size() && !memb[index].alias.empty())
10407 		return memb[index].alias;
10408 	else
10409 		return join("_m", index);
10410 }
10411 
to_member_reference(uint32_t,const SPIRType & type,uint32_t index,bool)10412 string CompilerGLSL::to_member_reference(uint32_t, const SPIRType &type, uint32_t index, bool)
10413 {
10414 	return join(".", to_member_name(type, index));
10415 }
10416 
add_member_name(SPIRType & type,uint32_t index)10417 void CompilerGLSL::add_member_name(SPIRType &type, uint32_t index)
10418 {
10419 	auto &memb = ir.meta[type.self].members;
10420 	if (index < memb.size() && !memb[index].alias.empty())
10421 	{
10422 		auto &name = memb[index].alias;
10423 		if (name.empty())
10424 			return;
10425 
10426 		// Reserved for temporaries.
10427 		if (name[0] == '_' && name.size() >= 2 && isdigit(name[1]))
10428 		{
10429 			name.clear();
10430 			return;
10431 		}
10432 
10433 		update_name_cache(type.member_name_cache, name);
10434 	}
10435 }
10436 
10437 // Checks whether the ID is a row_major matrix that requires conversion before use
is_non_native_row_major_matrix(uint32_t id)10438 bool CompilerGLSL::is_non_native_row_major_matrix(uint32_t id)
10439 {
10440 	// Natively supported row-major matrices do not need to be converted.
10441 	// Legacy targets do not support row major.
10442 	if (backend.native_row_major_matrix && !is_legacy())
10443 		return false;
10444 
10445 	// Non-matrix or column-major matrix types do not need to be converted.
10446 	if (!has_decoration(id, DecorationRowMajor))
10447 		return false;
10448 
10449 	// Only square row-major matrices can be converted at this time.
10450 	// Converting non-square matrices will require defining custom GLSL function that
10451 	// swaps matrix elements while retaining the original dimensional form of the matrix.
10452 	const auto type = expression_type(id);
10453 	if (type.columns != type.vecsize)
10454 		SPIRV_CROSS_THROW("Row-major matrices must be square on this platform.");
10455 
10456 	return true;
10457 }
10458 
10459 // Checks whether the member is a row_major matrix that requires conversion before use
member_is_non_native_row_major_matrix(const SPIRType & type,uint32_t index)10460 bool CompilerGLSL::member_is_non_native_row_major_matrix(const SPIRType &type, uint32_t index)
10461 {
10462 	// Natively supported row-major matrices do not need to be converted.
10463 	if (backend.native_row_major_matrix && !is_legacy())
10464 		return false;
10465 
10466 	// Non-matrix or column-major matrix types do not need to be converted.
10467 	if (!has_member_decoration(type.self, index, DecorationRowMajor))
10468 		return false;
10469 
10470 	// Only square row-major matrices can be converted at this time.
10471 	// Converting non-square matrices will require defining custom GLSL function that
10472 	// swaps matrix elements while retaining the original dimensional form of the matrix.
10473 	const auto mbr_type = get<SPIRType>(type.member_types[index]);
10474 	if (mbr_type.columns != mbr_type.vecsize)
10475 		SPIRV_CROSS_THROW("Row-major matrices must be square on this platform.");
10476 
10477 	return true;
10478 }
10479 
10480 // Checks if we need to remap physical type IDs when declaring the type in a buffer.
member_is_remapped_physical_type(const SPIRType & type,uint32_t index) const10481 bool CompilerGLSL::member_is_remapped_physical_type(const SPIRType &type, uint32_t index) const
10482 {
10483 	return has_extended_member_decoration(type.self, index, SPIRVCrossDecorationPhysicalTypeID);
10484 }
10485 
10486 // Checks whether the member is in packed data type, that might need to be unpacked.
member_is_packed_physical_type(const SPIRType & type,uint32_t index) const10487 bool CompilerGLSL::member_is_packed_physical_type(const SPIRType &type, uint32_t index) const
10488 {
10489 	return has_extended_member_decoration(type.self, index, SPIRVCrossDecorationPhysicalTypePacked);
10490 }
10491 
10492 // Wraps the expression string in a function call that converts the
10493 // row_major matrix result of the expression to a column_major matrix.
10494 // Base implementation uses the standard library transpose() function.
10495 // Subclasses may override to use a different function.
convert_row_major_matrix(string exp_str,const SPIRType & exp_type,uint32_t,bool)10496 string CompilerGLSL::convert_row_major_matrix(string exp_str, const SPIRType &exp_type, uint32_t /* physical_type_id */,
10497                                               bool /*is_packed*/)
10498 {
10499 	strip_enclosed_expression(exp_str);
10500 	if (!is_matrix(exp_type))
10501 	{
10502 		auto column_index = exp_str.find_last_of('[');
10503 		if (column_index == string::npos)
10504 			return exp_str;
10505 
10506 		auto column_expr = exp_str.substr(column_index);
10507 		exp_str.resize(column_index);
10508 
10509 		auto transposed_expr = type_to_glsl_constructor(exp_type) + "(";
10510 
10511 		// Loading a column from a row-major matrix. Unroll the load.
10512 		for (uint32_t c = 0; c < exp_type.vecsize; c++)
10513 		{
10514 			transposed_expr += join(exp_str, '[', c, ']', column_expr);
10515 			if (c + 1 < exp_type.vecsize)
10516 				transposed_expr += ", ";
10517 		}
10518 
10519 		transposed_expr += ")";
10520 		return transposed_expr;
10521 	}
10522 	else
10523 		return join("transpose(", exp_str, ")");
10524 }
10525 
variable_decl(const SPIRType & type,const string & name,uint32_t id)10526 string CompilerGLSL::variable_decl(const SPIRType &type, const string &name, uint32_t id)
10527 {
10528 	string type_name = type_to_glsl(type, id);
10529 	remap_variable_type_name(type, name, type_name);
10530 	return join(type_name, " ", name, type_to_array_glsl(type));
10531 }
10532 
10533 // Emit a structure member. Subclasses may override to modify output,
10534 // or to dynamically add a padding member if needed.
emit_struct_member(const SPIRType & type,uint32_t member_type_id,uint32_t index,const string & qualifier,uint32_t)10535 void CompilerGLSL::emit_struct_member(const SPIRType &type, uint32_t member_type_id, uint32_t index,
10536                                       const string &qualifier, uint32_t)
10537 {
10538 	auto &membertype = get<SPIRType>(member_type_id);
10539 
10540 	Bitset memberflags;
10541 	auto &memb = ir.meta[type.self].members;
10542 	if (index < memb.size())
10543 		memberflags = memb[index].decoration_flags;
10544 
10545 	string qualifiers;
10546 	bool is_block = ir.meta[type.self].decoration.decoration_flags.get(DecorationBlock) ||
10547 	                ir.meta[type.self].decoration.decoration_flags.get(DecorationBufferBlock);
10548 
10549 	if (is_block)
10550 		qualifiers = to_interpolation_qualifiers(memberflags);
10551 
10552 	statement(layout_for_member(type, index), qualifiers, qualifier, flags_to_qualifiers_glsl(membertype, memberflags),
10553 	          variable_decl(membertype, to_member_name(type, index)), ";");
10554 }
10555 
emit_struct_padding_target(const SPIRType &)10556 void CompilerGLSL::emit_struct_padding_target(const SPIRType &)
10557 {
10558 }
10559 
flags_to_qualifiers_glsl(const SPIRType & type,const Bitset & flags)10560 const char *CompilerGLSL::flags_to_qualifiers_glsl(const SPIRType &type, const Bitset &flags)
10561 {
10562 	// GL_EXT_buffer_reference variables can be marked as restrict.
10563 	if (flags.get(DecorationRestrictPointerEXT))
10564 		return "restrict ";
10565 
10566 	// Structs do not have precision qualifiers, neither do doubles (desktop only anyways, so no mediump/highp).
10567 	if (type.basetype != SPIRType::Float && type.basetype != SPIRType::Int && type.basetype != SPIRType::UInt &&
10568 	    type.basetype != SPIRType::Image && type.basetype != SPIRType::SampledImage &&
10569 	    type.basetype != SPIRType::Sampler)
10570 		return "";
10571 
10572 	if (options.es)
10573 	{
10574 		auto &execution = get_entry_point();
10575 
10576 		if (flags.get(DecorationRelaxedPrecision))
10577 		{
10578 			bool implied_fmediump = type.basetype == SPIRType::Float &&
10579 			                        options.fragment.default_float_precision == Options::Mediump &&
10580 			                        execution.model == ExecutionModelFragment;
10581 
10582 			bool implied_imediump = (type.basetype == SPIRType::Int || type.basetype == SPIRType::UInt) &&
10583 			                        options.fragment.default_int_precision == Options::Mediump &&
10584 			                        execution.model == ExecutionModelFragment;
10585 
10586 			return implied_fmediump || implied_imediump ? "" : "mediump ";
10587 		}
10588 		else
10589 		{
10590 			bool implied_fhighp =
10591 			    type.basetype == SPIRType::Float && ((options.fragment.default_float_precision == Options::Highp &&
10592 			                                          execution.model == ExecutionModelFragment) ||
10593 			                                         (execution.model != ExecutionModelFragment));
10594 
10595 			bool implied_ihighp = (type.basetype == SPIRType::Int || type.basetype == SPIRType::UInt) &&
10596 			                      ((options.fragment.default_int_precision == Options::Highp &&
10597 			                        execution.model == ExecutionModelFragment) ||
10598 			                       (execution.model != ExecutionModelFragment));
10599 
10600 			return implied_fhighp || implied_ihighp ? "" : "highp ";
10601 		}
10602 	}
10603 	else if (backend.allow_precision_qualifiers)
10604 	{
10605 		// Vulkan GLSL supports precision qualifiers, even in desktop profiles, which is convenient.
10606 		// The default is highp however, so only emit mediump in the rare case that a shader has these.
10607 		if (flags.get(DecorationRelaxedPrecision))
10608 			return "mediump ";
10609 		else
10610 			return "";
10611 	}
10612 	else
10613 		return "";
10614 }
10615 
to_precision_qualifiers_glsl(uint32_t id)10616 const char *CompilerGLSL::to_precision_qualifiers_glsl(uint32_t id)
10617 {
10618 	auto &type = expression_type(id);
10619 	bool use_precision_qualifiers = backend.allow_precision_qualifiers || options.es;
10620 	if (use_precision_qualifiers && (type.basetype == SPIRType::Image || type.basetype == SPIRType::SampledImage))
10621 	{
10622 		// Force mediump for the sampler type. We cannot declare 16-bit or smaller image types.
10623 		auto &result_type = get<SPIRType>(type.image.type);
10624 		if (result_type.width < 32)
10625 			return "mediump ";
10626 	}
10627 	return flags_to_qualifiers_glsl(type, ir.meta[id].decoration.decoration_flags);
10628 }
10629 
to_qualifiers_glsl(uint32_t id)10630 string CompilerGLSL::to_qualifiers_glsl(uint32_t id)
10631 {
10632 	auto &flags = ir.meta[id].decoration.decoration_flags;
10633 	string res;
10634 
10635 	auto *var = maybe_get<SPIRVariable>(id);
10636 
10637 	if (var && var->storage == StorageClassWorkgroup && !backend.shared_is_implied)
10638 		res += "shared ";
10639 
10640 	res += to_interpolation_qualifiers(flags);
10641 	if (var)
10642 		res += to_storage_qualifiers_glsl(*var);
10643 
10644 	auto &type = expression_type(id);
10645 	if (type.image.dim != DimSubpassData && type.image.sampled == 2)
10646 	{
10647 		if (flags.get(DecorationCoherent))
10648 			res += "coherent ";
10649 		if (flags.get(DecorationRestrict))
10650 			res += "restrict ";
10651 		if (flags.get(DecorationNonWritable))
10652 			res += "readonly ";
10653 		if (flags.get(DecorationNonReadable))
10654 			res += "writeonly ";
10655 	}
10656 
10657 	res += to_precision_qualifiers_glsl(id);
10658 
10659 	return res;
10660 }
10661 
argument_decl(const SPIRFunction::Parameter & arg)10662 string CompilerGLSL::argument_decl(const SPIRFunction::Parameter &arg)
10663 {
10664 	// glslangValidator seems to make all arguments pointer no matter what which is rather bizarre ...
10665 	auto &type = expression_type(arg.id);
10666 	const char *direction = "";
10667 
10668 	if (type.pointer)
10669 	{
10670 		if (arg.write_count && arg.read_count)
10671 			direction = "inout ";
10672 		else if (arg.write_count)
10673 			direction = "out ";
10674 	}
10675 
10676 	return join(direction, to_qualifiers_glsl(arg.id), variable_decl(type, to_name(arg.id), arg.id));
10677 }
10678 
to_initializer_expression(const SPIRVariable & var)10679 string CompilerGLSL::to_initializer_expression(const SPIRVariable &var)
10680 {
10681 	return to_expression(var.initializer);
10682 }
10683 
variable_decl(const SPIRVariable & variable)10684 string CompilerGLSL::variable_decl(const SPIRVariable &variable)
10685 {
10686 	// Ignore the pointer type since GLSL doesn't have pointers.
10687 	auto &type = get_variable_data_type(variable);
10688 
10689 	if (type.pointer_depth > 1)
10690 		SPIRV_CROSS_THROW("Cannot declare pointer-to-pointer types.");
10691 
10692 	auto res = join(to_qualifiers_glsl(variable.self), variable_decl(type, to_name(variable.self), variable.self));
10693 
10694 	if (variable.loop_variable && variable.static_expression)
10695 	{
10696 		uint32_t expr = variable.static_expression;
10697 		if (ir.ids[expr].get_type() != TypeUndef)
10698 			res += join(" = ", to_expression(variable.static_expression));
10699 	}
10700 	else if (variable.initializer)
10701 	{
10702 		uint32_t expr = variable.initializer;
10703 		if (ir.ids[expr].get_type() != TypeUndef)
10704 			res += join(" = ", to_initializer_expression(variable));
10705 	}
10706 	return res;
10707 }
10708 
to_pls_qualifiers_glsl(const SPIRVariable & variable)10709 const char *CompilerGLSL::to_pls_qualifiers_glsl(const SPIRVariable &variable)
10710 {
10711 	auto &flags = ir.meta[variable.self].decoration.decoration_flags;
10712 	if (flags.get(DecorationRelaxedPrecision))
10713 		return "mediump ";
10714 	else
10715 		return "highp ";
10716 }
10717 
pls_decl(const PlsRemap & var)10718 string CompilerGLSL::pls_decl(const PlsRemap &var)
10719 {
10720 	auto &variable = get<SPIRVariable>(var.id);
10721 
10722 	SPIRType type;
10723 	type.vecsize = pls_format_to_components(var.format);
10724 	type.basetype = pls_format_to_basetype(var.format);
10725 
10726 	return join(to_pls_layout(var.format), to_pls_qualifiers_glsl(variable), type_to_glsl(type), " ",
10727 	            to_name(variable.self));
10728 }
10729 
to_array_size_literal(const SPIRType & type) const10730 uint32_t CompilerGLSL::to_array_size_literal(const SPIRType &type) const
10731 {
10732 	return to_array_size_literal(type, uint32_t(type.array.size() - 1));
10733 }
10734 
to_array_size_literal(const SPIRType & type,uint32_t index) const10735 uint32_t CompilerGLSL::to_array_size_literal(const SPIRType &type, uint32_t index) const
10736 {
10737 	assert(type.array.size() == type.array_size_literal.size());
10738 
10739 	if (type.array_size_literal[index])
10740 	{
10741 		return type.array[index];
10742 	}
10743 	else
10744 	{
10745 		// Use the default spec constant value.
10746 		// This is the best we can do.
10747 		uint32_t array_size_id = type.array[index];
10748 
10749 		// Explicitly check for this case. The error message you would get (bad cast) makes no sense otherwise.
10750 		if (ir.ids[array_size_id].get_type() == TypeConstantOp)
10751 			SPIRV_CROSS_THROW("An array size was found to be an OpSpecConstantOp. This is not supported since "
10752 			                  "SPIRV-Cross cannot deduce the actual size here.");
10753 
10754 		uint32_t array_size = get<SPIRConstant>(array_size_id).scalar();
10755 		return array_size;
10756 	}
10757 }
10758 
to_array_size(const SPIRType & type,uint32_t index)10759 string CompilerGLSL::to_array_size(const SPIRType &type, uint32_t index)
10760 {
10761 	assert(type.array.size() == type.array_size_literal.size());
10762 
10763 	// Tessellation control and evaluation shaders must have either gl_MaxPatchVertices or unsized arrays for input arrays.
10764 	// Opt for unsized as it's the more "correct" variant to use.
10765 	if (type.storage == StorageClassInput &&
10766 	    (get_entry_point().model == ExecutionModelTessellationControl ||
10767 	     get_entry_point().model == ExecutionModelTessellationEvaluation) &&
10768 	    index == uint32_t(type.array.size() - 1))
10769 		return "";
10770 
10771 	auto &size = type.array[index];
10772 	if (!type.array_size_literal[index])
10773 		return to_expression(size);
10774 	else if (size)
10775 		return convert_to_string(size);
10776 	else if (!backend.unsized_array_supported)
10777 	{
10778 		// For runtime-sized arrays, we can work around
10779 		// lack of standard support for this by simply having
10780 		// a single element array.
10781 		//
10782 		// Runtime length arrays must always be the last element
10783 		// in an interface block.
10784 		return "1";
10785 	}
10786 	else
10787 		return "";
10788 }
10789 
type_to_array_glsl(const SPIRType & type)10790 string CompilerGLSL::type_to_array_glsl(const SPIRType &type)
10791 {
10792 	if (type.pointer && type.storage == StorageClassPhysicalStorageBufferEXT && type.basetype != SPIRType::Struct)
10793 	{
10794 		// We are using a wrapped pointer type, and we should not emit any array declarations here.
10795 		return "";
10796 	}
10797 
10798 	if (type.array.empty())
10799 		return "";
10800 
10801 	if (options.flatten_multidimensional_arrays)
10802 	{
10803 		string res;
10804 		res += "[";
10805 		for (auto i = uint32_t(type.array.size()); i; i--)
10806 		{
10807 			res += enclose_expression(to_array_size(type, i - 1));
10808 			if (i > 1)
10809 				res += " * ";
10810 		}
10811 		res += "]";
10812 		return res;
10813 	}
10814 	else
10815 	{
10816 		if (type.array.size() > 1)
10817 		{
10818 			if (!options.es && options.version < 430)
10819 				require_extension_internal("GL_ARB_arrays_of_arrays");
10820 			else if (options.es && options.version < 310)
10821 				SPIRV_CROSS_THROW("Arrays of arrays not supported before ESSL version 310. "
10822 				                  "Try using --flatten-multidimensional-arrays or set "
10823 				                  "options.flatten_multidimensional_arrays to true.");
10824 		}
10825 
10826 		string res;
10827 		for (auto i = uint32_t(type.array.size()); i; i--)
10828 		{
10829 			res += "[";
10830 			res += to_array_size(type, i - 1);
10831 			res += "]";
10832 		}
10833 		return res;
10834 	}
10835 }
10836 
image_type_glsl(const SPIRType & type,uint32_t id)10837 string CompilerGLSL::image_type_glsl(const SPIRType &type, uint32_t id)
10838 {
10839 	auto &imagetype = get<SPIRType>(type.image.type);
10840 	string res;
10841 
10842 	switch (imagetype.basetype)
10843 	{
10844 	case SPIRType::Int:
10845 	case SPIRType::Short:
10846 	case SPIRType::SByte:
10847 		res = "i";
10848 		break;
10849 	case SPIRType::UInt:
10850 	case SPIRType::UShort:
10851 	case SPIRType::UByte:
10852 		res = "u";
10853 		break;
10854 	default:
10855 		break;
10856 	}
10857 
10858 	// For half image types, we will force mediump for the sampler, and cast to f16 after any sampling operation.
10859 	// We cannot express a true half texture type in GLSL. Neither for short integer formats for that matter.
10860 
10861 	if (type.basetype == SPIRType::Image && type.image.dim == DimSubpassData && options.vulkan_semantics)
10862 		return res + "subpassInput" + (type.image.ms ? "MS" : "");
10863 
10864 	// If we're emulating subpassInput with samplers, force sampler2D
10865 	// so we don't have to specify format.
10866 	if (type.basetype == SPIRType::Image && type.image.dim != DimSubpassData)
10867 	{
10868 		// Sampler buffers are always declared as samplerBuffer even though they might be separate images in the SPIR-V.
10869 		if (type.image.dim == DimBuffer && type.image.sampled == 1)
10870 			res += "sampler";
10871 		else
10872 			res += type.image.sampled == 2 ? "image" : "texture";
10873 	}
10874 	else
10875 		res += "sampler";
10876 
10877 	switch (type.image.dim)
10878 	{
10879 	case Dim1D:
10880 		res += "1D";
10881 		break;
10882 	case Dim2D:
10883 		res += "2D";
10884 		break;
10885 	case Dim3D:
10886 		res += "3D";
10887 		break;
10888 	case DimCube:
10889 		res += "Cube";
10890 		break;
10891 	case DimRect:
10892 		if (options.es)
10893 			SPIRV_CROSS_THROW("Rectangle textures are not supported on OpenGL ES.");
10894 
10895 		if (is_legacy_desktop())
10896 			require_extension_internal("GL_ARB_texture_rectangle");
10897 
10898 		res += "2DRect";
10899 		break;
10900 
10901 	case DimBuffer:
10902 		if (options.es && options.version < 320)
10903 			require_extension_internal("GL_OES_texture_buffer");
10904 		else if (!options.es && options.version < 300)
10905 			require_extension_internal("GL_EXT_texture_buffer_object");
10906 		res += "Buffer";
10907 		break;
10908 
10909 	case DimSubpassData:
10910 		res += "2D";
10911 		break;
10912 	default:
10913 		SPIRV_CROSS_THROW("Only 1D, 2D, 2DRect, 3D, Buffer, InputTarget and Cube textures supported.");
10914 	}
10915 
10916 	if (type.image.ms)
10917 		res += "MS";
10918 	if (type.image.arrayed)
10919 	{
10920 		if (is_legacy_desktop())
10921 			require_extension_internal("GL_EXT_texture_array");
10922 		res += "Array";
10923 	}
10924 
10925 	// "Shadow" state in GLSL only exists for samplers and combined image samplers.
10926 	if (((type.basetype == SPIRType::SampledImage) || (type.basetype == SPIRType::Sampler)) &&
10927 	    image_is_comparison(type, id))
10928 	{
10929 		res += "Shadow";
10930 	}
10931 
10932 	return res;
10933 }
10934 
type_to_glsl_constructor(const SPIRType & type)10935 string CompilerGLSL::type_to_glsl_constructor(const SPIRType &type)
10936 {
10937 	if (backend.use_array_constructor && type.array.size() > 1)
10938 	{
10939 		if (options.flatten_multidimensional_arrays)
10940 			SPIRV_CROSS_THROW("Cannot flatten constructors of multidimensional array constructors, e.g. float[][]().");
10941 		else if (!options.es && options.version < 430)
10942 			require_extension_internal("GL_ARB_arrays_of_arrays");
10943 		else if (options.es && options.version < 310)
10944 			SPIRV_CROSS_THROW("Arrays of arrays not supported before ESSL version 310.");
10945 	}
10946 
10947 	auto e = type_to_glsl(type);
10948 	if (backend.use_array_constructor)
10949 	{
10950 		for (uint32_t i = 0; i < type.array.size(); i++)
10951 			e += "[]";
10952 	}
10953 	return e;
10954 }
10955 
10956 // The optional id parameter indicates the object whose type we are trying
10957 // to find the description for. It is optional. Most type descriptions do not
10958 // depend on a specific object's use of that type.
type_to_glsl(const SPIRType & type,uint32_t id)10959 string CompilerGLSL::type_to_glsl(const SPIRType &type, uint32_t id)
10960 {
10961 	if (type.pointer && type.storage == StorageClassPhysicalStorageBufferEXT && type.basetype != SPIRType::Struct)
10962 	{
10963 		// Need to create a magic type name which compacts the entire type information.
10964 		string name = type_to_glsl(get_pointee_type(type));
10965 		for (size_t i = 0; i < type.array.size(); i++)
10966 		{
10967 			if (type.array_size_literal[i])
10968 				name += join(type.array[i], "_");
10969 			else
10970 				name += join("id", type.array[i], "_");
10971 		}
10972 		name += "Pointer";
10973 		return name;
10974 	}
10975 
10976 	switch (type.basetype)
10977 	{
10978 	case SPIRType::Struct:
10979 		// Need OpName lookup here to get a "sensible" name for a struct.
10980 		if (backend.explicit_struct_type)
10981 			return join("struct ", to_name(type.self));
10982 		else
10983 			return to_name(type.self);
10984 
10985 	case SPIRType::Image:
10986 	case SPIRType::SampledImage:
10987 		return image_type_glsl(type, id);
10988 
10989 	case SPIRType::Sampler:
10990 		// The depth field is set by calling code based on the variable ID of the sampler, effectively reintroducing
10991 		// this distinction into the type system.
10992 		return comparison_ids.count(id) ? "samplerShadow" : "sampler";
10993 
10994 	case SPIRType::AccelerationStructureNV:
10995 		return "accelerationStructureNV";
10996 
10997 	case SPIRType::Void:
10998 		return "void";
10999 
11000 	default:
11001 		break;
11002 	}
11003 
11004 	if (type.basetype == SPIRType::UInt && is_legacy())
11005 		SPIRV_CROSS_THROW("Unsigned integers are not supported on legacy targets.");
11006 
11007 	if (type.vecsize == 1 && type.columns == 1) // Scalar builtin
11008 	{
11009 		switch (type.basetype)
11010 		{
11011 		case SPIRType::Boolean:
11012 			return "bool";
11013 		case SPIRType::SByte:
11014 			return backend.basic_int8_type;
11015 		case SPIRType::UByte:
11016 			return backend.basic_uint8_type;
11017 		case SPIRType::Short:
11018 			return backend.basic_int16_type;
11019 		case SPIRType::UShort:
11020 			return backend.basic_uint16_type;
11021 		case SPIRType::Int:
11022 			return backend.basic_int_type;
11023 		case SPIRType::UInt:
11024 			return backend.basic_uint_type;
11025 		case SPIRType::AtomicCounter:
11026 			return "atomic_uint";
11027 		case SPIRType::Half:
11028 			return "float16_t";
11029 		case SPIRType::Float:
11030 			return "float";
11031 		case SPIRType::Double:
11032 			return "double";
11033 		case SPIRType::Int64:
11034 			return "int64_t";
11035 		case SPIRType::UInt64:
11036 			return "uint64_t";
11037 		default:
11038 			return "???";
11039 		}
11040 	}
11041 	else if (type.vecsize > 1 && type.columns == 1) // Vector builtin
11042 	{
11043 		switch (type.basetype)
11044 		{
11045 		case SPIRType::Boolean:
11046 			return join("bvec", type.vecsize);
11047 		case SPIRType::SByte:
11048 			return join("i8vec", type.vecsize);
11049 		case SPIRType::UByte:
11050 			return join("u8vec", type.vecsize);
11051 		case SPIRType::Short:
11052 			return join("i16vec", type.vecsize);
11053 		case SPIRType::UShort:
11054 			return join("u16vec", type.vecsize);
11055 		case SPIRType::Int:
11056 			return join("ivec", type.vecsize);
11057 		case SPIRType::UInt:
11058 			return join("uvec", type.vecsize);
11059 		case SPIRType::Half:
11060 			return join("f16vec", type.vecsize);
11061 		case SPIRType::Float:
11062 			return join("vec", type.vecsize);
11063 		case SPIRType::Double:
11064 			return join("dvec", type.vecsize);
11065 		case SPIRType::Int64:
11066 			return join("i64vec", type.vecsize);
11067 		case SPIRType::UInt64:
11068 			return join("u64vec", type.vecsize);
11069 		default:
11070 			return "???";
11071 		}
11072 	}
11073 	else if (type.vecsize == type.columns) // Simple Matrix builtin
11074 	{
11075 		switch (type.basetype)
11076 		{
11077 		case SPIRType::Boolean:
11078 			return join("bmat", type.vecsize);
11079 		case SPIRType::Int:
11080 			return join("imat", type.vecsize);
11081 		case SPIRType::UInt:
11082 			return join("umat", type.vecsize);
11083 		case SPIRType::Half:
11084 			return join("f16mat", type.vecsize);
11085 		case SPIRType::Float:
11086 			return join("mat", type.vecsize);
11087 		case SPIRType::Double:
11088 			return join("dmat", type.vecsize);
11089 		// Matrix types not supported for int64/uint64.
11090 		default:
11091 			return "???";
11092 		}
11093 	}
11094 	else
11095 	{
11096 		switch (type.basetype)
11097 		{
11098 		case SPIRType::Boolean:
11099 			return join("bmat", type.columns, "x", type.vecsize);
11100 		case SPIRType::Int:
11101 			return join("imat", type.columns, "x", type.vecsize);
11102 		case SPIRType::UInt:
11103 			return join("umat", type.columns, "x", type.vecsize);
11104 		case SPIRType::Half:
11105 			return join("f16mat", type.columns, "x", type.vecsize);
11106 		case SPIRType::Float:
11107 			return join("mat", type.columns, "x", type.vecsize);
11108 		case SPIRType::Double:
11109 			return join("dmat", type.columns, "x", type.vecsize);
11110 		// Matrix types not supported for int64/uint64.
11111 		default:
11112 			return "???";
11113 		}
11114 	}
11115 }
11116 
add_variable(unordered_set<string> & variables_primary,const unordered_set<string> & variables_secondary,string & name)11117 void CompilerGLSL::add_variable(unordered_set<string> &variables_primary,
11118                                 const unordered_set<string> &variables_secondary, string &name)
11119 {
11120 	if (name.empty())
11121 		return;
11122 
11123 	// Reserved for temporaries.
11124 	if (name[0] == '_' && name.size() >= 2 && isdigit(name[1]))
11125 	{
11126 		name.clear();
11127 		return;
11128 	}
11129 
11130 	// Avoid double underscores.
11131 	name = sanitize_underscores(name);
11132 
11133 	update_name_cache(variables_primary, variables_secondary, name);
11134 }
11135 
add_local_variable_name(uint32_t id)11136 void CompilerGLSL::add_local_variable_name(uint32_t id)
11137 {
11138 	add_variable(local_variable_names, block_names, ir.meta[id].decoration.alias);
11139 }
11140 
add_resource_name(uint32_t id)11141 void CompilerGLSL::add_resource_name(uint32_t id)
11142 {
11143 	add_variable(resource_names, block_names, ir.meta[id].decoration.alias);
11144 }
11145 
add_header_line(const std::string & line)11146 void CompilerGLSL::add_header_line(const std::string &line)
11147 {
11148 	header_lines.push_back(line);
11149 }
11150 
has_extension(const std::string & ext) const11151 bool CompilerGLSL::has_extension(const std::string &ext) const
11152 {
11153 	auto itr = find(begin(forced_extensions), end(forced_extensions), ext);
11154 	return itr != end(forced_extensions);
11155 }
11156 
require_extension(const std::string & ext)11157 void CompilerGLSL::require_extension(const std::string &ext)
11158 {
11159 	if (!has_extension(ext))
11160 		forced_extensions.push_back(ext);
11161 }
11162 
require_extension_internal(const string & ext)11163 void CompilerGLSL::require_extension_internal(const string &ext)
11164 {
11165 	if (backend.supports_extensions && !has_extension(ext))
11166 	{
11167 		forced_extensions.push_back(ext);
11168 		force_recompile();
11169 	}
11170 }
11171 
flatten_buffer_block(VariableID id)11172 void CompilerGLSL::flatten_buffer_block(VariableID id)
11173 {
11174 	auto &var = get<SPIRVariable>(id);
11175 	auto &type = get<SPIRType>(var.basetype);
11176 	auto name = to_name(type.self, false);
11177 	auto &flags = ir.meta[type.self].decoration.decoration_flags;
11178 
11179 	if (!type.array.empty())
11180 		SPIRV_CROSS_THROW(name + " is an array of UBOs.");
11181 	if (type.basetype != SPIRType::Struct)
11182 		SPIRV_CROSS_THROW(name + " is not a struct.");
11183 	if (!flags.get(DecorationBlock))
11184 		SPIRV_CROSS_THROW(name + " is not a block.");
11185 	if (type.member_types.empty())
11186 		SPIRV_CROSS_THROW(name + " is an empty struct.");
11187 
11188 	flattened_buffer_blocks.insert(id);
11189 }
11190 
builtin_translates_to_nonarray(spv::BuiltIn) const11191 bool CompilerGLSL::builtin_translates_to_nonarray(spv::BuiltIn /*builtin*/) const
11192 {
11193 	return false; // GLSL itself does not need to translate array builtin types to non-array builtin types
11194 }
11195 
check_atomic_image(uint32_t id)11196 bool CompilerGLSL::check_atomic_image(uint32_t id)
11197 {
11198 	auto &type = expression_type(id);
11199 	if (type.storage == StorageClassImage)
11200 	{
11201 		if (options.es && options.version < 320)
11202 			require_extension_internal("GL_OES_shader_image_atomic");
11203 
11204 		auto *var = maybe_get_backing_variable(id);
11205 		if (var)
11206 		{
11207 			auto &flags = ir.meta[var->self].decoration.decoration_flags;
11208 			if (flags.get(DecorationNonWritable) || flags.get(DecorationNonReadable))
11209 			{
11210 				flags.clear(DecorationNonWritable);
11211 				flags.clear(DecorationNonReadable);
11212 				force_recompile();
11213 			}
11214 		}
11215 		return true;
11216 	}
11217 	else
11218 		return false;
11219 }
11220 
add_function_overload(const SPIRFunction & func)11221 void CompilerGLSL::add_function_overload(const SPIRFunction &func)
11222 {
11223 	Hasher hasher;
11224 	for (auto &arg : func.arguments)
11225 	{
11226 		// Parameters can vary with pointer type or not,
11227 		// but that will not change the signature in GLSL/HLSL,
11228 		// so strip the pointer type before hashing.
11229 		uint32_t type_id = get_pointee_type_id(arg.type);
11230 		auto &type = get<SPIRType>(type_id);
11231 
11232 		if (!combined_image_samplers.empty())
11233 		{
11234 			// If we have combined image samplers, we cannot really trust the image and sampler arguments
11235 			// we pass down to callees, because they may be shuffled around.
11236 			// Ignore these arguments, to make sure that functions need to differ in some other way
11237 			// to be considered different overloads.
11238 			if (type.basetype == SPIRType::SampledImage ||
11239 			    (type.basetype == SPIRType::Image && type.image.sampled == 1) || type.basetype == SPIRType::Sampler)
11240 			{
11241 				continue;
11242 			}
11243 		}
11244 
11245 		hasher.u32(type_id);
11246 	}
11247 	uint64_t types_hash = hasher.get();
11248 
11249 	auto function_name = to_name(func.self);
11250 	auto itr = function_overloads.find(function_name);
11251 	if (itr != end(function_overloads))
11252 	{
11253 		// There exists a function with this name already.
11254 		auto &overloads = itr->second;
11255 		if (overloads.count(types_hash) != 0)
11256 		{
11257 			// Overload conflict, assign a new name.
11258 			add_resource_name(func.self);
11259 			function_overloads[to_name(func.self)].insert(types_hash);
11260 		}
11261 		else
11262 		{
11263 			// Can reuse the name.
11264 			overloads.insert(types_hash);
11265 		}
11266 	}
11267 	else
11268 	{
11269 		// First time we see this function name.
11270 		add_resource_name(func.self);
11271 		function_overloads[to_name(func.self)].insert(types_hash);
11272 	}
11273 }
11274 
emit_function_prototype(SPIRFunction & func,const Bitset & return_flags)11275 void CompilerGLSL::emit_function_prototype(SPIRFunction &func, const Bitset &return_flags)
11276 {
11277 	if (func.self != ir.default_entry_point)
11278 		add_function_overload(func);
11279 
11280 	// Avoid shadow declarations.
11281 	local_variable_names = resource_names;
11282 
11283 	string decl;
11284 
11285 	auto &type = get<SPIRType>(func.return_type);
11286 	decl += flags_to_qualifiers_glsl(type, return_flags);
11287 	decl += type_to_glsl(type);
11288 	decl += type_to_array_glsl(type);
11289 	decl += " ";
11290 
11291 	if (func.self == ir.default_entry_point)
11292 	{
11293 		// If we need complex fallback in GLSL, we just wrap main() in a function
11294 		// and interlock the entire shader ...
11295 		if (interlocked_is_complex)
11296 			decl += "spvMainInterlockedBody";
11297 		else
11298 			decl += "main";
11299 
11300 		processing_entry_point = true;
11301 	}
11302 	else
11303 		decl += to_name(func.self);
11304 
11305 	decl += "(";
11306 	SmallVector<string> arglist;
11307 	for (auto &arg : func.arguments)
11308 	{
11309 		// Do not pass in separate images or samplers if we're remapping
11310 		// to combined image samplers.
11311 		if (skip_argument(arg.id))
11312 			continue;
11313 
11314 		// Might change the variable name if it already exists in this function.
11315 		// SPIRV OpName doesn't have any semantic effect, so it's valid for an implementation
11316 		// to use same name for variables.
11317 		// Since we want to make the GLSL debuggable and somewhat sane, use fallback names for variables which are duplicates.
11318 		add_local_variable_name(arg.id);
11319 
11320 		arglist.push_back(argument_decl(arg));
11321 
11322 		// Hold a pointer to the parameter so we can invalidate the readonly field if needed.
11323 		auto *var = maybe_get<SPIRVariable>(arg.id);
11324 		if (var)
11325 			var->parameter = &arg;
11326 	}
11327 
11328 	for (auto &arg : func.shadow_arguments)
11329 	{
11330 		// Might change the variable name if it already exists in this function.
11331 		// SPIRV OpName doesn't have any semantic effect, so it's valid for an implementation
11332 		// to use same name for variables.
11333 		// Since we want to make the GLSL debuggable and somewhat sane, use fallback names for variables which are duplicates.
11334 		add_local_variable_name(arg.id);
11335 
11336 		arglist.push_back(argument_decl(arg));
11337 
11338 		// Hold a pointer to the parameter so we can invalidate the readonly field if needed.
11339 		auto *var = maybe_get<SPIRVariable>(arg.id);
11340 		if (var)
11341 			var->parameter = &arg;
11342 	}
11343 
11344 	decl += merge(arglist);
11345 	decl += ")";
11346 	statement(decl);
11347 }
11348 
emit_function(SPIRFunction & func,const Bitset & return_flags)11349 void CompilerGLSL::emit_function(SPIRFunction &func, const Bitset &return_flags)
11350 {
11351 	// Avoid potential cycles.
11352 	if (func.active)
11353 		return;
11354 	func.active = true;
11355 
11356 	// If we depend on a function, emit that function before we emit our own function.
11357 	for (auto block : func.blocks)
11358 	{
11359 		auto &b = get<SPIRBlock>(block);
11360 		for (auto &i : b.ops)
11361 		{
11362 			auto ops = stream(i);
11363 			auto op = static_cast<Op>(i.op);
11364 
11365 			if (op == OpFunctionCall)
11366 			{
11367 				// Recursively emit functions which are called.
11368 				uint32_t id = ops[2];
11369 				emit_function(get<SPIRFunction>(id), ir.meta[ops[1]].decoration.decoration_flags);
11370 			}
11371 		}
11372 	}
11373 
11374 	if (func.entry_line.file_id != 0)
11375 		emit_line_directive(func.entry_line.file_id, func.entry_line.line_literal);
11376 	emit_function_prototype(func, return_flags);
11377 	begin_scope();
11378 
11379 	if (func.self == ir.default_entry_point)
11380 		emit_entry_point_declarations();
11381 
11382 	current_function = &func;
11383 	auto &entry_block = get<SPIRBlock>(func.entry_block);
11384 
11385 	for (auto &v : func.local_variables)
11386 	{
11387 		auto &var = get<SPIRVariable>(v);
11388 		var.deferred_declaration = false;
11389 
11390 		if (var.storage == StorageClassWorkgroup)
11391 		{
11392 			// Special variable type which cannot have initializer,
11393 			// need to be declared as standalone variables.
11394 			// Comes from MSL which can push global variables as local variables in main function.
11395 			add_local_variable_name(var.self);
11396 			statement(variable_decl(var), ";");
11397 			var.deferred_declaration = false;
11398 		}
11399 		else if (var.storage == StorageClassPrivate)
11400 		{
11401 			// These variables will not have had their CFG usage analyzed, so move it to the entry block.
11402 			// Comes from MSL which can push global variables as local variables in main function.
11403 			// We could just declare them right now, but we would miss out on an important initialization case which is
11404 			// LUT declaration in MSL.
11405 			// If we don't declare the variable when it is assigned we're forced to go through a helper function
11406 			// which copies elements one by one.
11407 			add_local_variable_name(var.self);
11408 			auto &dominated = entry_block.dominated_variables;
11409 			if (find(begin(dominated), end(dominated), var.self) == end(dominated))
11410 				entry_block.dominated_variables.push_back(var.self);
11411 			var.deferred_declaration = true;
11412 		}
11413 		else if (var.storage == StorageClassFunction && var.remapped_variable && var.static_expression)
11414 		{
11415 			// No need to declare this variable, it has a static expression.
11416 			var.deferred_declaration = false;
11417 		}
11418 		else if (expression_is_lvalue(v))
11419 		{
11420 			add_local_variable_name(var.self);
11421 
11422 			if (var.initializer)
11423 				statement(variable_decl_function_local(var), ";");
11424 			else
11425 			{
11426 				// Don't declare variable until first use to declutter the GLSL output quite a lot.
11427 				// If we don't touch the variable before first branch,
11428 				// declare it then since we need variable declaration to be in top scope.
11429 				var.deferred_declaration = true;
11430 			}
11431 		}
11432 		else
11433 		{
11434 			// HACK: SPIR-V in older glslang output likes to use samplers and images as local variables, but GLSL does not allow this.
11435 			// For these types (non-lvalue), we enforce forwarding through a shadowed variable.
11436 			// This means that when we OpStore to these variables, we just write in the expression ID directly.
11437 			// This breaks any kind of branching, since the variable must be statically assigned.
11438 			// Branching on samplers and images would be pretty much impossible to fake in GLSL.
11439 			var.statically_assigned = true;
11440 		}
11441 
11442 		var.loop_variable_enable = false;
11443 
11444 		// Loop variables are never declared outside their for-loop, so block any implicit declaration.
11445 		if (var.loop_variable)
11446 			var.deferred_declaration = false;
11447 	}
11448 
11449 	// Enforce declaration order for regression testing purposes.
11450 	for (auto &block_id : func.blocks)
11451 	{
11452 		auto &block = get<SPIRBlock>(block_id);
11453 		sort(begin(block.dominated_variables), end(block.dominated_variables));
11454 	}
11455 
11456 	for (auto &line : current_function->fixup_hooks_in)
11457 		line();
11458 
11459 	emit_block_chain(entry_block);
11460 
11461 	end_scope();
11462 	processing_entry_point = false;
11463 	statement("");
11464 
11465 	// Make sure deferred declaration state for local variables is cleared when we are done with function.
11466 	// We risk declaring Private/Workgroup variables in places we are not supposed to otherwise.
11467 	for (auto &v : func.local_variables)
11468 	{
11469 		auto &var = get<SPIRVariable>(v);
11470 		var.deferred_declaration = false;
11471 	}
11472 }
11473 
emit_fixup()11474 void CompilerGLSL::emit_fixup()
11475 {
11476 	auto &execution = get_entry_point();
11477 	if (execution.model == ExecutionModelVertex)
11478 	{
11479 		if (options.vertex.fixup_clipspace)
11480 		{
11481 			const char *suffix = backend.float_literal_suffix ? "f" : "";
11482 			statement("gl_Position.z = 2.0", suffix, " * gl_Position.z - gl_Position.w;");
11483 		}
11484 
11485 		if (options.vertex.flip_vert_y)
11486 			statement("gl_Position.y = -gl_Position.y;");
11487 	}
11488 }
11489 
flush_phi(BlockID from,BlockID to)11490 void CompilerGLSL::flush_phi(BlockID from, BlockID to)
11491 {
11492 	auto &child = get<SPIRBlock>(to);
11493 	if (child.ignore_phi_from_block == from)
11494 		return;
11495 
11496 	unordered_set<uint32_t> temporary_phi_variables;
11497 
11498 	for (auto itr = begin(child.phi_variables); itr != end(child.phi_variables); ++itr)
11499 	{
11500 		auto &phi = *itr;
11501 
11502 		if (phi.parent == from)
11503 		{
11504 			auto &var = get<SPIRVariable>(phi.function_variable);
11505 
11506 			// A Phi variable might be a loop variable, so flush to static expression.
11507 			if (var.loop_variable && !var.loop_variable_enable)
11508 				var.static_expression = phi.local_variable;
11509 			else
11510 			{
11511 				flush_variable_declaration(phi.function_variable);
11512 
11513 				// Check if we are going to write to a Phi variable that another statement will read from
11514 				// as part of another Phi node in our target block.
11515 				// For this case, we will need to copy phi.function_variable to a temporary, and use that for future reads.
11516 				// This is judged to be extremely rare, so deal with it here using a simple, but suboptimal algorithm.
11517 				bool need_saved_temporary =
11518 				    find_if(itr + 1, end(child.phi_variables), [&](const SPIRBlock::Phi &future_phi) -> bool {
11519 					    return future_phi.local_variable == ID(phi.function_variable) && future_phi.parent == from;
11520 				    }) != end(child.phi_variables);
11521 
11522 				if (need_saved_temporary)
11523 				{
11524 					// Need to make sure we declare the phi variable with a copy at the right scope.
11525 					// We cannot safely declare a temporary here since we might be inside a continue block.
11526 					if (!var.allocate_temporary_copy)
11527 					{
11528 						var.allocate_temporary_copy = true;
11529 						force_recompile();
11530 					}
11531 					statement("_", phi.function_variable, "_copy", " = ", to_name(phi.function_variable), ";");
11532 					temporary_phi_variables.insert(phi.function_variable);
11533 				}
11534 
11535 				// This might be called in continue block, so make sure we
11536 				// use this to emit ESSL 1.0 compliant increments/decrements.
11537 				auto lhs = to_expression(phi.function_variable);
11538 
11539 				string rhs;
11540 				if (temporary_phi_variables.count(phi.local_variable))
11541 					rhs = join("_", phi.local_variable, "_copy");
11542 				else
11543 					rhs = to_pointer_expression(phi.local_variable);
11544 
11545 				if (!optimize_read_modify_write(get<SPIRType>(var.basetype), lhs, rhs))
11546 					statement(lhs, " = ", rhs, ";");
11547 			}
11548 
11549 			register_write(phi.function_variable);
11550 		}
11551 	}
11552 }
11553 
branch_to_continue(BlockID from,BlockID to)11554 void CompilerGLSL::branch_to_continue(BlockID from, BlockID to)
11555 {
11556 	auto &to_block = get<SPIRBlock>(to);
11557 	if (from == to)
11558 		return;
11559 
11560 	assert(is_continue(to));
11561 	if (to_block.complex_continue)
11562 	{
11563 		// Just emit the whole block chain as is.
11564 		auto usage_counts = expression_usage_counts;
11565 
11566 		emit_block_chain(to_block);
11567 
11568 		// Expression usage counts are moot after returning from the continue block.
11569 		expression_usage_counts = usage_counts;
11570 	}
11571 	else
11572 	{
11573 		auto &from_block = get<SPIRBlock>(from);
11574 		bool outside_control_flow = false;
11575 		uint32_t loop_dominator = 0;
11576 
11577 		// FIXME: Refactor this to not use the old loop_dominator tracking.
11578 		if (from_block.merge_block)
11579 		{
11580 			// If we are a loop header, we don't set the loop dominator,
11581 			// so just use "self" here.
11582 			loop_dominator = from;
11583 		}
11584 		else if (from_block.loop_dominator != BlockID(SPIRBlock::NoDominator))
11585 		{
11586 			loop_dominator = from_block.loop_dominator;
11587 		}
11588 
11589 		if (loop_dominator != 0)
11590 		{
11591 			auto &cfg = get_cfg_for_current_function();
11592 
11593 			// For non-complex continue blocks, we implicitly branch to the continue block
11594 			// by having the continue block be part of the loop header in for (; ; continue-block).
11595 			outside_control_flow = cfg.node_terminates_control_flow_in_sub_graph(loop_dominator, from);
11596 		}
11597 
11598 		// Some simplification for for-loops. We always end up with a useless continue;
11599 		// statement since we branch to a loop block.
11600 		// Walk the CFG, if we unconditionally execute the block calling continue assuming we're in the loop block,
11601 		// we can avoid writing out an explicit continue statement.
11602 		// Similar optimization to return statements if we know we're outside flow control.
11603 		if (!outside_control_flow)
11604 			statement("continue;");
11605 	}
11606 }
11607 
branch(BlockID from,BlockID to)11608 void CompilerGLSL::branch(BlockID from, BlockID to)
11609 {
11610 	flush_phi(from, to);
11611 	flush_control_dependent_expressions(from);
11612 
11613 	bool to_is_continue = is_continue(to);
11614 
11615 	// This is only a continue if we branch to our loop dominator.
11616 	if ((ir.block_meta[to] & ParsedIR::BLOCK_META_LOOP_HEADER_BIT) != 0 && get<SPIRBlock>(from).loop_dominator == to)
11617 	{
11618 		// This can happen if we had a complex continue block which was emitted.
11619 		// Once the continue block tries to branch to the loop header, just emit continue;
11620 		// and end the chain here.
11621 		statement("continue;");
11622 	}
11623 	else if (is_break(to))
11624 	{
11625 		// Very dirty workaround.
11626 		// Switch constructs are able to break, but they cannot break out of a loop at the same time.
11627 		// Only sensible solution is to make a ladder variable, which we declare at the top of the switch block,
11628 		// write to the ladder here, and defer the break.
11629 		// The loop we're breaking out of must dominate the switch block, or there is no ladder breaking case.
11630 		if (current_emitting_switch && is_loop_break(to) &&
11631 		    current_emitting_switch->loop_dominator != BlockID(SPIRBlock::NoDominator) &&
11632 		    get<SPIRBlock>(current_emitting_switch->loop_dominator).merge_block == to)
11633 		{
11634 			if (!current_emitting_switch->need_ladder_break)
11635 			{
11636 				force_recompile();
11637 				current_emitting_switch->need_ladder_break = true;
11638 			}
11639 
11640 			statement("_", current_emitting_switch->self, "_ladder_break = true;");
11641 		}
11642 		statement("break;");
11643 	}
11644 	else if (to_is_continue || from == to)
11645 	{
11646 		// For from == to case can happen for a do-while loop which branches into itself.
11647 		// We don't mark these cases as continue blocks, but the only possible way to branch into
11648 		// ourselves is through means of continue blocks.
11649 
11650 		// If we are merging to a continue block, there is no need to emit the block chain for continue here.
11651 		// We can branch to the continue block after we merge execution.
11652 
11653 		// Here we make use of structured control flow rules from spec:
11654 		// 2.11: - the merge block declared by a header block cannot be a merge block declared by any other header block
11655 		//       - each header block must strictly dominate its merge block, unless the merge block is unreachable in the CFG
11656 		// If we are branching to a merge block, we must be inside a construct which dominates the merge block.
11657 		auto &block_meta = ir.block_meta[to];
11658 		bool branching_to_merge =
11659 		    (block_meta & (ParsedIR::BLOCK_META_SELECTION_MERGE_BIT | ParsedIR::BLOCK_META_MULTISELECT_MERGE_BIT |
11660 		                   ParsedIR::BLOCK_META_LOOP_MERGE_BIT)) != 0;
11661 		if (!to_is_continue || !branching_to_merge)
11662 			branch_to_continue(from, to);
11663 	}
11664 	else if (!is_conditional(to))
11665 		emit_block_chain(get<SPIRBlock>(to));
11666 
11667 	// It is important that we check for break before continue.
11668 	// A block might serve two purposes, a break block for the inner scope, and
11669 	// a continue block in the outer scope.
11670 	// Inner scope always takes precedence.
11671 }
11672 
branch(BlockID from,uint32_t cond,BlockID true_block,BlockID false_block)11673 void CompilerGLSL::branch(BlockID from, uint32_t cond, BlockID true_block, BlockID false_block)
11674 {
11675 	auto &from_block = get<SPIRBlock>(from);
11676 	BlockID merge_block = from_block.merge == SPIRBlock::MergeSelection ? from_block.next_block : BlockID(0);
11677 
11678 	// If we branch directly to a selection merge target, we don't need a code path.
11679 	// This covers both merge out of if () / else () as well as a break for switch blocks.
11680 	bool true_sub = !is_conditional(true_block);
11681 	bool false_sub = !is_conditional(false_block);
11682 
11683 	bool true_block_is_selection_merge = true_block == merge_block;
11684 	bool false_block_is_selection_merge = false_block == merge_block;
11685 
11686 	if (true_sub)
11687 	{
11688 		emit_block_hints(get<SPIRBlock>(from));
11689 		statement("if (", to_expression(cond), ")");
11690 		begin_scope();
11691 		branch(from, true_block);
11692 		end_scope();
11693 
11694 		// If we merge to continue, we handle that explicitly in emit_block_chain(),
11695 		// so there is no need to branch to it directly here.
11696 		// break; is required to handle ladder fallthrough cases, so keep that in for now, even
11697 		// if we could potentially handle it in emit_block_chain().
11698 		if (false_sub || (!false_block_is_selection_merge && is_continue(false_block)) || is_break(false_block))
11699 		{
11700 			statement("else");
11701 			begin_scope();
11702 			branch(from, false_block);
11703 			end_scope();
11704 		}
11705 		else if (flush_phi_required(from, false_block))
11706 		{
11707 			statement("else");
11708 			begin_scope();
11709 			flush_phi(from, false_block);
11710 			end_scope();
11711 		}
11712 	}
11713 	else if (false_sub)
11714 	{
11715 		// Only need false path, use negative conditional.
11716 		emit_block_hints(get<SPIRBlock>(from));
11717 		statement("if (!", to_enclosed_expression(cond), ")");
11718 		begin_scope();
11719 		branch(from, false_block);
11720 		end_scope();
11721 
11722 		if ((!true_block_is_selection_merge && is_continue(true_block)) || is_break(true_block))
11723 		{
11724 			statement("else");
11725 			begin_scope();
11726 			branch(from, true_block);
11727 			end_scope();
11728 		}
11729 		else if (flush_phi_required(from, true_block))
11730 		{
11731 			statement("else");
11732 			begin_scope();
11733 			flush_phi(from, true_block);
11734 			end_scope();
11735 		}
11736 	}
11737 }
11738 
11739 // FIXME: This currently cannot handle complex continue blocks
11740 // as in do-while.
11741 // This should be seen as a "trivial" continue block.
emit_continue_block(uint32_t continue_block,bool follow_true_block,bool follow_false_block)11742 string CompilerGLSL::emit_continue_block(uint32_t continue_block, bool follow_true_block, bool follow_false_block)
11743 {
11744 	auto *block = &get<SPIRBlock>(continue_block);
11745 
11746 	// While emitting the continue block, declare_temporary will check this
11747 	// if we have to emit temporaries.
11748 	current_continue_block = block;
11749 
11750 	SmallVector<string> statements;
11751 
11752 	// Capture all statements into our list.
11753 	auto *old = redirect_statement;
11754 	redirect_statement = &statements;
11755 
11756 	// Stamp out all blocks one after each other.
11757 	while ((ir.block_meta[block->self] & ParsedIR::BLOCK_META_LOOP_HEADER_BIT) == 0)
11758 	{
11759 		// Write out all instructions we have in this block.
11760 		emit_block_instructions(*block);
11761 
11762 		// For plain branchless for/while continue blocks.
11763 		if (block->next_block)
11764 		{
11765 			flush_phi(continue_block, block->next_block);
11766 			block = &get<SPIRBlock>(block->next_block);
11767 		}
11768 		// For do while blocks. The last block will be a select block.
11769 		else if (block->true_block && follow_true_block)
11770 		{
11771 			flush_phi(continue_block, block->true_block);
11772 			block = &get<SPIRBlock>(block->true_block);
11773 		}
11774 		else if (block->false_block && follow_false_block)
11775 		{
11776 			flush_phi(continue_block, block->false_block);
11777 			block = &get<SPIRBlock>(block->false_block);
11778 		}
11779 		else
11780 		{
11781 			SPIRV_CROSS_THROW("Invalid continue block detected!");
11782 		}
11783 	}
11784 
11785 	// Restore old pointer.
11786 	redirect_statement = old;
11787 
11788 	// Somewhat ugly, strip off the last ';' since we use ',' instead.
11789 	// Ideally, we should select this behavior in statement().
11790 	for (auto &s : statements)
11791 	{
11792 		if (!s.empty() && s.back() == ';')
11793 			s.erase(s.size() - 1, 1);
11794 	}
11795 
11796 	current_continue_block = nullptr;
11797 	return merge(statements);
11798 }
11799 
emit_while_loop_initializers(const SPIRBlock & block)11800 void CompilerGLSL::emit_while_loop_initializers(const SPIRBlock &block)
11801 {
11802 	// While loops do not take initializers, so declare all of them outside.
11803 	for (auto &loop_var : block.loop_variables)
11804 	{
11805 		auto &var = get<SPIRVariable>(loop_var);
11806 		statement(variable_decl(var), ";");
11807 	}
11808 }
11809 
emit_for_loop_initializers(const SPIRBlock & block)11810 string CompilerGLSL::emit_for_loop_initializers(const SPIRBlock &block)
11811 {
11812 	if (block.loop_variables.empty())
11813 		return "";
11814 
11815 	bool same_types = for_loop_initializers_are_same_type(block);
11816 	// We can only declare for loop initializers if all variables are of same type.
11817 	// If we cannot do this, declare individual variables before the loop header.
11818 
11819 	// We might have a loop variable candidate which was not assigned to for some reason.
11820 	uint32_t missing_initializers = 0;
11821 	for (auto &variable : block.loop_variables)
11822 	{
11823 		uint32_t expr = get<SPIRVariable>(variable).static_expression;
11824 
11825 		// Sometimes loop variables are initialized with OpUndef, but we can just declare
11826 		// a plain variable without initializer in this case.
11827 		if (expr == 0 || ir.ids[expr].get_type() == TypeUndef)
11828 			missing_initializers++;
11829 	}
11830 
11831 	if (block.loop_variables.size() == 1 && missing_initializers == 0)
11832 	{
11833 		return variable_decl(get<SPIRVariable>(block.loop_variables.front()));
11834 	}
11835 	else if (!same_types || missing_initializers == uint32_t(block.loop_variables.size()))
11836 	{
11837 		for (auto &loop_var : block.loop_variables)
11838 			statement(variable_decl(get<SPIRVariable>(loop_var)), ";");
11839 		return "";
11840 	}
11841 	else
11842 	{
11843 		// We have a mix of loop variables, either ones with a clear initializer, or ones without.
11844 		// Separate the two streams.
11845 		string expr;
11846 
11847 		for (auto &loop_var : block.loop_variables)
11848 		{
11849 			uint32_t static_expr = get<SPIRVariable>(loop_var).static_expression;
11850 			if (static_expr == 0 || ir.ids[static_expr].get_type() == TypeUndef)
11851 			{
11852 				statement(variable_decl(get<SPIRVariable>(loop_var)), ";");
11853 			}
11854 			else
11855 			{
11856 				auto &var = get<SPIRVariable>(loop_var);
11857 				auto &type = get_variable_data_type(var);
11858 				if (expr.empty())
11859 				{
11860 					// For loop initializers are of the form <type id = value, id = value, id = value, etc ...
11861 					expr = join(to_qualifiers_glsl(var.self), type_to_glsl(type), " ");
11862 				}
11863 				else
11864 				{
11865 					expr += ", ";
11866 					// In MSL, being based on C++, the asterisk marking a pointer
11867 					// binds to the identifier, not the type.
11868 					if (type.pointer)
11869 						expr += "* ";
11870 				}
11871 
11872 				expr += join(to_name(loop_var), " = ", to_pointer_expression(var.static_expression));
11873 			}
11874 		}
11875 		return expr;
11876 	}
11877 }
11878 
for_loop_initializers_are_same_type(const SPIRBlock & block)11879 bool CompilerGLSL::for_loop_initializers_are_same_type(const SPIRBlock &block)
11880 {
11881 	if (block.loop_variables.size() <= 1)
11882 		return true;
11883 
11884 	uint32_t expected = 0;
11885 	Bitset expected_flags;
11886 	for (auto &var : block.loop_variables)
11887 	{
11888 		// Don't care about uninitialized variables as they will not be part of the initializers.
11889 		uint32_t expr = get<SPIRVariable>(var).static_expression;
11890 		if (expr == 0 || ir.ids[expr].get_type() == TypeUndef)
11891 			continue;
11892 
11893 		if (expected == 0)
11894 		{
11895 			expected = get<SPIRVariable>(var).basetype;
11896 			expected_flags = get_decoration_bitset(var);
11897 		}
11898 		else if (expected != get<SPIRVariable>(var).basetype)
11899 			return false;
11900 
11901 		// Precision flags and things like that must also match.
11902 		if (expected_flags != get_decoration_bitset(var))
11903 			return false;
11904 	}
11905 
11906 	return true;
11907 }
11908 
attempt_emit_loop_header(SPIRBlock & block,SPIRBlock::Method method)11909 bool CompilerGLSL::attempt_emit_loop_header(SPIRBlock &block, SPIRBlock::Method method)
11910 {
11911 	SPIRBlock::ContinueBlockType continue_type = continue_block_type(get<SPIRBlock>(block.continue_block));
11912 
11913 	if (method == SPIRBlock::MergeToSelectForLoop || method == SPIRBlock::MergeToSelectContinueForLoop)
11914 	{
11915 		uint32_t current_count = statement_count;
11916 		// If we're trying to create a true for loop,
11917 		// we need to make sure that all opcodes before branch statement do not actually emit any code.
11918 		// We can then take the condition expression and create a for (; cond ; ) { body; } structure instead.
11919 		emit_block_instructions(block);
11920 
11921 		bool condition_is_temporary = forced_temporaries.find(block.condition) == end(forced_temporaries);
11922 
11923 		// This can work! We only did trivial things which could be forwarded in block body!
11924 		if (current_count == statement_count && condition_is_temporary)
11925 		{
11926 			switch (continue_type)
11927 			{
11928 			case SPIRBlock::ForLoop:
11929 			{
11930 				// This block may be a dominating block, so make sure we flush undeclared variables before building the for loop header.
11931 				flush_undeclared_variables(block);
11932 
11933 				// Important that we do this in this order because
11934 				// emitting the continue block can invalidate the condition expression.
11935 				auto initializer = emit_for_loop_initializers(block);
11936 				auto condition = to_expression(block.condition);
11937 
11938 				// Condition might have to be inverted.
11939 				if (execution_is_noop(get<SPIRBlock>(block.true_block), get<SPIRBlock>(block.merge_block)))
11940 					condition = join("!", enclose_expression(condition));
11941 
11942 				emit_block_hints(block);
11943 				if (method != SPIRBlock::MergeToSelectContinueForLoop)
11944 				{
11945 					auto continue_block = emit_continue_block(block.continue_block, false, false);
11946 					statement("for (", initializer, "; ", condition, "; ", continue_block, ")");
11947 				}
11948 				else
11949 					statement("for (", initializer, "; ", condition, "; )");
11950 				break;
11951 			}
11952 
11953 			case SPIRBlock::WhileLoop:
11954 			{
11955 				// This block may be a dominating block, so make sure we flush undeclared variables before building the while loop header.
11956 				flush_undeclared_variables(block);
11957 				emit_while_loop_initializers(block);
11958 				emit_block_hints(block);
11959 
11960 				auto condition = to_expression(block.condition);
11961 				// Condition might have to be inverted.
11962 				if (execution_is_noop(get<SPIRBlock>(block.true_block), get<SPIRBlock>(block.merge_block)))
11963 					condition = join("!", enclose_expression(condition));
11964 
11965 				statement("while (", condition, ")");
11966 				break;
11967 			}
11968 
11969 			default:
11970 				block.disable_block_optimization = true;
11971 				force_recompile();
11972 				begin_scope(); // We'll see an end_scope() later.
11973 				return false;
11974 			}
11975 
11976 			begin_scope();
11977 			return true;
11978 		}
11979 		else
11980 		{
11981 			block.disable_block_optimization = true;
11982 			force_recompile();
11983 			begin_scope(); // We'll see an end_scope() later.
11984 			return false;
11985 		}
11986 	}
11987 	else if (method == SPIRBlock::MergeToDirectForLoop)
11988 	{
11989 		auto &child = get<SPIRBlock>(block.next_block);
11990 
11991 		// This block may be a dominating block, so make sure we flush undeclared variables before building the for loop header.
11992 		flush_undeclared_variables(child);
11993 
11994 		uint32_t current_count = statement_count;
11995 
11996 		// If we're trying to create a true for loop,
11997 		// we need to make sure that all opcodes before branch statement do not actually emit any code.
11998 		// We can then take the condition expression and create a for (; cond ; ) { body; } structure instead.
11999 		emit_block_instructions(child);
12000 
12001 		bool condition_is_temporary = forced_temporaries.find(child.condition) == end(forced_temporaries);
12002 
12003 		if (current_count == statement_count && condition_is_temporary)
12004 		{
12005 			uint32_t target_block = child.true_block;
12006 
12007 			switch (continue_type)
12008 			{
12009 			case SPIRBlock::ForLoop:
12010 			{
12011 				// Important that we do this in this order because
12012 				// emitting the continue block can invalidate the condition expression.
12013 				auto initializer = emit_for_loop_initializers(block);
12014 				auto condition = to_expression(child.condition);
12015 
12016 				// Condition might have to be inverted.
12017 				if (execution_is_noop(get<SPIRBlock>(child.true_block), get<SPIRBlock>(block.merge_block)))
12018 				{
12019 					condition = join("!", enclose_expression(condition));
12020 					target_block = child.false_block;
12021 				}
12022 
12023 				auto continue_block = emit_continue_block(block.continue_block, false, false);
12024 				emit_block_hints(block);
12025 				statement("for (", initializer, "; ", condition, "; ", continue_block, ")");
12026 				break;
12027 			}
12028 
12029 			case SPIRBlock::WhileLoop:
12030 			{
12031 				emit_while_loop_initializers(block);
12032 				emit_block_hints(block);
12033 
12034 				auto condition = to_expression(child.condition);
12035 				// Condition might have to be inverted.
12036 				if (execution_is_noop(get<SPIRBlock>(child.true_block), get<SPIRBlock>(block.merge_block)))
12037 				{
12038 					condition = join("!", enclose_expression(condition));
12039 					target_block = child.false_block;
12040 				}
12041 
12042 				statement("while (", condition, ")");
12043 				break;
12044 			}
12045 
12046 			default:
12047 				block.disable_block_optimization = true;
12048 				force_recompile();
12049 				begin_scope(); // We'll see an end_scope() later.
12050 				return false;
12051 			}
12052 
12053 			begin_scope();
12054 			branch(child.self, target_block);
12055 			return true;
12056 		}
12057 		else
12058 		{
12059 			block.disable_block_optimization = true;
12060 			force_recompile();
12061 			begin_scope(); // We'll see an end_scope() later.
12062 			return false;
12063 		}
12064 	}
12065 	else
12066 		return false;
12067 }
12068 
flush_undeclared_variables(SPIRBlock & block)12069 void CompilerGLSL::flush_undeclared_variables(SPIRBlock &block)
12070 {
12071 	for (auto &v : block.dominated_variables)
12072 		flush_variable_declaration(v);
12073 }
12074 
emit_hoisted_temporaries(SmallVector<pair<TypeID,ID>> & temporaries)12075 void CompilerGLSL::emit_hoisted_temporaries(SmallVector<pair<TypeID, ID>> &temporaries)
12076 {
12077 	// If we need to force temporaries for certain IDs due to continue blocks, do it before starting loop header.
12078 	// Need to sort these to ensure that reference output is stable.
12079 	sort(begin(temporaries), end(temporaries),
12080 	     [](const pair<TypeID, ID> &a, const pair<TypeID, ID> &b) { return a.second < b.second; });
12081 
12082 	for (auto &tmp : temporaries)
12083 	{
12084 		add_local_variable_name(tmp.second);
12085 		auto &flags = ir.meta[tmp.second].decoration.decoration_flags;
12086 		auto &type = get<SPIRType>(tmp.first);
12087 		statement(flags_to_qualifiers_glsl(type, flags), variable_decl(type, to_name(tmp.second)), ";");
12088 
12089 		hoisted_temporaries.insert(tmp.second);
12090 		forced_temporaries.insert(tmp.second);
12091 
12092 		// The temporary might be read from before it's assigned, set up the expression now.
12093 		set<SPIRExpression>(tmp.second, to_name(tmp.second), tmp.first, true);
12094 	}
12095 }
12096 
emit_block_chain(SPIRBlock & block)12097 void CompilerGLSL::emit_block_chain(SPIRBlock &block)
12098 {
12099 	bool select_branch_to_true_block = false;
12100 	bool select_branch_to_false_block = false;
12101 	bool skip_direct_branch = false;
12102 	bool emitted_loop_header_variables = false;
12103 	bool force_complex_continue_block = false;
12104 
12105 	emit_hoisted_temporaries(block.declare_temporary);
12106 
12107 	SPIRBlock::ContinueBlockType continue_type = SPIRBlock::ContinueNone;
12108 	if (block.continue_block)
12109 		continue_type = continue_block_type(get<SPIRBlock>(block.continue_block));
12110 
12111 	// If we have loop variables, stop masking out access to the variable now.
12112 	for (auto var_id : block.loop_variables)
12113 	{
12114 		auto &var = get<SPIRVariable>(var_id);
12115 		var.loop_variable_enable = true;
12116 		// We're not going to declare the variable directly, so emit a copy here.
12117 		emit_variable_temporary_copies(var);
12118 	}
12119 
12120 	// Remember deferred declaration state. We will restore it before returning.
12121 	SmallVector<bool, 64> rearm_dominated_variables(block.dominated_variables.size());
12122 	for (size_t i = 0; i < block.dominated_variables.size(); i++)
12123 	{
12124 		uint32_t var_id = block.dominated_variables[i];
12125 		auto &var = get<SPIRVariable>(var_id);
12126 		rearm_dominated_variables[i] = var.deferred_declaration;
12127 	}
12128 
12129 	// This is the method often used by spirv-opt to implement loops.
12130 	// The loop header goes straight into the continue block.
12131 	// However, don't attempt this on ESSL 1.0, because if a loop variable is used in a continue block,
12132 	// it *MUST* be used in the continue block. This loop method will not work.
12133 	if (!is_legacy_es() && block_is_loop_candidate(block, SPIRBlock::MergeToSelectContinueForLoop))
12134 	{
12135 		flush_undeclared_variables(block);
12136 		if (attempt_emit_loop_header(block, SPIRBlock::MergeToSelectContinueForLoop))
12137 		{
12138 			if (execution_is_noop(get<SPIRBlock>(block.true_block), get<SPIRBlock>(block.merge_block)))
12139 				select_branch_to_false_block = true;
12140 			else
12141 				select_branch_to_true_block = true;
12142 
12143 			emitted_loop_header_variables = true;
12144 			force_complex_continue_block = true;
12145 		}
12146 	}
12147 	// This is the older loop behavior in glslang which branches to loop body directly from the loop header.
12148 	else if (block_is_loop_candidate(block, SPIRBlock::MergeToSelectForLoop))
12149 	{
12150 		flush_undeclared_variables(block);
12151 		if (attempt_emit_loop_header(block, SPIRBlock::MergeToSelectForLoop))
12152 		{
12153 			// The body of while, is actually just the true (or false) block, so always branch there unconditionally.
12154 			if (execution_is_noop(get<SPIRBlock>(block.true_block), get<SPIRBlock>(block.merge_block)))
12155 				select_branch_to_false_block = true;
12156 			else
12157 				select_branch_to_true_block = true;
12158 
12159 			emitted_loop_header_variables = true;
12160 		}
12161 	}
12162 	// This is the newer loop behavior in glslang which branches from Loop header directly to
12163 	// a new block, which in turn has a OpBranchSelection without a selection merge.
12164 	else if (block_is_loop_candidate(block, SPIRBlock::MergeToDirectForLoop))
12165 	{
12166 		flush_undeclared_variables(block);
12167 		if (attempt_emit_loop_header(block, SPIRBlock::MergeToDirectForLoop))
12168 		{
12169 			skip_direct_branch = true;
12170 			emitted_loop_header_variables = true;
12171 		}
12172 	}
12173 	else if (continue_type == SPIRBlock::DoWhileLoop)
12174 	{
12175 		flush_undeclared_variables(block);
12176 		emit_while_loop_initializers(block);
12177 		emitted_loop_header_variables = true;
12178 		// We have some temporaries where the loop header is the dominator.
12179 		// We risk a case where we have code like:
12180 		// for (;;) { create-temporary; break; } consume-temporary;
12181 		// so force-declare temporaries here.
12182 		emit_hoisted_temporaries(block.potential_declare_temporary);
12183 		statement("do");
12184 		begin_scope();
12185 
12186 		emit_block_instructions(block);
12187 	}
12188 	else if (block.merge == SPIRBlock::MergeLoop)
12189 	{
12190 		flush_undeclared_variables(block);
12191 		emit_while_loop_initializers(block);
12192 		emitted_loop_header_variables = true;
12193 
12194 		// We have a generic loop without any distinguishable pattern like for, while or do while.
12195 		get<SPIRBlock>(block.continue_block).complex_continue = true;
12196 		continue_type = SPIRBlock::ComplexLoop;
12197 
12198 		// We have some temporaries where the loop header is the dominator.
12199 		// We risk a case where we have code like:
12200 		// for (;;) { create-temporary; break; } consume-temporary;
12201 		// so force-declare temporaries here.
12202 		emit_hoisted_temporaries(block.potential_declare_temporary);
12203 		statement("for (;;)");
12204 		begin_scope();
12205 
12206 		emit_block_instructions(block);
12207 	}
12208 	else
12209 	{
12210 		emit_block_instructions(block);
12211 	}
12212 
12213 	// If we didn't successfully emit a loop header and we had loop variable candidates, we have a problem
12214 	// as writes to said loop variables might have been masked out, we need a recompile.
12215 	if (!emitted_loop_header_variables && !block.loop_variables.empty())
12216 	{
12217 		force_recompile();
12218 		for (auto var : block.loop_variables)
12219 			get<SPIRVariable>(var).loop_variable = false;
12220 		block.loop_variables.clear();
12221 	}
12222 
12223 	flush_undeclared_variables(block);
12224 	bool emit_next_block = true;
12225 
12226 	// Handle end of block.
12227 	switch (block.terminator)
12228 	{
12229 	case SPIRBlock::Direct:
12230 		// True when emitting complex continue block.
12231 		if (block.loop_dominator == block.next_block)
12232 		{
12233 			branch(block.self, block.next_block);
12234 			emit_next_block = false;
12235 		}
12236 		// True if MergeToDirectForLoop succeeded.
12237 		else if (skip_direct_branch)
12238 			emit_next_block = false;
12239 		else if (is_continue(block.next_block) || is_break(block.next_block) || is_conditional(block.next_block))
12240 		{
12241 			branch(block.self, block.next_block);
12242 			emit_next_block = false;
12243 		}
12244 		break;
12245 
12246 	case SPIRBlock::Select:
12247 		// True if MergeToSelectForLoop or MergeToSelectContinueForLoop succeeded.
12248 		if (select_branch_to_true_block)
12249 		{
12250 			if (force_complex_continue_block)
12251 			{
12252 				assert(block.true_block == block.continue_block);
12253 
12254 				// We're going to emit a continue block directly here, so make sure it's marked as complex.
12255 				auto &complex_continue = get<SPIRBlock>(block.continue_block).complex_continue;
12256 				bool old_complex = complex_continue;
12257 				complex_continue = true;
12258 				branch(block.self, block.true_block);
12259 				complex_continue = old_complex;
12260 			}
12261 			else
12262 				branch(block.self, block.true_block);
12263 		}
12264 		else if (select_branch_to_false_block)
12265 		{
12266 			if (force_complex_continue_block)
12267 			{
12268 				assert(block.false_block == block.continue_block);
12269 
12270 				// We're going to emit a continue block directly here, so make sure it's marked as complex.
12271 				auto &complex_continue = get<SPIRBlock>(block.continue_block).complex_continue;
12272 				bool old_complex = complex_continue;
12273 				complex_continue = true;
12274 				branch(block.self, block.false_block);
12275 				complex_continue = old_complex;
12276 			}
12277 			else
12278 				branch(block.self, block.false_block);
12279 		}
12280 		else
12281 			branch(block.self, block.condition, block.true_block, block.false_block);
12282 		break;
12283 
12284 	case SPIRBlock::MultiSelect:
12285 	{
12286 		auto &type = expression_type(block.condition);
12287 		bool unsigned_case =
12288 		    type.basetype == SPIRType::UInt || type.basetype == SPIRType::UShort || type.basetype == SPIRType::UByte;
12289 
12290 		if (block.merge == SPIRBlock::MergeNone)
12291 			SPIRV_CROSS_THROW("Switch statement is not structured");
12292 
12293 		if (type.basetype == SPIRType::UInt64 || type.basetype == SPIRType::Int64)
12294 		{
12295 			// SPIR-V spec suggests this is allowed, but we cannot support it in higher level languages.
12296 			SPIRV_CROSS_THROW("Cannot use 64-bit switch selectors.");
12297 		}
12298 
12299 		const char *label_suffix = "";
12300 		if (type.basetype == SPIRType::UInt && backend.uint32_t_literal_suffix)
12301 			label_suffix = "u";
12302 		else if (type.basetype == SPIRType::UShort)
12303 			label_suffix = backend.uint16_t_literal_suffix;
12304 		else if (type.basetype == SPIRType::Short)
12305 			label_suffix = backend.int16_t_literal_suffix;
12306 
12307 		SPIRBlock *old_emitting_switch = current_emitting_switch;
12308 		current_emitting_switch = &block;
12309 
12310 		if (block.need_ladder_break)
12311 			statement("bool _", block.self, "_ladder_break = false;");
12312 
12313 		// Find all unique case constructs.
12314 		unordered_map<uint32_t, SmallVector<uint32_t>> case_constructs;
12315 		SmallVector<uint32_t> block_declaration_order;
12316 		SmallVector<uint32_t> literals_to_merge;
12317 
12318 		// If a switch case branches to the default block for some reason, we can just remove that literal from consideration
12319 		// and let the default: block handle it.
12320 		// 2.11 in SPIR-V spec states that for fall-through cases, there is a very strict declaration order which we can take advantage of here.
12321 		// We only need to consider possible fallthrough if order[i] branches to order[i + 1].
12322 		for (auto &c : block.cases)
12323 		{
12324 			if (c.block != block.next_block && c.block != block.default_block)
12325 			{
12326 				if (!case_constructs.count(c.block))
12327 					block_declaration_order.push_back(c.block);
12328 				case_constructs[c.block].push_back(c.value);
12329 			}
12330 			else if (c.block == block.next_block && block.default_block != block.next_block)
12331 			{
12332 				// We might have to flush phi inside specific case labels.
12333 				// If we can piggyback on default:, do so instead.
12334 				literals_to_merge.push_back(c.value);
12335 			}
12336 		}
12337 
12338 		// Empty literal array -> default.
12339 		if (block.default_block != block.next_block)
12340 		{
12341 			auto &default_block = get<SPIRBlock>(block.default_block);
12342 
12343 			// We need to slide in the default block somewhere in this chain
12344 			// if there are fall-through scenarios since the default is declared separately in OpSwitch.
12345 			// Only consider trivial fall-through cases here.
12346 			size_t num_blocks = block_declaration_order.size();
12347 			bool injected_block = false;
12348 
12349 			for (size_t i = 0; i < num_blocks; i++)
12350 			{
12351 				auto &case_block = get<SPIRBlock>(block_declaration_order[i]);
12352 				if (execution_is_direct_branch(case_block, default_block))
12353 				{
12354 					// Fallthrough to default block, we must inject the default block here.
12355 					block_declaration_order.insert(begin(block_declaration_order) + i + 1, block.default_block);
12356 					injected_block = true;
12357 					break;
12358 				}
12359 				else if (execution_is_direct_branch(default_block, case_block))
12360 				{
12361 					// Default case is falling through to another case label, we must inject the default block here.
12362 					block_declaration_order.insert(begin(block_declaration_order) + i, block.default_block);
12363 					injected_block = true;
12364 					break;
12365 				}
12366 			}
12367 
12368 			// Order does not matter.
12369 			if (!injected_block)
12370 				block_declaration_order.push_back(block.default_block);
12371 
12372 			case_constructs[block.default_block] = {};
12373 		}
12374 
12375 		size_t num_blocks = block_declaration_order.size();
12376 
12377 		const auto to_case_label = [](uint32_t literal, bool is_unsigned_case) -> string {
12378 			return is_unsigned_case ? convert_to_string(literal) : convert_to_string(int32_t(literal));
12379 		};
12380 
12381 		// We need to deal with a complex scenario for OpPhi. If we have case-fallthrough and Phi in the picture,
12382 		// we need to flush phi nodes outside the switch block in a branch,
12383 		// and skip any Phi handling inside the case label to make fall-through work as expected.
12384 		// This kind of code-gen is super awkward and it's a last resort. Normally we would want to handle this
12385 		// inside the case label if at all possible.
12386 		for (size_t i = 1; i < num_blocks; i++)
12387 		{
12388 			if (flush_phi_required(block.self, block_declaration_order[i]) &&
12389 			    flush_phi_required(block_declaration_order[i - 1], block_declaration_order[i]))
12390 			{
12391 				uint32_t target_block = block_declaration_order[i];
12392 
12393 				// Make sure we flush Phi, it might have been marked to be ignored earlier.
12394 				get<SPIRBlock>(target_block).ignore_phi_from_block = 0;
12395 
12396 				auto &literals = case_constructs[target_block];
12397 
12398 				if (literals.empty())
12399 				{
12400 					// Oh boy, gotta make a complete negative test instead! o.o
12401 					// Find all possible literals that would *not* make us enter the default block.
12402 					// If none of those literals match, we flush Phi ...
12403 					SmallVector<string> conditions;
12404 					for (size_t j = 0; j < num_blocks; j++)
12405 					{
12406 						auto &negative_literals = case_constructs[block_declaration_order[j]];
12407 						for (auto &case_label : negative_literals)
12408 							conditions.push_back(join(to_enclosed_expression(block.condition),
12409 							                          " != ", to_case_label(case_label, unsigned_case)));
12410 					}
12411 
12412 					statement("if (", merge(conditions, " && "), ")");
12413 					begin_scope();
12414 					flush_phi(block.self, target_block);
12415 					end_scope();
12416 				}
12417 				else
12418 				{
12419 					SmallVector<string> conditions;
12420 					conditions.reserve(literals.size());
12421 					for (auto &case_label : literals)
12422 						conditions.push_back(join(to_enclosed_expression(block.condition),
12423 						                          " == ", to_case_label(case_label, unsigned_case)));
12424 					statement("if (", merge(conditions, " || "), ")");
12425 					begin_scope();
12426 					flush_phi(block.self, target_block);
12427 					end_scope();
12428 				}
12429 
12430 				// Mark the block so that we don't flush Phi from header to case label.
12431 				get<SPIRBlock>(target_block).ignore_phi_from_block = block.self;
12432 			}
12433 		}
12434 
12435 		emit_block_hints(block);
12436 		statement("switch (", to_expression(block.condition), ")");
12437 		begin_scope();
12438 
12439 		for (size_t i = 0; i < num_blocks; i++)
12440 		{
12441 			uint32_t target_block = block_declaration_order[i];
12442 			auto &literals = case_constructs[target_block];
12443 
12444 			if (literals.empty())
12445 			{
12446 				// Default case.
12447 				statement("default:");
12448 			}
12449 			else
12450 			{
12451 				for (auto &case_literal : literals)
12452 				{
12453 					// The case label value must be sign-extended properly in SPIR-V, so we can assume 32-bit values here.
12454 					statement("case ", to_case_label(case_literal, unsigned_case), label_suffix, ":");
12455 				}
12456 			}
12457 
12458 			auto &case_block = get<SPIRBlock>(target_block);
12459 			if (backend.support_case_fallthrough && i + 1 < num_blocks &&
12460 			    execution_is_direct_branch(case_block, get<SPIRBlock>(block_declaration_order[i + 1])))
12461 			{
12462 				// We will fall through here, so just terminate the block chain early.
12463 				// We still need to deal with Phi potentially.
12464 				// No need for a stack-like thing here since we only do fall-through when there is a
12465 				// single trivial branch to fall-through target..
12466 				current_emitting_switch_fallthrough = true;
12467 			}
12468 			else
12469 				current_emitting_switch_fallthrough = false;
12470 
12471 			begin_scope();
12472 			branch(block.self, target_block);
12473 			end_scope();
12474 
12475 			current_emitting_switch_fallthrough = false;
12476 		}
12477 
12478 		// Might still have to flush phi variables if we branch from loop header directly to merge target.
12479 		if (flush_phi_required(block.self, block.next_block))
12480 		{
12481 			if (block.default_block == block.next_block || !literals_to_merge.empty())
12482 			{
12483 				for (auto &case_literal : literals_to_merge)
12484 					statement("case ", to_case_label(case_literal, unsigned_case), label_suffix, ":");
12485 
12486 				if (block.default_block == block.next_block)
12487 					statement("default:");
12488 
12489 				begin_scope();
12490 				flush_phi(block.self, block.next_block);
12491 				statement("break;");
12492 				end_scope();
12493 			}
12494 		}
12495 
12496 		end_scope();
12497 
12498 		if (block.need_ladder_break)
12499 		{
12500 			statement("if (_", block.self, "_ladder_break)");
12501 			begin_scope();
12502 			statement("break;");
12503 			end_scope();
12504 		}
12505 
12506 		current_emitting_switch = old_emitting_switch;
12507 		break;
12508 	}
12509 
12510 	case SPIRBlock::Return:
12511 	{
12512 		for (auto &line : current_function->fixup_hooks_out)
12513 			line();
12514 
12515 		if (processing_entry_point)
12516 			emit_fixup();
12517 
12518 		auto &cfg = get_cfg_for_current_function();
12519 
12520 		if (block.return_value)
12521 		{
12522 			auto &type = expression_type(block.return_value);
12523 			if (!type.array.empty() && !backend.can_return_array)
12524 			{
12525 				// If we cannot return arrays, we will have a special out argument we can write to instead.
12526 				// The backend is responsible for setting this up, and redirection the return values as appropriate.
12527 				if (ir.ids[block.return_value].get_type() != TypeUndef)
12528 				{
12529 					emit_array_copy("SPIRV_Cross_return_value", block.return_value, StorageClassFunction,
12530 					                get_backing_variable_storage(block.return_value));
12531 				}
12532 
12533 				if (!cfg.node_terminates_control_flow_in_sub_graph(current_function->entry_block, block.self) ||
12534 				    block.loop_dominator != BlockID(SPIRBlock::NoDominator))
12535 				{
12536 					statement("return;");
12537 				}
12538 			}
12539 			else
12540 			{
12541 				// OpReturnValue can return Undef, so don't emit anything for this case.
12542 				if (ir.ids[block.return_value].get_type() != TypeUndef)
12543 					statement("return ", to_expression(block.return_value), ";");
12544 			}
12545 		}
12546 		else if (!cfg.node_terminates_control_flow_in_sub_graph(current_function->entry_block, block.self) ||
12547 		         block.loop_dominator != BlockID(SPIRBlock::NoDominator))
12548 		{
12549 			// If this block is the very final block and not called from control flow,
12550 			// we do not need an explicit return which looks out of place. Just end the function here.
12551 			// In the very weird case of for(;;) { return; } executing return is unconditional,
12552 			// but we actually need a return here ...
12553 			statement("return;");
12554 		}
12555 		break;
12556 	}
12557 
12558 	case SPIRBlock::Kill:
12559 		statement(backend.discard_literal, ";");
12560 		break;
12561 
12562 	case SPIRBlock::Unreachable:
12563 		emit_next_block = false;
12564 		break;
12565 
12566 	default:
12567 		SPIRV_CROSS_THROW("Unimplemented block terminator.");
12568 	}
12569 
12570 	if (block.next_block && emit_next_block)
12571 	{
12572 		// If we hit this case, we're dealing with an unconditional branch, which means we will output
12573 		// that block after this. If we had selection merge, we already flushed phi variables.
12574 		if (block.merge != SPIRBlock::MergeSelection)
12575 			flush_phi(block.self, block.next_block);
12576 
12577 		// For switch fallthrough cases, we terminate the chain here, but we still need to handle Phi.
12578 		if (!current_emitting_switch_fallthrough)
12579 		{
12580 			// For merge selects we might have ignored the fact that a merge target
12581 			// could have been a break; or continue;
12582 			// We will need to deal with it here.
12583 			if (is_loop_break(block.next_block))
12584 			{
12585 				// Cannot check for just break, because switch statements will also use break.
12586 				assert(block.merge == SPIRBlock::MergeSelection);
12587 				statement("break;");
12588 			}
12589 			else if (is_continue(block.next_block))
12590 			{
12591 				assert(block.merge == SPIRBlock::MergeSelection);
12592 				branch_to_continue(block.self, block.next_block);
12593 			}
12594 			else if (BlockID(block.self) != block.next_block)
12595 				emit_block_chain(get<SPIRBlock>(block.next_block));
12596 		}
12597 	}
12598 
12599 	if (block.merge == SPIRBlock::MergeLoop)
12600 	{
12601 		if (continue_type == SPIRBlock::DoWhileLoop)
12602 		{
12603 			// Make sure that we run the continue block to get the expressions set, but this
12604 			// should become an empty string.
12605 			// We have no fallbacks if we cannot forward everything to temporaries ...
12606 			const auto &continue_block = get<SPIRBlock>(block.continue_block);
12607 			bool positive_test = execution_is_noop(get<SPIRBlock>(continue_block.true_block),
12608 			                                       get<SPIRBlock>(continue_block.loop_dominator));
12609 
12610 			uint32_t current_count = statement_count;
12611 			auto statements = emit_continue_block(block.continue_block, positive_test, !positive_test);
12612 			if (statement_count != current_count)
12613 			{
12614 				// The DoWhile block has side effects, force ComplexLoop pattern next pass.
12615 				get<SPIRBlock>(block.continue_block).complex_continue = true;
12616 				force_recompile();
12617 			}
12618 
12619 			// Might have to invert the do-while test here.
12620 			auto condition = to_expression(continue_block.condition);
12621 			if (!positive_test)
12622 				condition = join("!", enclose_expression(condition));
12623 
12624 			end_scope_decl(join("while (", condition, ")"));
12625 		}
12626 		else
12627 			end_scope();
12628 
12629 		// We cannot break out of two loops at once, so don't check for break; here.
12630 		// Using block.self as the "from" block isn't quite right, but it has the same scope
12631 		// and dominance structure, so it's fine.
12632 		if (is_continue(block.merge_block))
12633 			branch_to_continue(block.self, block.merge_block);
12634 		else
12635 			emit_block_chain(get<SPIRBlock>(block.merge_block));
12636 	}
12637 
12638 	// Forget about control dependent expressions now.
12639 	block.invalidate_expressions.clear();
12640 
12641 	// After we return, we must be out of scope, so if we somehow have to re-emit this function,
12642 	// re-declare variables if necessary.
12643 	assert(rearm_dominated_variables.size() == block.dominated_variables.size());
12644 	for (size_t i = 0; i < block.dominated_variables.size(); i++)
12645 	{
12646 		uint32_t var = block.dominated_variables[i];
12647 		get<SPIRVariable>(var).deferred_declaration = rearm_dominated_variables[i];
12648 	}
12649 
12650 	// Just like for deferred declaration, we need to forget about loop variable enable
12651 	// if our block chain is reinstantiated later.
12652 	for (auto &var_id : block.loop_variables)
12653 		get<SPIRVariable>(var_id).loop_variable_enable = false;
12654 }
12655 
begin_scope()12656 void CompilerGLSL::begin_scope()
12657 {
12658 	statement("{");
12659 	indent++;
12660 }
12661 
end_scope()12662 void CompilerGLSL::end_scope()
12663 {
12664 	if (!indent)
12665 		SPIRV_CROSS_THROW("Popping empty indent stack.");
12666 	indent--;
12667 	statement("}");
12668 }
12669 
end_scope(const string & trailer)12670 void CompilerGLSL::end_scope(const string &trailer)
12671 {
12672 	if (!indent)
12673 		SPIRV_CROSS_THROW("Popping empty indent stack.");
12674 	indent--;
12675 	statement("}", trailer);
12676 }
12677 
end_scope_decl()12678 void CompilerGLSL::end_scope_decl()
12679 {
12680 	if (!indent)
12681 		SPIRV_CROSS_THROW("Popping empty indent stack.");
12682 	indent--;
12683 	statement("};");
12684 }
12685 
end_scope_decl(const string & decl)12686 void CompilerGLSL::end_scope_decl(const string &decl)
12687 {
12688 	if (!indent)
12689 		SPIRV_CROSS_THROW("Popping empty indent stack.");
12690 	indent--;
12691 	statement("} ", decl, ";");
12692 }
12693 
check_function_call_constraints(const uint32_t * args,uint32_t length)12694 void CompilerGLSL::check_function_call_constraints(const uint32_t *args, uint32_t length)
12695 {
12696 	// If our variable is remapped, and we rely on type-remapping information as
12697 	// well, then we cannot pass the variable as a function parameter.
12698 	// Fixing this is non-trivial without stamping out variants of the same function,
12699 	// so for now warn about this and suggest workarounds instead.
12700 	for (uint32_t i = 0; i < length; i++)
12701 	{
12702 		auto *var = maybe_get<SPIRVariable>(args[i]);
12703 		if (!var || !var->remapped_variable)
12704 			continue;
12705 
12706 		auto &type = get<SPIRType>(var->basetype);
12707 		if (type.basetype == SPIRType::Image && type.image.dim == DimSubpassData)
12708 		{
12709 			SPIRV_CROSS_THROW("Tried passing a remapped subpassInput variable to a function. "
12710 			                  "This will not work correctly because type-remapping information is lost. "
12711 			                  "To workaround, please consider not passing the subpass input as a function parameter, "
12712 			                  "or use in/out variables instead which do not need type remapping information.");
12713 		}
12714 	}
12715 }
12716 
get_next_instruction_in_block(const Instruction & instr)12717 const Instruction *CompilerGLSL::get_next_instruction_in_block(const Instruction &instr)
12718 {
12719 	// FIXME: This is kind of hacky. There should be a cleaner way.
12720 	auto offset = uint32_t(&instr - current_emitting_block->ops.data());
12721 	if ((offset + 1) < current_emitting_block->ops.size())
12722 		return &current_emitting_block->ops[offset + 1];
12723 	else
12724 		return nullptr;
12725 }
12726 
mask_relevant_memory_semantics(uint32_t semantics)12727 uint32_t CompilerGLSL::mask_relevant_memory_semantics(uint32_t semantics)
12728 {
12729 	return semantics & (MemorySemanticsAtomicCounterMemoryMask | MemorySemanticsImageMemoryMask |
12730 	                    MemorySemanticsWorkgroupMemoryMask | MemorySemanticsUniformMemoryMask |
12731 	                    MemorySemanticsCrossWorkgroupMemoryMask | MemorySemanticsSubgroupMemoryMask);
12732 }
12733 
emit_array_copy(const string & lhs,uint32_t rhs_id,StorageClass,StorageClass)12734 void CompilerGLSL::emit_array_copy(const string &lhs, uint32_t rhs_id, StorageClass, StorageClass)
12735 {
12736 	statement(lhs, " = ", to_expression(rhs_id), ";");
12737 }
12738 
unroll_array_from_complex_load(uint32_t target_id,uint32_t source_id,std::string & expr)12739 void CompilerGLSL::unroll_array_from_complex_load(uint32_t target_id, uint32_t source_id, std::string &expr)
12740 {
12741 	if (!backend.force_gl_in_out_block)
12742 		return;
12743 	// This path is only relevant for GL backends.
12744 
12745 	auto *var = maybe_get<SPIRVariable>(source_id);
12746 	if (!var)
12747 		return;
12748 
12749 	if (var->storage != StorageClassInput)
12750 		return;
12751 
12752 	auto &type = get_variable_data_type(*var);
12753 	if (type.array.empty())
12754 		return;
12755 
12756 	auto builtin = BuiltIn(get_decoration(var->self, DecorationBuiltIn));
12757 	bool is_builtin = is_builtin_variable(*var) && (builtin == BuiltInPointSize || builtin == BuiltInPosition);
12758 	bool is_tess = is_tessellation_shader();
12759 
12760 	// Tessellation input arrays are special in that they are unsized, so we cannot directly copy from it.
12761 	// We must unroll the array load.
12762 	// For builtins, we couldn't catch this case normally,
12763 	// because this is resolved in the OpAccessChain in most cases.
12764 	// If we load the entire array, we have no choice but to unroll here.
12765 	if (is_builtin || is_tess)
12766 	{
12767 		auto new_expr = join("_", target_id, "_unrolled");
12768 		statement(variable_decl(type, new_expr, target_id), ";");
12769 		string array_expr;
12770 		if (type.array_size_literal.back())
12771 		{
12772 			array_expr = convert_to_string(type.array.back());
12773 			if (type.array.back() == 0)
12774 				SPIRV_CROSS_THROW("Cannot unroll an array copy from unsized array.");
12775 		}
12776 		else
12777 			array_expr = to_expression(type.array.back());
12778 
12779 		// The array size might be a specialization constant, so use a for-loop instead.
12780 		statement("for (int i = 0; i < int(", array_expr, "); i++)");
12781 		begin_scope();
12782 		if (is_builtin)
12783 			statement(new_expr, "[i] = gl_in[i].", expr, ";");
12784 		else
12785 			statement(new_expr, "[i] = ", expr, "[i];");
12786 		end_scope();
12787 
12788 		expr = move(new_expr);
12789 	}
12790 }
12791 
bitcast_from_builtin_load(uint32_t source_id,std::string & expr,const SPIRType & expr_type)12792 void CompilerGLSL::bitcast_from_builtin_load(uint32_t source_id, std::string &expr, const SPIRType &expr_type)
12793 {
12794 	auto *var = maybe_get_backing_variable(source_id);
12795 	if (var)
12796 		source_id = var->self;
12797 
12798 	// Only interested in standalone builtin variables.
12799 	if (!has_decoration(source_id, DecorationBuiltIn))
12800 		return;
12801 
12802 	auto builtin = static_cast<BuiltIn>(get_decoration(source_id, DecorationBuiltIn));
12803 	auto expected_type = expr_type.basetype;
12804 
12805 	// TODO: Fill in for more builtins.
12806 	switch (builtin)
12807 	{
12808 	case BuiltInLayer:
12809 	case BuiltInPrimitiveId:
12810 	case BuiltInViewportIndex:
12811 	case BuiltInInstanceId:
12812 	case BuiltInInstanceIndex:
12813 	case BuiltInVertexId:
12814 	case BuiltInVertexIndex:
12815 	case BuiltInSampleId:
12816 	case BuiltInBaseVertex:
12817 	case BuiltInBaseInstance:
12818 	case BuiltInDrawIndex:
12819 	case BuiltInFragStencilRefEXT:
12820 		expected_type = SPIRType::Int;
12821 		break;
12822 
12823 	case BuiltInGlobalInvocationId:
12824 	case BuiltInLocalInvocationId:
12825 	case BuiltInWorkgroupId:
12826 	case BuiltInLocalInvocationIndex:
12827 	case BuiltInWorkgroupSize:
12828 	case BuiltInNumWorkgroups:
12829 		expected_type = SPIRType::UInt;
12830 		break;
12831 
12832 	default:
12833 		break;
12834 	}
12835 
12836 	if (expected_type != expr_type.basetype)
12837 		expr = bitcast_expression(expr_type, expected_type, expr);
12838 }
12839 
bitcast_to_builtin_store(uint32_t target_id,std::string & expr,const SPIRType & expr_type)12840 void CompilerGLSL::bitcast_to_builtin_store(uint32_t target_id, std::string &expr, const SPIRType &expr_type)
12841 {
12842 	// Only interested in standalone builtin variables.
12843 	if (!has_decoration(target_id, DecorationBuiltIn))
12844 		return;
12845 
12846 	auto builtin = static_cast<BuiltIn>(get_decoration(target_id, DecorationBuiltIn));
12847 	auto expected_type = expr_type.basetype;
12848 
12849 	// TODO: Fill in for more builtins.
12850 	switch (builtin)
12851 	{
12852 	case BuiltInLayer:
12853 	case BuiltInPrimitiveId:
12854 	case BuiltInViewportIndex:
12855 	case BuiltInFragStencilRefEXT:
12856 		expected_type = SPIRType::Int;
12857 		break;
12858 
12859 	default:
12860 		break;
12861 	}
12862 
12863 	if (expected_type != expr_type.basetype)
12864 	{
12865 		auto type = expr_type;
12866 		type.basetype = expected_type;
12867 		expr = bitcast_expression(type, expr_type.basetype, expr);
12868 	}
12869 }
12870 
convert_non_uniform_expression(const SPIRType & type,std::string & expr)12871 void CompilerGLSL::convert_non_uniform_expression(const SPIRType &type, std::string &expr)
12872 {
12873 	if (*backend.nonuniform_qualifier == '\0')
12874 		return;
12875 
12876 	// Handle SPV_EXT_descriptor_indexing.
12877 	if (type.basetype == SPIRType::Sampler || type.basetype == SPIRType::SampledImage ||
12878 	    type.basetype == SPIRType::Image)
12879 	{
12880 		// The image/sampler ID must be declared as non-uniform.
12881 		// However, it is not legal GLSL to have
12882 		// nonuniformEXT(samplers[index]), so we must move the nonuniform qualifier
12883 		// to the array indexing, like
12884 		// samplers[nonuniformEXT(index)].
12885 		// While the access chain will generally be nonuniformEXT, it's not necessarily so,
12886 		// so we might have to fixup the OpLoad-ed expression late.
12887 
12888 		auto start_array_index = expr.find_first_of('[');
12889 		auto end_array_index = expr.find_last_of(']');
12890 		// Doesn't really make sense to declare a non-arrayed image with nonuniformEXT, but there's
12891 		// nothing we can do here to express that.
12892 		if (start_array_index == string::npos || end_array_index == string::npos || end_array_index < start_array_index)
12893 			return;
12894 
12895 		start_array_index++;
12896 
12897 		expr = join(expr.substr(0, start_array_index), backend.nonuniform_qualifier, "(",
12898 		            expr.substr(start_array_index, end_array_index - start_array_index), ")",
12899 		            expr.substr(end_array_index, string::npos));
12900 	}
12901 }
12902 
emit_block_hints(const SPIRBlock &)12903 void CompilerGLSL::emit_block_hints(const SPIRBlock &)
12904 {
12905 }
12906 
preserve_alias_on_reset(uint32_t id)12907 void CompilerGLSL::preserve_alias_on_reset(uint32_t id)
12908 {
12909 	preserved_aliases[id] = get_name(id);
12910 }
12911 
reset_name_caches()12912 void CompilerGLSL::reset_name_caches()
12913 {
12914 	for (auto &preserved : preserved_aliases)
12915 		set_name(preserved.first, preserved.second);
12916 
12917 	preserved_aliases.clear();
12918 	resource_names.clear();
12919 	block_input_names.clear();
12920 	block_output_names.clear();
12921 	block_ubo_names.clear();
12922 	block_ssbo_names.clear();
12923 	block_names.clear();
12924 	function_overloads.clear();
12925 }
12926 
fixup_type_alias()12927 void CompilerGLSL::fixup_type_alias()
12928 {
12929 	// Due to how some backends work, the "master" type of type_alias must be a block-like type if it exists.
12930 	// FIXME: Multiple alias types which are both block-like will be awkward, for now, it's best to just drop the type
12931 	// alias if the slave type is a block type.
12932 	ir.for_each_typed_id<SPIRType>([&](uint32_t self, SPIRType &type) {
12933 		if (type.type_alias && type_is_block_like(type))
12934 		{
12935 			// Become the master.
12936 			ir.for_each_typed_id<SPIRType>([&](uint32_t other_id, SPIRType &other_type) {
12937 				if (other_id == type.self)
12938 					return;
12939 
12940 				if (other_type.type_alias == type.type_alias)
12941 					other_type.type_alias = type.self;
12942 			});
12943 
12944 			this->get<SPIRType>(type.type_alias).type_alias = self;
12945 			type.type_alias = 0;
12946 		}
12947 	});
12948 
12949 	ir.for_each_typed_id<SPIRType>([&](uint32_t, SPIRType &type) {
12950 		if (type.type_alias && type_is_block_like(type))
12951 		{
12952 			// This is not allowed, drop the type_alias.
12953 			type.type_alias = 0;
12954 		}
12955 		else if (type.type_alias && !type_is_block_like(this->get<SPIRType>(type.type_alias)))
12956 		{
12957 			// If the alias master is not a block-like type, there is no reason to use type aliasing.
12958 			// This case can happen if two structs are declared with the same name, but they are unrelated.
12959 			// Aliases are only used to deal with aliased types for structs which are used in different buffer types
12960 			// which all create a variant of the same struct with different DecorationOffset values.
12961 			type.type_alias = 0;
12962 		}
12963 	});
12964 }
12965 
reorder_type_alias()12966 void CompilerGLSL::reorder_type_alias()
12967 {
12968 	// Reorder declaration of types so that the master of the type alias is always emitted first.
12969 	// We need this in case a type B depends on type A (A must come before in the vector), but A is an alias of a type Abuffer, which
12970 	// means declaration of A doesn't happen (yet), and order would be B, ABuffer and not ABuffer, B. Fix this up here.
12971 	auto loop_lock = ir.create_loop_hard_lock();
12972 
12973 	auto &type_ids = ir.ids_for_type[TypeType];
12974 	for (auto alias_itr = begin(type_ids); alias_itr != end(type_ids); ++alias_itr)
12975 	{
12976 		auto &type = get<SPIRType>(*alias_itr);
12977 		if (type.type_alias != TypeID(0) &&
12978 		    !has_extended_decoration(type.type_alias, SPIRVCrossDecorationBufferBlockRepacked))
12979 		{
12980 			// We will skip declaring this type, so make sure the type_alias type comes before.
12981 			auto master_itr = find(begin(type_ids), end(type_ids), ID(type.type_alias));
12982 			assert(master_itr != end(type_ids));
12983 
12984 			if (alias_itr < master_itr)
12985 			{
12986 				// Must also swap the type order for the constant-type joined array.
12987 				auto &joined_types = ir.ids_for_constant_or_type;
12988 				auto alt_alias_itr = find(begin(joined_types), end(joined_types), *alias_itr);
12989 				auto alt_master_itr = find(begin(joined_types), end(joined_types), *master_itr);
12990 				assert(alt_alias_itr != end(joined_types));
12991 				assert(alt_master_itr != end(joined_types));
12992 
12993 				swap(*alias_itr, *master_itr);
12994 				swap(*alt_alias_itr, *alt_master_itr);
12995 			}
12996 		}
12997 	}
12998 }
12999 
emit_line_directive(uint32_t file_id,uint32_t line_literal)13000 void CompilerGLSL::emit_line_directive(uint32_t file_id, uint32_t line_literal)
13001 {
13002 	// If we are redirecting statements, ignore the line directive.
13003 	// Common case here is continue blocks.
13004 	if (redirect_statement)
13005 		return;
13006 
13007 	if (options.emit_line_directives)
13008 	{
13009 		require_extension_internal("GL_GOOGLE_cpp_style_line_directive");
13010 		statement_no_indent("#line ", line_literal, " \"", get<SPIRString>(file_id).str, "\"");
13011 	}
13012 }
13013 
propagate_nonuniform_qualifier(uint32_t id)13014 void CompilerGLSL::propagate_nonuniform_qualifier(uint32_t id)
13015 {
13016 	// SPIR-V might only tag the very last ID with NonUniformEXT, but for codegen,
13017 	// we need to know NonUniformEXT a little earlier, when the resource is actually loaded.
13018 	// Back-propagate the qualifier based on the expression dependency chain.
13019 
13020 	if (!has_decoration(id, DecorationNonUniformEXT))
13021 	{
13022 		set_decoration(id, DecorationNonUniformEXT);
13023 		force_recompile();
13024 	}
13025 
13026 	auto *e = maybe_get<SPIRExpression>(id);
13027 	auto *combined = maybe_get<SPIRCombinedImageSampler>(id);
13028 	auto *chain = maybe_get<SPIRAccessChain>(id);
13029 	if (e)
13030 	{
13031 		for (auto &expr : e->expression_dependencies)
13032 			propagate_nonuniform_qualifier(expr);
13033 		for (auto &expr : e->implied_read_expressions)
13034 			propagate_nonuniform_qualifier(expr);
13035 	}
13036 	else if (combined)
13037 	{
13038 		propagate_nonuniform_qualifier(combined->image);
13039 		propagate_nonuniform_qualifier(combined->sampler);
13040 	}
13041 	else if (chain)
13042 	{
13043 		for (auto &expr : chain->implied_read_expressions)
13044 			propagate_nonuniform_qualifier(expr);
13045 	}
13046 }
13047