1 /*
2 * Copyright 2015-2021 Arm Limited
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 /*
18 * At your option, you may choose to accept this material under either:
19 * 1. The Apache License, Version 2.0, found at <http://www.apache.org/licenses/LICENSE-2.0>, or
20 * 2. The MIT License, found at <http://opensource.org/licenses/MIT>.
21 * SPDX-License-Identifier: Apache-2.0 OR MIT.
22 */
23
24 #include "spirv_cross.hpp"
25 #include "GLSL.std.450.h"
26 #include "spirv_cfg.hpp"
27 #include "spirv_common.hpp"
28 #include "spirv_parser.hpp"
29 #include <algorithm>
30 #include <cstring>
31 #include <utility>
32
33 using namespace std;
34 using namespace spv;
35 using namespace SPIRV_CROSS_NAMESPACE;
36
Compiler(vector<uint32_t> ir_)37 Compiler::Compiler(vector<uint32_t> ir_)
38 {
39 Parser parser(move(ir_));
40 parser.parse();
41 set_ir(move(parser.get_parsed_ir()));
42 }
43
Compiler(const uint32_t * ir_,size_t word_count)44 Compiler::Compiler(const uint32_t *ir_, size_t word_count)
45 {
46 Parser parser(ir_, word_count);
47 parser.parse();
48 set_ir(move(parser.get_parsed_ir()));
49 }
50
Compiler(const ParsedIR & ir_)51 Compiler::Compiler(const ParsedIR &ir_)
52 {
53 set_ir(ir_);
54 }
55
Compiler(ParsedIR && ir_)56 Compiler::Compiler(ParsedIR &&ir_)
57 {
58 set_ir(move(ir_));
59 }
60
set_ir(ParsedIR && ir_)61 void Compiler::set_ir(ParsedIR &&ir_)
62 {
63 ir = move(ir_);
64 parse_fixup();
65 }
66
set_ir(const ParsedIR & ir_)67 void Compiler::set_ir(const ParsedIR &ir_)
68 {
69 ir = ir_;
70 parse_fixup();
71 }
72
compile()73 string Compiler::compile()
74 {
75 return "";
76 }
77
variable_storage_is_aliased(const SPIRVariable & v)78 bool Compiler::variable_storage_is_aliased(const SPIRVariable &v)
79 {
80 auto &type = get<SPIRType>(v.basetype);
81 bool ssbo = v.storage == StorageClassStorageBuffer ||
82 ir.meta[type.self].decoration.decoration_flags.get(DecorationBufferBlock);
83 bool image = type.basetype == SPIRType::Image;
84 bool counter = type.basetype == SPIRType::AtomicCounter;
85 bool buffer_reference = type.storage == StorageClassPhysicalStorageBufferEXT;
86
87 bool is_restrict;
88 if (ssbo)
89 is_restrict = ir.get_buffer_block_flags(v).get(DecorationRestrict);
90 else
91 is_restrict = has_decoration(v.self, DecorationRestrict);
92
93 return !is_restrict && (ssbo || image || counter || buffer_reference);
94 }
95
block_is_pure(const SPIRBlock & block)96 bool Compiler::block_is_pure(const SPIRBlock &block)
97 {
98 // This is a global side effect of the function.
99 if (block.terminator == SPIRBlock::Kill ||
100 block.terminator == SPIRBlock::TerminateRay ||
101 block.terminator == SPIRBlock::IgnoreIntersection)
102 return false;
103
104 for (auto &i : block.ops)
105 {
106 auto ops = stream(i);
107 auto op = static_cast<Op>(i.op);
108
109 switch (op)
110 {
111 case OpFunctionCall:
112 {
113 uint32_t func = ops[2];
114 if (!function_is_pure(get<SPIRFunction>(func)))
115 return false;
116 break;
117 }
118
119 case OpCopyMemory:
120 case OpStore:
121 {
122 auto &type = expression_type(ops[0]);
123 if (type.storage != StorageClassFunction)
124 return false;
125 break;
126 }
127
128 case OpImageWrite:
129 return false;
130
131 // Atomics are impure.
132 case OpAtomicLoad:
133 case OpAtomicStore:
134 case OpAtomicExchange:
135 case OpAtomicCompareExchange:
136 case OpAtomicCompareExchangeWeak:
137 case OpAtomicIIncrement:
138 case OpAtomicIDecrement:
139 case OpAtomicIAdd:
140 case OpAtomicISub:
141 case OpAtomicSMin:
142 case OpAtomicUMin:
143 case OpAtomicSMax:
144 case OpAtomicUMax:
145 case OpAtomicAnd:
146 case OpAtomicOr:
147 case OpAtomicXor:
148 return false;
149
150 // Geometry shader builtins modify global state.
151 case OpEndPrimitive:
152 case OpEmitStreamVertex:
153 case OpEndStreamPrimitive:
154 case OpEmitVertex:
155 return false;
156
157 // Barriers disallow any reordering, so we should treat blocks with barrier as writing.
158 case OpControlBarrier:
159 case OpMemoryBarrier:
160 return false;
161
162 // Ray tracing builtins are impure.
163 case OpReportIntersectionKHR:
164 case OpIgnoreIntersectionNV:
165 case OpTerminateRayNV:
166 case OpTraceNV:
167 case OpTraceRayKHR:
168 case OpExecuteCallableNV:
169 case OpExecuteCallableKHR:
170 return false;
171
172 // OpExtInst is potentially impure depending on extension, but GLSL builtins are at least pure.
173
174 case OpDemoteToHelperInvocationEXT:
175 // This is a global side effect of the function.
176 return false;
177
178 default:
179 break;
180 }
181 }
182
183 return true;
184 }
185
to_name(uint32_t id,bool allow_alias) const186 string Compiler::to_name(uint32_t id, bool allow_alias) const
187 {
188 if (allow_alias && ir.ids[id].get_type() == TypeType)
189 {
190 // If this type is a simple alias, emit the
191 // name of the original type instead.
192 // We don't want to override the meta alias
193 // as that can be overridden by the reflection APIs after parse.
194 auto &type = get<SPIRType>(id);
195 if (type.type_alias)
196 {
197 // If the alias master has been specially packed, we will have emitted a clean variant as well,
198 // so skip the name aliasing here.
199 if (!has_extended_decoration(type.type_alias, SPIRVCrossDecorationBufferBlockRepacked))
200 return to_name(type.type_alias);
201 }
202 }
203
204 auto &alias = ir.get_name(id);
205 if (alias.empty())
206 return join("_", id);
207 else
208 return alias;
209 }
210
function_is_pure(const SPIRFunction & func)211 bool Compiler::function_is_pure(const SPIRFunction &func)
212 {
213 for (auto block : func.blocks)
214 {
215 if (!block_is_pure(get<SPIRBlock>(block)))
216 {
217 //fprintf(stderr, "Function %s is impure!\n", to_name(func.self).c_str());
218 return false;
219 }
220 }
221
222 //fprintf(stderr, "Function %s is pure!\n", to_name(func.self).c_str());
223 return true;
224 }
225
register_global_read_dependencies(const SPIRBlock & block,uint32_t id)226 void Compiler::register_global_read_dependencies(const SPIRBlock &block, uint32_t id)
227 {
228 for (auto &i : block.ops)
229 {
230 auto ops = stream(i);
231 auto op = static_cast<Op>(i.op);
232
233 switch (op)
234 {
235 case OpFunctionCall:
236 {
237 uint32_t func = ops[2];
238 register_global_read_dependencies(get<SPIRFunction>(func), id);
239 break;
240 }
241
242 case OpLoad:
243 case OpImageRead:
244 {
245 // If we're in a storage class which does not get invalidated, adding dependencies here is no big deal.
246 auto *var = maybe_get_backing_variable(ops[2]);
247 if (var && var->storage != StorageClassFunction)
248 {
249 auto &type = get<SPIRType>(var->basetype);
250
251 // InputTargets are immutable.
252 if (type.basetype != SPIRType::Image && type.image.dim != DimSubpassData)
253 var->dependees.push_back(id);
254 }
255 break;
256 }
257
258 default:
259 break;
260 }
261 }
262 }
263
register_global_read_dependencies(const SPIRFunction & func,uint32_t id)264 void Compiler::register_global_read_dependencies(const SPIRFunction &func, uint32_t id)
265 {
266 for (auto block : func.blocks)
267 register_global_read_dependencies(get<SPIRBlock>(block), id);
268 }
269
maybe_get_backing_variable(uint32_t chain)270 SPIRVariable *Compiler::maybe_get_backing_variable(uint32_t chain)
271 {
272 auto *var = maybe_get<SPIRVariable>(chain);
273 if (!var)
274 {
275 auto *cexpr = maybe_get<SPIRExpression>(chain);
276 if (cexpr)
277 var = maybe_get<SPIRVariable>(cexpr->loaded_from);
278
279 auto *access_chain = maybe_get<SPIRAccessChain>(chain);
280 if (access_chain)
281 var = maybe_get<SPIRVariable>(access_chain->loaded_from);
282 }
283
284 return var;
285 }
286
get_expression_effective_storage_class(uint32_t ptr)287 StorageClass Compiler::get_expression_effective_storage_class(uint32_t ptr)
288 {
289 auto *var = maybe_get_backing_variable(ptr);
290
291 // If the expression has been lowered to a temporary, we need to use the Generic storage class.
292 // We're looking for the effective storage class of a given expression.
293 // An access chain or forwarded OpLoads from such access chains
294 // will generally have the storage class of the underlying variable, but if the load was not forwarded
295 // we have lost any address space qualifiers.
296 bool forced_temporary = ir.ids[ptr].get_type() == TypeExpression && !get<SPIRExpression>(ptr).access_chain &&
297 (forced_temporaries.count(ptr) != 0 || forwarded_temporaries.count(ptr) == 0);
298
299 if (var && !forced_temporary)
300 {
301 // Normalize SSBOs to StorageBuffer here.
302 if (var->storage == StorageClassUniform &&
303 has_decoration(get<SPIRType>(var->basetype).self, DecorationBufferBlock))
304 return StorageClassStorageBuffer;
305 else
306 return var->storage;
307 }
308 else
309 return expression_type(ptr).storage;
310 }
311
register_read(uint32_t expr,uint32_t chain,bool forwarded)312 void Compiler::register_read(uint32_t expr, uint32_t chain, bool forwarded)
313 {
314 auto &e = get<SPIRExpression>(expr);
315 auto *var = maybe_get_backing_variable(chain);
316
317 if (var)
318 {
319 e.loaded_from = var->self;
320
321 // If the backing variable is immutable, we do not need to depend on the variable.
322 if (forwarded && !is_immutable(var->self))
323 var->dependees.push_back(e.self);
324
325 // If we load from a parameter, make sure we create "inout" if we also write to the parameter.
326 // The default is "in" however, so we never invalidate our compilation by reading.
327 if (var && var->parameter)
328 var->parameter->read_count++;
329 }
330 }
331
register_write(uint32_t chain)332 void Compiler::register_write(uint32_t chain)
333 {
334 auto *var = maybe_get<SPIRVariable>(chain);
335 if (!var)
336 {
337 // If we're storing through an access chain, invalidate the backing variable instead.
338 auto *expr = maybe_get<SPIRExpression>(chain);
339 if (expr && expr->loaded_from)
340 var = maybe_get<SPIRVariable>(expr->loaded_from);
341
342 auto *access_chain = maybe_get<SPIRAccessChain>(chain);
343 if (access_chain && access_chain->loaded_from)
344 var = maybe_get<SPIRVariable>(access_chain->loaded_from);
345 }
346
347 auto &chain_type = expression_type(chain);
348
349 if (var)
350 {
351 bool check_argument_storage_qualifier = true;
352 auto &type = expression_type(chain);
353
354 // If our variable is in a storage class which can alias with other buffers,
355 // invalidate all variables which depend on aliased variables. And if this is a
356 // variable pointer, then invalidate all variables regardless.
357 if (get_variable_data_type(*var).pointer)
358 {
359 flush_all_active_variables();
360
361 if (type.pointer_depth == 1)
362 {
363 // We have a backing variable which is a pointer-to-pointer type.
364 // We are storing some data through a pointer acquired through that variable,
365 // but we are not writing to the value of the variable itself,
366 // i.e., we are not modifying the pointer directly.
367 // If we are storing a non-pointer type (pointer_depth == 1),
368 // we know that we are storing some unrelated data.
369 // A case here would be
370 // void foo(Foo * const *arg) {
371 // Foo *bar = *arg;
372 // bar->unrelated = 42;
373 // }
374 // arg, the argument is constant.
375 check_argument_storage_qualifier = false;
376 }
377 }
378
379 if (type.storage == StorageClassPhysicalStorageBufferEXT || variable_storage_is_aliased(*var))
380 flush_all_aliased_variables();
381 else if (var)
382 flush_dependees(*var);
383
384 // We tried to write to a parameter which is not marked with out qualifier, force a recompile.
385 if (check_argument_storage_qualifier && var->parameter && var->parameter->write_count == 0)
386 {
387 var->parameter->write_count++;
388 force_recompile();
389 }
390 }
391 else if (chain_type.pointer)
392 {
393 // If we stored through a variable pointer, then we don't know which
394 // variable we stored to. So *all* expressions after this point need to
395 // be invalidated.
396 // FIXME: If we can prove that the variable pointer will point to
397 // only certain variables, we can invalidate only those.
398 flush_all_active_variables();
399 }
400
401 // If chain_type.pointer is false, we're not writing to memory backed variables, but temporaries instead.
402 // This can happen in copy_logical_type where we unroll complex reads and writes to temporaries.
403 }
404
flush_dependees(SPIRVariable & var)405 void Compiler::flush_dependees(SPIRVariable &var)
406 {
407 for (auto expr : var.dependees)
408 invalid_expressions.insert(expr);
409 var.dependees.clear();
410 }
411
flush_all_aliased_variables()412 void Compiler::flush_all_aliased_variables()
413 {
414 for (auto aliased : aliased_variables)
415 flush_dependees(get<SPIRVariable>(aliased));
416 }
417
flush_all_atomic_capable_variables()418 void Compiler::flush_all_atomic_capable_variables()
419 {
420 for (auto global : global_variables)
421 flush_dependees(get<SPIRVariable>(global));
422 flush_all_aliased_variables();
423 }
424
flush_control_dependent_expressions(uint32_t block_id)425 void Compiler::flush_control_dependent_expressions(uint32_t block_id)
426 {
427 auto &block = get<SPIRBlock>(block_id);
428 for (auto &expr : block.invalidate_expressions)
429 invalid_expressions.insert(expr);
430 block.invalidate_expressions.clear();
431 }
432
flush_all_active_variables()433 void Compiler::flush_all_active_variables()
434 {
435 // Invalidate all temporaries we read from variables in this block since they were forwarded.
436 // Invalidate all temporaries we read from globals.
437 for (auto &v : current_function->local_variables)
438 flush_dependees(get<SPIRVariable>(v));
439 for (auto &arg : current_function->arguments)
440 flush_dependees(get<SPIRVariable>(arg.id));
441 for (auto global : global_variables)
442 flush_dependees(get<SPIRVariable>(global));
443
444 flush_all_aliased_variables();
445 }
446
expression_type_id(uint32_t id) const447 uint32_t Compiler::expression_type_id(uint32_t id) const
448 {
449 switch (ir.ids[id].get_type())
450 {
451 case TypeVariable:
452 return get<SPIRVariable>(id).basetype;
453
454 case TypeExpression:
455 return get<SPIRExpression>(id).expression_type;
456
457 case TypeConstant:
458 return get<SPIRConstant>(id).constant_type;
459
460 case TypeConstantOp:
461 return get<SPIRConstantOp>(id).basetype;
462
463 case TypeUndef:
464 return get<SPIRUndef>(id).basetype;
465
466 case TypeCombinedImageSampler:
467 return get<SPIRCombinedImageSampler>(id).combined_type;
468
469 case TypeAccessChain:
470 return get<SPIRAccessChain>(id).basetype;
471
472 default:
473 SPIRV_CROSS_THROW("Cannot resolve expression type.");
474 }
475 }
476
expression_type(uint32_t id) const477 const SPIRType &Compiler::expression_type(uint32_t id) const
478 {
479 return get<SPIRType>(expression_type_id(id));
480 }
481
expression_is_lvalue(uint32_t id) const482 bool Compiler::expression_is_lvalue(uint32_t id) const
483 {
484 auto &type = expression_type(id);
485 switch (type.basetype)
486 {
487 case SPIRType::SampledImage:
488 case SPIRType::Image:
489 case SPIRType::Sampler:
490 return false;
491
492 default:
493 return true;
494 }
495 }
496
is_immutable(uint32_t id) const497 bool Compiler::is_immutable(uint32_t id) const
498 {
499 if (ir.ids[id].get_type() == TypeVariable)
500 {
501 auto &var = get<SPIRVariable>(id);
502
503 // Anything we load from the UniformConstant address space is guaranteed to be immutable.
504 bool pointer_to_const = var.storage == StorageClassUniformConstant;
505 return pointer_to_const || var.phi_variable || !expression_is_lvalue(id);
506 }
507 else if (ir.ids[id].get_type() == TypeAccessChain)
508 return get<SPIRAccessChain>(id).immutable;
509 else if (ir.ids[id].get_type() == TypeExpression)
510 return get<SPIRExpression>(id).immutable;
511 else if (ir.ids[id].get_type() == TypeConstant || ir.ids[id].get_type() == TypeConstantOp ||
512 ir.ids[id].get_type() == TypeUndef)
513 return true;
514 else
515 return false;
516 }
517
storage_class_is_interface(spv::StorageClass storage)518 static inline bool storage_class_is_interface(spv::StorageClass storage)
519 {
520 switch (storage)
521 {
522 case StorageClassInput:
523 case StorageClassOutput:
524 case StorageClassUniform:
525 case StorageClassUniformConstant:
526 case StorageClassAtomicCounter:
527 case StorageClassPushConstant:
528 case StorageClassStorageBuffer:
529 return true;
530
531 default:
532 return false;
533 }
534 }
535
is_hidden_variable(const SPIRVariable & var,bool include_builtins) const536 bool Compiler::is_hidden_variable(const SPIRVariable &var, bool include_builtins) const
537 {
538 if ((is_builtin_variable(var) && !include_builtins) || var.remapped_variable)
539 return true;
540
541 // Combined image samplers are always considered active as they are "magic" variables.
542 if (find_if(begin(combined_image_samplers), end(combined_image_samplers), [&var](const CombinedImageSampler &samp) {
543 return samp.combined_id == var.self;
544 }) != end(combined_image_samplers))
545 {
546 return false;
547 }
548
549 bool hidden = false;
550 if (check_active_interface_variables && storage_class_is_interface(var.storage))
551 hidden = active_interface_variables.find(var.self) == end(active_interface_variables);
552 return hidden;
553 }
554
is_builtin_type(const SPIRType & type) const555 bool Compiler::is_builtin_type(const SPIRType &type) const
556 {
557 auto *type_meta = ir.find_meta(type.self);
558
559 // We can have builtin structs as well. If one member of a struct is builtin, the struct must also be builtin.
560 if (type_meta)
561 for (auto &m : type_meta->members)
562 if (m.builtin)
563 return true;
564
565 return false;
566 }
567
is_builtin_variable(const SPIRVariable & var) const568 bool Compiler::is_builtin_variable(const SPIRVariable &var) const
569 {
570 auto *m = ir.find_meta(var.self);
571
572 if (var.compat_builtin || (m && m->decoration.builtin))
573 return true;
574 else
575 return is_builtin_type(get<SPIRType>(var.basetype));
576 }
577
is_member_builtin(const SPIRType & type,uint32_t index,BuiltIn * builtin) const578 bool Compiler::is_member_builtin(const SPIRType &type, uint32_t index, BuiltIn *builtin) const
579 {
580 auto *type_meta = ir.find_meta(type.self);
581
582 if (type_meta)
583 {
584 auto &memb = type_meta->members;
585 if (index < memb.size() && memb[index].builtin)
586 {
587 if (builtin)
588 *builtin = memb[index].builtin_type;
589 return true;
590 }
591 }
592
593 return false;
594 }
595
is_scalar(const SPIRType & type) const596 bool Compiler::is_scalar(const SPIRType &type) const
597 {
598 return type.basetype != SPIRType::Struct && type.vecsize == 1 && type.columns == 1;
599 }
600
is_vector(const SPIRType & type) const601 bool Compiler::is_vector(const SPIRType &type) const
602 {
603 return type.vecsize > 1 && type.columns == 1;
604 }
605
is_matrix(const SPIRType & type) const606 bool Compiler::is_matrix(const SPIRType &type) const
607 {
608 return type.vecsize > 1 && type.columns > 1;
609 }
610
is_array(const SPIRType & type) const611 bool Compiler::is_array(const SPIRType &type) const
612 {
613 return !type.array.empty();
614 }
615
get_shader_resources() const616 ShaderResources Compiler::get_shader_resources() const
617 {
618 return get_shader_resources(nullptr);
619 }
620
get_shader_resources(const unordered_set<VariableID> & active_variables) const621 ShaderResources Compiler::get_shader_resources(const unordered_set<VariableID> &active_variables) const
622 {
623 return get_shader_resources(&active_variables);
624 }
625
handle(Op opcode,const uint32_t * args,uint32_t length)626 bool Compiler::InterfaceVariableAccessHandler::handle(Op opcode, const uint32_t *args, uint32_t length)
627 {
628 uint32_t variable = 0;
629 switch (opcode)
630 {
631 // Need this first, otherwise, GCC complains about unhandled switch statements.
632 default:
633 break;
634
635 case OpFunctionCall:
636 {
637 // Invalid SPIR-V.
638 if (length < 3)
639 return false;
640
641 uint32_t count = length - 3;
642 args += 3;
643 for (uint32_t i = 0; i < count; i++)
644 {
645 auto *var = compiler.maybe_get<SPIRVariable>(args[i]);
646 if (var && storage_class_is_interface(var->storage))
647 variables.insert(args[i]);
648 }
649 break;
650 }
651
652 case OpSelect:
653 {
654 // Invalid SPIR-V.
655 if (length < 5)
656 return false;
657
658 uint32_t count = length - 3;
659 args += 3;
660 for (uint32_t i = 0; i < count; i++)
661 {
662 auto *var = compiler.maybe_get<SPIRVariable>(args[i]);
663 if (var && storage_class_is_interface(var->storage))
664 variables.insert(args[i]);
665 }
666 break;
667 }
668
669 case OpPhi:
670 {
671 // Invalid SPIR-V.
672 if (length < 2)
673 return false;
674
675 uint32_t count = length - 2;
676 args += 2;
677 for (uint32_t i = 0; i < count; i += 2)
678 {
679 auto *var = compiler.maybe_get<SPIRVariable>(args[i]);
680 if (var && storage_class_is_interface(var->storage))
681 variables.insert(args[i]);
682 }
683 break;
684 }
685
686 case OpAtomicStore:
687 case OpStore:
688 // Invalid SPIR-V.
689 if (length < 1)
690 return false;
691 variable = args[0];
692 break;
693
694 case OpCopyMemory:
695 {
696 if (length < 2)
697 return false;
698
699 auto *var = compiler.maybe_get<SPIRVariable>(args[0]);
700 if (var && storage_class_is_interface(var->storage))
701 variables.insert(args[0]);
702
703 var = compiler.maybe_get<SPIRVariable>(args[1]);
704 if (var && storage_class_is_interface(var->storage))
705 variables.insert(args[1]);
706 break;
707 }
708
709 case OpExtInst:
710 {
711 if (length < 5)
712 return false;
713 auto &extension_set = compiler.get<SPIRExtension>(args[2]);
714 switch (extension_set.ext)
715 {
716 case SPIRExtension::GLSL:
717 {
718 auto op = static_cast<GLSLstd450>(args[3]);
719
720 switch (op)
721 {
722 case GLSLstd450InterpolateAtCentroid:
723 case GLSLstd450InterpolateAtSample:
724 case GLSLstd450InterpolateAtOffset:
725 {
726 auto *var = compiler.maybe_get<SPIRVariable>(args[4]);
727 if (var && storage_class_is_interface(var->storage))
728 variables.insert(args[4]);
729 break;
730 }
731
732 default:
733 break;
734 }
735 break;
736 }
737 case SPIRExtension::SPV_AMD_shader_explicit_vertex_parameter:
738 {
739 enum AMDShaderExplicitVertexParameter
740 {
741 InterpolateAtVertexAMD = 1
742 };
743
744 auto op = static_cast<AMDShaderExplicitVertexParameter>(args[3]);
745
746 switch (op)
747 {
748 case InterpolateAtVertexAMD:
749 {
750 auto *var = compiler.maybe_get<SPIRVariable>(args[4]);
751 if (var && storage_class_is_interface(var->storage))
752 variables.insert(args[4]);
753 break;
754 }
755
756 default:
757 break;
758 }
759 break;
760 }
761 default:
762 break;
763 }
764 break;
765 }
766
767 case OpAccessChain:
768 case OpInBoundsAccessChain:
769 case OpPtrAccessChain:
770 case OpLoad:
771 case OpCopyObject:
772 case OpImageTexelPointer:
773 case OpAtomicLoad:
774 case OpAtomicExchange:
775 case OpAtomicCompareExchange:
776 case OpAtomicCompareExchangeWeak:
777 case OpAtomicIIncrement:
778 case OpAtomicIDecrement:
779 case OpAtomicIAdd:
780 case OpAtomicISub:
781 case OpAtomicSMin:
782 case OpAtomicUMin:
783 case OpAtomicSMax:
784 case OpAtomicUMax:
785 case OpAtomicAnd:
786 case OpAtomicOr:
787 case OpAtomicXor:
788 case OpArrayLength:
789 // Invalid SPIR-V.
790 if (length < 3)
791 return false;
792 variable = args[2];
793 break;
794 }
795
796 if (variable)
797 {
798 auto *var = compiler.maybe_get<SPIRVariable>(variable);
799 if (var && storage_class_is_interface(var->storage))
800 variables.insert(variable);
801 }
802 return true;
803 }
804
get_active_interface_variables() const805 unordered_set<VariableID> Compiler::get_active_interface_variables() const
806 {
807 // Traverse the call graph and find all interface variables which are in use.
808 unordered_set<VariableID> variables;
809 InterfaceVariableAccessHandler handler(*this, variables);
810 traverse_all_reachable_opcodes(get<SPIRFunction>(ir.default_entry_point), handler);
811
812 ir.for_each_typed_id<SPIRVariable>([&](uint32_t, const SPIRVariable &var) {
813 if (var.storage != StorageClassOutput)
814 return;
815 if (!interface_variable_exists_in_entry_point(var.self))
816 return;
817
818 // An output variable which is just declared (but uninitialized) might be read by subsequent stages
819 // so we should force-enable these outputs,
820 // since compilation will fail if a subsequent stage attempts to read from the variable in question.
821 // Also, make sure we preserve output variables which are only initialized, but never accessed by any code.
822 if (var.initializer != ID(0) || get_execution_model() != ExecutionModelFragment)
823 variables.insert(var.self);
824 });
825
826 // If we needed to create one, we'll need it.
827 if (dummy_sampler_id)
828 variables.insert(dummy_sampler_id);
829
830 return variables;
831 }
832
set_enabled_interface_variables(std::unordered_set<VariableID> active_variables)833 void Compiler::set_enabled_interface_variables(std::unordered_set<VariableID> active_variables)
834 {
835 active_interface_variables = move(active_variables);
836 check_active_interface_variables = true;
837 }
838
get_shader_resources(const unordered_set<VariableID> * active_variables) const839 ShaderResources Compiler::get_shader_resources(const unordered_set<VariableID> *active_variables) const
840 {
841 ShaderResources res;
842
843 bool ssbo_instance_name = reflection_ssbo_instance_name_is_significant();
844
845 ir.for_each_typed_id<SPIRVariable>([&](uint32_t, const SPIRVariable &var) {
846 auto &type = this->get<SPIRType>(var.basetype);
847
848 // It is possible for uniform storage classes to be passed as function parameters, so detect
849 // that. To detect function parameters, check of StorageClass of variable is function scope.
850 if (var.storage == StorageClassFunction || !type.pointer || is_builtin_variable(var))
851 return;
852
853 if (active_variables && active_variables->find(var.self) == end(*active_variables))
854 return;
855
856 // Input
857 if (var.storage == StorageClassInput && interface_variable_exists_in_entry_point(var.self))
858 {
859 if (has_decoration(type.self, DecorationBlock))
860 {
861 res.stage_inputs.push_back(
862 { var.self, var.basetype, type.self, get_remapped_declared_block_name(var.self, false) });
863 }
864 else
865 res.stage_inputs.push_back({ var.self, var.basetype, type.self, get_name(var.self) });
866 }
867 // Subpass inputs
868 else if (var.storage == StorageClassUniformConstant && type.image.dim == DimSubpassData)
869 {
870 res.subpass_inputs.push_back({ var.self, var.basetype, type.self, get_name(var.self) });
871 }
872 // Outputs
873 else if (var.storage == StorageClassOutput && interface_variable_exists_in_entry_point(var.self))
874 {
875 if (has_decoration(type.self, DecorationBlock))
876 {
877 res.stage_outputs.push_back(
878 { var.self, var.basetype, type.self, get_remapped_declared_block_name(var.self, false) });
879 }
880 else
881 res.stage_outputs.push_back({ var.self, var.basetype, type.self, get_name(var.self) });
882 }
883 // UBOs
884 else if (type.storage == StorageClassUniform && has_decoration(type.self, DecorationBlock))
885 {
886 res.uniform_buffers.push_back(
887 { var.self, var.basetype, type.self, get_remapped_declared_block_name(var.self, false) });
888 }
889 // Old way to declare SSBOs.
890 else if (type.storage == StorageClassUniform && has_decoration(type.self, DecorationBufferBlock))
891 {
892 res.storage_buffers.push_back(
893 { var.self, var.basetype, type.self, get_remapped_declared_block_name(var.self, ssbo_instance_name) });
894 }
895 // Modern way to declare SSBOs.
896 else if (type.storage == StorageClassStorageBuffer)
897 {
898 res.storage_buffers.push_back(
899 { var.self, var.basetype, type.self, get_remapped_declared_block_name(var.self, ssbo_instance_name) });
900 }
901 // Push constant blocks
902 else if (type.storage == StorageClassPushConstant)
903 {
904 // There can only be one push constant block, but keep the vector in case this restriction is lifted
905 // in the future.
906 res.push_constant_buffers.push_back({ var.self, var.basetype, type.self, get_name(var.self) });
907 }
908 // Images
909 else if (type.storage == StorageClassUniformConstant && type.basetype == SPIRType::Image &&
910 type.image.sampled == 2)
911 {
912 res.storage_images.push_back({ var.self, var.basetype, type.self, get_name(var.self) });
913 }
914 // Separate images
915 else if (type.storage == StorageClassUniformConstant && type.basetype == SPIRType::Image &&
916 type.image.sampled == 1)
917 {
918 res.separate_images.push_back({ var.self, var.basetype, type.self, get_name(var.self) });
919 }
920 // Separate samplers
921 else if (type.storage == StorageClassUniformConstant && type.basetype == SPIRType::Sampler)
922 {
923 res.separate_samplers.push_back({ var.self, var.basetype, type.self, get_name(var.self) });
924 }
925 // Textures
926 else if (type.storage == StorageClassUniformConstant && type.basetype == SPIRType::SampledImage)
927 {
928 res.sampled_images.push_back({ var.self, var.basetype, type.self, get_name(var.self) });
929 }
930 // Atomic counters
931 else if (type.storage == StorageClassAtomicCounter)
932 {
933 res.atomic_counters.push_back({ var.self, var.basetype, type.self, get_name(var.self) });
934 }
935 // Acceleration structures
936 else if (type.storage == StorageClassUniformConstant && type.basetype == SPIRType::AccelerationStructure)
937 {
938 res.acceleration_structures.push_back({ var.self, var.basetype, type.self, get_name(var.self) });
939 }
940 });
941
942 return res;
943 }
944
type_is_block_like(const SPIRType & type) const945 bool Compiler::type_is_block_like(const SPIRType &type) const
946 {
947 if (type.basetype != SPIRType::Struct)
948 return false;
949
950 if (has_decoration(type.self, DecorationBlock) || has_decoration(type.self, DecorationBufferBlock))
951 {
952 return true;
953 }
954
955 // Block-like types may have Offset decorations.
956 for (uint32_t i = 0; i < uint32_t(type.member_types.size()); i++)
957 if (has_member_decoration(type.self, i, DecorationOffset))
958 return true;
959
960 return false;
961 }
962
parse_fixup()963 void Compiler::parse_fixup()
964 {
965 // Figure out specialization constants for work group sizes.
966 for (auto id_ : ir.ids_for_constant_or_variable)
967 {
968 auto &id = ir.ids[id_];
969
970 if (id.get_type() == TypeConstant)
971 {
972 auto &c = id.get<SPIRConstant>();
973 if (ir.meta[c.self].decoration.builtin && ir.meta[c.self].decoration.builtin_type == BuiltInWorkgroupSize)
974 {
975 // In current SPIR-V, there can be just one constant like this.
976 // All entry points will receive the constant value.
977 for (auto &entry : ir.entry_points)
978 {
979 entry.second.workgroup_size.constant = c.self;
980 entry.second.workgroup_size.x = c.scalar(0, 0);
981 entry.second.workgroup_size.y = c.scalar(0, 1);
982 entry.second.workgroup_size.z = c.scalar(0, 2);
983 }
984 }
985 }
986 else if (id.get_type() == TypeVariable)
987 {
988 auto &var = id.get<SPIRVariable>();
989 if (var.storage == StorageClassPrivate || var.storage == StorageClassWorkgroup ||
990 var.storage == StorageClassOutput)
991 global_variables.push_back(var.self);
992 if (variable_storage_is_aliased(var))
993 aliased_variables.push_back(var.self);
994 }
995 }
996 }
997
update_name_cache(unordered_set<string> & cache_primary,const unordered_set<string> & cache_secondary,string & name)998 void Compiler::update_name_cache(unordered_set<string> &cache_primary, const unordered_set<string> &cache_secondary,
999 string &name)
1000 {
1001 if (name.empty())
1002 return;
1003
1004 const auto find_name = [&](const string &n) -> bool {
1005 if (cache_primary.find(n) != end(cache_primary))
1006 return true;
1007
1008 if (&cache_primary != &cache_secondary)
1009 if (cache_secondary.find(n) != end(cache_secondary))
1010 return true;
1011
1012 return false;
1013 };
1014
1015 const auto insert_name = [&](const string &n) { cache_primary.insert(n); };
1016
1017 if (!find_name(name))
1018 {
1019 insert_name(name);
1020 return;
1021 }
1022
1023 uint32_t counter = 0;
1024 auto tmpname = name;
1025
1026 bool use_linked_underscore = true;
1027
1028 if (tmpname == "_")
1029 {
1030 // We cannot just append numbers, as we will end up creating internally reserved names.
1031 // Make it like _0_<counter> instead.
1032 tmpname += "0";
1033 }
1034 else if (tmpname.back() == '_')
1035 {
1036 // The last_character is an underscore, so we don't need to link in underscore.
1037 // This would violate double underscore rules.
1038 use_linked_underscore = false;
1039 }
1040
1041 // If there is a collision (very rare),
1042 // keep tacking on extra identifier until it's unique.
1043 do
1044 {
1045 counter++;
1046 name = tmpname + (use_linked_underscore ? "_" : "") + convert_to_string(counter);
1047 } while (find_name(name));
1048 insert_name(name);
1049 }
1050
update_name_cache(unordered_set<string> & cache,string & name)1051 void Compiler::update_name_cache(unordered_set<string> &cache, string &name)
1052 {
1053 update_name_cache(cache, cache, name);
1054 }
1055
set_name(ID id,const std::string & name)1056 void Compiler::set_name(ID id, const std::string &name)
1057 {
1058 ir.set_name(id, name);
1059 }
1060
get_type(TypeID id) const1061 const SPIRType &Compiler::get_type(TypeID id) const
1062 {
1063 return get<SPIRType>(id);
1064 }
1065
get_type_from_variable(VariableID id) const1066 const SPIRType &Compiler::get_type_from_variable(VariableID id) const
1067 {
1068 return get<SPIRType>(get<SPIRVariable>(id).basetype);
1069 }
1070
get_pointee_type_id(uint32_t type_id) const1071 uint32_t Compiler::get_pointee_type_id(uint32_t type_id) const
1072 {
1073 auto *p_type = &get<SPIRType>(type_id);
1074 if (p_type->pointer)
1075 {
1076 assert(p_type->parent_type);
1077 type_id = p_type->parent_type;
1078 }
1079 return type_id;
1080 }
1081
get_pointee_type(const SPIRType & type) const1082 const SPIRType &Compiler::get_pointee_type(const SPIRType &type) const
1083 {
1084 auto *p_type = &type;
1085 if (p_type->pointer)
1086 {
1087 assert(p_type->parent_type);
1088 p_type = &get<SPIRType>(p_type->parent_type);
1089 }
1090 return *p_type;
1091 }
1092
get_pointee_type(uint32_t type_id) const1093 const SPIRType &Compiler::get_pointee_type(uint32_t type_id) const
1094 {
1095 return get_pointee_type(get<SPIRType>(type_id));
1096 }
1097
get_variable_data_type_id(const SPIRVariable & var) const1098 uint32_t Compiler::get_variable_data_type_id(const SPIRVariable &var) const
1099 {
1100 if (var.phi_variable)
1101 return var.basetype;
1102 return get_pointee_type_id(var.basetype);
1103 }
1104
get_variable_data_type(const SPIRVariable & var)1105 SPIRType &Compiler::get_variable_data_type(const SPIRVariable &var)
1106 {
1107 return get<SPIRType>(get_variable_data_type_id(var));
1108 }
1109
get_variable_data_type(const SPIRVariable & var) const1110 const SPIRType &Compiler::get_variable_data_type(const SPIRVariable &var) const
1111 {
1112 return get<SPIRType>(get_variable_data_type_id(var));
1113 }
1114
get_variable_element_type(const SPIRVariable & var)1115 SPIRType &Compiler::get_variable_element_type(const SPIRVariable &var)
1116 {
1117 SPIRType *type = &get_variable_data_type(var);
1118 if (is_array(*type))
1119 type = &get<SPIRType>(type->parent_type);
1120 return *type;
1121 }
1122
get_variable_element_type(const SPIRVariable & var) const1123 const SPIRType &Compiler::get_variable_element_type(const SPIRVariable &var) const
1124 {
1125 const SPIRType *type = &get_variable_data_type(var);
1126 if (is_array(*type))
1127 type = &get<SPIRType>(type->parent_type);
1128 return *type;
1129 }
1130
is_sampled_image_type(const SPIRType & type)1131 bool Compiler::is_sampled_image_type(const SPIRType &type)
1132 {
1133 return (type.basetype == SPIRType::Image || type.basetype == SPIRType::SampledImage) && type.image.sampled == 1 &&
1134 type.image.dim != DimBuffer;
1135 }
1136
set_member_decoration_string(TypeID id,uint32_t index,spv::Decoration decoration,const std::string & argument)1137 void Compiler::set_member_decoration_string(TypeID id, uint32_t index, spv::Decoration decoration,
1138 const std::string &argument)
1139 {
1140 ir.set_member_decoration_string(id, index, decoration, argument);
1141 }
1142
set_member_decoration(TypeID id,uint32_t index,Decoration decoration,uint32_t argument)1143 void Compiler::set_member_decoration(TypeID id, uint32_t index, Decoration decoration, uint32_t argument)
1144 {
1145 ir.set_member_decoration(id, index, decoration, argument);
1146 }
1147
set_member_name(TypeID id,uint32_t index,const std::string & name)1148 void Compiler::set_member_name(TypeID id, uint32_t index, const std::string &name)
1149 {
1150 ir.set_member_name(id, index, name);
1151 }
1152
get_member_name(TypeID id,uint32_t index) const1153 const std::string &Compiler::get_member_name(TypeID id, uint32_t index) const
1154 {
1155 return ir.get_member_name(id, index);
1156 }
1157
set_qualified_name(uint32_t id,const string & name)1158 void Compiler::set_qualified_name(uint32_t id, const string &name)
1159 {
1160 ir.meta[id].decoration.qualified_alias = name;
1161 }
1162
set_member_qualified_name(uint32_t type_id,uint32_t index,const std::string & name)1163 void Compiler::set_member_qualified_name(uint32_t type_id, uint32_t index, const std::string &name)
1164 {
1165 ir.meta[type_id].members.resize(max(ir.meta[type_id].members.size(), size_t(index) + 1));
1166 ir.meta[type_id].members[index].qualified_alias = name;
1167 }
1168
get_member_qualified_name(TypeID type_id,uint32_t index) const1169 const string &Compiler::get_member_qualified_name(TypeID type_id, uint32_t index) const
1170 {
1171 auto *m = ir.find_meta(type_id);
1172 if (m && index < m->members.size())
1173 return m->members[index].qualified_alias;
1174 else
1175 return ir.get_empty_string();
1176 }
1177
get_member_decoration(TypeID id,uint32_t index,Decoration decoration) const1178 uint32_t Compiler::get_member_decoration(TypeID id, uint32_t index, Decoration decoration) const
1179 {
1180 return ir.get_member_decoration(id, index, decoration);
1181 }
1182
get_member_decoration_bitset(TypeID id,uint32_t index) const1183 const Bitset &Compiler::get_member_decoration_bitset(TypeID id, uint32_t index) const
1184 {
1185 return ir.get_member_decoration_bitset(id, index);
1186 }
1187
has_member_decoration(TypeID id,uint32_t index,Decoration decoration) const1188 bool Compiler::has_member_decoration(TypeID id, uint32_t index, Decoration decoration) const
1189 {
1190 return ir.has_member_decoration(id, index, decoration);
1191 }
1192
unset_member_decoration(TypeID id,uint32_t index,Decoration decoration)1193 void Compiler::unset_member_decoration(TypeID id, uint32_t index, Decoration decoration)
1194 {
1195 ir.unset_member_decoration(id, index, decoration);
1196 }
1197
set_decoration_string(ID id,spv::Decoration decoration,const std::string & argument)1198 void Compiler::set_decoration_string(ID id, spv::Decoration decoration, const std::string &argument)
1199 {
1200 ir.set_decoration_string(id, decoration, argument);
1201 }
1202
set_decoration(ID id,Decoration decoration,uint32_t argument)1203 void Compiler::set_decoration(ID id, Decoration decoration, uint32_t argument)
1204 {
1205 ir.set_decoration(id, decoration, argument);
1206 }
1207
set_extended_decoration(uint32_t id,ExtendedDecorations decoration,uint32_t value)1208 void Compiler::set_extended_decoration(uint32_t id, ExtendedDecorations decoration, uint32_t value)
1209 {
1210 auto &dec = ir.meta[id].decoration;
1211 dec.extended.flags.set(decoration);
1212 dec.extended.values[decoration] = value;
1213 }
1214
set_extended_member_decoration(uint32_t type,uint32_t index,ExtendedDecorations decoration,uint32_t value)1215 void Compiler::set_extended_member_decoration(uint32_t type, uint32_t index, ExtendedDecorations decoration,
1216 uint32_t value)
1217 {
1218 ir.meta[type].members.resize(max(ir.meta[type].members.size(), size_t(index) + 1));
1219 auto &dec = ir.meta[type].members[index];
1220 dec.extended.flags.set(decoration);
1221 dec.extended.values[decoration] = value;
1222 }
1223
get_default_extended_decoration(ExtendedDecorations decoration)1224 static uint32_t get_default_extended_decoration(ExtendedDecorations decoration)
1225 {
1226 switch (decoration)
1227 {
1228 case SPIRVCrossDecorationResourceIndexPrimary:
1229 case SPIRVCrossDecorationResourceIndexSecondary:
1230 case SPIRVCrossDecorationResourceIndexTertiary:
1231 case SPIRVCrossDecorationResourceIndexQuaternary:
1232 case SPIRVCrossDecorationInterfaceMemberIndex:
1233 return ~(0u);
1234
1235 default:
1236 return 0;
1237 }
1238 }
1239
get_extended_decoration(uint32_t id,ExtendedDecorations decoration) const1240 uint32_t Compiler::get_extended_decoration(uint32_t id, ExtendedDecorations decoration) const
1241 {
1242 auto *m = ir.find_meta(id);
1243 if (!m)
1244 return 0;
1245
1246 auto &dec = m->decoration;
1247
1248 if (!dec.extended.flags.get(decoration))
1249 return get_default_extended_decoration(decoration);
1250
1251 return dec.extended.values[decoration];
1252 }
1253
get_extended_member_decoration(uint32_t type,uint32_t index,ExtendedDecorations decoration) const1254 uint32_t Compiler::get_extended_member_decoration(uint32_t type, uint32_t index, ExtendedDecorations decoration) const
1255 {
1256 auto *m = ir.find_meta(type);
1257 if (!m)
1258 return 0;
1259
1260 if (index >= m->members.size())
1261 return 0;
1262
1263 auto &dec = m->members[index];
1264 if (!dec.extended.flags.get(decoration))
1265 return get_default_extended_decoration(decoration);
1266 return dec.extended.values[decoration];
1267 }
1268
has_extended_decoration(uint32_t id,ExtendedDecorations decoration) const1269 bool Compiler::has_extended_decoration(uint32_t id, ExtendedDecorations decoration) const
1270 {
1271 auto *m = ir.find_meta(id);
1272 if (!m)
1273 return false;
1274
1275 auto &dec = m->decoration;
1276 return dec.extended.flags.get(decoration);
1277 }
1278
has_extended_member_decoration(uint32_t type,uint32_t index,ExtendedDecorations decoration) const1279 bool Compiler::has_extended_member_decoration(uint32_t type, uint32_t index, ExtendedDecorations decoration) const
1280 {
1281 auto *m = ir.find_meta(type);
1282 if (!m)
1283 return false;
1284
1285 if (index >= m->members.size())
1286 return false;
1287
1288 auto &dec = m->members[index];
1289 return dec.extended.flags.get(decoration);
1290 }
1291
unset_extended_decoration(uint32_t id,ExtendedDecorations decoration)1292 void Compiler::unset_extended_decoration(uint32_t id, ExtendedDecorations decoration)
1293 {
1294 auto &dec = ir.meta[id].decoration;
1295 dec.extended.flags.clear(decoration);
1296 dec.extended.values[decoration] = 0;
1297 }
1298
unset_extended_member_decoration(uint32_t type,uint32_t index,ExtendedDecorations decoration)1299 void Compiler::unset_extended_member_decoration(uint32_t type, uint32_t index, ExtendedDecorations decoration)
1300 {
1301 ir.meta[type].members.resize(max(ir.meta[type].members.size(), size_t(index) + 1));
1302 auto &dec = ir.meta[type].members[index];
1303 dec.extended.flags.clear(decoration);
1304 dec.extended.values[decoration] = 0;
1305 }
1306
get_storage_class(VariableID id) const1307 StorageClass Compiler::get_storage_class(VariableID id) const
1308 {
1309 return get<SPIRVariable>(id).storage;
1310 }
1311
get_name(ID id) const1312 const std::string &Compiler::get_name(ID id) const
1313 {
1314 return ir.get_name(id);
1315 }
1316
get_fallback_name(ID id) const1317 const std::string Compiler::get_fallback_name(ID id) const
1318 {
1319 return join("_", id);
1320 }
1321
get_block_fallback_name(VariableID id) const1322 const std::string Compiler::get_block_fallback_name(VariableID id) const
1323 {
1324 auto &var = get<SPIRVariable>(id);
1325 if (get_name(id).empty())
1326 return join("_", get<SPIRType>(var.basetype).self, "_", id);
1327 else
1328 return get_name(id);
1329 }
1330
get_decoration_bitset(ID id) const1331 const Bitset &Compiler::get_decoration_bitset(ID id) const
1332 {
1333 return ir.get_decoration_bitset(id);
1334 }
1335
has_decoration(ID id,Decoration decoration) const1336 bool Compiler::has_decoration(ID id, Decoration decoration) const
1337 {
1338 return ir.has_decoration(id, decoration);
1339 }
1340
get_decoration_string(ID id,Decoration decoration) const1341 const string &Compiler::get_decoration_string(ID id, Decoration decoration) const
1342 {
1343 return ir.get_decoration_string(id, decoration);
1344 }
1345
get_member_decoration_string(TypeID id,uint32_t index,Decoration decoration) const1346 const string &Compiler::get_member_decoration_string(TypeID id, uint32_t index, Decoration decoration) const
1347 {
1348 return ir.get_member_decoration_string(id, index, decoration);
1349 }
1350
get_decoration(ID id,Decoration decoration) const1351 uint32_t Compiler::get_decoration(ID id, Decoration decoration) const
1352 {
1353 return ir.get_decoration(id, decoration);
1354 }
1355
unset_decoration(ID id,Decoration decoration)1356 void Compiler::unset_decoration(ID id, Decoration decoration)
1357 {
1358 ir.unset_decoration(id, decoration);
1359 }
1360
get_binary_offset_for_decoration(VariableID id,spv::Decoration decoration,uint32_t & word_offset) const1361 bool Compiler::get_binary_offset_for_decoration(VariableID id, spv::Decoration decoration, uint32_t &word_offset) const
1362 {
1363 auto *m = ir.find_meta(id);
1364 if (!m)
1365 return false;
1366
1367 auto &word_offsets = m->decoration_word_offset;
1368 auto itr = word_offsets.find(decoration);
1369 if (itr == end(word_offsets))
1370 return false;
1371
1372 word_offset = itr->second;
1373 return true;
1374 }
1375
block_is_loop_candidate(const SPIRBlock & block,SPIRBlock::Method method) const1376 bool Compiler::block_is_loop_candidate(const SPIRBlock &block, SPIRBlock::Method method) const
1377 {
1378 // Tried and failed.
1379 if (block.disable_block_optimization || block.complex_continue)
1380 return false;
1381
1382 if (method == SPIRBlock::MergeToSelectForLoop || method == SPIRBlock::MergeToSelectContinueForLoop)
1383 {
1384 // Try to detect common for loop pattern
1385 // which the code backend can use to create cleaner code.
1386 // for(;;) { if (cond) { some_body; } else { break; } }
1387 // is the pattern we're looking for.
1388 const auto *false_block = maybe_get<SPIRBlock>(block.false_block);
1389 const auto *true_block = maybe_get<SPIRBlock>(block.true_block);
1390 const auto *merge_block = maybe_get<SPIRBlock>(block.merge_block);
1391
1392 bool false_block_is_merge = block.false_block == block.merge_block ||
1393 (false_block && merge_block && execution_is_noop(*false_block, *merge_block));
1394
1395 bool true_block_is_merge = block.true_block == block.merge_block ||
1396 (true_block && merge_block && execution_is_noop(*true_block, *merge_block));
1397
1398 bool positive_candidate =
1399 block.true_block != block.merge_block && block.true_block != block.self && false_block_is_merge;
1400
1401 bool negative_candidate =
1402 block.false_block != block.merge_block && block.false_block != block.self && true_block_is_merge;
1403
1404 bool ret = block.terminator == SPIRBlock::Select && block.merge == SPIRBlock::MergeLoop &&
1405 (positive_candidate || negative_candidate);
1406
1407 if (ret && positive_candidate && method == SPIRBlock::MergeToSelectContinueForLoop)
1408 ret = block.true_block == block.continue_block;
1409 else if (ret && negative_candidate && method == SPIRBlock::MergeToSelectContinueForLoop)
1410 ret = block.false_block == block.continue_block;
1411
1412 // If we have OpPhi which depends on branches which came from our own block,
1413 // we need to flush phi variables in else block instead of a trivial break,
1414 // so we cannot assume this is a for loop candidate.
1415 if (ret)
1416 {
1417 for (auto &phi : block.phi_variables)
1418 if (phi.parent == block.self)
1419 return false;
1420
1421 auto *merge = maybe_get<SPIRBlock>(block.merge_block);
1422 if (merge)
1423 for (auto &phi : merge->phi_variables)
1424 if (phi.parent == block.self)
1425 return false;
1426 }
1427 return ret;
1428 }
1429 else if (method == SPIRBlock::MergeToDirectForLoop)
1430 {
1431 // Empty loop header that just sets up merge target
1432 // and branches to loop body.
1433 bool ret = block.terminator == SPIRBlock::Direct && block.merge == SPIRBlock::MergeLoop && block.ops.empty();
1434
1435 if (!ret)
1436 return false;
1437
1438 auto &child = get<SPIRBlock>(block.next_block);
1439
1440 const auto *false_block = maybe_get<SPIRBlock>(child.false_block);
1441 const auto *true_block = maybe_get<SPIRBlock>(child.true_block);
1442 const auto *merge_block = maybe_get<SPIRBlock>(block.merge_block);
1443
1444 bool false_block_is_merge = child.false_block == block.merge_block ||
1445 (false_block && merge_block && execution_is_noop(*false_block, *merge_block));
1446
1447 bool true_block_is_merge = child.true_block == block.merge_block ||
1448 (true_block && merge_block && execution_is_noop(*true_block, *merge_block));
1449
1450 bool positive_candidate =
1451 child.true_block != block.merge_block && child.true_block != block.self && false_block_is_merge;
1452
1453 bool negative_candidate =
1454 child.false_block != block.merge_block && child.false_block != block.self && true_block_is_merge;
1455
1456 ret = child.terminator == SPIRBlock::Select && child.merge == SPIRBlock::MergeNone &&
1457 (positive_candidate || negative_candidate);
1458
1459 // If we have OpPhi which depends on branches which came from our own block,
1460 // we need to flush phi variables in else block instead of a trivial break,
1461 // so we cannot assume this is a for loop candidate.
1462 if (ret)
1463 {
1464 for (auto &phi : block.phi_variables)
1465 if (phi.parent == block.self || phi.parent == child.self)
1466 return false;
1467
1468 for (auto &phi : child.phi_variables)
1469 if (phi.parent == block.self)
1470 return false;
1471
1472 auto *merge = maybe_get<SPIRBlock>(block.merge_block);
1473 if (merge)
1474 for (auto &phi : merge->phi_variables)
1475 if (phi.parent == block.self || phi.parent == child.false_block)
1476 return false;
1477 }
1478
1479 return ret;
1480 }
1481 else
1482 return false;
1483 }
1484
execution_is_noop(const SPIRBlock & from,const SPIRBlock & to) const1485 bool Compiler::execution_is_noop(const SPIRBlock &from, const SPIRBlock &to) const
1486 {
1487 if (!execution_is_branchless(from, to))
1488 return false;
1489
1490 auto *start = &from;
1491 for (;;)
1492 {
1493 if (start->self == to.self)
1494 return true;
1495
1496 if (!start->ops.empty())
1497 return false;
1498
1499 auto &next = get<SPIRBlock>(start->next_block);
1500 // Flushing phi variables does not count as noop.
1501 for (auto &phi : next.phi_variables)
1502 if (phi.parent == start->self)
1503 return false;
1504
1505 start = &next;
1506 }
1507 }
1508
execution_is_branchless(const SPIRBlock & from,const SPIRBlock & to) const1509 bool Compiler::execution_is_branchless(const SPIRBlock &from, const SPIRBlock &to) const
1510 {
1511 auto *start = &from;
1512 for (;;)
1513 {
1514 if (start->self == to.self)
1515 return true;
1516
1517 if (start->terminator == SPIRBlock::Direct && start->merge == SPIRBlock::MergeNone)
1518 start = &get<SPIRBlock>(start->next_block);
1519 else
1520 return false;
1521 }
1522 }
1523
execution_is_direct_branch(const SPIRBlock & from,const SPIRBlock & to) const1524 bool Compiler::execution_is_direct_branch(const SPIRBlock &from, const SPIRBlock &to) const
1525 {
1526 return from.terminator == SPIRBlock::Direct && from.merge == SPIRBlock::MergeNone && from.next_block == to.self;
1527 }
1528
continue_block_type(const SPIRBlock & block) const1529 SPIRBlock::ContinueBlockType Compiler::continue_block_type(const SPIRBlock &block) const
1530 {
1531 // The block was deemed too complex during code emit, pick conservative fallback paths.
1532 if (block.complex_continue)
1533 return SPIRBlock::ComplexLoop;
1534
1535 // In older glslang output continue block can be equal to the loop header.
1536 // In this case, execution is clearly branchless, so just assume a while loop header here.
1537 if (block.merge == SPIRBlock::MergeLoop)
1538 return SPIRBlock::WhileLoop;
1539
1540 if (block.loop_dominator == BlockID(SPIRBlock::NoDominator))
1541 {
1542 // Continue block is never reached from CFG.
1543 return SPIRBlock::ComplexLoop;
1544 }
1545
1546 auto &dominator = get<SPIRBlock>(block.loop_dominator);
1547
1548 if (execution_is_noop(block, dominator))
1549 return SPIRBlock::WhileLoop;
1550 else if (execution_is_branchless(block, dominator))
1551 return SPIRBlock::ForLoop;
1552 else
1553 {
1554 const auto *false_block = maybe_get<SPIRBlock>(block.false_block);
1555 const auto *true_block = maybe_get<SPIRBlock>(block.true_block);
1556 const auto *merge_block = maybe_get<SPIRBlock>(dominator.merge_block);
1557
1558 // If we need to flush Phi in this block, we cannot have a DoWhile loop.
1559 bool flush_phi_to_false = false_block && flush_phi_required(block.self, block.false_block);
1560 bool flush_phi_to_true = true_block && flush_phi_required(block.self, block.true_block);
1561 if (flush_phi_to_false || flush_phi_to_true)
1562 return SPIRBlock::ComplexLoop;
1563
1564 bool positive_do_while = block.true_block == dominator.self &&
1565 (block.false_block == dominator.merge_block ||
1566 (false_block && merge_block && execution_is_noop(*false_block, *merge_block)));
1567
1568 bool negative_do_while = block.false_block == dominator.self &&
1569 (block.true_block == dominator.merge_block ||
1570 (true_block && merge_block && execution_is_noop(*true_block, *merge_block)));
1571
1572 if (block.merge == SPIRBlock::MergeNone && block.terminator == SPIRBlock::Select &&
1573 (positive_do_while || negative_do_while))
1574 {
1575 return SPIRBlock::DoWhileLoop;
1576 }
1577 else
1578 return SPIRBlock::ComplexLoop;
1579 }
1580 }
1581
traverse_all_reachable_opcodes(const SPIRBlock & block,OpcodeHandler & handler) const1582 bool Compiler::traverse_all_reachable_opcodes(const SPIRBlock &block, OpcodeHandler &handler) const
1583 {
1584 handler.set_current_block(block);
1585 handler.rearm_current_block(block);
1586
1587 // Ideally, perhaps traverse the CFG instead of all blocks in order to eliminate dead blocks,
1588 // but this shouldn't be a problem in practice unless the SPIR-V is doing insane things like recursing
1589 // inside dead blocks ...
1590 for (auto &i : block.ops)
1591 {
1592 auto ops = stream(i);
1593 auto op = static_cast<Op>(i.op);
1594
1595 if (!handler.handle(op, ops, i.length))
1596 return false;
1597
1598 if (op == OpFunctionCall)
1599 {
1600 auto &func = get<SPIRFunction>(ops[2]);
1601 if (handler.follow_function_call(func))
1602 {
1603 if (!handler.begin_function_scope(ops, i.length))
1604 return false;
1605 if (!traverse_all_reachable_opcodes(get<SPIRFunction>(ops[2]), handler))
1606 return false;
1607 if (!handler.end_function_scope(ops, i.length))
1608 return false;
1609
1610 handler.rearm_current_block(block);
1611 }
1612 }
1613 }
1614
1615 return true;
1616 }
1617
traverse_all_reachable_opcodes(const SPIRFunction & func,OpcodeHandler & handler) const1618 bool Compiler::traverse_all_reachable_opcodes(const SPIRFunction &func, OpcodeHandler &handler) const
1619 {
1620 for (auto block : func.blocks)
1621 if (!traverse_all_reachable_opcodes(get<SPIRBlock>(block), handler))
1622 return false;
1623
1624 return true;
1625 }
1626
type_struct_member_offset(const SPIRType & type,uint32_t index) const1627 uint32_t Compiler::type_struct_member_offset(const SPIRType &type, uint32_t index) const
1628 {
1629 auto *type_meta = ir.find_meta(type.self);
1630 if (type_meta)
1631 {
1632 // Decoration must be set in valid SPIR-V, otherwise throw.
1633 auto &dec = type_meta->members[index];
1634 if (dec.decoration_flags.get(DecorationOffset))
1635 return dec.offset;
1636 else
1637 SPIRV_CROSS_THROW("Struct member does not have Offset set.");
1638 }
1639 else
1640 SPIRV_CROSS_THROW("Struct member does not have Offset set.");
1641 }
1642
type_struct_member_array_stride(const SPIRType & type,uint32_t index) const1643 uint32_t Compiler::type_struct_member_array_stride(const SPIRType &type, uint32_t index) const
1644 {
1645 auto *type_meta = ir.find_meta(type.member_types[index]);
1646 if (type_meta)
1647 {
1648 // Decoration must be set in valid SPIR-V, otherwise throw.
1649 // ArrayStride is part of the array type not OpMemberDecorate.
1650 auto &dec = type_meta->decoration;
1651 if (dec.decoration_flags.get(DecorationArrayStride))
1652 return dec.array_stride;
1653 else
1654 SPIRV_CROSS_THROW("Struct member does not have ArrayStride set.");
1655 }
1656 else
1657 SPIRV_CROSS_THROW("Struct member does not have ArrayStride set.");
1658 }
1659
type_struct_member_matrix_stride(const SPIRType & type,uint32_t index) const1660 uint32_t Compiler::type_struct_member_matrix_stride(const SPIRType &type, uint32_t index) const
1661 {
1662 auto *type_meta = ir.find_meta(type.self);
1663 if (type_meta)
1664 {
1665 // Decoration must be set in valid SPIR-V, otherwise throw.
1666 // MatrixStride is part of OpMemberDecorate.
1667 auto &dec = type_meta->members[index];
1668 if (dec.decoration_flags.get(DecorationMatrixStride))
1669 return dec.matrix_stride;
1670 else
1671 SPIRV_CROSS_THROW("Struct member does not have MatrixStride set.");
1672 }
1673 else
1674 SPIRV_CROSS_THROW("Struct member does not have MatrixStride set.");
1675 }
1676
get_declared_struct_size(const SPIRType & type) const1677 size_t Compiler::get_declared_struct_size(const SPIRType &type) const
1678 {
1679 if (type.member_types.empty())
1680 SPIRV_CROSS_THROW("Declared struct in block cannot be empty.");
1681
1682 uint32_t last = uint32_t(type.member_types.size() - 1);
1683 size_t offset = type_struct_member_offset(type, last);
1684 size_t size = get_declared_struct_member_size(type, last);
1685 return offset + size;
1686 }
1687
get_declared_struct_size_runtime_array(const SPIRType & type,size_t array_size) const1688 size_t Compiler::get_declared_struct_size_runtime_array(const SPIRType &type, size_t array_size) const
1689 {
1690 if (type.member_types.empty())
1691 SPIRV_CROSS_THROW("Declared struct in block cannot be empty.");
1692
1693 size_t size = get_declared_struct_size(type);
1694 auto &last_type = get<SPIRType>(type.member_types.back());
1695 if (!last_type.array.empty() && last_type.array_size_literal[0] && last_type.array[0] == 0) // Runtime array
1696 size += array_size * type_struct_member_array_stride(type, uint32_t(type.member_types.size() - 1));
1697
1698 return size;
1699 }
1700
evaluate_spec_constant_u32(const SPIRConstantOp & spec) const1701 uint32_t Compiler::evaluate_spec_constant_u32(const SPIRConstantOp &spec) const
1702 {
1703 auto &result_type = get<SPIRType>(spec.basetype);
1704 if (result_type.basetype != SPIRType::UInt && result_type.basetype != SPIRType::Int &&
1705 result_type.basetype != SPIRType::Boolean)
1706 {
1707 SPIRV_CROSS_THROW(
1708 "Only 32-bit integers and booleans are currently supported when evaluating specialization constants.\n");
1709 }
1710
1711 if (!is_scalar(result_type))
1712 SPIRV_CROSS_THROW("Spec constant evaluation must be a scalar.\n");
1713
1714 uint32_t value = 0;
1715
1716 const auto eval_u32 = [&](uint32_t id) -> uint32_t {
1717 auto &type = expression_type(id);
1718 if (type.basetype != SPIRType::UInt && type.basetype != SPIRType::Int && type.basetype != SPIRType::Boolean)
1719 {
1720 SPIRV_CROSS_THROW("Only 32-bit integers and booleans are currently supported when evaluating "
1721 "specialization constants.\n");
1722 }
1723
1724 if (!is_scalar(type))
1725 SPIRV_CROSS_THROW("Spec constant evaluation must be a scalar.\n");
1726 if (const auto *c = this->maybe_get<SPIRConstant>(id))
1727 return c->scalar();
1728 else
1729 return evaluate_spec_constant_u32(this->get<SPIRConstantOp>(id));
1730 };
1731
1732 #define binary_spec_op(op, binary_op) \
1733 case Op##op: \
1734 value = eval_u32(spec.arguments[0]) binary_op eval_u32(spec.arguments[1]); \
1735 break
1736 #define binary_spec_op_cast(op, binary_op, type) \
1737 case Op##op: \
1738 value = uint32_t(type(eval_u32(spec.arguments[0])) binary_op type(eval_u32(spec.arguments[1]))); \
1739 break
1740
1741 // Support the basic opcodes which are typically used when computing array sizes.
1742 switch (spec.opcode)
1743 {
1744 binary_spec_op(IAdd, +);
1745 binary_spec_op(ISub, -);
1746 binary_spec_op(IMul, *);
1747 binary_spec_op(BitwiseAnd, &);
1748 binary_spec_op(BitwiseOr, |);
1749 binary_spec_op(BitwiseXor, ^);
1750 binary_spec_op(LogicalAnd, &);
1751 binary_spec_op(LogicalOr, |);
1752 binary_spec_op(ShiftLeftLogical, <<);
1753 binary_spec_op(ShiftRightLogical, >>);
1754 binary_spec_op_cast(ShiftRightArithmetic, >>, int32_t);
1755 binary_spec_op(LogicalEqual, ==);
1756 binary_spec_op(LogicalNotEqual, !=);
1757 binary_spec_op(IEqual, ==);
1758 binary_spec_op(INotEqual, !=);
1759 binary_spec_op(ULessThan, <);
1760 binary_spec_op(ULessThanEqual, <=);
1761 binary_spec_op(UGreaterThan, >);
1762 binary_spec_op(UGreaterThanEqual, >=);
1763 binary_spec_op_cast(SLessThan, <, int32_t);
1764 binary_spec_op_cast(SLessThanEqual, <=, int32_t);
1765 binary_spec_op_cast(SGreaterThan, >, int32_t);
1766 binary_spec_op_cast(SGreaterThanEqual, >=, int32_t);
1767 #undef binary_spec_op
1768 #undef binary_spec_op_cast
1769
1770 case OpLogicalNot:
1771 value = uint32_t(!eval_u32(spec.arguments[0]));
1772 break;
1773
1774 case OpNot:
1775 value = ~eval_u32(spec.arguments[0]);
1776 break;
1777
1778 case OpSNegate:
1779 value = uint32_t(-int32_t(eval_u32(spec.arguments[0])));
1780 break;
1781
1782 case OpSelect:
1783 value = eval_u32(spec.arguments[0]) ? eval_u32(spec.arguments[1]) : eval_u32(spec.arguments[2]);
1784 break;
1785
1786 case OpUMod:
1787 {
1788 uint32_t a = eval_u32(spec.arguments[0]);
1789 uint32_t b = eval_u32(spec.arguments[1]);
1790 if (b == 0)
1791 SPIRV_CROSS_THROW("Undefined behavior in UMod, b == 0.\n");
1792 value = a % b;
1793 break;
1794 }
1795
1796 case OpSRem:
1797 {
1798 auto a = int32_t(eval_u32(spec.arguments[0]));
1799 auto b = int32_t(eval_u32(spec.arguments[1]));
1800 if (b == 0)
1801 SPIRV_CROSS_THROW("Undefined behavior in SRem, b == 0.\n");
1802 value = a % b;
1803 break;
1804 }
1805
1806 case OpSMod:
1807 {
1808 auto a = int32_t(eval_u32(spec.arguments[0]));
1809 auto b = int32_t(eval_u32(spec.arguments[1]));
1810 if (b == 0)
1811 SPIRV_CROSS_THROW("Undefined behavior in SMod, b == 0.\n");
1812 auto v = a % b;
1813
1814 // Makes sure we match the sign of b, not a.
1815 if ((b < 0 && v > 0) || (b > 0 && v < 0))
1816 v += b;
1817 value = v;
1818 break;
1819 }
1820
1821 case OpUDiv:
1822 {
1823 uint32_t a = eval_u32(spec.arguments[0]);
1824 uint32_t b = eval_u32(spec.arguments[1]);
1825 if (b == 0)
1826 SPIRV_CROSS_THROW("Undefined behavior in UDiv, b == 0.\n");
1827 value = a / b;
1828 break;
1829 }
1830
1831 case OpSDiv:
1832 {
1833 auto a = int32_t(eval_u32(spec.arguments[0]));
1834 auto b = int32_t(eval_u32(spec.arguments[1]));
1835 if (b == 0)
1836 SPIRV_CROSS_THROW("Undefined behavior in SDiv, b == 0.\n");
1837 value = a / b;
1838 break;
1839 }
1840
1841 default:
1842 SPIRV_CROSS_THROW("Unsupported spec constant opcode for evaluation.\n");
1843 }
1844
1845 return value;
1846 }
1847
evaluate_constant_u32(uint32_t id) const1848 uint32_t Compiler::evaluate_constant_u32(uint32_t id) const
1849 {
1850 if (const auto *c = maybe_get<SPIRConstant>(id))
1851 return c->scalar();
1852 else
1853 return evaluate_spec_constant_u32(get<SPIRConstantOp>(id));
1854 }
1855
get_declared_struct_member_size(const SPIRType & struct_type,uint32_t index) const1856 size_t Compiler::get_declared_struct_member_size(const SPIRType &struct_type, uint32_t index) const
1857 {
1858 if (struct_type.member_types.empty())
1859 SPIRV_CROSS_THROW("Declared struct in block cannot be empty.");
1860
1861 auto &flags = get_member_decoration_bitset(struct_type.self, index);
1862 auto &type = get<SPIRType>(struct_type.member_types[index]);
1863
1864 switch (type.basetype)
1865 {
1866 case SPIRType::Unknown:
1867 case SPIRType::Void:
1868 case SPIRType::Boolean: // Bools are purely logical, and cannot be used for externally visible types.
1869 case SPIRType::AtomicCounter:
1870 case SPIRType::Image:
1871 case SPIRType::SampledImage:
1872 case SPIRType::Sampler:
1873 SPIRV_CROSS_THROW("Querying size for object with opaque size.");
1874
1875 default:
1876 break;
1877 }
1878
1879 if (type.pointer && type.storage == StorageClassPhysicalStorageBuffer)
1880 {
1881 // Check if this is a top-level pointer type, and not an array of pointers.
1882 if (type.pointer_depth > get<SPIRType>(type.parent_type).pointer_depth)
1883 return 8;
1884 }
1885
1886 if (!type.array.empty())
1887 {
1888 // For arrays, we can use ArrayStride to get an easy check.
1889 bool array_size_literal = type.array_size_literal.back();
1890 uint32_t array_size = array_size_literal ? type.array.back() : evaluate_constant_u32(type.array.back());
1891 return type_struct_member_array_stride(struct_type, index) * array_size;
1892 }
1893 else if (type.basetype == SPIRType::Struct)
1894 {
1895 return get_declared_struct_size(type);
1896 }
1897 else
1898 {
1899 unsigned vecsize = type.vecsize;
1900 unsigned columns = type.columns;
1901
1902 // Vectors.
1903 if (columns == 1)
1904 {
1905 size_t component_size = type.width / 8;
1906 return vecsize * component_size;
1907 }
1908 else
1909 {
1910 uint32_t matrix_stride = type_struct_member_matrix_stride(struct_type, index);
1911
1912 // Per SPIR-V spec, matrices must be tightly packed and aligned up for vec3 accesses.
1913 if (flags.get(DecorationRowMajor))
1914 return matrix_stride * vecsize;
1915 else if (flags.get(DecorationColMajor))
1916 return matrix_stride * columns;
1917 else
1918 SPIRV_CROSS_THROW("Either row-major or column-major must be declared for matrices.");
1919 }
1920 }
1921 }
1922
handle(Op opcode,const uint32_t * args,uint32_t length)1923 bool Compiler::BufferAccessHandler::handle(Op opcode, const uint32_t *args, uint32_t length)
1924 {
1925 if (opcode != OpAccessChain && opcode != OpInBoundsAccessChain && opcode != OpPtrAccessChain)
1926 return true;
1927
1928 bool ptr_chain = (opcode == OpPtrAccessChain);
1929
1930 // Invalid SPIR-V.
1931 if (length < (ptr_chain ? 5u : 4u))
1932 return false;
1933
1934 if (args[2] != id)
1935 return true;
1936
1937 // Don't bother traversing the entire access chain tree yet.
1938 // If we access a struct member, assume we access the entire member.
1939 uint32_t index = compiler.get<SPIRConstant>(args[ptr_chain ? 4 : 3]).scalar();
1940
1941 // Seen this index already.
1942 if (seen.find(index) != end(seen))
1943 return true;
1944 seen.insert(index);
1945
1946 auto &type = compiler.expression_type(id);
1947 uint32_t offset = compiler.type_struct_member_offset(type, index);
1948
1949 size_t range;
1950 // If we have another member in the struct, deduce the range by looking at the next member.
1951 // This is okay since structs in SPIR-V can have padding, but Offset decoration must be
1952 // monotonically increasing.
1953 // Of course, this doesn't take into account if the SPIR-V for some reason decided to add
1954 // very large amounts of padding, but that's not really a big deal.
1955 if (index + 1 < type.member_types.size())
1956 {
1957 range = compiler.type_struct_member_offset(type, index + 1) - offset;
1958 }
1959 else
1960 {
1961 // No padding, so just deduce it from the size of the member directly.
1962 range = compiler.get_declared_struct_member_size(type, index);
1963 }
1964
1965 ranges.push_back({ index, offset, range });
1966 return true;
1967 }
1968
get_active_buffer_ranges(VariableID id) const1969 SmallVector<BufferRange> Compiler::get_active_buffer_ranges(VariableID id) const
1970 {
1971 SmallVector<BufferRange> ranges;
1972 BufferAccessHandler handler(*this, ranges, id);
1973 traverse_all_reachable_opcodes(get<SPIRFunction>(ir.default_entry_point), handler);
1974 return ranges;
1975 }
1976
types_are_logically_equivalent(const SPIRType & a,const SPIRType & b) const1977 bool Compiler::types_are_logically_equivalent(const SPIRType &a, const SPIRType &b) const
1978 {
1979 if (a.basetype != b.basetype)
1980 return false;
1981 if (a.width != b.width)
1982 return false;
1983 if (a.vecsize != b.vecsize)
1984 return false;
1985 if (a.columns != b.columns)
1986 return false;
1987 if (a.array.size() != b.array.size())
1988 return false;
1989
1990 size_t array_count = a.array.size();
1991 if (array_count && memcmp(a.array.data(), b.array.data(), array_count * sizeof(uint32_t)) != 0)
1992 return false;
1993
1994 if (a.basetype == SPIRType::Image || a.basetype == SPIRType::SampledImage)
1995 {
1996 if (memcmp(&a.image, &b.image, sizeof(SPIRType::Image)) != 0)
1997 return false;
1998 }
1999
2000 if (a.member_types.size() != b.member_types.size())
2001 return false;
2002
2003 size_t member_types = a.member_types.size();
2004 for (size_t i = 0; i < member_types; i++)
2005 {
2006 if (!types_are_logically_equivalent(get<SPIRType>(a.member_types[i]), get<SPIRType>(b.member_types[i])))
2007 return false;
2008 }
2009
2010 return true;
2011 }
2012
get_execution_mode_bitset() const2013 const Bitset &Compiler::get_execution_mode_bitset() const
2014 {
2015 return get_entry_point().flags;
2016 }
2017
set_execution_mode(ExecutionMode mode,uint32_t arg0,uint32_t arg1,uint32_t arg2)2018 void Compiler::set_execution_mode(ExecutionMode mode, uint32_t arg0, uint32_t arg1, uint32_t arg2)
2019 {
2020 auto &execution = get_entry_point();
2021
2022 execution.flags.set(mode);
2023 switch (mode)
2024 {
2025 case ExecutionModeLocalSize:
2026 execution.workgroup_size.x = arg0;
2027 execution.workgroup_size.y = arg1;
2028 execution.workgroup_size.z = arg2;
2029 break;
2030
2031 case ExecutionModeInvocations:
2032 execution.invocations = arg0;
2033 break;
2034
2035 case ExecutionModeOutputVertices:
2036 execution.output_vertices = arg0;
2037 break;
2038
2039 default:
2040 break;
2041 }
2042 }
2043
unset_execution_mode(ExecutionMode mode)2044 void Compiler::unset_execution_mode(ExecutionMode mode)
2045 {
2046 auto &execution = get_entry_point();
2047 execution.flags.clear(mode);
2048 }
2049
get_work_group_size_specialization_constants(SpecializationConstant & x,SpecializationConstant & y,SpecializationConstant & z) const2050 uint32_t Compiler::get_work_group_size_specialization_constants(SpecializationConstant &x, SpecializationConstant &y,
2051 SpecializationConstant &z) const
2052 {
2053 auto &execution = get_entry_point();
2054 x = { 0, 0 };
2055 y = { 0, 0 };
2056 z = { 0, 0 };
2057
2058 if (execution.workgroup_size.constant != 0)
2059 {
2060 auto &c = get<SPIRConstant>(execution.workgroup_size.constant);
2061
2062 if (c.m.c[0].id[0] != ID(0))
2063 {
2064 x.id = c.m.c[0].id[0];
2065 x.constant_id = get_decoration(c.m.c[0].id[0], DecorationSpecId);
2066 }
2067
2068 if (c.m.c[0].id[1] != ID(0))
2069 {
2070 y.id = c.m.c[0].id[1];
2071 y.constant_id = get_decoration(c.m.c[0].id[1], DecorationSpecId);
2072 }
2073
2074 if (c.m.c[0].id[2] != ID(0))
2075 {
2076 z.id = c.m.c[0].id[2];
2077 z.constant_id = get_decoration(c.m.c[0].id[2], DecorationSpecId);
2078 }
2079 }
2080
2081 return execution.workgroup_size.constant;
2082 }
2083
get_execution_mode_argument(spv::ExecutionMode mode,uint32_t index) const2084 uint32_t Compiler::get_execution_mode_argument(spv::ExecutionMode mode, uint32_t index) const
2085 {
2086 auto &execution = get_entry_point();
2087 switch (mode)
2088 {
2089 case ExecutionModeLocalSize:
2090 switch (index)
2091 {
2092 case 0:
2093 return execution.workgroup_size.x;
2094 case 1:
2095 return execution.workgroup_size.y;
2096 case 2:
2097 return execution.workgroup_size.z;
2098 default:
2099 return 0;
2100 }
2101
2102 case ExecutionModeInvocations:
2103 return execution.invocations;
2104
2105 case ExecutionModeOutputVertices:
2106 return execution.output_vertices;
2107
2108 default:
2109 return 0;
2110 }
2111 }
2112
get_execution_model() const2113 ExecutionModel Compiler::get_execution_model() const
2114 {
2115 auto &execution = get_entry_point();
2116 return execution.model;
2117 }
2118
is_tessellation_shader(ExecutionModel model)2119 bool Compiler::is_tessellation_shader(ExecutionModel model)
2120 {
2121 return model == ExecutionModelTessellationControl || model == ExecutionModelTessellationEvaluation;
2122 }
2123
is_vertex_like_shader() const2124 bool Compiler::is_vertex_like_shader() const
2125 {
2126 auto model = get_execution_model();
2127 return model == ExecutionModelVertex || model == ExecutionModelGeometry ||
2128 model == ExecutionModelTessellationControl || model == ExecutionModelTessellationEvaluation;
2129 }
2130
is_tessellation_shader() const2131 bool Compiler::is_tessellation_shader() const
2132 {
2133 return is_tessellation_shader(get_execution_model());
2134 }
2135
set_remapped_variable_state(VariableID id,bool remap_enable)2136 void Compiler::set_remapped_variable_state(VariableID id, bool remap_enable)
2137 {
2138 get<SPIRVariable>(id).remapped_variable = remap_enable;
2139 }
2140
get_remapped_variable_state(VariableID id) const2141 bool Compiler::get_remapped_variable_state(VariableID id) const
2142 {
2143 return get<SPIRVariable>(id).remapped_variable;
2144 }
2145
set_subpass_input_remapped_components(VariableID id,uint32_t components)2146 void Compiler::set_subpass_input_remapped_components(VariableID id, uint32_t components)
2147 {
2148 get<SPIRVariable>(id).remapped_components = components;
2149 }
2150
get_subpass_input_remapped_components(VariableID id) const2151 uint32_t Compiler::get_subpass_input_remapped_components(VariableID id) const
2152 {
2153 return get<SPIRVariable>(id).remapped_components;
2154 }
2155
add_implied_read_expression(SPIRExpression & e,uint32_t source)2156 void Compiler::add_implied_read_expression(SPIRExpression &e, uint32_t source)
2157 {
2158 auto itr = find(begin(e.implied_read_expressions), end(e.implied_read_expressions), ID(source));
2159 if (itr == end(e.implied_read_expressions))
2160 e.implied_read_expressions.push_back(source);
2161 }
2162
add_implied_read_expression(SPIRAccessChain & e,uint32_t source)2163 void Compiler::add_implied_read_expression(SPIRAccessChain &e, uint32_t source)
2164 {
2165 auto itr = find(begin(e.implied_read_expressions), end(e.implied_read_expressions), ID(source));
2166 if (itr == end(e.implied_read_expressions))
2167 e.implied_read_expressions.push_back(source);
2168 }
2169
inherit_expression_dependencies(uint32_t dst,uint32_t source_expression)2170 void Compiler::inherit_expression_dependencies(uint32_t dst, uint32_t source_expression)
2171 {
2172 // Don't inherit any expression dependencies if the expression in dst
2173 // is not a forwarded temporary.
2174 if (forwarded_temporaries.find(dst) == end(forwarded_temporaries) ||
2175 forced_temporaries.find(dst) != end(forced_temporaries))
2176 {
2177 return;
2178 }
2179
2180 auto &e = get<SPIRExpression>(dst);
2181 auto *phi = maybe_get<SPIRVariable>(source_expression);
2182 if (phi && phi->phi_variable)
2183 {
2184 // We have used a phi variable, which can change at the end of the block,
2185 // so make sure we take a dependency on this phi variable.
2186 phi->dependees.push_back(dst);
2187 }
2188
2189 auto *s = maybe_get<SPIRExpression>(source_expression);
2190 if (!s)
2191 return;
2192
2193 auto &e_deps = e.expression_dependencies;
2194 auto &s_deps = s->expression_dependencies;
2195
2196 // If we depend on a expression, we also depend on all sub-dependencies from source.
2197 e_deps.push_back(source_expression);
2198 e_deps.insert(end(e_deps), begin(s_deps), end(s_deps));
2199
2200 // Eliminate duplicated dependencies.
2201 sort(begin(e_deps), end(e_deps));
2202 e_deps.erase(unique(begin(e_deps), end(e_deps)), end(e_deps));
2203 }
2204
get_entry_points_and_stages() const2205 SmallVector<EntryPoint> Compiler::get_entry_points_and_stages() const
2206 {
2207 SmallVector<EntryPoint> entries;
2208 for (auto &entry : ir.entry_points)
2209 entries.push_back({ entry.second.orig_name, entry.second.model });
2210 return entries;
2211 }
2212
rename_entry_point(const std::string & old_name,const std::string & new_name,spv::ExecutionModel model)2213 void Compiler::rename_entry_point(const std::string &old_name, const std::string &new_name, spv::ExecutionModel model)
2214 {
2215 auto &entry = get_entry_point(old_name, model);
2216 entry.orig_name = new_name;
2217 entry.name = new_name;
2218 }
2219
set_entry_point(const std::string & name,spv::ExecutionModel model)2220 void Compiler::set_entry_point(const std::string &name, spv::ExecutionModel model)
2221 {
2222 auto &entry = get_entry_point(name, model);
2223 ir.default_entry_point = entry.self;
2224 }
2225
get_first_entry_point(const std::string & name)2226 SPIREntryPoint &Compiler::get_first_entry_point(const std::string &name)
2227 {
2228 auto itr = find_if(
2229 begin(ir.entry_points), end(ir.entry_points),
2230 [&](const std::pair<uint32_t, SPIREntryPoint> &entry) -> bool { return entry.second.orig_name == name; });
2231
2232 if (itr == end(ir.entry_points))
2233 SPIRV_CROSS_THROW("Entry point does not exist.");
2234
2235 return itr->second;
2236 }
2237
get_first_entry_point(const std::string & name) const2238 const SPIREntryPoint &Compiler::get_first_entry_point(const std::string &name) const
2239 {
2240 auto itr = find_if(
2241 begin(ir.entry_points), end(ir.entry_points),
2242 [&](const std::pair<uint32_t, SPIREntryPoint> &entry) -> bool { return entry.second.orig_name == name; });
2243
2244 if (itr == end(ir.entry_points))
2245 SPIRV_CROSS_THROW("Entry point does not exist.");
2246
2247 return itr->second;
2248 }
2249
get_entry_point(const std::string & name,ExecutionModel model)2250 SPIREntryPoint &Compiler::get_entry_point(const std::string &name, ExecutionModel model)
2251 {
2252 auto itr = find_if(begin(ir.entry_points), end(ir.entry_points),
2253 [&](const std::pair<uint32_t, SPIREntryPoint> &entry) -> bool {
2254 return entry.second.orig_name == name && entry.second.model == model;
2255 });
2256
2257 if (itr == end(ir.entry_points))
2258 SPIRV_CROSS_THROW("Entry point does not exist.");
2259
2260 return itr->second;
2261 }
2262
get_entry_point(const std::string & name,ExecutionModel model) const2263 const SPIREntryPoint &Compiler::get_entry_point(const std::string &name, ExecutionModel model) const
2264 {
2265 auto itr = find_if(begin(ir.entry_points), end(ir.entry_points),
2266 [&](const std::pair<uint32_t, SPIREntryPoint> &entry) -> bool {
2267 return entry.second.orig_name == name && entry.second.model == model;
2268 });
2269
2270 if (itr == end(ir.entry_points))
2271 SPIRV_CROSS_THROW("Entry point does not exist.");
2272
2273 return itr->second;
2274 }
2275
get_cleansed_entry_point_name(const std::string & name,ExecutionModel model) const2276 const string &Compiler::get_cleansed_entry_point_name(const std::string &name, ExecutionModel model) const
2277 {
2278 return get_entry_point(name, model).name;
2279 }
2280
get_entry_point() const2281 const SPIREntryPoint &Compiler::get_entry_point() const
2282 {
2283 return ir.entry_points.find(ir.default_entry_point)->second;
2284 }
2285
get_entry_point()2286 SPIREntryPoint &Compiler::get_entry_point()
2287 {
2288 return ir.entry_points.find(ir.default_entry_point)->second;
2289 }
2290
interface_variable_exists_in_entry_point(uint32_t id) const2291 bool Compiler::interface_variable_exists_in_entry_point(uint32_t id) const
2292 {
2293 auto &var = get<SPIRVariable>(id);
2294
2295 if (ir.get_spirv_version() < 0x10400)
2296 {
2297 if (var.storage != StorageClassInput && var.storage != StorageClassOutput &&
2298 var.storage != StorageClassUniformConstant)
2299 SPIRV_CROSS_THROW("Only Input, Output variables and Uniform constants are part of a shader linking interface.");
2300
2301 // This is to avoid potential problems with very old glslang versions which did
2302 // not emit input/output interfaces properly.
2303 // We can assume they only had a single entry point, and single entry point
2304 // shaders could easily be assumed to use every interface variable anyways.
2305 if (ir.entry_points.size() <= 1)
2306 return true;
2307 }
2308
2309 // In SPIR-V 1.4 and later, all global resource variables must be present.
2310
2311 auto &execution = get_entry_point();
2312 return find(begin(execution.interface_variables), end(execution.interface_variables), VariableID(id)) !=
2313 end(execution.interface_variables);
2314 }
2315
push_remap_parameters(const SPIRFunction & func,const uint32_t * args,uint32_t length)2316 void Compiler::CombinedImageSamplerHandler::push_remap_parameters(const SPIRFunction &func, const uint32_t *args,
2317 uint32_t length)
2318 {
2319 // If possible, pipe through a remapping table so that parameters know
2320 // which variables they actually bind to in this scope.
2321 unordered_map<uint32_t, uint32_t> remapping;
2322 for (uint32_t i = 0; i < length; i++)
2323 remapping[func.arguments[i].id] = remap_parameter(args[i]);
2324 parameter_remapping.push(move(remapping));
2325 }
2326
pop_remap_parameters()2327 void Compiler::CombinedImageSamplerHandler::pop_remap_parameters()
2328 {
2329 parameter_remapping.pop();
2330 }
2331
remap_parameter(uint32_t id)2332 uint32_t Compiler::CombinedImageSamplerHandler::remap_parameter(uint32_t id)
2333 {
2334 auto *var = compiler.maybe_get_backing_variable(id);
2335 if (var)
2336 id = var->self;
2337
2338 if (parameter_remapping.empty())
2339 return id;
2340
2341 auto &remapping = parameter_remapping.top();
2342 auto itr = remapping.find(id);
2343 if (itr != end(remapping))
2344 return itr->second;
2345 else
2346 return id;
2347 }
2348
begin_function_scope(const uint32_t * args,uint32_t length)2349 bool Compiler::CombinedImageSamplerHandler::begin_function_scope(const uint32_t *args, uint32_t length)
2350 {
2351 if (length < 3)
2352 return false;
2353
2354 auto &callee = compiler.get<SPIRFunction>(args[2]);
2355 args += 3;
2356 length -= 3;
2357 push_remap_parameters(callee, args, length);
2358 functions.push(&callee);
2359 return true;
2360 }
2361
end_function_scope(const uint32_t * args,uint32_t length)2362 bool Compiler::CombinedImageSamplerHandler::end_function_scope(const uint32_t *args, uint32_t length)
2363 {
2364 if (length < 3)
2365 return false;
2366
2367 auto &callee = compiler.get<SPIRFunction>(args[2]);
2368 args += 3;
2369
2370 // There are two types of cases we have to handle,
2371 // a callee might call sampler2D(texture2D, sampler) directly where
2372 // one or more parameters originate from parameters.
2373 // Alternatively, we need to provide combined image samplers to our callees,
2374 // and in this case we need to add those as well.
2375
2376 pop_remap_parameters();
2377
2378 // Our callee has now been processed at least once.
2379 // No point in doing it again.
2380 callee.do_combined_parameters = false;
2381
2382 auto ¶ms = functions.top()->combined_parameters;
2383 functions.pop();
2384 if (functions.empty())
2385 return true;
2386
2387 auto &caller = *functions.top();
2388 if (caller.do_combined_parameters)
2389 {
2390 for (auto ¶m : params)
2391 {
2392 VariableID image_id = param.global_image ? param.image_id : VariableID(args[param.image_id]);
2393 VariableID sampler_id = param.global_sampler ? param.sampler_id : VariableID(args[param.sampler_id]);
2394
2395 auto *i = compiler.maybe_get_backing_variable(image_id);
2396 auto *s = compiler.maybe_get_backing_variable(sampler_id);
2397 if (i)
2398 image_id = i->self;
2399 if (s)
2400 sampler_id = s->self;
2401
2402 register_combined_image_sampler(caller, 0, image_id, sampler_id, param.depth);
2403 }
2404 }
2405
2406 return true;
2407 }
2408
register_combined_image_sampler(SPIRFunction & caller,VariableID combined_module_id,VariableID image_id,VariableID sampler_id,bool depth)2409 void Compiler::CombinedImageSamplerHandler::register_combined_image_sampler(SPIRFunction &caller,
2410 VariableID combined_module_id,
2411 VariableID image_id, VariableID sampler_id,
2412 bool depth)
2413 {
2414 // We now have a texture ID and a sampler ID which will either be found as a global
2415 // or a parameter in our own function. If both are global, they will not need a parameter,
2416 // otherwise, add it to our list.
2417 SPIRFunction::CombinedImageSamplerParameter param = {
2418 0u, image_id, sampler_id, true, true, depth,
2419 };
2420
2421 auto texture_itr = find_if(begin(caller.arguments), end(caller.arguments),
2422 [image_id](const SPIRFunction::Parameter &p) { return p.id == image_id; });
2423 auto sampler_itr = find_if(begin(caller.arguments), end(caller.arguments),
2424 [sampler_id](const SPIRFunction::Parameter &p) { return p.id == sampler_id; });
2425
2426 if (texture_itr != end(caller.arguments))
2427 {
2428 param.global_image = false;
2429 param.image_id = uint32_t(texture_itr - begin(caller.arguments));
2430 }
2431
2432 if (sampler_itr != end(caller.arguments))
2433 {
2434 param.global_sampler = false;
2435 param.sampler_id = uint32_t(sampler_itr - begin(caller.arguments));
2436 }
2437
2438 if (param.global_image && param.global_sampler)
2439 return;
2440
2441 auto itr = find_if(begin(caller.combined_parameters), end(caller.combined_parameters),
2442 [¶m](const SPIRFunction::CombinedImageSamplerParameter &p) {
2443 return param.image_id == p.image_id && param.sampler_id == p.sampler_id &&
2444 param.global_image == p.global_image && param.global_sampler == p.global_sampler;
2445 });
2446
2447 if (itr == end(caller.combined_parameters))
2448 {
2449 uint32_t id = compiler.ir.increase_bound_by(3);
2450 auto type_id = id + 0;
2451 auto ptr_type_id = id + 1;
2452 auto combined_id = id + 2;
2453 auto &base = compiler.expression_type(image_id);
2454 auto &type = compiler.set<SPIRType>(type_id);
2455 auto &ptr_type = compiler.set<SPIRType>(ptr_type_id);
2456
2457 type = base;
2458 type.self = type_id;
2459 type.basetype = SPIRType::SampledImage;
2460 type.pointer = false;
2461 type.storage = StorageClassGeneric;
2462 type.image.depth = depth;
2463
2464 ptr_type = type;
2465 ptr_type.pointer = true;
2466 ptr_type.storage = StorageClassUniformConstant;
2467 ptr_type.parent_type = type_id;
2468
2469 // Build new variable.
2470 compiler.set<SPIRVariable>(combined_id, ptr_type_id, StorageClassFunction, 0);
2471
2472 // Inherit RelaxedPrecision.
2473 // If any of OpSampledImage, underlying image or sampler are marked, inherit the decoration.
2474 bool relaxed_precision =
2475 compiler.has_decoration(sampler_id, DecorationRelaxedPrecision) ||
2476 compiler.has_decoration(image_id, DecorationRelaxedPrecision) ||
2477 (combined_module_id && compiler.has_decoration(combined_module_id, DecorationRelaxedPrecision));
2478
2479 if (relaxed_precision)
2480 compiler.set_decoration(combined_id, DecorationRelaxedPrecision);
2481
2482 param.id = combined_id;
2483
2484 compiler.set_name(combined_id,
2485 join("SPIRV_Cross_Combined", compiler.to_name(image_id), compiler.to_name(sampler_id)));
2486
2487 caller.combined_parameters.push_back(param);
2488 caller.shadow_arguments.push_back({ ptr_type_id, combined_id, 0u, 0u, true });
2489 }
2490 }
2491
handle(Op opcode,const uint32_t * args,uint32_t length)2492 bool Compiler::DummySamplerForCombinedImageHandler::handle(Op opcode, const uint32_t *args, uint32_t length)
2493 {
2494 if (need_dummy_sampler)
2495 {
2496 // No need to traverse further, we know the result.
2497 return false;
2498 }
2499
2500 switch (opcode)
2501 {
2502 case OpLoad:
2503 {
2504 if (length < 3)
2505 return false;
2506
2507 uint32_t result_type = args[0];
2508
2509 auto &type = compiler.get<SPIRType>(result_type);
2510 bool separate_image =
2511 type.basetype == SPIRType::Image && type.image.sampled == 1 && type.image.dim != DimBuffer;
2512
2513 // If not separate image, don't bother.
2514 if (!separate_image)
2515 return true;
2516
2517 uint32_t id = args[1];
2518 uint32_t ptr = args[2];
2519 compiler.set<SPIRExpression>(id, "", result_type, true);
2520 compiler.register_read(id, ptr, true);
2521 break;
2522 }
2523
2524 case OpImageFetch:
2525 case OpImageQuerySizeLod:
2526 case OpImageQuerySize:
2527 case OpImageQueryLevels:
2528 case OpImageQuerySamples:
2529 {
2530 // If we are fetching or querying LOD from a plain OpTypeImage, we must pre-combine with our dummy sampler.
2531 auto *var = compiler.maybe_get_backing_variable(args[2]);
2532 if (var)
2533 {
2534 auto &type = compiler.get<SPIRType>(var->basetype);
2535 if (type.basetype == SPIRType::Image && type.image.sampled == 1 && type.image.dim != DimBuffer)
2536 need_dummy_sampler = true;
2537 }
2538
2539 break;
2540 }
2541
2542 case OpInBoundsAccessChain:
2543 case OpAccessChain:
2544 case OpPtrAccessChain:
2545 {
2546 if (length < 3)
2547 return false;
2548
2549 uint32_t result_type = args[0];
2550 auto &type = compiler.get<SPIRType>(result_type);
2551 bool separate_image =
2552 type.basetype == SPIRType::Image && type.image.sampled == 1 && type.image.dim != DimBuffer;
2553 if (!separate_image)
2554 return true;
2555
2556 uint32_t id = args[1];
2557 uint32_t ptr = args[2];
2558 compiler.set<SPIRExpression>(id, "", result_type, true);
2559 compiler.register_read(id, ptr, true);
2560
2561 // Other backends might use SPIRAccessChain for this later.
2562 compiler.ir.ids[id].set_allow_type_rewrite();
2563 break;
2564 }
2565
2566 default:
2567 break;
2568 }
2569
2570 return true;
2571 }
2572
handle(Op opcode,const uint32_t * args,uint32_t length)2573 bool Compiler::CombinedImageSamplerHandler::handle(Op opcode, const uint32_t *args, uint32_t length)
2574 {
2575 // We need to figure out where samplers and images are loaded from, so do only the bare bones compilation we need.
2576 bool is_fetch = false;
2577
2578 switch (opcode)
2579 {
2580 case OpLoad:
2581 {
2582 if (length < 3)
2583 return false;
2584
2585 uint32_t result_type = args[0];
2586
2587 auto &type = compiler.get<SPIRType>(result_type);
2588 bool separate_image = type.basetype == SPIRType::Image && type.image.sampled == 1;
2589 bool separate_sampler = type.basetype == SPIRType::Sampler;
2590
2591 // If not separate image or sampler, don't bother.
2592 if (!separate_image && !separate_sampler)
2593 return true;
2594
2595 uint32_t id = args[1];
2596 uint32_t ptr = args[2];
2597 compiler.set<SPIRExpression>(id, "", result_type, true);
2598 compiler.register_read(id, ptr, true);
2599 return true;
2600 }
2601
2602 case OpInBoundsAccessChain:
2603 case OpAccessChain:
2604 case OpPtrAccessChain:
2605 {
2606 if (length < 3)
2607 return false;
2608
2609 // Technically, it is possible to have arrays of textures and arrays of samplers and combine them, but this becomes essentially
2610 // impossible to implement, since we don't know which concrete sampler we are accessing.
2611 // One potential way is to create a combinatorial explosion where N textures and M samplers are combined into N * M sampler2Ds,
2612 // but this seems ridiculously complicated for a problem which is easy to work around.
2613 // Checking access chains like this assumes we don't have samplers or textures inside uniform structs, but this makes no sense.
2614
2615 uint32_t result_type = args[0];
2616
2617 auto &type = compiler.get<SPIRType>(result_type);
2618 bool separate_image = type.basetype == SPIRType::Image && type.image.sampled == 1;
2619 bool separate_sampler = type.basetype == SPIRType::Sampler;
2620 if (separate_sampler)
2621 SPIRV_CROSS_THROW(
2622 "Attempting to use arrays or structs of separate samplers. This is not possible to statically "
2623 "remap to plain GLSL.");
2624
2625 if (separate_image)
2626 {
2627 uint32_t id = args[1];
2628 uint32_t ptr = args[2];
2629 compiler.set<SPIRExpression>(id, "", result_type, true);
2630 compiler.register_read(id, ptr, true);
2631 }
2632 return true;
2633 }
2634
2635 case OpImageFetch:
2636 case OpImageQuerySizeLod:
2637 case OpImageQuerySize:
2638 case OpImageQueryLevels:
2639 case OpImageQuerySamples:
2640 {
2641 // If we are fetching from a plain OpTypeImage or querying LOD, we must pre-combine with our dummy sampler.
2642 auto *var = compiler.maybe_get_backing_variable(args[2]);
2643 if (!var)
2644 return true;
2645
2646 auto &type = compiler.get<SPIRType>(var->basetype);
2647 if (type.basetype == SPIRType::Image && type.image.sampled == 1 && type.image.dim != DimBuffer)
2648 {
2649 if (compiler.dummy_sampler_id == 0)
2650 SPIRV_CROSS_THROW("texelFetch without sampler was found, but no dummy sampler has been created with "
2651 "build_dummy_sampler_for_combined_images().");
2652
2653 // Do it outside.
2654 is_fetch = true;
2655 break;
2656 }
2657
2658 return true;
2659 }
2660
2661 case OpSampledImage:
2662 // Do it outside.
2663 break;
2664
2665 default:
2666 return true;
2667 }
2668
2669 // Registers sampler2D calls used in case they are parameters so
2670 // that their callees know which combined image samplers to propagate down the call stack.
2671 if (!functions.empty())
2672 {
2673 auto &callee = *functions.top();
2674 if (callee.do_combined_parameters)
2675 {
2676 uint32_t image_id = args[2];
2677
2678 auto *image = compiler.maybe_get_backing_variable(image_id);
2679 if (image)
2680 image_id = image->self;
2681
2682 uint32_t sampler_id = is_fetch ? compiler.dummy_sampler_id : args[3];
2683 auto *sampler = compiler.maybe_get_backing_variable(sampler_id);
2684 if (sampler)
2685 sampler_id = sampler->self;
2686
2687 uint32_t combined_id = args[1];
2688
2689 auto &combined_type = compiler.get<SPIRType>(args[0]);
2690 register_combined_image_sampler(callee, combined_id, image_id, sampler_id, combined_type.image.depth);
2691 }
2692 }
2693
2694 // For function calls, we need to remap IDs which are function parameters into global variables.
2695 // This information is statically known from the current place in the call stack.
2696 // Function parameters are not necessarily pointers, so if we don't have a backing variable, remapping will know
2697 // which backing variable the image/sample came from.
2698 VariableID image_id = remap_parameter(args[2]);
2699 VariableID sampler_id = is_fetch ? compiler.dummy_sampler_id : remap_parameter(args[3]);
2700
2701 auto itr = find_if(begin(compiler.combined_image_samplers), end(compiler.combined_image_samplers),
2702 [image_id, sampler_id](const CombinedImageSampler &combined) {
2703 return combined.image_id == image_id && combined.sampler_id == sampler_id;
2704 });
2705
2706 if (itr == end(compiler.combined_image_samplers))
2707 {
2708 uint32_t sampled_type;
2709 uint32_t combined_module_id;
2710 if (is_fetch)
2711 {
2712 // Have to invent the sampled image type.
2713 sampled_type = compiler.ir.increase_bound_by(1);
2714 auto &type = compiler.set<SPIRType>(sampled_type);
2715 type = compiler.expression_type(args[2]);
2716 type.self = sampled_type;
2717 type.basetype = SPIRType::SampledImage;
2718 type.image.depth = false;
2719 combined_module_id = 0;
2720 }
2721 else
2722 {
2723 sampled_type = args[0];
2724 combined_module_id = args[1];
2725 }
2726
2727 auto id = compiler.ir.increase_bound_by(2);
2728 auto type_id = id + 0;
2729 auto combined_id = id + 1;
2730
2731 // Make a new type, pointer to OpTypeSampledImage, so we can make a variable of this type.
2732 // We will probably have this type lying around, but it doesn't hurt to make duplicates for internal purposes.
2733 auto &type = compiler.set<SPIRType>(type_id);
2734 auto &base = compiler.get<SPIRType>(sampled_type);
2735 type = base;
2736 type.pointer = true;
2737 type.storage = StorageClassUniformConstant;
2738 type.parent_type = type_id;
2739
2740 // Build new variable.
2741 compiler.set<SPIRVariable>(combined_id, type_id, StorageClassUniformConstant, 0);
2742
2743 // Inherit RelaxedPrecision (and potentially other useful flags if deemed relevant).
2744 // If any of OpSampledImage, underlying image or sampler are marked, inherit the decoration.
2745 bool relaxed_precision =
2746 (sampler_id && compiler.has_decoration(sampler_id, DecorationRelaxedPrecision)) ||
2747 (image_id && compiler.has_decoration(image_id, DecorationRelaxedPrecision)) ||
2748 (combined_module_id && compiler.has_decoration(combined_module_id, DecorationRelaxedPrecision));
2749
2750 if (relaxed_precision)
2751 compiler.set_decoration(combined_id, DecorationRelaxedPrecision);
2752
2753 // Propagate the array type for the original image as well.
2754 auto *var = compiler.maybe_get_backing_variable(image_id);
2755 if (var)
2756 {
2757 auto &parent_type = compiler.get<SPIRType>(var->basetype);
2758 type.array = parent_type.array;
2759 type.array_size_literal = parent_type.array_size_literal;
2760 }
2761
2762 compiler.combined_image_samplers.push_back({ combined_id, image_id, sampler_id });
2763 }
2764
2765 return true;
2766 }
2767
build_dummy_sampler_for_combined_images()2768 VariableID Compiler::build_dummy_sampler_for_combined_images()
2769 {
2770 DummySamplerForCombinedImageHandler handler(*this);
2771 traverse_all_reachable_opcodes(get<SPIRFunction>(ir.default_entry_point), handler);
2772 if (handler.need_dummy_sampler)
2773 {
2774 uint32_t offset = ir.increase_bound_by(3);
2775 auto type_id = offset + 0;
2776 auto ptr_type_id = offset + 1;
2777 auto var_id = offset + 2;
2778
2779 SPIRType sampler_type;
2780 auto &sampler = set<SPIRType>(type_id);
2781 sampler.basetype = SPIRType::Sampler;
2782
2783 auto &ptr_sampler = set<SPIRType>(ptr_type_id);
2784 ptr_sampler = sampler;
2785 ptr_sampler.self = type_id;
2786 ptr_sampler.storage = StorageClassUniformConstant;
2787 ptr_sampler.pointer = true;
2788 ptr_sampler.parent_type = type_id;
2789
2790 set<SPIRVariable>(var_id, ptr_type_id, StorageClassUniformConstant, 0);
2791 set_name(var_id, "SPIRV_Cross_DummySampler");
2792 dummy_sampler_id = var_id;
2793 return var_id;
2794 }
2795 else
2796 return 0;
2797 }
2798
build_combined_image_samplers()2799 void Compiler::build_combined_image_samplers()
2800 {
2801 ir.for_each_typed_id<SPIRFunction>([&](uint32_t, SPIRFunction &func) {
2802 func.combined_parameters.clear();
2803 func.shadow_arguments.clear();
2804 func.do_combined_parameters = true;
2805 });
2806
2807 combined_image_samplers.clear();
2808 CombinedImageSamplerHandler handler(*this);
2809 traverse_all_reachable_opcodes(get<SPIRFunction>(ir.default_entry_point), handler);
2810 }
2811
get_specialization_constants() const2812 SmallVector<SpecializationConstant> Compiler::get_specialization_constants() const
2813 {
2814 SmallVector<SpecializationConstant> spec_consts;
2815 ir.for_each_typed_id<SPIRConstant>([&](uint32_t, const SPIRConstant &c) {
2816 if (c.specialization && has_decoration(c.self, DecorationSpecId))
2817 spec_consts.push_back({ c.self, get_decoration(c.self, DecorationSpecId) });
2818 });
2819 return spec_consts;
2820 }
2821
get_constant(ConstantID id)2822 SPIRConstant &Compiler::get_constant(ConstantID id)
2823 {
2824 return get<SPIRConstant>(id);
2825 }
2826
get_constant(ConstantID id) const2827 const SPIRConstant &Compiler::get_constant(ConstantID id) const
2828 {
2829 return get<SPIRConstant>(id);
2830 }
2831
exists_unaccessed_path_to_return(const CFG & cfg,uint32_t block,const unordered_set<uint32_t> & blocks,unordered_set<uint32_t> & visit_cache)2832 static bool exists_unaccessed_path_to_return(const CFG &cfg, uint32_t block, const unordered_set<uint32_t> &blocks,
2833 unordered_set<uint32_t> &visit_cache)
2834 {
2835 // This block accesses the variable.
2836 if (blocks.find(block) != end(blocks))
2837 return false;
2838
2839 // We are at the end of the CFG.
2840 if (cfg.get_succeeding_edges(block).empty())
2841 return true;
2842
2843 // If any of our successors have a path to the end, there exists a path from block.
2844 for (auto &succ : cfg.get_succeeding_edges(block))
2845 {
2846 if (visit_cache.count(succ) == 0)
2847 {
2848 if (exists_unaccessed_path_to_return(cfg, succ, blocks, visit_cache))
2849 return true;
2850 visit_cache.insert(succ);
2851 }
2852 }
2853
2854 return false;
2855 }
2856
analyze_parameter_preservation(SPIRFunction & entry,const CFG & cfg,const unordered_map<uint32_t,unordered_set<uint32_t>> & variable_to_blocks,const unordered_map<uint32_t,unordered_set<uint32_t>> & complete_write_blocks)2857 void Compiler::analyze_parameter_preservation(
2858 SPIRFunction &entry, const CFG &cfg, const unordered_map<uint32_t, unordered_set<uint32_t>> &variable_to_blocks,
2859 const unordered_map<uint32_t, unordered_set<uint32_t>> &complete_write_blocks)
2860 {
2861 for (auto &arg : entry.arguments)
2862 {
2863 // Non-pointers are always inputs.
2864 auto &type = get<SPIRType>(arg.type);
2865 if (!type.pointer)
2866 continue;
2867
2868 // Opaque argument types are always in
2869 bool potential_preserve;
2870 switch (type.basetype)
2871 {
2872 case SPIRType::Sampler:
2873 case SPIRType::Image:
2874 case SPIRType::SampledImage:
2875 case SPIRType::AtomicCounter:
2876 potential_preserve = false;
2877 break;
2878
2879 default:
2880 potential_preserve = true;
2881 break;
2882 }
2883
2884 if (!potential_preserve)
2885 continue;
2886
2887 auto itr = variable_to_blocks.find(arg.id);
2888 if (itr == end(variable_to_blocks))
2889 {
2890 // Variable is never accessed.
2891 continue;
2892 }
2893
2894 // We have accessed a variable, but there was no complete writes to that variable.
2895 // We deduce that we must preserve the argument.
2896 itr = complete_write_blocks.find(arg.id);
2897 if (itr == end(complete_write_blocks))
2898 {
2899 arg.read_count++;
2900 continue;
2901 }
2902
2903 // If there is a path through the CFG where no block completely writes to the variable, the variable will be in an undefined state
2904 // when the function returns. We therefore need to implicitly preserve the variable in case there are writers in the function.
2905 // Major case here is if a function is
2906 // void foo(int &var) { if (cond) var = 10; }
2907 // Using read/write counts, we will think it's just an out variable, but it really needs to be inout,
2908 // because if we don't write anything whatever we put into the function must return back to the caller.
2909 unordered_set<uint32_t> visit_cache;
2910 if (exists_unaccessed_path_to_return(cfg, entry.entry_block, itr->second, visit_cache))
2911 arg.read_count++;
2912 }
2913 }
2914
AnalyzeVariableScopeAccessHandler(Compiler & compiler_,SPIRFunction & entry_)2915 Compiler::AnalyzeVariableScopeAccessHandler::AnalyzeVariableScopeAccessHandler(Compiler &compiler_,
2916 SPIRFunction &entry_)
2917 : compiler(compiler_)
2918 , entry(entry_)
2919 {
2920 }
2921
follow_function_call(const SPIRFunction &)2922 bool Compiler::AnalyzeVariableScopeAccessHandler::follow_function_call(const SPIRFunction &)
2923 {
2924 // Only analyze within this function.
2925 return false;
2926 }
2927
set_current_block(const SPIRBlock & block)2928 void Compiler::AnalyzeVariableScopeAccessHandler::set_current_block(const SPIRBlock &block)
2929 {
2930 current_block = █
2931
2932 // If we're branching to a block which uses OpPhi, in GLSL
2933 // this will be a variable write when we branch,
2934 // so we need to track access to these variables as well to
2935 // have a complete picture.
2936 const auto test_phi = [this, &block](uint32_t to) {
2937 auto &next = compiler.get<SPIRBlock>(to);
2938 for (auto &phi : next.phi_variables)
2939 {
2940 if (phi.parent == block.self)
2941 {
2942 accessed_variables_to_block[phi.function_variable].insert(block.self);
2943 // Phi variables are also accessed in our target branch block.
2944 accessed_variables_to_block[phi.function_variable].insert(next.self);
2945
2946 notify_variable_access(phi.local_variable, block.self);
2947 }
2948 }
2949 };
2950
2951 switch (block.terminator)
2952 {
2953 case SPIRBlock::Direct:
2954 notify_variable_access(block.condition, block.self);
2955 test_phi(block.next_block);
2956 break;
2957
2958 case SPIRBlock::Select:
2959 notify_variable_access(block.condition, block.self);
2960 test_phi(block.true_block);
2961 test_phi(block.false_block);
2962 break;
2963
2964 case SPIRBlock::MultiSelect:
2965 notify_variable_access(block.condition, block.self);
2966 for (auto &target : block.cases)
2967 test_phi(target.block);
2968 if (block.default_block)
2969 test_phi(block.default_block);
2970 break;
2971
2972 default:
2973 break;
2974 }
2975 }
2976
notify_variable_access(uint32_t id,uint32_t block)2977 void Compiler::AnalyzeVariableScopeAccessHandler::notify_variable_access(uint32_t id, uint32_t block)
2978 {
2979 if (id == 0)
2980 return;
2981
2982 // Access chains used in multiple blocks mean hoisting all the variables used to construct the access chain as not all backends can use pointers.
2983 auto itr = access_chain_children.find(id);
2984 if (itr != end(access_chain_children))
2985 for (auto child_id : itr->second)
2986 notify_variable_access(child_id, block);
2987
2988 if (id_is_phi_variable(id))
2989 accessed_variables_to_block[id].insert(block);
2990 else if (id_is_potential_temporary(id))
2991 accessed_temporaries_to_block[id].insert(block);
2992 }
2993
id_is_phi_variable(uint32_t id) const2994 bool Compiler::AnalyzeVariableScopeAccessHandler::id_is_phi_variable(uint32_t id) const
2995 {
2996 if (id >= compiler.get_current_id_bound())
2997 return false;
2998 auto *var = compiler.maybe_get<SPIRVariable>(id);
2999 return var && var->phi_variable;
3000 }
3001
id_is_potential_temporary(uint32_t id) const3002 bool Compiler::AnalyzeVariableScopeAccessHandler::id_is_potential_temporary(uint32_t id) const
3003 {
3004 if (id >= compiler.get_current_id_bound())
3005 return false;
3006
3007 // Temporaries are not created before we start emitting code.
3008 return compiler.ir.ids[id].empty() || (compiler.ir.ids[id].get_type() == TypeExpression);
3009 }
3010
handle(spv::Op op,const uint32_t * args,uint32_t length)3011 bool Compiler::AnalyzeVariableScopeAccessHandler::handle(spv::Op op, const uint32_t *args, uint32_t length)
3012 {
3013 // Keep track of the types of temporaries, so we can hoist them out as necessary.
3014 uint32_t result_type, result_id;
3015 if (compiler.instruction_to_result_type(result_type, result_id, op, args, length))
3016 result_id_to_type[result_id] = result_type;
3017
3018 switch (op)
3019 {
3020 case OpStore:
3021 {
3022 if (length < 2)
3023 return false;
3024
3025 ID ptr = args[0];
3026 auto *var = compiler.maybe_get_backing_variable(ptr);
3027
3028 // If we store through an access chain, we have a partial write.
3029 if (var)
3030 {
3031 accessed_variables_to_block[var->self].insert(current_block->self);
3032 if (var->self == ptr)
3033 complete_write_variables_to_block[var->self].insert(current_block->self);
3034 else
3035 partial_write_variables_to_block[var->self].insert(current_block->self);
3036 }
3037
3038 // args[0] might be an access chain we have to track use of.
3039 notify_variable_access(args[0], current_block->self);
3040 // Might try to store a Phi variable here.
3041 notify_variable_access(args[1], current_block->self);
3042 break;
3043 }
3044
3045 case OpAccessChain:
3046 case OpInBoundsAccessChain:
3047 case OpPtrAccessChain:
3048 {
3049 if (length < 3)
3050 return false;
3051
3052 // Access chains used in multiple blocks mean hoisting all the variables used to construct the access chain as not all backends can use pointers.
3053 uint32_t ptr = args[2];
3054 auto *var = compiler.maybe_get<SPIRVariable>(ptr);
3055 if (var)
3056 {
3057 accessed_variables_to_block[var->self].insert(current_block->self);
3058 access_chain_children[args[1]].insert(var->self);
3059 }
3060
3061 // args[2] might be another access chain we have to track use of.
3062 for (uint32_t i = 2; i < length; i++)
3063 {
3064 notify_variable_access(args[i], current_block->self);
3065 access_chain_children[args[1]].insert(args[i]);
3066 }
3067
3068 // Also keep track of the access chain pointer itself.
3069 // In exceptionally rare cases, we can end up with a case where
3070 // the access chain is generated in the loop body, but is consumed in continue block.
3071 // This means we need complex loop workarounds, and we must detect this via CFG analysis.
3072 notify_variable_access(args[1], current_block->self);
3073
3074 // The result of an access chain is a fixed expression and is not really considered a temporary.
3075 auto &e = compiler.set<SPIRExpression>(args[1], "", args[0], true);
3076 auto *backing_variable = compiler.maybe_get_backing_variable(ptr);
3077 e.loaded_from = backing_variable ? VariableID(backing_variable->self) : VariableID(0);
3078
3079 // Other backends might use SPIRAccessChain for this later.
3080 compiler.ir.ids[args[1]].set_allow_type_rewrite();
3081 access_chain_expressions.insert(args[1]);
3082 break;
3083 }
3084
3085 case OpCopyMemory:
3086 {
3087 if (length < 2)
3088 return false;
3089
3090 ID lhs = args[0];
3091 ID rhs = args[1];
3092 auto *var = compiler.maybe_get_backing_variable(lhs);
3093
3094 // If we store through an access chain, we have a partial write.
3095 if (var)
3096 {
3097 accessed_variables_to_block[var->self].insert(current_block->self);
3098 if (var->self == lhs)
3099 complete_write_variables_to_block[var->self].insert(current_block->self);
3100 else
3101 partial_write_variables_to_block[var->self].insert(current_block->self);
3102 }
3103
3104 // args[0:1] might be access chains we have to track use of.
3105 for (uint32_t i = 0; i < 2; i++)
3106 notify_variable_access(args[i], current_block->self);
3107
3108 var = compiler.maybe_get_backing_variable(rhs);
3109 if (var)
3110 accessed_variables_to_block[var->self].insert(current_block->self);
3111 break;
3112 }
3113
3114 case OpCopyObject:
3115 {
3116 if (length < 3)
3117 return false;
3118
3119 auto *var = compiler.maybe_get_backing_variable(args[2]);
3120 if (var)
3121 accessed_variables_to_block[var->self].insert(current_block->self);
3122
3123 // Might be an access chain which we have to keep track of.
3124 notify_variable_access(args[1], current_block->self);
3125 if (access_chain_expressions.count(args[2]))
3126 access_chain_expressions.insert(args[1]);
3127
3128 // Might try to copy a Phi variable here.
3129 notify_variable_access(args[2], current_block->self);
3130 break;
3131 }
3132
3133 case OpLoad:
3134 {
3135 if (length < 3)
3136 return false;
3137 uint32_t ptr = args[2];
3138 auto *var = compiler.maybe_get_backing_variable(ptr);
3139 if (var)
3140 accessed_variables_to_block[var->self].insert(current_block->self);
3141
3142 // Loaded value is a temporary.
3143 notify_variable_access(args[1], current_block->self);
3144
3145 // Might be an access chain we have to track use of.
3146 notify_variable_access(args[2], current_block->self);
3147 break;
3148 }
3149
3150 case OpFunctionCall:
3151 {
3152 if (length < 3)
3153 return false;
3154
3155 // Return value may be a temporary.
3156 if (compiler.get_type(args[0]).basetype != SPIRType::Void)
3157 notify_variable_access(args[1], current_block->self);
3158
3159 length -= 3;
3160 args += 3;
3161
3162 for (uint32_t i = 0; i < length; i++)
3163 {
3164 auto *var = compiler.maybe_get_backing_variable(args[i]);
3165 if (var)
3166 {
3167 accessed_variables_to_block[var->self].insert(current_block->self);
3168 // Assume we can get partial writes to this variable.
3169 partial_write_variables_to_block[var->self].insert(current_block->self);
3170 }
3171
3172 // Cannot easily prove if argument we pass to a function is completely written.
3173 // Usually, functions write to a dummy variable,
3174 // which is then copied to in full to the real argument.
3175
3176 // Might try to copy a Phi variable here.
3177 notify_variable_access(args[i], current_block->self);
3178 }
3179 break;
3180 }
3181
3182 case OpExtInst:
3183 {
3184 for (uint32_t i = 4; i < length; i++)
3185 notify_variable_access(args[i], current_block->self);
3186 notify_variable_access(args[1], current_block->self);
3187 break;
3188 }
3189
3190 case OpArrayLength:
3191 case OpLine:
3192 case OpNoLine:
3193 // Uses literals, but cannot be a phi variable or temporary, so ignore.
3194 break;
3195
3196 // Atomics shouldn't be able to access function-local variables.
3197 // Some GLSL builtins access a pointer.
3198
3199 case OpCompositeInsert:
3200 case OpVectorShuffle:
3201 // Specialize for opcode which contains literals.
3202 for (uint32_t i = 1; i < 4; i++)
3203 notify_variable_access(args[i], current_block->self);
3204 break;
3205
3206 case OpCompositeExtract:
3207 // Specialize for opcode which contains literals.
3208 for (uint32_t i = 1; i < 3; i++)
3209 notify_variable_access(args[i], current_block->self);
3210 break;
3211
3212 case OpImageWrite:
3213 for (uint32_t i = 0; i < length; i++)
3214 {
3215 // Argument 3 is a literal.
3216 if (i != 3)
3217 notify_variable_access(args[i], current_block->self);
3218 }
3219 break;
3220
3221 case OpImageSampleImplicitLod:
3222 case OpImageSampleExplicitLod:
3223 case OpImageSparseSampleImplicitLod:
3224 case OpImageSparseSampleExplicitLod:
3225 case OpImageSampleProjImplicitLod:
3226 case OpImageSampleProjExplicitLod:
3227 case OpImageSparseSampleProjImplicitLod:
3228 case OpImageSparseSampleProjExplicitLod:
3229 case OpImageFetch:
3230 case OpImageSparseFetch:
3231 case OpImageRead:
3232 case OpImageSparseRead:
3233 for (uint32_t i = 1; i < length; i++)
3234 {
3235 // Argument 4 is a literal.
3236 if (i != 4)
3237 notify_variable_access(args[i], current_block->self);
3238 }
3239 break;
3240
3241 case OpImageSampleDrefImplicitLod:
3242 case OpImageSampleDrefExplicitLod:
3243 case OpImageSparseSampleDrefImplicitLod:
3244 case OpImageSparseSampleDrefExplicitLod:
3245 case OpImageSampleProjDrefImplicitLod:
3246 case OpImageSampleProjDrefExplicitLod:
3247 case OpImageSparseSampleProjDrefImplicitLod:
3248 case OpImageSparseSampleProjDrefExplicitLod:
3249 case OpImageGather:
3250 case OpImageSparseGather:
3251 case OpImageDrefGather:
3252 case OpImageSparseDrefGather:
3253 for (uint32_t i = 1; i < length; i++)
3254 {
3255 // Argument 5 is a literal.
3256 if (i != 5)
3257 notify_variable_access(args[i], current_block->self);
3258 }
3259 break;
3260
3261 default:
3262 {
3263 // Rather dirty way of figuring out where Phi variables are used.
3264 // As long as only IDs are used, we can scan through instructions and try to find any evidence that
3265 // the ID of a variable has been used.
3266 // There are potential false positives here where a literal is used in-place of an ID,
3267 // but worst case, it does not affect the correctness of the compile.
3268 // Exhaustive analysis would be better here, but it's not worth it for now.
3269 for (uint32_t i = 0; i < length; i++)
3270 notify_variable_access(args[i], current_block->self);
3271 break;
3272 }
3273 }
3274 return true;
3275 }
3276
StaticExpressionAccessHandler(Compiler & compiler_,uint32_t variable_id_)3277 Compiler::StaticExpressionAccessHandler::StaticExpressionAccessHandler(Compiler &compiler_, uint32_t variable_id_)
3278 : compiler(compiler_)
3279 , variable_id(variable_id_)
3280 {
3281 }
3282
follow_function_call(const SPIRFunction &)3283 bool Compiler::StaticExpressionAccessHandler::follow_function_call(const SPIRFunction &)
3284 {
3285 return false;
3286 }
3287
handle(spv::Op op,const uint32_t * args,uint32_t length)3288 bool Compiler::StaticExpressionAccessHandler::handle(spv::Op op, const uint32_t *args, uint32_t length)
3289 {
3290 switch (op)
3291 {
3292 case OpStore:
3293 if (length < 2)
3294 return false;
3295 if (args[0] == variable_id)
3296 {
3297 static_expression = args[1];
3298 write_count++;
3299 }
3300 break;
3301
3302 case OpLoad:
3303 if (length < 3)
3304 return false;
3305 if (args[2] == variable_id && static_expression == 0) // Tried to read from variable before it was initialized.
3306 return false;
3307 break;
3308
3309 case OpAccessChain:
3310 case OpInBoundsAccessChain:
3311 case OpPtrAccessChain:
3312 if (length < 3)
3313 return false;
3314 if (args[2] == variable_id) // If we try to access chain our candidate variable before we store to it, bail.
3315 return false;
3316 break;
3317
3318 default:
3319 break;
3320 }
3321
3322 return true;
3323 }
3324
find_function_local_luts(SPIRFunction & entry,const AnalyzeVariableScopeAccessHandler & handler,bool single_function)3325 void Compiler::find_function_local_luts(SPIRFunction &entry, const AnalyzeVariableScopeAccessHandler &handler,
3326 bool single_function)
3327 {
3328 auto &cfg = *function_cfgs.find(entry.self)->second;
3329
3330 // For each variable which is statically accessed.
3331 for (auto &accessed_var : handler.accessed_variables_to_block)
3332 {
3333 auto &blocks = accessed_var.second;
3334 auto &var = get<SPIRVariable>(accessed_var.first);
3335 auto &type = expression_type(accessed_var.first);
3336
3337 // Only consider function local variables here.
3338 // If we only have a single function in our CFG, private storage is also fine,
3339 // since it behaves like a function local variable.
3340 bool allow_lut = var.storage == StorageClassFunction || (single_function && var.storage == StorageClassPrivate);
3341 if (!allow_lut)
3342 continue;
3343
3344 // We cannot be a phi variable.
3345 if (var.phi_variable)
3346 continue;
3347
3348 // Only consider arrays here.
3349 if (type.array.empty())
3350 continue;
3351
3352 // If the variable has an initializer, make sure it is a constant expression.
3353 uint32_t static_constant_expression = 0;
3354 if (var.initializer)
3355 {
3356 if (ir.ids[var.initializer].get_type() != TypeConstant)
3357 continue;
3358 static_constant_expression = var.initializer;
3359
3360 // There can be no stores to this variable, we have now proved we have a LUT.
3361 if (handler.complete_write_variables_to_block.count(var.self) != 0 ||
3362 handler.partial_write_variables_to_block.count(var.self) != 0)
3363 continue;
3364 }
3365 else
3366 {
3367 // We can have one, and only one write to the variable, and that write needs to be a constant.
3368
3369 // No partial writes allowed.
3370 if (handler.partial_write_variables_to_block.count(var.self) != 0)
3371 continue;
3372
3373 auto itr = handler.complete_write_variables_to_block.find(var.self);
3374
3375 // No writes?
3376 if (itr == end(handler.complete_write_variables_to_block))
3377 continue;
3378
3379 // We write to the variable in more than one block.
3380 auto &write_blocks = itr->second;
3381 if (write_blocks.size() != 1)
3382 continue;
3383
3384 // The write needs to happen in the dominating block.
3385 DominatorBuilder builder(cfg);
3386 for (auto &block : blocks)
3387 builder.add_block(block);
3388 uint32_t dominator = builder.get_dominator();
3389
3390 // The complete write happened in a branch or similar, cannot deduce static expression.
3391 if (write_blocks.count(dominator) == 0)
3392 continue;
3393
3394 // Find the static expression for this variable.
3395 StaticExpressionAccessHandler static_expression_handler(*this, var.self);
3396 traverse_all_reachable_opcodes(get<SPIRBlock>(dominator), static_expression_handler);
3397
3398 // We want one, and exactly one write
3399 if (static_expression_handler.write_count != 1 || static_expression_handler.static_expression == 0)
3400 continue;
3401
3402 // Is it a constant expression?
3403 if (ir.ids[static_expression_handler.static_expression].get_type() != TypeConstant)
3404 continue;
3405
3406 // We found a LUT!
3407 static_constant_expression = static_expression_handler.static_expression;
3408 }
3409
3410 get<SPIRConstant>(static_constant_expression).is_used_as_lut = true;
3411 var.static_expression = static_constant_expression;
3412 var.statically_assigned = true;
3413 var.remapped_variable = true;
3414 }
3415 }
3416
analyze_variable_scope(SPIRFunction & entry,AnalyzeVariableScopeAccessHandler & handler)3417 void Compiler::analyze_variable_scope(SPIRFunction &entry, AnalyzeVariableScopeAccessHandler &handler)
3418 {
3419 // First, we map out all variable access within a function.
3420 // Essentially a map of block -> { variables accessed in the basic block }
3421 traverse_all_reachable_opcodes(entry, handler);
3422
3423 auto &cfg = *function_cfgs.find(entry.self)->second;
3424
3425 // Analyze if there are parameters which need to be implicitly preserved with an "in" qualifier.
3426 analyze_parameter_preservation(entry, cfg, handler.accessed_variables_to_block,
3427 handler.complete_write_variables_to_block);
3428
3429 unordered_map<uint32_t, uint32_t> potential_loop_variables;
3430
3431 // Find the loop dominator block for each block.
3432 for (auto &block_id : entry.blocks)
3433 {
3434 auto &block = get<SPIRBlock>(block_id);
3435
3436 auto itr = ir.continue_block_to_loop_header.find(block_id);
3437 if (itr != end(ir.continue_block_to_loop_header) && itr->second != block_id)
3438 {
3439 // Continue block might be unreachable in the CFG, but we still like to know the loop dominator.
3440 // Edge case is when continue block is also the loop header, don't set the dominator in this case.
3441 block.loop_dominator = itr->second;
3442 }
3443 else
3444 {
3445 uint32_t loop_dominator = cfg.find_loop_dominator(block_id);
3446 if (loop_dominator != block_id)
3447 block.loop_dominator = loop_dominator;
3448 else
3449 block.loop_dominator = SPIRBlock::NoDominator;
3450 }
3451 }
3452
3453 // For each variable which is statically accessed.
3454 for (auto &var : handler.accessed_variables_to_block)
3455 {
3456 // Only deal with variables which are considered local variables in this function.
3457 if (find(begin(entry.local_variables), end(entry.local_variables), VariableID(var.first)) ==
3458 end(entry.local_variables))
3459 continue;
3460
3461 DominatorBuilder builder(cfg);
3462 auto &blocks = var.second;
3463 auto &type = expression_type(var.first);
3464
3465 // Figure out which block is dominating all accesses of those variables.
3466 for (auto &block : blocks)
3467 {
3468 // If we're accessing a variable inside a continue block, this variable might be a loop variable.
3469 // We can only use loop variables with scalars, as we cannot track static expressions for vectors.
3470 if (is_continue(block))
3471 {
3472 // Potentially awkward case to check for.
3473 // We might have a variable inside a loop, which is touched by the continue block,
3474 // but is not actually a loop variable.
3475 // The continue block is dominated by the inner part of the loop, which does not make sense in high-level
3476 // language output because it will be declared before the body,
3477 // so we will have to lift the dominator up to the relevant loop header instead.
3478 builder.add_block(ir.continue_block_to_loop_header[block]);
3479
3480 // Arrays or structs cannot be loop variables.
3481 if (type.vecsize == 1 && type.columns == 1 && type.basetype != SPIRType::Struct && type.array.empty())
3482 {
3483 // The variable is used in multiple continue blocks, this is not a loop
3484 // candidate, signal that by setting block to -1u.
3485 auto &potential = potential_loop_variables[var.first];
3486
3487 if (potential == 0)
3488 potential = block;
3489 else
3490 potential = ~(0u);
3491 }
3492 }
3493 builder.add_block(block);
3494 }
3495
3496 builder.lift_continue_block_dominator();
3497
3498 // Add it to a per-block list of variables.
3499 BlockID dominating_block = builder.get_dominator();
3500
3501 // For variables whose dominating block is inside a loop, there is a risk that these variables
3502 // actually need to be preserved across loop iterations. We can express this by adding
3503 // a "read" access to the loop header.
3504 // In the dominating block, we must see an OpStore or equivalent as the first access of an OpVariable.
3505 // Should that fail, we look for the outermost loop header and tack on an access there.
3506 // Phi nodes cannot have this problem.
3507 if (dominating_block)
3508 {
3509 auto &variable = get<SPIRVariable>(var.first);
3510 if (!variable.phi_variable)
3511 {
3512 auto *block = &get<SPIRBlock>(dominating_block);
3513 bool preserve = may_read_undefined_variable_in_block(*block, var.first);
3514 if (preserve)
3515 {
3516 // Find the outermost loop scope.
3517 while (block->loop_dominator != BlockID(SPIRBlock::NoDominator))
3518 block = &get<SPIRBlock>(block->loop_dominator);
3519
3520 if (block->self != dominating_block)
3521 {
3522 builder.add_block(block->self);
3523 dominating_block = builder.get_dominator();
3524 }
3525 }
3526 }
3527 }
3528
3529 // If all blocks here are dead code, this will be 0, so the variable in question
3530 // will be completely eliminated.
3531 if (dominating_block)
3532 {
3533 auto &block = get<SPIRBlock>(dominating_block);
3534 block.dominated_variables.push_back(var.first);
3535 get<SPIRVariable>(var.first).dominator = dominating_block;
3536 }
3537 }
3538
3539 for (auto &var : handler.accessed_temporaries_to_block)
3540 {
3541 auto itr = handler.result_id_to_type.find(var.first);
3542
3543 if (itr == end(handler.result_id_to_type))
3544 {
3545 // We found a false positive ID being used, ignore.
3546 // This should probably be an assert.
3547 continue;
3548 }
3549
3550 // There is no point in doing domination analysis for opaque types.
3551 auto &type = get<SPIRType>(itr->second);
3552 if (type_is_opaque_value(type))
3553 continue;
3554
3555 DominatorBuilder builder(cfg);
3556 bool force_temporary = false;
3557 bool used_in_header_hoisted_continue_block = false;
3558
3559 // Figure out which block is dominating all accesses of those temporaries.
3560 auto &blocks = var.second;
3561 for (auto &block : blocks)
3562 {
3563 builder.add_block(block);
3564
3565 if (blocks.size() != 1 && is_continue(block))
3566 {
3567 // The risk here is that inner loop can dominate the continue block.
3568 // Any temporary we access in the continue block must be declared before the loop.
3569 // This is moot for complex loops however.
3570 auto &loop_header_block = get<SPIRBlock>(ir.continue_block_to_loop_header[block]);
3571 assert(loop_header_block.merge == SPIRBlock::MergeLoop);
3572 builder.add_block(loop_header_block.self);
3573 used_in_header_hoisted_continue_block = true;
3574 }
3575 }
3576
3577 uint32_t dominating_block = builder.get_dominator();
3578
3579 if (blocks.size() != 1 && is_single_block_loop(dominating_block))
3580 {
3581 // Awkward case, because the loop header is also the continue block,
3582 // so hoisting to loop header does not help.
3583 force_temporary = true;
3584 }
3585
3586 if (dominating_block)
3587 {
3588 // If we touch a variable in the dominating block, this is the expected setup.
3589 // SPIR-V normally mandates this, but we have extra cases for temporary use inside loops.
3590 bool first_use_is_dominator = blocks.count(dominating_block) != 0;
3591
3592 if (!first_use_is_dominator || force_temporary)
3593 {
3594 if (handler.access_chain_expressions.count(var.first))
3595 {
3596 // Exceptionally rare case.
3597 // We cannot declare temporaries of access chains (except on MSL perhaps with pointers).
3598 // Rather than do that, we force the indexing expressions to be declared in the right scope by
3599 // tracking their usage to that end. There is no temporary to hoist.
3600 // However, we still need to observe declaration order of the access chain.
3601
3602 if (used_in_header_hoisted_continue_block)
3603 {
3604 // For this scenario, we used an access chain inside a continue block where we also registered an access to header block.
3605 // This is a problem as we need to declare an access chain properly first with full definition.
3606 // We cannot use temporaries for these expressions,
3607 // so we must make sure the access chain is declared ahead of time.
3608 // Force a complex for loop to deal with this.
3609 // TODO: Out-of-order declaring for loops where continue blocks are emitted last might be another option.
3610 auto &loop_header_block = get<SPIRBlock>(dominating_block);
3611 assert(loop_header_block.merge == SPIRBlock::MergeLoop);
3612 loop_header_block.complex_continue = true;
3613 }
3614 }
3615 else
3616 {
3617 // This should be very rare, but if we try to declare a temporary inside a loop,
3618 // and that temporary is used outside the loop as well (spirv-opt inliner likes this)
3619 // we should actually emit the temporary outside the loop.
3620 hoisted_temporaries.insert(var.first);
3621 forced_temporaries.insert(var.first);
3622
3623 auto &block_temporaries = get<SPIRBlock>(dominating_block).declare_temporary;
3624 block_temporaries.emplace_back(handler.result_id_to_type[var.first], var.first);
3625 }
3626 }
3627 else if (blocks.size() > 1)
3628 {
3629 // Keep track of the temporary as we might have to declare this temporary.
3630 // This can happen if the loop header dominates a temporary, but we have a complex fallback loop.
3631 // In this case, the header is actually inside the for (;;) {} block, and we have problems.
3632 // What we need to do is hoist the temporaries outside the for (;;) {} block in case the header block
3633 // declares the temporary.
3634 auto &block_temporaries = get<SPIRBlock>(dominating_block).potential_declare_temporary;
3635 block_temporaries.emplace_back(handler.result_id_to_type[var.first], var.first);
3636 }
3637 }
3638 }
3639
3640 unordered_set<uint32_t> seen_blocks;
3641
3642 // Now, try to analyze whether or not these variables are actually loop variables.
3643 for (auto &loop_variable : potential_loop_variables)
3644 {
3645 auto &var = get<SPIRVariable>(loop_variable.first);
3646 auto dominator = var.dominator;
3647 BlockID block = loop_variable.second;
3648
3649 // The variable was accessed in multiple continue blocks, ignore.
3650 if (block == BlockID(~(0u)) || block == BlockID(0))
3651 continue;
3652
3653 // Dead code.
3654 if (dominator == ID(0))
3655 continue;
3656
3657 BlockID header = 0;
3658
3659 // Find the loop header for this block if we are a continue block.
3660 {
3661 auto itr = ir.continue_block_to_loop_header.find(block);
3662 if (itr != end(ir.continue_block_to_loop_header))
3663 {
3664 header = itr->second;
3665 }
3666 else if (get<SPIRBlock>(block).continue_block == block)
3667 {
3668 // Also check for self-referential continue block.
3669 header = block;
3670 }
3671 }
3672
3673 assert(header);
3674 auto &header_block = get<SPIRBlock>(header);
3675 auto &blocks = handler.accessed_variables_to_block[loop_variable.first];
3676
3677 // If a loop variable is not used before the loop, it's probably not a loop variable.
3678 bool has_accessed_variable = blocks.count(header) != 0;
3679
3680 // Now, there are two conditions we need to meet for the variable to be a loop variable.
3681 // 1. The dominating block must have a branch-free path to the loop header,
3682 // this way we statically know which expression should be part of the loop variable initializer.
3683
3684 // Walk from the dominator, if there is one straight edge connecting
3685 // dominator and loop header, we statically know the loop initializer.
3686 bool static_loop_init = true;
3687 while (dominator != header)
3688 {
3689 if (blocks.count(dominator) != 0)
3690 has_accessed_variable = true;
3691
3692 auto &succ = cfg.get_succeeding_edges(dominator);
3693 if (succ.size() != 1)
3694 {
3695 static_loop_init = false;
3696 break;
3697 }
3698
3699 auto &pred = cfg.get_preceding_edges(succ.front());
3700 if (pred.size() != 1 || pred.front() != dominator)
3701 {
3702 static_loop_init = false;
3703 break;
3704 }
3705
3706 dominator = succ.front();
3707 }
3708
3709 if (!static_loop_init || !has_accessed_variable)
3710 continue;
3711
3712 // The second condition we need to meet is that no access after the loop
3713 // merge can occur. Walk the CFG to see if we find anything.
3714
3715 seen_blocks.clear();
3716 cfg.walk_from(seen_blocks, header_block.merge_block, [&](uint32_t walk_block) -> bool {
3717 // We found a block which accesses the variable outside the loop.
3718 if (blocks.find(walk_block) != end(blocks))
3719 static_loop_init = false;
3720 return true;
3721 });
3722
3723 if (!static_loop_init)
3724 continue;
3725
3726 // We have a loop variable.
3727 header_block.loop_variables.push_back(loop_variable.first);
3728 // Need to sort here as variables come from an unordered container, and pushing stuff in wrong order
3729 // will break reproducability in regression runs.
3730 sort(begin(header_block.loop_variables), end(header_block.loop_variables));
3731 get<SPIRVariable>(loop_variable.first).loop_variable = true;
3732 }
3733 }
3734
may_read_undefined_variable_in_block(const SPIRBlock & block,uint32_t var)3735 bool Compiler::may_read_undefined_variable_in_block(const SPIRBlock &block, uint32_t var)
3736 {
3737 for (auto &op : block.ops)
3738 {
3739 auto *ops = stream(op);
3740 switch (op.op)
3741 {
3742 case OpStore:
3743 case OpCopyMemory:
3744 if (ops[0] == var)
3745 return false;
3746 break;
3747
3748 case OpAccessChain:
3749 case OpInBoundsAccessChain:
3750 case OpPtrAccessChain:
3751 // Access chains are generally used to partially read and write. It's too hard to analyze
3752 // if all constituents are written fully before continuing, so just assume it's preserved.
3753 // This is the same as the parameter preservation analysis.
3754 if (ops[2] == var)
3755 return true;
3756 break;
3757
3758 case OpSelect:
3759 // Variable pointers.
3760 // We might read before writing.
3761 if (ops[3] == var || ops[4] == var)
3762 return true;
3763 break;
3764
3765 case OpPhi:
3766 {
3767 // Variable pointers.
3768 // We might read before writing.
3769 if (op.length < 2)
3770 break;
3771
3772 uint32_t count = op.length - 2;
3773 for (uint32_t i = 0; i < count; i += 2)
3774 if (ops[i + 2] == var)
3775 return true;
3776 break;
3777 }
3778
3779 case OpCopyObject:
3780 case OpLoad:
3781 if (ops[2] == var)
3782 return true;
3783 break;
3784
3785 case OpFunctionCall:
3786 {
3787 if (op.length < 3)
3788 break;
3789
3790 // May read before writing.
3791 uint32_t count = op.length - 3;
3792 for (uint32_t i = 0; i < count; i++)
3793 if (ops[i + 3] == var)
3794 return true;
3795 break;
3796 }
3797
3798 default:
3799 break;
3800 }
3801 }
3802
3803 // Not accessed somehow, at least not in a usual fashion.
3804 // It's likely accessed in a branch, so assume we must preserve.
3805 return true;
3806 }
3807
get_buffer_block_flags(VariableID id) const3808 Bitset Compiler::get_buffer_block_flags(VariableID id) const
3809 {
3810 return ir.get_buffer_block_flags(get<SPIRVariable>(id));
3811 }
3812
get_common_basic_type(const SPIRType & type,SPIRType::BaseType & base_type)3813 bool Compiler::get_common_basic_type(const SPIRType &type, SPIRType::BaseType &base_type)
3814 {
3815 if (type.basetype == SPIRType::Struct)
3816 {
3817 base_type = SPIRType::Unknown;
3818 for (auto &member_type : type.member_types)
3819 {
3820 SPIRType::BaseType member_base;
3821 if (!get_common_basic_type(get<SPIRType>(member_type), member_base))
3822 return false;
3823
3824 if (base_type == SPIRType::Unknown)
3825 base_type = member_base;
3826 else if (base_type != member_base)
3827 return false;
3828 }
3829 return true;
3830 }
3831 else
3832 {
3833 base_type = type.basetype;
3834 return true;
3835 }
3836 }
3837
handle_builtin(const SPIRType & type,BuiltIn builtin,const Bitset & decoration_flags)3838 void Compiler::ActiveBuiltinHandler::handle_builtin(const SPIRType &type, BuiltIn builtin,
3839 const Bitset &decoration_flags)
3840 {
3841 // If used, we will need to explicitly declare a new array size for these builtins.
3842
3843 if (builtin == BuiltInClipDistance)
3844 {
3845 if (!type.array_size_literal[0])
3846 SPIRV_CROSS_THROW("Array size for ClipDistance must be a literal.");
3847 uint32_t array_size = type.array[0];
3848 if (array_size == 0)
3849 SPIRV_CROSS_THROW("Array size for ClipDistance must not be unsized.");
3850 compiler.clip_distance_count = array_size;
3851 }
3852 else if (builtin == BuiltInCullDistance)
3853 {
3854 if (!type.array_size_literal[0])
3855 SPIRV_CROSS_THROW("Array size for CullDistance must be a literal.");
3856 uint32_t array_size = type.array[0];
3857 if (array_size == 0)
3858 SPIRV_CROSS_THROW("Array size for CullDistance must not be unsized.");
3859 compiler.cull_distance_count = array_size;
3860 }
3861 else if (builtin == BuiltInPosition)
3862 {
3863 if (decoration_flags.get(DecorationInvariant))
3864 compiler.position_invariant = true;
3865 }
3866 }
3867
add_if_builtin(uint32_t id,bool allow_blocks)3868 void Compiler::ActiveBuiltinHandler::add_if_builtin(uint32_t id, bool allow_blocks)
3869 {
3870 // Only handle plain variables here.
3871 // Builtins which are part of a block are handled in AccessChain.
3872 // If allow_blocks is used however, this is to handle initializers of blocks,
3873 // which implies that all members are written to.
3874
3875 auto *var = compiler.maybe_get<SPIRVariable>(id);
3876 auto *m = compiler.ir.find_meta(id);
3877 if (var && m)
3878 {
3879 auto &type = compiler.get<SPIRType>(var->basetype);
3880 auto &decorations = m->decoration;
3881 auto &flags = type.storage == StorageClassInput ?
3882 compiler.active_input_builtins : compiler.active_output_builtins;
3883 if (decorations.builtin)
3884 {
3885 flags.set(decorations.builtin_type);
3886 handle_builtin(type, decorations.builtin_type, decorations.decoration_flags);
3887 }
3888 else if (allow_blocks && compiler.has_decoration(type.self, DecorationBlock))
3889 {
3890 uint32_t member_count = uint32_t(type.member_types.size());
3891 for (uint32_t i = 0; i < member_count; i++)
3892 {
3893 if (compiler.has_member_decoration(type.self, i, DecorationBuiltIn))
3894 {
3895 auto &member_type = compiler.get<SPIRType>(type.member_types[i]);
3896 BuiltIn builtin = BuiltIn(compiler.get_member_decoration(type.self, i, DecorationBuiltIn));
3897 flags.set(builtin);
3898 handle_builtin(member_type, builtin, compiler.get_member_decoration_bitset(type.self, i));
3899 }
3900 }
3901 }
3902 }
3903 }
3904
add_if_builtin(uint32_t id)3905 void Compiler::ActiveBuiltinHandler::add_if_builtin(uint32_t id)
3906 {
3907 add_if_builtin(id, false);
3908 }
3909
add_if_builtin_or_block(uint32_t id)3910 void Compiler::ActiveBuiltinHandler::add_if_builtin_or_block(uint32_t id)
3911 {
3912 add_if_builtin(id, true);
3913 }
3914
handle(spv::Op opcode,const uint32_t * args,uint32_t length)3915 bool Compiler::ActiveBuiltinHandler::handle(spv::Op opcode, const uint32_t *args, uint32_t length)
3916 {
3917 switch (opcode)
3918 {
3919 case OpStore:
3920 if (length < 1)
3921 return false;
3922
3923 add_if_builtin(args[0]);
3924 break;
3925
3926 case OpCopyMemory:
3927 if (length < 2)
3928 return false;
3929
3930 add_if_builtin(args[0]);
3931 add_if_builtin(args[1]);
3932 break;
3933
3934 case OpCopyObject:
3935 case OpLoad:
3936 if (length < 3)
3937 return false;
3938
3939 add_if_builtin(args[2]);
3940 break;
3941
3942 case OpSelect:
3943 if (length < 5)
3944 return false;
3945
3946 add_if_builtin(args[3]);
3947 add_if_builtin(args[4]);
3948 break;
3949
3950 case OpPhi:
3951 {
3952 if (length < 2)
3953 return false;
3954
3955 uint32_t count = length - 2;
3956 args += 2;
3957 for (uint32_t i = 0; i < count; i += 2)
3958 add_if_builtin(args[i]);
3959 break;
3960 }
3961
3962 case OpFunctionCall:
3963 {
3964 if (length < 3)
3965 return false;
3966
3967 uint32_t count = length - 3;
3968 args += 3;
3969 for (uint32_t i = 0; i < count; i++)
3970 add_if_builtin(args[i]);
3971 break;
3972 }
3973
3974 case OpAccessChain:
3975 case OpInBoundsAccessChain:
3976 case OpPtrAccessChain:
3977 {
3978 if (length < 4)
3979 return false;
3980
3981 // Only consider global variables, cannot consider variables in functions yet, or other
3982 // access chains as they have not been created yet.
3983 auto *var = compiler.maybe_get<SPIRVariable>(args[2]);
3984 if (!var)
3985 break;
3986
3987 // Required if we access chain into builtins like gl_GlobalInvocationID.
3988 add_if_builtin(args[2]);
3989
3990 // Start traversing type hierarchy at the proper non-pointer types.
3991 auto *type = &compiler.get_variable_data_type(*var);
3992
3993 auto &flags =
3994 var->storage == StorageClassInput ? compiler.active_input_builtins : compiler.active_output_builtins;
3995
3996 uint32_t count = length - 3;
3997 args += 3;
3998 for (uint32_t i = 0; i < count; i++)
3999 {
4000 // Pointers
4001 if (opcode == OpPtrAccessChain && i == 0)
4002 {
4003 type = &compiler.get<SPIRType>(type->parent_type);
4004 continue;
4005 }
4006
4007 // Arrays
4008 if (!type->array.empty())
4009 {
4010 type = &compiler.get<SPIRType>(type->parent_type);
4011 }
4012 // Structs
4013 else if (type->basetype == SPIRType::Struct)
4014 {
4015 uint32_t index = compiler.get<SPIRConstant>(args[i]).scalar();
4016
4017 if (index < uint32_t(compiler.ir.meta[type->self].members.size()))
4018 {
4019 auto &decorations = compiler.ir.meta[type->self].members[index];
4020 if (decorations.builtin)
4021 {
4022 flags.set(decorations.builtin_type);
4023 handle_builtin(compiler.get<SPIRType>(type->member_types[index]), decorations.builtin_type,
4024 decorations.decoration_flags);
4025 }
4026 }
4027
4028 type = &compiler.get<SPIRType>(type->member_types[index]);
4029 }
4030 else
4031 {
4032 // No point in traversing further. We won't find any extra builtins.
4033 break;
4034 }
4035 }
4036 break;
4037 }
4038
4039 default:
4040 break;
4041 }
4042
4043 return true;
4044 }
4045
update_active_builtins()4046 void Compiler::update_active_builtins()
4047 {
4048 active_input_builtins.reset();
4049 active_output_builtins.reset();
4050 cull_distance_count = 0;
4051 clip_distance_count = 0;
4052 ActiveBuiltinHandler handler(*this);
4053 traverse_all_reachable_opcodes(get<SPIRFunction>(ir.default_entry_point), handler);
4054
4055 ir.for_each_typed_id<SPIRVariable>([&](uint32_t, const SPIRVariable &var) {
4056 if (var.storage != StorageClassOutput)
4057 return;
4058 if (!interface_variable_exists_in_entry_point(var.self))
4059 return;
4060
4061 // Also, make sure we preserve output variables which are only initialized, but never accessed by any code.
4062 if (var.initializer != ID(0))
4063 handler.add_if_builtin_or_block(var.self);
4064 });
4065 }
4066
4067 // Returns whether this shader uses a builtin of the storage class
has_active_builtin(BuiltIn builtin,StorageClass storage)4068 bool Compiler::has_active_builtin(BuiltIn builtin, StorageClass storage)
4069 {
4070 const Bitset *flags;
4071 switch (storage)
4072 {
4073 case StorageClassInput:
4074 flags = &active_input_builtins;
4075 break;
4076 case StorageClassOutput:
4077 flags = &active_output_builtins;
4078 break;
4079
4080 default:
4081 return false;
4082 }
4083 return flags->get(builtin);
4084 }
4085
analyze_image_and_sampler_usage()4086 void Compiler::analyze_image_and_sampler_usage()
4087 {
4088 CombinedImageSamplerDrefHandler dref_handler(*this);
4089 traverse_all_reachable_opcodes(get<SPIRFunction>(ir.default_entry_point), dref_handler);
4090
4091 CombinedImageSamplerUsageHandler handler(*this, dref_handler.dref_combined_samplers);
4092 traverse_all_reachable_opcodes(get<SPIRFunction>(ir.default_entry_point), handler);
4093
4094 // Need to run this traversal twice. First time, we propagate any comparison sampler usage from leaf functions
4095 // down to main().
4096 // In the second pass, we can propagate up forced depth state coming from main() up into leaf functions.
4097 handler.dependency_hierarchy.clear();
4098 traverse_all_reachable_opcodes(get<SPIRFunction>(ir.default_entry_point), handler);
4099
4100 comparison_ids = move(handler.comparison_ids);
4101 need_subpass_input = handler.need_subpass_input;
4102
4103 // Forward information from separate images and samplers into combined image samplers.
4104 for (auto &combined : combined_image_samplers)
4105 if (comparison_ids.count(combined.sampler_id))
4106 comparison_ids.insert(combined.combined_id);
4107 }
4108
handle(spv::Op opcode,const uint32_t * args,uint32_t)4109 bool Compiler::CombinedImageSamplerDrefHandler::handle(spv::Op opcode, const uint32_t *args, uint32_t)
4110 {
4111 // Mark all sampled images which are used with Dref.
4112 switch (opcode)
4113 {
4114 case OpImageSampleDrefExplicitLod:
4115 case OpImageSampleDrefImplicitLod:
4116 case OpImageSampleProjDrefExplicitLod:
4117 case OpImageSampleProjDrefImplicitLod:
4118 case OpImageSparseSampleProjDrefImplicitLod:
4119 case OpImageSparseSampleDrefImplicitLod:
4120 case OpImageSparseSampleProjDrefExplicitLod:
4121 case OpImageSparseSampleDrefExplicitLod:
4122 case OpImageDrefGather:
4123 case OpImageSparseDrefGather:
4124 dref_combined_samplers.insert(args[2]);
4125 return true;
4126
4127 default:
4128 break;
4129 }
4130
4131 return true;
4132 }
4133
get_cfg_for_current_function() const4134 const CFG &Compiler::get_cfg_for_current_function() const
4135 {
4136 assert(current_function);
4137 return get_cfg_for_function(current_function->self);
4138 }
4139
get_cfg_for_function(uint32_t id) const4140 const CFG &Compiler::get_cfg_for_function(uint32_t id) const
4141 {
4142 auto cfg_itr = function_cfgs.find(id);
4143 assert(cfg_itr != end(function_cfgs));
4144 assert(cfg_itr->second);
4145 return *cfg_itr->second;
4146 }
4147
build_function_control_flow_graphs_and_analyze()4148 void Compiler::build_function_control_flow_graphs_and_analyze()
4149 {
4150 CFGBuilder handler(*this);
4151 handler.function_cfgs[ir.default_entry_point].reset(new CFG(*this, get<SPIRFunction>(ir.default_entry_point)));
4152 traverse_all_reachable_opcodes(get<SPIRFunction>(ir.default_entry_point), handler);
4153 function_cfgs = move(handler.function_cfgs);
4154 bool single_function = function_cfgs.size() <= 1;
4155
4156 for (auto &f : function_cfgs)
4157 {
4158 auto &func = get<SPIRFunction>(f.first);
4159 AnalyzeVariableScopeAccessHandler scope_handler(*this, func);
4160 analyze_variable_scope(func, scope_handler);
4161 find_function_local_luts(func, scope_handler, single_function);
4162
4163 // Check if we can actually use the loop variables we found in analyze_variable_scope.
4164 // To use multiple initializers, we need the same type and qualifiers.
4165 for (auto block : func.blocks)
4166 {
4167 auto &b = get<SPIRBlock>(block);
4168 if (b.loop_variables.size() < 2)
4169 continue;
4170
4171 auto &flags = get_decoration_bitset(b.loop_variables.front());
4172 uint32_t type = get<SPIRVariable>(b.loop_variables.front()).basetype;
4173 bool invalid_initializers = false;
4174 for (auto loop_variable : b.loop_variables)
4175 {
4176 if (flags != get_decoration_bitset(loop_variable) ||
4177 type != get<SPIRVariable>(b.loop_variables.front()).basetype)
4178 {
4179 invalid_initializers = true;
4180 break;
4181 }
4182 }
4183
4184 if (invalid_initializers)
4185 {
4186 for (auto loop_variable : b.loop_variables)
4187 get<SPIRVariable>(loop_variable).loop_variable = false;
4188 b.loop_variables.clear();
4189 }
4190 }
4191 }
4192 }
4193
CFGBuilder(Compiler & compiler_)4194 Compiler::CFGBuilder::CFGBuilder(Compiler &compiler_)
4195 : compiler(compiler_)
4196 {
4197 }
4198
handle(spv::Op,const uint32_t *,uint32_t)4199 bool Compiler::CFGBuilder::handle(spv::Op, const uint32_t *, uint32_t)
4200 {
4201 return true;
4202 }
4203
follow_function_call(const SPIRFunction & func)4204 bool Compiler::CFGBuilder::follow_function_call(const SPIRFunction &func)
4205 {
4206 if (function_cfgs.find(func.self) == end(function_cfgs))
4207 {
4208 function_cfgs[func.self].reset(new CFG(compiler, func));
4209 return true;
4210 }
4211 else
4212 return false;
4213 }
4214
add_dependency(uint32_t dst,uint32_t src)4215 void Compiler::CombinedImageSamplerUsageHandler::add_dependency(uint32_t dst, uint32_t src)
4216 {
4217 dependency_hierarchy[dst].insert(src);
4218 // Propagate up any comparison state if we're loading from one such variable.
4219 if (comparison_ids.count(src))
4220 comparison_ids.insert(dst);
4221 }
4222
begin_function_scope(const uint32_t * args,uint32_t length)4223 bool Compiler::CombinedImageSamplerUsageHandler::begin_function_scope(const uint32_t *args, uint32_t length)
4224 {
4225 if (length < 3)
4226 return false;
4227
4228 auto &func = compiler.get<SPIRFunction>(args[2]);
4229 const auto *arg = &args[3];
4230 length -= 3;
4231
4232 for (uint32_t i = 0; i < length; i++)
4233 {
4234 auto &argument = func.arguments[i];
4235 add_dependency(argument.id, arg[i]);
4236 }
4237
4238 return true;
4239 }
4240
add_hierarchy_to_comparison_ids(uint32_t id)4241 void Compiler::CombinedImageSamplerUsageHandler::add_hierarchy_to_comparison_ids(uint32_t id)
4242 {
4243 // Traverse the variable dependency hierarchy and tag everything in its path with comparison ids.
4244 comparison_ids.insert(id);
4245
4246 for (auto &dep_id : dependency_hierarchy[id])
4247 add_hierarchy_to_comparison_ids(dep_id);
4248 }
4249
handle(Op opcode,const uint32_t * args,uint32_t length)4250 bool Compiler::CombinedImageSamplerUsageHandler::handle(Op opcode, const uint32_t *args, uint32_t length)
4251 {
4252 switch (opcode)
4253 {
4254 case OpAccessChain:
4255 case OpInBoundsAccessChain:
4256 case OpPtrAccessChain:
4257 case OpLoad:
4258 {
4259 if (length < 3)
4260 return false;
4261
4262 add_dependency(args[1], args[2]);
4263
4264 // Ideally defer this to OpImageRead, but then we'd need to track loaded IDs.
4265 // If we load an image, we're going to use it and there is little harm in declaring an unused gl_FragCoord.
4266 auto &type = compiler.get<SPIRType>(args[0]);
4267 if (type.image.dim == DimSubpassData)
4268 need_subpass_input = true;
4269
4270 // If we load a SampledImage and it will be used with Dref, propagate the state up.
4271 if (dref_combined_samplers.count(args[1]) != 0)
4272 add_hierarchy_to_comparison_ids(args[1]);
4273 break;
4274 }
4275
4276 case OpSampledImage:
4277 {
4278 if (length < 4)
4279 return false;
4280
4281 uint32_t result_type = args[0];
4282 uint32_t result_id = args[1];
4283 auto &type = compiler.get<SPIRType>(result_type);
4284
4285 // If the underlying resource has been used for comparison then duplicate loads of that resource must be too.
4286 // This image must be a depth image.
4287 uint32_t image = args[2];
4288 uint32_t sampler = args[3];
4289
4290 if (type.image.depth || dref_combined_samplers.count(result_id) != 0)
4291 {
4292 add_hierarchy_to_comparison_ids(image);
4293
4294 // This sampler must be a SamplerComparisonState, and not a regular SamplerState.
4295 add_hierarchy_to_comparison_ids(sampler);
4296
4297 // Mark the OpSampledImage itself as being comparison state.
4298 comparison_ids.insert(result_id);
4299 }
4300 return true;
4301 }
4302
4303 default:
4304 break;
4305 }
4306
4307 return true;
4308 }
4309
buffer_is_hlsl_counter_buffer(VariableID id) const4310 bool Compiler::buffer_is_hlsl_counter_buffer(VariableID id) const
4311 {
4312 auto *m = ir.find_meta(id);
4313 return m && m->hlsl_is_magic_counter_buffer;
4314 }
4315
buffer_get_hlsl_counter_buffer(VariableID id,uint32_t & counter_id) const4316 bool Compiler::buffer_get_hlsl_counter_buffer(VariableID id, uint32_t &counter_id) const
4317 {
4318 auto *m = ir.find_meta(id);
4319
4320 // First, check for the proper decoration.
4321 if (m && m->hlsl_magic_counter_buffer != 0)
4322 {
4323 counter_id = m->hlsl_magic_counter_buffer;
4324 return true;
4325 }
4326 else
4327 return false;
4328 }
4329
make_constant_null(uint32_t id,uint32_t type)4330 void Compiler::make_constant_null(uint32_t id, uint32_t type)
4331 {
4332 auto &constant_type = get<SPIRType>(type);
4333
4334 if (constant_type.pointer)
4335 {
4336 auto &constant = set<SPIRConstant>(id, type);
4337 constant.make_null(constant_type);
4338 }
4339 else if (!constant_type.array.empty())
4340 {
4341 assert(constant_type.parent_type);
4342 uint32_t parent_id = ir.increase_bound_by(1);
4343 make_constant_null(parent_id, constant_type.parent_type);
4344
4345 if (!constant_type.array_size_literal.back())
4346 SPIRV_CROSS_THROW("Array size of OpConstantNull must be a literal.");
4347
4348 SmallVector<uint32_t> elements(constant_type.array.back());
4349 for (uint32_t i = 0; i < constant_type.array.back(); i++)
4350 elements[i] = parent_id;
4351 set<SPIRConstant>(id, type, elements.data(), uint32_t(elements.size()), false);
4352 }
4353 else if (!constant_type.member_types.empty())
4354 {
4355 uint32_t member_ids = ir.increase_bound_by(uint32_t(constant_type.member_types.size()));
4356 SmallVector<uint32_t> elements(constant_type.member_types.size());
4357 for (uint32_t i = 0; i < constant_type.member_types.size(); i++)
4358 {
4359 make_constant_null(member_ids + i, constant_type.member_types[i]);
4360 elements[i] = member_ids + i;
4361 }
4362 set<SPIRConstant>(id, type, elements.data(), uint32_t(elements.size()), false);
4363 }
4364 else
4365 {
4366 auto &constant = set<SPIRConstant>(id, type);
4367 constant.make_null(constant_type);
4368 }
4369 }
4370
get_declared_capabilities() const4371 const SmallVector<spv::Capability> &Compiler::get_declared_capabilities() const
4372 {
4373 return ir.declared_capabilities;
4374 }
4375
get_declared_extensions() const4376 const SmallVector<std::string> &Compiler::get_declared_extensions() const
4377 {
4378 return ir.declared_extensions;
4379 }
4380
get_remapped_declared_block_name(VariableID id) const4381 std::string Compiler::get_remapped_declared_block_name(VariableID id) const
4382 {
4383 return get_remapped_declared_block_name(id, false);
4384 }
4385
get_remapped_declared_block_name(uint32_t id,bool fallback_prefer_instance_name) const4386 std::string Compiler::get_remapped_declared_block_name(uint32_t id, bool fallback_prefer_instance_name) const
4387 {
4388 auto itr = declared_block_names.find(id);
4389 if (itr != end(declared_block_names))
4390 {
4391 return itr->second;
4392 }
4393 else
4394 {
4395 auto &var = get<SPIRVariable>(id);
4396
4397 if (fallback_prefer_instance_name)
4398 {
4399 return to_name(var.self);
4400 }
4401 else
4402 {
4403 auto &type = get<SPIRType>(var.basetype);
4404 auto *type_meta = ir.find_meta(type.self);
4405 auto *block_name = type_meta ? &type_meta->decoration.alias : nullptr;
4406 return (!block_name || block_name->empty()) ? get_block_fallback_name(id) : *block_name;
4407 }
4408 }
4409 }
4410
reflection_ssbo_instance_name_is_significant() const4411 bool Compiler::reflection_ssbo_instance_name_is_significant() const
4412 {
4413 if (ir.source.known)
4414 {
4415 // UAVs from HLSL source tend to be declared in a way where the type is reused
4416 // but the instance name is significant, and that's the name we should report.
4417 // For GLSL, SSBOs each have their own block type as that's how GLSL is written.
4418 return ir.source.hlsl;
4419 }
4420
4421 unordered_set<uint32_t> ssbo_type_ids;
4422 bool aliased_ssbo_types = false;
4423
4424 // If we don't have any OpSource information, we need to perform some shaky heuristics.
4425 ir.for_each_typed_id<SPIRVariable>([&](uint32_t, const SPIRVariable &var) {
4426 auto &type = this->get<SPIRType>(var.basetype);
4427 if (!type.pointer || var.storage == StorageClassFunction)
4428 return;
4429
4430 bool ssbo = var.storage == StorageClassStorageBuffer ||
4431 (var.storage == StorageClassUniform && has_decoration(type.self, DecorationBufferBlock));
4432
4433 if (ssbo)
4434 {
4435 if (ssbo_type_ids.count(type.self))
4436 aliased_ssbo_types = true;
4437 else
4438 ssbo_type_ids.insert(type.self);
4439 }
4440 });
4441
4442 // If the block name is aliased, assume we have HLSL-style UAV declarations.
4443 return aliased_ssbo_types;
4444 }
4445
instruction_to_result_type(uint32_t & result_type,uint32_t & result_id,spv::Op op,const uint32_t * args,uint32_t length)4446 bool Compiler::instruction_to_result_type(uint32_t &result_type, uint32_t &result_id, spv::Op op, const uint32_t *args,
4447 uint32_t length)
4448 {
4449 // Most instructions follow the pattern of <result-type> <result-id> <arguments>.
4450 // There are some exceptions.
4451 switch (op)
4452 {
4453 case OpStore:
4454 case OpCopyMemory:
4455 case OpCopyMemorySized:
4456 case OpImageWrite:
4457 case OpAtomicStore:
4458 case OpAtomicFlagClear:
4459 case OpEmitStreamVertex:
4460 case OpEndStreamPrimitive:
4461 case OpControlBarrier:
4462 case OpMemoryBarrier:
4463 case OpGroupWaitEvents:
4464 case OpRetainEvent:
4465 case OpReleaseEvent:
4466 case OpSetUserEventStatus:
4467 case OpCaptureEventProfilingInfo:
4468 case OpCommitReadPipe:
4469 case OpCommitWritePipe:
4470 case OpGroupCommitReadPipe:
4471 case OpGroupCommitWritePipe:
4472 case OpLine:
4473 case OpNoLine:
4474 return false;
4475
4476 default:
4477 if (length > 1 && maybe_get<SPIRType>(args[0]) != nullptr)
4478 {
4479 result_type = args[0];
4480 result_id = args[1];
4481 return true;
4482 }
4483 else
4484 return false;
4485 }
4486 }
4487
combined_decoration_for_member(const SPIRType & type,uint32_t index) const4488 Bitset Compiler::combined_decoration_for_member(const SPIRType &type, uint32_t index) const
4489 {
4490 Bitset flags;
4491 auto *type_meta = ir.find_meta(type.self);
4492
4493 if (type_meta)
4494 {
4495 auto &members = type_meta->members;
4496 if (index >= members.size())
4497 return flags;
4498 auto &dec = members[index];
4499
4500 flags.merge_or(dec.decoration_flags);
4501
4502 auto &member_type = get<SPIRType>(type.member_types[index]);
4503
4504 // If our member type is a struct, traverse all the child members as well recursively.
4505 auto &member_childs = member_type.member_types;
4506 for (uint32_t i = 0; i < member_childs.size(); i++)
4507 {
4508 auto &child_member_type = get<SPIRType>(member_childs[i]);
4509 if (!child_member_type.pointer)
4510 flags.merge_or(combined_decoration_for_member(member_type, i));
4511 }
4512 }
4513
4514 return flags;
4515 }
4516
is_desktop_only_format(spv::ImageFormat format)4517 bool Compiler::is_desktop_only_format(spv::ImageFormat format)
4518 {
4519 switch (format)
4520 {
4521 // Desktop-only formats
4522 case ImageFormatR11fG11fB10f:
4523 case ImageFormatR16f:
4524 case ImageFormatRgb10A2:
4525 case ImageFormatR8:
4526 case ImageFormatRg8:
4527 case ImageFormatR16:
4528 case ImageFormatRg16:
4529 case ImageFormatRgba16:
4530 case ImageFormatR16Snorm:
4531 case ImageFormatRg16Snorm:
4532 case ImageFormatRgba16Snorm:
4533 case ImageFormatR8Snorm:
4534 case ImageFormatRg8Snorm:
4535 case ImageFormatR8ui:
4536 case ImageFormatRg8ui:
4537 case ImageFormatR16ui:
4538 case ImageFormatRgb10a2ui:
4539 case ImageFormatR8i:
4540 case ImageFormatRg8i:
4541 case ImageFormatR16i:
4542 return true;
4543 default:
4544 break;
4545 }
4546
4547 return false;
4548 }
4549
image_is_comparison(const SPIRType & type,uint32_t id) const4550 bool Compiler::image_is_comparison(const SPIRType &type, uint32_t id) const
4551 {
4552 return type.image.depth || (comparison_ids.count(id) != 0);
4553 }
4554
type_is_opaque_value(const SPIRType & type) const4555 bool Compiler::type_is_opaque_value(const SPIRType &type) const
4556 {
4557 return !type.pointer && (type.basetype == SPIRType::SampledImage || type.basetype == SPIRType::Image ||
4558 type.basetype == SPIRType::Sampler);
4559 }
4560
4561 // Make these member functions so we can easily break on any force_recompile events.
force_recompile()4562 void Compiler::force_recompile()
4563 {
4564 is_force_recompile = true;
4565 }
4566
is_forcing_recompilation() const4567 bool Compiler::is_forcing_recompilation() const
4568 {
4569 return is_force_recompile;
4570 }
4571
clear_force_recompile()4572 void Compiler::clear_force_recompile()
4573 {
4574 is_force_recompile = false;
4575 }
4576
PhysicalStorageBufferPointerHandler(Compiler & compiler_)4577 Compiler::PhysicalStorageBufferPointerHandler::PhysicalStorageBufferPointerHandler(Compiler &compiler_)
4578 : compiler(compiler_)
4579 {
4580 }
4581
handle(Op op,const uint32_t * args,uint32_t)4582 bool Compiler::PhysicalStorageBufferPointerHandler::handle(Op op, const uint32_t *args, uint32_t)
4583 {
4584 if (op == OpConvertUToPtr || op == OpBitcast)
4585 {
4586 auto &type = compiler.get<SPIRType>(args[0]);
4587 if (type.storage == StorageClassPhysicalStorageBufferEXT && type.pointer && type.pointer_depth == 1)
4588 {
4589 // If we need to cast to a pointer type which is not a block, we might need to synthesize ourselves
4590 // a block type which wraps this POD type.
4591 if (type.basetype != SPIRType::Struct)
4592 types.insert(args[0]);
4593 }
4594 }
4595
4596 return true;
4597 }
4598
analyze_non_block_pointer_types()4599 void Compiler::analyze_non_block_pointer_types()
4600 {
4601 PhysicalStorageBufferPointerHandler handler(*this);
4602 traverse_all_reachable_opcodes(get<SPIRFunction>(ir.default_entry_point), handler);
4603 physical_storage_non_block_pointer_types.reserve(handler.types.size());
4604 for (auto type : handler.types)
4605 physical_storage_non_block_pointer_types.push_back(type);
4606 sort(begin(physical_storage_non_block_pointer_types), end(physical_storage_non_block_pointer_types));
4607 }
4608
handle(Op op,const uint32_t *,uint32_t)4609 bool Compiler::InterlockedResourceAccessPrepassHandler::handle(Op op, const uint32_t *, uint32_t)
4610 {
4611 if (op == OpBeginInvocationInterlockEXT || op == OpEndInvocationInterlockEXT)
4612 {
4613 if (interlock_function_id != 0 && interlock_function_id != call_stack.back())
4614 {
4615 // Most complex case, we have no sensible way of dealing with this
4616 // other than taking the 100% conservative approach, exit early.
4617 split_function_case = true;
4618 return false;
4619 }
4620 else
4621 {
4622 interlock_function_id = call_stack.back();
4623 // If this call is performed inside control flow we have a problem.
4624 auto &cfg = compiler.get_cfg_for_function(interlock_function_id);
4625
4626 uint32_t from_block_id = compiler.get<SPIRFunction>(interlock_function_id).entry_block;
4627 bool outside_control_flow = cfg.node_terminates_control_flow_in_sub_graph(from_block_id, current_block_id);
4628 if (!outside_control_flow)
4629 control_flow_interlock = true;
4630 }
4631 }
4632 return true;
4633 }
4634
rearm_current_block(const SPIRBlock & block)4635 void Compiler::InterlockedResourceAccessPrepassHandler::rearm_current_block(const SPIRBlock &block)
4636 {
4637 current_block_id = block.self;
4638 }
4639
begin_function_scope(const uint32_t * args,uint32_t length)4640 bool Compiler::InterlockedResourceAccessPrepassHandler::begin_function_scope(const uint32_t *args, uint32_t length)
4641 {
4642 if (length < 3)
4643 return false;
4644 call_stack.push_back(args[2]);
4645 return true;
4646 }
4647
end_function_scope(const uint32_t *,uint32_t)4648 bool Compiler::InterlockedResourceAccessPrepassHandler::end_function_scope(const uint32_t *, uint32_t)
4649 {
4650 call_stack.pop_back();
4651 return true;
4652 }
4653
begin_function_scope(const uint32_t * args,uint32_t length)4654 bool Compiler::InterlockedResourceAccessHandler::begin_function_scope(const uint32_t *args, uint32_t length)
4655 {
4656 if (length < 3)
4657 return false;
4658
4659 if (args[2] == interlock_function_id)
4660 call_stack_is_interlocked = true;
4661
4662 call_stack.push_back(args[2]);
4663 return true;
4664 }
4665
end_function_scope(const uint32_t *,uint32_t)4666 bool Compiler::InterlockedResourceAccessHandler::end_function_scope(const uint32_t *, uint32_t)
4667 {
4668 if (call_stack.back() == interlock_function_id)
4669 call_stack_is_interlocked = false;
4670
4671 call_stack.pop_back();
4672 return true;
4673 }
4674
access_potential_resource(uint32_t id)4675 void Compiler::InterlockedResourceAccessHandler::access_potential_resource(uint32_t id)
4676 {
4677 if ((use_critical_section && in_crit_sec) || (control_flow_interlock && call_stack_is_interlocked) ||
4678 split_function_case)
4679 {
4680 compiler.interlocked_resources.insert(id);
4681 }
4682 }
4683
handle(Op opcode,const uint32_t * args,uint32_t length)4684 bool Compiler::InterlockedResourceAccessHandler::handle(Op opcode, const uint32_t *args, uint32_t length)
4685 {
4686 // Only care about critical section analysis if we have simple case.
4687 if (use_critical_section)
4688 {
4689 if (opcode == OpBeginInvocationInterlockEXT)
4690 {
4691 in_crit_sec = true;
4692 return true;
4693 }
4694
4695 if (opcode == OpEndInvocationInterlockEXT)
4696 {
4697 // End critical section--nothing more to do.
4698 return false;
4699 }
4700 }
4701
4702 // We need to figure out where images and buffers are loaded from, so do only the bare bones compilation we need.
4703 switch (opcode)
4704 {
4705 case OpLoad:
4706 {
4707 if (length < 3)
4708 return false;
4709
4710 uint32_t ptr = args[2];
4711 auto *var = compiler.maybe_get_backing_variable(ptr);
4712
4713 // We're only concerned with buffer and image memory here.
4714 if (!var)
4715 break;
4716
4717 switch (var->storage)
4718 {
4719 default:
4720 break;
4721
4722 case StorageClassUniformConstant:
4723 {
4724 uint32_t result_type = args[0];
4725 uint32_t id = args[1];
4726 compiler.set<SPIRExpression>(id, "", result_type, true);
4727 compiler.register_read(id, ptr, true);
4728 break;
4729 }
4730
4731 case StorageClassUniform:
4732 // Must have BufferBlock; we only care about SSBOs.
4733 if (!compiler.has_decoration(compiler.get<SPIRType>(var->basetype).self, DecorationBufferBlock))
4734 break;
4735 // fallthrough
4736 case StorageClassStorageBuffer:
4737 access_potential_resource(var->self);
4738 break;
4739 }
4740 break;
4741 }
4742
4743 case OpInBoundsAccessChain:
4744 case OpAccessChain:
4745 case OpPtrAccessChain:
4746 {
4747 if (length < 3)
4748 return false;
4749
4750 uint32_t result_type = args[0];
4751
4752 auto &type = compiler.get<SPIRType>(result_type);
4753 if (type.storage == StorageClassUniform || type.storage == StorageClassUniformConstant ||
4754 type.storage == StorageClassStorageBuffer)
4755 {
4756 uint32_t id = args[1];
4757 uint32_t ptr = args[2];
4758 compiler.set<SPIRExpression>(id, "", result_type, true);
4759 compiler.register_read(id, ptr, true);
4760 compiler.ir.ids[id].set_allow_type_rewrite();
4761 }
4762 break;
4763 }
4764
4765 case OpImageTexelPointer:
4766 {
4767 if (length < 3)
4768 return false;
4769
4770 uint32_t result_type = args[0];
4771 uint32_t id = args[1];
4772 uint32_t ptr = args[2];
4773 auto &e = compiler.set<SPIRExpression>(id, "", result_type, true);
4774 auto *var = compiler.maybe_get_backing_variable(ptr);
4775 if (var)
4776 e.loaded_from = var->self;
4777 break;
4778 }
4779
4780 case OpStore:
4781 case OpImageWrite:
4782 case OpAtomicStore:
4783 {
4784 if (length < 1)
4785 return false;
4786
4787 uint32_t ptr = args[0];
4788 auto *var = compiler.maybe_get_backing_variable(ptr);
4789 if (var && (var->storage == StorageClassUniform || var->storage == StorageClassUniformConstant ||
4790 var->storage == StorageClassStorageBuffer))
4791 {
4792 access_potential_resource(var->self);
4793 }
4794
4795 break;
4796 }
4797
4798 case OpCopyMemory:
4799 {
4800 if (length < 2)
4801 return false;
4802
4803 uint32_t dst = args[0];
4804 uint32_t src = args[1];
4805 auto *dst_var = compiler.maybe_get_backing_variable(dst);
4806 auto *src_var = compiler.maybe_get_backing_variable(src);
4807
4808 if (dst_var && (dst_var->storage == StorageClassUniform || dst_var->storage == StorageClassStorageBuffer))
4809 access_potential_resource(dst_var->self);
4810
4811 if (src_var)
4812 {
4813 if (src_var->storage != StorageClassUniform && src_var->storage != StorageClassStorageBuffer)
4814 break;
4815
4816 if (src_var->storage == StorageClassUniform &&
4817 !compiler.has_decoration(compiler.get<SPIRType>(src_var->basetype).self, DecorationBufferBlock))
4818 {
4819 break;
4820 }
4821
4822 access_potential_resource(src_var->self);
4823 }
4824
4825 break;
4826 }
4827
4828 case OpImageRead:
4829 case OpAtomicLoad:
4830 {
4831 if (length < 3)
4832 return false;
4833
4834 uint32_t ptr = args[2];
4835 auto *var = compiler.maybe_get_backing_variable(ptr);
4836
4837 // We're only concerned with buffer and image memory here.
4838 if (!var)
4839 break;
4840
4841 switch (var->storage)
4842 {
4843 default:
4844 break;
4845
4846 case StorageClassUniform:
4847 // Must have BufferBlock; we only care about SSBOs.
4848 if (!compiler.has_decoration(compiler.get<SPIRType>(var->basetype).self, DecorationBufferBlock))
4849 break;
4850 // fallthrough
4851 case StorageClassUniformConstant:
4852 case StorageClassStorageBuffer:
4853 access_potential_resource(var->self);
4854 break;
4855 }
4856 break;
4857 }
4858
4859 case OpAtomicExchange:
4860 case OpAtomicCompareExchange:
4861 case OpAtomicIIncrement:
4862 case OpAtomicIDecrement:
4863 case OpAtomicIAdd:
4864 case OpAtomicISub:
4865 case OpAtomicSMin:
4866 case OpAtomicUMin:
4867 case OpAtomicSMax:
4868 case OpAtomicUMax:
4869 case OpAtomicAnd:
4870 case OpAtomicOr:
4871 case OpAtomicXor:
4872 {
4873 if (length < 3)
4874 return false;
4875
4876 uint32_t ptr = args[2];
4877 auto *var = compiler.maybe_get_backing_variable(ptr);
4878 if (var && (var->storage == StorageClassUniform || var->storage == StorageClassUniformConstant ||
4879 var->storage == StorageClassStorageBuffer))
4880 {
4881 access_potential_resource(var->self);
4882 }
4883
4884 break;
4885 }
4886
4887 default:
4888 break;
4889 }
4890
4891 return true;
4892 }
4893
analyze_interlocked_resource_usage()4894 void Compiler::analyze_interlocked_resource_usage()
4895 {
4896 if (get_execution_model() == ExecutionModelFragment &&
4897 (get_entry_point().flags.get(ExecutionModePixelInterlockOrderedEXT) ||
4898 get_entry_point().flags.get(ExecutionModePixelInterlockUnorderedEXT) ||
4899 get_entry_point().flags.get(ExecutionModeSampleInterlockOrderedEXT) ||
4900 get_entry_point().flags.get(ExecutionModeSampleInterlockUnorderedEXT)))
4901 {
4902 InterlockedResourceAccessPrepassHandler prepass_handler(*this, ir.default_entry_point);
4903 traverse_all_reachable_opcodes(get<SPIRFunction>(ir.default_entry_point), prepass_handler);
4904
4905 InterlockedResourceAccessHandler handler(*this, ir.default_entry_point);
4906 handler.interlock_function_id = prepass_handler.interlock_function_id;
4907 handler.split_function_case = prepass_handler.split_function_case;
4908 handler.control_flow_interlock = prepass_handler.control_flow_interlock;
4909 handler.use_critical_section = !handler.split_function_case && !handler.control_flow_interlock;
4910
4911 traverse_all_reachable_opcodes(get<SPIRFunction>(ir.default_entry_point), handler);
4912
4913 // For GLSL. If we hit any of these cases, we have to fall back to conservative approach.
4914 interlocked_is_complex =
4915 !handler.use_critical_section || handler.interlock_function_id != ir.default_entry_point;
4916 }
4917 }
4918
type_is_array_of_pointers(const SPIRType & type) const4919 bool Compiler::type_is_array_of_pointers(const SPIRType &type) const
4920 {
4921 if (!type.pointer)
4922 return false;
4923
4924 // If parent type has same pointer depth, we must have an array of pointers.
4925 return type.pointer_depth == get<SPIRType>(type.parent_type).pointer_depth;
4926 }
4927
type_is_top_level_physical_pointer(const SPIRType & type) const4928 bool Compiler::type_is_top_level_physical_pointer(const SPIRType &type) const
4929 {
4930 return type.pointer && type.storage == StorageClassPhysicalStorageBuffer &&
4931 type.pointer_depth > get<SPIRType>(type.parent_type).pointer_depth;
4932 }
4933
flush_phi_required(BlockID from,BlockID to) const4934 bool Compiler::flush_phi_required(BlockID from, BlockID to) const
4935 {
4936 auto &child = get<SPIRBlock>(to);
4937 for (auto &phi : child.phi_variables)
4938 if (phi.parent == from)
4939 return true;
4940 return false;
4941 }
4942
add_loop_level()4943 void Compiler::add_loop_level()
4944 {
4945 current_loop_level++;
4946 }
4947