1 //
2 // Copyright (C) 2018 Google, Inc.
3 //
4 // All rights reserved.
5 //
6 // Redistribution and use in source and binary forms, with or without
7 // modification, are permitted provided that the following conditions
8 // are met:
9 //
10 //    Redistributions of source code must retain the above copyright
11 //    notice, this list of conditions and the following disclaimer.
12 //
13 //    Redistributions in binary form must reproduce the above
14 //    copyright notice, this list of conditions and the following
15 //    disclaimer in the documentation and/or other materials provided
16 //    with the distribution.
17 //
18 //    Neither the name of 3Dlabs Inc. Ltd. nor the names of its
19 //    contributors may be used to endorse or promote products derived
20 //    from this software without specific prior written permission.
21 //
22 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
23 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
24 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
25 // FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
26 // COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
27 // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
28 // BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
29 // LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
30 // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31 // LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
32 // ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
33 // POSSIBILITY OF SUCH DAMAGE.
34 
35 //
36 // Post-processing for SPIR-V IR, in internal form, not standard binary form.
37 //
38 
39 #include <cassert>
40 #include <cstdlib>
41 
42 #include <unordered_map>
43 #include <unordered_set>
44 #include <algorithm>
45 
46 #include "SpvBuilder.h"
47 
48 #include "spirv.hpp"
49 #include "GlslangToSpv.h"
50 #include "SpvBuilder.h"
51 namespace spv {
52     #include "GLSL.std.450.h"
53     #include "GLSL.ext.KHR.h"
54     #include "GLSL.ext.EXT.h"
55     #include "GLSL.ext.AMD.h"
56     #include "GLSL.ext.NV.h"
57 }
58 
59 namespace spv {
60 
61 #ifndef GLSLANG_WEB
62 // Hook to visit each operand type and result type of an instruction.
63 // Will be called multiple times for one instruction, once for each typed
64 // operand and the result.
postProcessType(const Instruction & inst,Id typeId)65 void Builder::postProcessType(const Instruction& inst, Id typeId)
66 {
67     // Characterize the type being questioned
68     Id basicTypeOp = getMostBasicTypeClass(typeId);
69     int width = 0;
70     if (basicTypeOp == OpTypeFloat || basicTypeOp == OpTypeInt)
71         width = getScalarTypeWidth(typeId);
72 
73     // Do opcode-specific checks
74     switch (inst.getOpCode()) {
75     case OpLoad:
76     case OpStore:
77         if (basicTypeOp == OpTypeStruct) {
78             if (containsType(typeId, OpTypeInt, 8))
79                 addCapability(CapabilityInt8);
80             if (containsType(typeId, OpTypeInt, 16))
81                 addCapability(CapabilityInt16);
82             if (containsType(typeId, OpTypeFloat, 16))
83                 addCapability(CapabilityFloat16);
84         } else {
85             StorageClass storageClass = getStorageClass(inst.getIdOperand(0));
86             if (width == 8) {
87                 switch (storageClass) {
88                 case StorageClassPhysicalStorageBufferEXT:
89                 case StorageClassUniform:
90                 case StorageClassStorageBuffer:
91                 case StorageClassPushConstant:
92                     break;
93                 default:
94                     addCapability(CapabilityInt8);
95                     break;
96                 }
97             } else if (width == 16) {
98                 switch (storageClass) {
99                 case StorageClassPhysicalStorageBufferEXT:
100                 case StorageClassUniform:
101                 case StorageClassStorageBuffer:
102                 case StorageClassPushConstant:
103                 case StorageClassInput:
104                 case StorageClassOutput:
105                     break;
106                 default:
107                     if (basicTypeOp == OpTypeInt)
108                         addCapability(CapabilityInt16);
109                     if (basicTypeOp == OpTypeFloat)
110                         addCapability(CapabilityFloat16);
111                     break;
112                 }
113             }
114         }
115         break;
116     case OpAccessChain:
117     case OpPtrAccessChain:
118     case OpCopyObject:
119         break;
120     case OpFConvert:
121     case OpSConvert:
122     case OpUConvert:
123         // Look for any 8/16-bit storage capabilities. If there are none, assume that
124         // the convert instruction requires the Float16/Int8/16 capability.
125         if (containsType(typeId, OpTypeFloat, 16) || containsType(typeId, OpTypeInt, 16)) {
126             bool foundStorage = false;
127             for (auto it = capabilities.begin(); it != capabilities.end(); ++it) {
128                 spv::Capability cap = *it;
129                 if (cap == spv::CapabilityStorageInputOutput16 ||
130                     cap == spv::CapabilityStoragePushConstant16 ||
131                     cap == spv::CapabilityStorageUniformBufferBlock16 ||
132                     cap == spv::CapabilityStorageUniform16) {
133                     foundStorage = true;
134                     break;
135                 }
136             }
137             if (!foundStorage) {
138                 if (containsType(typeId, OpTypeFloat, 16))
139                     addCapability(CapabilityFloat16);
140                 if (containsType(typeId, OpTypeInt, 16))
141                     addCapability(CapabilityInt16);
142             }
143         }
144         if (containsType(typeId, OpTypeInt, 8)) {
145             bool foundStorage = false;
146             for (auto it = capabilities.begin(); it != capabilities.end(); ++it) {
147                 spv::Capability cap = *it;
148                 if (cap == spv::CapabilityStoragePushConstant8 ||
149                     cap == spv::CapabilityUniformAndStorageBuffer8BitAccess ||
150                     cap == spv::CapabilityStorageBuffer8BitAccess) {
151                     foundStorage = true;
152                     break;
153                 }
154             }
155             if (!foundStorage) {
156                 addCapability(CapabilityInt8);
157             }
158         }
159         break;
160     case OpExtInst:
161         switch (inst.getImmediateOperand(1)) {
162         case GLSLstd450Frexp:
163         case GLSLstd450FrexpStruct:
164             if (getSpvVersion() < glslang::EShTargetSpv_1_3 && containsType(typeId, OpTypeInt, 16))
165                 addExtension(spv::E_SPV_AMD_gpu_shader_int16);
166             break;
167         case GLSLstd450InterpolateAtCentroid:
168         case GLSLstd450InterpolateAtSample:
169         case GLSLstd450InterpolateAtOffset:
170             if (getSpvVersion() < glslang::EShTargetSpv_1_3 && containsType(typeId, OpTypeFloat, 16))
171                 addExtension(spv::E_SPV_AMD_gpu_shader_half_float);
172             break;
173         default:
174             break;
175         }
176         break;
177     default:
178         if (basicTypeOp == OpTypeFloat && width == 16)
179             addCapability(CapabilityFloat16);
180         if (basicTypeOp == OpTypeInt && width == 16)
181             addCapability(CapabilityInt16);
182         if (basicTypeOp == OpTypeInt && width == 8)
183             addCapability(CapabilityInt8);
184         break;
185     }
186 }
187 
188 // Called for each instruction that resides in a block.
postProcess(Instruction & inst)189 void Builder::postProcess(Instruction& inst)
190 {
191     // Add capabilities based simply on the opcode.
192     switch (inst.getOpCode()) {
193     case OpExtInst:
194         switch (inst.getImmediateOperand(1)) {
195         case GLSLstd450InterpolateAtCentroid:
196         case GLSLstd450InterpolateAtSample:
197         case GLSLstd450InterpolateAtOffset:
198             addCapability(CapabilityInterpolationFunction);
199             break;
200         default:
201             break;
202         }
203         break;
204     case OpDPdxFine:
205     case OpDPdyFine:
206     case OpFwidthFine:
207     case OpDPdxCoarse:
208     case OpDPdyCoarse:
209     case OpFwidthCoarse:
210         addCapability(CapabilityDerivativeControl);
211         break;
212 
213     case OpImageQueryLod:
214     case OpImageQuerySize:
215     case OpImageQuerySizeLod:
216     case OpImageQuerySamples:
217     case OpImageQueryLevels:
218         addCapability(CapabilityImageQuery);
219         break;
220 
221     case OpGroupNonUniformPartitionNV:
222         addExtension(E_SPV_NV_shader_subgroup_partitioned);
223         addCapability(CapabilityGroupNonUniformPartitionedNV);
224         break;
225 
226     case OpLoad:
227     case OpStore:
228         {
229             // For any load/store to a PhysicalStorageBufferEXT, walk the accesschain
230             // index list to compute the misalignment. The pre-existing alignment value
231             // (set via Builder::AccessChain::alignment) only accounts for the base of
232             // the reference type and any scalar component selection in the accesschain,
233             // and this function computes the rest from the SPIR-V Offset decorations.
234             Instruction *accessChain = module.getInstruction(inst.getIdOperand(0));
235             if (accessChain->getOpCode() == OpAccessChain) {
236                 Instruction *base = module.getInstruction(accessChain->getIdOperand(0));
237                 // Get the type of the base of the access chain. It must be a pointer type.
238                 Id typeId = base->getTypeId();
239                 Instruction *type = module.getInstruction(typeId);
240                 assert(type->getOpCode() == OpTypePointer);
241                 if (type->getImmediateOperand(0) != StorageClassPhysicalStorageBufferEXT) {
242                     break;
243                 }
244                 // Get the pointee type.
245                 typeId = type->getIdOperand(1);
246                 type = module.getInstruction(typeId);
247                 // Walk the index list for the access chain. For each index, find any
248                 // misalignment that can apply when accessing the member/element via
249                 // Offset/ArrayStride/MatrixStride decorations, and bitwise OR them all
250                 // together.
251                 int alignment = 0;
252                 for (int i = 1; i < accessChain->getNumOperands(); ++i) {
253                     Instruction *idx = module.getInstruction(accessChain->getIdOperand(i));
254                     if (type->getOpCode() == OpTypeStruct) {
255                         assert(idx->getOpCode() == OpConstant);
256                         unsigned int c = idx->getImmediateOperand(0);
257 
258                         const auto function = [&](const std::unique_ptr<Instruction>& decoration) {
259                             if (decoration.get()->getOpCode() == OpMemberDecorate &&
260                                 decoration.get()->getIdOperand(0) == typeId &&
261                                 decoration.get()->getImmediateOperand(1) == c &&
262                                 (decoration.get()->getImmediateOperand(2) == DecorationOffset ||
263                                  decoration.get()->getImmediateOperand(2) == DecorationMatrixStride)) {
264                                 alignment |= decoration.get()->getImmediateOperand(3);
265                             }
266                         };
267                         std::for_each(decorations.begin(), decorations.end(), function);
268                         // get the next member type
269                         typeId = type->getIdOperand(c);
270                         type = module.getInstruction(typeId);
271                     } else if (type->getOpCode() == OpTypeArray ||
272                                type->getOpCode() == OpTypeRuntimeArray) {
273                         const auto function = [&](const std::unique_ptr<Instruction>& decoration) {
274                             if (decoration.get()->getOpCode() == OpDecorate &&
275                                 decoration.get()->getIdOperand(0) == typeId &&
276                                 decoration.get()->getImmediateOperand(1) == DecorationArrayStride) {
277                                 alignment |= decoration.get()->getImmediateOperand(2);
278                             }
279                         };
280                         std::for_each(decorations.begin(), decorations.end(), function);
281                         // Get the element type
282                         typeId = type->getIdOperand(0);
283                         type = module.getInstruction(typeId);
284                     } else {
285                         // Once we get to any non-aggregate type, we're done.
286                         break;
287                     }
288                 }
289                 assert(inst.getNumOperands() >= 3);
290                 unsigned int memoryAccess = inst.getImmediateOperand((inst.getOpCode() == OpStore) ? 2 : 1);
291                 assert(memoryAccess & MemoryAccessAlignedMask);
292                 static_cast<void>(memoryAccess);
293                 // Compute the index of the alignment operand.
294                 int alignmentIdx = 2;
295                 if (inst.getOpCode() == OpStore)
296                     alignmentIdx++;
297                 // Merge new and old (mis)alignment
298                 alignment |= inst.getImmediateOperand(alignmentIdx);
299                 // Pick the LSB
300                 alignment = alignment & ~(alignment & (alignment-1));
301                 // update the Aligned operand
302                 inst.setImmediateOperand(alignmentIdx, alignment);
303             }
304             break;
305         }
306 
307     default:
308         break;
309     }
310 
311     // Checks based on type
312     if (inst.getTypeId() != NoType)
313         postProcessType(inst, inst.getTypeId());
314     for (int op = 0; op < inst.getNumOperands(); ++op) {
315         if (inst.isIdOperand(op)) {
316             // In blocks, these are always result ids, but we are relying on
317             // getTypeId() to return NoType for things like OpLabel.
318             if (getTypeId(inst.getIdOperand(op)) != NoType)
319                 postProcessType(inst, getTypeId(inst.getIdOperand(op)));
320         }
321     }
322 }
323 #endif
324 
325 // comment in header
postProcessCFG()326 void Builder::postProcessCFG()
327 {
328     // reachableBlocks is the set of blockss reached via control flow, or which are
329     // unreachable continue targert or unreachable merge.
330     std::unordered_set<const Block*> reachableBlocks;
331     std::unordered_map<Block*, Block*> headerForUnreachableContinue;
332     std::unordered_set<Block*> unreachableMerges;
333     std::unordered_set<Id> unreachableDefinitions;
334     // Collect IDs defined in unreachable blocks. For each function, label the
335     // reachable blocks first. Then for each unreachable block, collect the
336     // result IDs of the instructions in it.
337     for (auto fi = module.getFunctions().cbegin(); fi != module.getFunctions().cend(); fi++) {
338         Function* f = *fi;
339         Block* entry = f->getEntryBlock();
340         inReadableOrder(entry,
341             [&reachableBlocks, &unreachableMerges, &headerForUnreachableContinue]
342             (Block* b, ReachReason why, Block* header) {
343                reachableBlocks.insert(b);
344                if (why == ReachDeadContinue) headerForUnreachableContinue[b] = header;
345                if (why == ReachDeadMerge) unreachableMerges.insert(b);
346             });
347         for (auto bi = f->getBlocks().cbegin(); bi != f->getBlocks().cend(); bi++) {
348             Block* b = *bi;
349             if (unreachableMerges.count(b) != 0 || headerForUnreachableContinue.count(b) != 0) {
350                 auto ii = b->getInstructions().cbegin();
351                 ++ii; // Keep potential decorations on the label.
352                 for (; ii != b->getInstructions().cend(); ++ii)
353                     unreachableDefinitions.insert(ii->get()->getResultId());
354             } else if (reachableBlocks.count(b) == 0) {
355                 // The normal case for unreachable code.  All definitions are considered dead.
356                 for (auto ii = b->getInstructions().cbegin(); ii != b->getInstructions().cend(); ++ii)
357                     unreachableDefinitions.insert(ii->get()->getResultId());
358             }
359         }
360     }
361 
362     // Modify unreachable merge blocks and unreachable continue targets.
363     // Delete their contents.
364     for (auto mergeIter = unreachableMerges.begin(); mergeIter != unreachableMerges.end(); ++mergeIter) {
365         (*mergeIter)->rewriteAsCanonicalUnreachableMerge();
366     }
367     for (auto continueIter = headerForUnreachableContinue.begin();
368          continueIter != headerForUnreachableContinue.end();
369          ++continueIter) {
370         Block* continue_target = continueIter->first;
371         Block* header = continueIter->second;
372         continue_target->rewriteAsCanonicalUnreachableContinue(header);
373     }
374 
375     // Remove unneeded decorations, for unreachable instructions
376     decorations.erase(std::remove_if(decorations.begin(), decorations.end(),
377         [&unreachableDefinitions](std::unique_ptr<Instruction>& I) -> bool {
378             Id decoration_id = I.get()->getIdOperand(0);
379             return unreachableDefinitions.count(decoration_id) != 0;
380         }),
381         decorations.end());
382 }
383 
384 #ifndef GLSLANG_WEB
385 // comment in header
postProcessFeatures()386 void Builder::postProcessFeatures() {
387     // Add per-instruction capabilities, extensions, etc.,
388 
389     // Look for any 8/16 bit type in physical storage buffer class, and set the
390     // appropriate capability. This happens in createSpvVariable for other storage
391     // classes, but there isn't always a variable for physical storage buffer.
392     for (int t = 0; t < (int)groupedTypes[OpTypePointer].size(); ++t) {
393         Instruction* type = groupedTypes[OpTypePointer][t];
394         if (type->getImmediateOperand(0) == (unsigned)StorageClassPhysicalStorageBufferEXT) {
395             if (containsType(type->getIdOperand(1), OpTypeInt, 8)) {
396                 addIncorporatedExtension(spv::E_SPV_KHR_8bit_storage, spv::Spv_1_5);
397                 addCapability(spv::CapabilityStorageBuffer8BitAccess);
398             }
399             if (containsType(type->getIdOperand(1), OpTypeInt, 16) ||
400                 containsType(type->getIdOperand(1), OpTypeFloat, 16)) {
401                 addIncorporatedExtension(spv::E_SPV_KHR_16bit_storage, spv::Spv_1_3);
402                 addCapability(spv::CapabilityStorageBuffer16BitAccess);
403             }
404         }
405     }
406 
407     // process all block-contained instructions
408     for (auto fi = module.getFunctions().cbegin(); fi != module.getFunctions().cend(); fi++) {
409         Function* f = *fi;
410         for (auto bi = f->getBlocks().cbegin(); bi != f->getBlocks().cend(); bi++) {
411             Block* b = *bi;
412             for (auto ii = b->getInstructions().cbegin(); ii != b->getInstructions().cend(); ii++)
413                 postProcess(*ii->get());
414 
415             // For all local variables that contain pointers to PhysicalStorageBufferEXT, check whether
416             // there is an existing restrict/aliased decoration. If we don't find one, add Aliased as the
417             // default.
418             for (auto vi = b->getLocalVariables().cbegin(); vi != b->getLocalVariables().cend(); vi++) {
419                 const Instruction& inst = *vi->get();
420                 Id resultId = inst.getResultId();
421                 if (containsPhysicalStorageBufferOrArray(getDerefTypeId(resultId))) {
422                     bool foundDecoration = false;
423                     const auto function = [&](const std::unique_ptr<Instruction>& decoration) {
424                         if (decoration.get()->getIdOperand(0) == resultId &&
425                             decoration.get()->getOpCode() == OpDecorate &&
426                             (decoration.get()->getImmediateOperand(1) == spv::DecorationAliasedPointerEXT ||
427                              decoration.get()->getImmediateOperand(1) == spv::DecorationRestrictPointerEXT)) {
428                             foundDecoration = true;
429                         }
430                     };
431                     std::for_each(decorations.begin(), decorations.end(), function);
432                     if (!foundDecoration) {
433                         addDecoration(resultId, spv::DecorationAliasedPointerEXT);
434                     }
435                 }
436             }
437         }
438     }
439 
440     // If any Vulkan memory model-specific functionality is used, update the
441     // OpMemoryModel to match.
442     if (capabilities.find(spv::CapabilityVulkanMemoryModelKHR) != capabilities.end()) {
443         memoryModel = spv::MemoryModelVulkanKHR;
444         addIncorporatedExtension(spv::E_SPV_KHR_vulkan_memory_model, spv::Spv_1_5);
445     }
446 
447     // Add Aliased decoration if there's more than one Workgroup Block variable.
448     if (capabilities.find(spv::CapabilityWorkgroupMemoryExplicitLayoutKHR) != capabilities.end()) {
449         assert(entryPoints.size() == 1);
450         auto &ep = entryPoints[0];
451 
452         std::vector<Id> workgroup_variables;
453         for (int i = 0; i < (int)ep->getNumOperands(); i++) {
454             if (!ep->isIdOperand(i))
455                 continue;
456 
457             const Id id = ep->getIdOperand(i);
458             const Instruction *instr = module.getInstruction(id);
459             if (instr->getOpCode() != spv::OpVariable)
460                 continue;
461 
462             if (instr->getImmediateOperand(0) == spv::StorageClassWorkgroup)
463                 workgroup_variables.push_back(id);
464         }
465 
466         if (workgroup_variables.size() > 1) {
467             for (size_t i = 0; i < workgroup_variables.size(); i++)
468                 addDecoration(workgroup_variables[i], spv::DecorationAliased);
469         }
470     }
471 }
472 #endif
473 
474 // comment in header
postProcess()475 void Builder::postProcess() {
476   postProcessCFG();
477 #ifndef GLSLANG_WEB
478   postProcessFeatures();
479 #endif
480 }
481 
482 }; // end spv namespace
483