1 // Copyright (c) 2019 Google LLC
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //     http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #include "source/fuzz/fact_manager/data_synonym_and_id_equation_facts.h"
16 
17 #include "source/fuzz/fuzzer_util.h"
18 
19 namespace spvtools {
20 namespace fuzz {
21 namespace fact_manager {
22 
operator ()(const Operation & operation) const23 size_t DataSynonymAndIdEquationFacts::OperationHash::operator()(
24     const Operation& operation) const {
25   std::u32string hash;
26   hash.push_back(operation.opcode);
27   for (auto operand : operation.operands) {
28     hash.push_back(static_cast<uint32_t>(DataDescriptorHash()(operand)));
29   }
30   return std::hash<std::u32string>()(hash);
31 }
32 
operator ()(const Operation & first,const Operation & second) const33 bool DataSynonymAndIdEquationFacts::OperationEquals::operator()(
34     const Operation& first, const Operation& second) const {
35   // Equal operations require...
36   //
37   // Equal opcodes.
38   if (first.opcode != second.opcode) {
39     return false;
40   }
41   // Matching operand counts.
42   if (first.operands.size() != second.operands.size()) {
43     return false;
44   }
45   // Equal operands.
46   for (uint32_t i = 0; i < first.operands.size(); i++) {
47     if (!DataDescriptorEquals()(first.operands[i], second.operands[i])) {
48       return false;
49     }
50   }
51   return true;
52 }
53 
DataSynonymAndIdEquationFacts(opt::IRContext * ir_context)54 DataSynonymAndIdEquationFacts::DataSynonymAndIdEquationFacts(
55     opt::IRContext* ir_context)
56     : ir_context_(ir_context) {}
57 
MaybeAddFact(const protobufs::FactDataSynonym & fact,const DeadBlockFacts & dead_block_facts,const IrrelevantValueFacts & irrelevant_value_facts)58 bool DataSynonymAndIdEquationFacts::MaybeAddFact(
59     const protobufs::FactDataSynonym& fact,
60     const DeadBlockFacts& dead_block_facts,
61     const IrrelevantValueFacts& irrelevant_value_facts) {
62   if (irrelevant_value_facts.IdIsIrrelevant(fact.data1().object(),
63                                             dead_block_facts) ||
64       irrelevant_value_facts.IdIsIrrelevant(fact.data2().object(),
65                                             dead_block_facts)) {
66     // Irrelevant ids cannot be synonymous with other ids.
67     return false;
68   }
69 
70   // Add the fact, including all facts relating sub-components of the data
71   // descriptors that are involved.
72   AddDataSynonymFactRecursive(fact.data1(), fact.data2());
73   return true;
74 }
75 
MaybeAddFact(const protobufs::FactIdEquation & fact,const DeadBlockFacts & dead_block_facts,const IrrelevantValueFacts & irrelevant_value_facts)76 bool DataSynonymAndIdEquationFacts::MaybeAddFact(
77     const protobufs::FactIdEquation& fact,
78     const DeadBlockFacts& dead_block_facts,
79     const IrrelevantValueFacts& irrelevant_value_facts) {
80   if (irrelevant_value_facts.IdIsIrrelevant(fact.lhs_id(), dead_block_facts)) {
81     // Irrelevant ids cannot participate in IdEquation facts.
82     return false;
83   }
84 
85   for (auto id : fact.rhs_id()) {
86     if (irrelevant_value_facts.IdIsIrrelevant(id, dead_block_facts)) {
87       // Irrelevant ids cannot participate in IdEquation facts.
88       return false;
89     }
90   }
91 
92   protobufs::DataDescriptor lhs_dd = MakeDataDescriptor(fact.lhs_id(), {});
93 
94   // Register the LHS in the equivalence relation if needed.
95   RegisterDataDescriptor(lhs_dd);
96 
97   // Get equivalence class representatives for all ids used on the RHS of the
98   // equation.
99   std::vector<const protobufs::DataDescriptor*> rhs_dds;
100   for (auto rhs_id : fact.rhs_id()) {
101     // Register a data descriptor based on this id in the equivalence relation
102     // if needed, and then record the equivalence class representative.
103     rhs_dds.push_back(RegisterDataDescriptor(MakeDataDescriptor(rhs_id, {})));
104   }
105 
106   // Now add the fact.
107   AddEquationFactRecursive(lhs_dd, static_cast<SpvOp>(fact.opcode()), rhs_dds);
108   return true;
109 }
110 
111 DataSynonymAndIdEquationFacts::OperationSet
GetEquations(const protobufs::DataDescriptor * lhs) const112 DataSynonymAndIdEquationFacts::GetEquations(
113     const protobufs::DataDescriptor* lhs) const {
114   auto existing = id_equations_.find(lhs);
115   if (existing == id_equations_.end()) {
116     return OperationSet();
117   }
118   return existing->second;
119 }
120 
AddEquationFactRecursive(const protobufs::DataDescriptor & lhs_dd,SpvOp opcode,const std::vector<const protobufs::DataDescriptor * > & rhs_dds)121 void DataSynonymAndIdEquationFacts::AddEquationFactRecursive(
122     const protobufs::DataDescriptor& lhs_dd, SpvOp opcode,
123     const std::vector<const protobufs::DataDescriptor*>& rhs_dds) {
124   assert(synonymous_.Exists(lhs_dd) &&
125          "The LHS must be known to the equivalence relation.");
126   for (auto rhs_dd : rhs_dds) {
127     // Keep release compilers happy.
128     (void)(rhs_dd);
129     assert(synonymous_.Exists(*rhs_dd) &&
130            "The RHS operands must be known to the equivalence relation.");
131   }
132 
133   auto lhs_dd_representative = synonymous_.Find(&lhs_dd);
134 
135   if (id_equations_.count(lhs_dd_representative) == 0) {
136     // We have not seen an equation with this LHS before, so associate the LHS
137     // with an initially empty set.
138     id_equations_.insert({lhs_dd_representative, OperationSet()});
139   }
140 
141   {
142     auto existing_equations = id_equations_.find(lhs_dd_representative);
143     assert(existing_equations != id_equations_.end() &&
144            "A set of operations should be present, even if empty.");
145 
146     Operation new_operation = {opcode, rhs_dds};
147     if (existing_equations->second.count(new_operation)) {
148       // This equation is known, so there is nothing further to be done.
149       return;
150     }
151     // Add the equation to the set of known equations.
152     existing_equations->second.insert(new_operation);
153   }
154 
155   // Now try to work out corollaries implied by the new equation and existing
156   // facts.
157   switch (opcode) {
158     case SpvOpConvertSToF:
159     case SpvOpConvertUToF:
160       ComputeConversionDataSynonymFacts(*rhs_dds[0]);
161       break;
162     case SpvOpBitcast: {
163       assert(DataDescriptorsAreWellFormedAndComparable(lhs_dd, *rhs_dds[0]) &&
164              "Operands of OpBitcast equation fact must have compatible types");
165       if (!synonymous_.IsEquivalent(lhs_dd, *rhs_dds[0])) {
166         AddDataSynonymFactRecursive(lhs_dd, *rhs_dds[0]);
167       }
168     } break;
169     case SpvOpIAdd: {
170       // Equation form: "a = b + c"
171       for (const auto& equation : GetEquations(rhs_dds[0])) {
172         if (equation.opcode == SpvOpISub) {
173           // Equation form: "a = (d - e) + c"
174           if (synonymous_.IsEquivalent(*equation.operands[1], *rhs_dds[1])) {
175             // Equation form: "a = (d - c) + c"
176             // We can thus infer "a = d"
177             AddDataSynonymFactRecursive(lhs_dd, *equation.operands[0]);
178           }
179         }
180       }
181       for (const auto& equation : GetEquations(rhs_dds[1])) {
182         if (equation.opcode == SpvOpISub) {
183           // Equation form: "a = b + (d - e)"
184           if (synonymous_.IsEquivalent(*equation.operands[1], *rhs_dds[0])) {
185             // Equation form: "a = b + (d - b)"
186             // We can thus infer "a = d"
187             AddDataSynonymFactRecursive(lhs_dd, *equation.operands[0]);
188           }
189         }
190       }
191       break;
192     }
193     case SpvOpISub: {
194       // Equation form: "a = b - c"
195       for (const auto& equation : GetEquations(rhs_dds[0])) {
196         if (equation.opcode == SpvOpIAdd) {
197           // Equation form: "a = (d + e) - c"
198           if (synonymous_.IsEquivalent(*equation.operands[0], *rhs_dds[1])) {
199             // Equation form: "a = (c + e) - c"
200             // We can thus infer "a = e"
201             AddDataSynonymFactRecursive(lhs_dd, *equation.operands[1]);
202           }
203           if (synonymous_.IsEquivalent(*equation.operands[1], *rhs_dds[1])) {
204             // Equation form: "a = (d + c) - c"
205             // We can thus infer "a = d"
206             AddDataSynonymFactRecursive(lhs_dd, *equation.operands[0]);
207           }
208         }
209 
210         if (equation.opcode == SpvOpISub) {
211           // Equation form: "a = (d - e) - c"
212           if (synonymous_.IsEquivalent(*equation.operands[0], *rhs_dds[1])) {
213             // Equation form: "a = (c - e) - c"
214             // We can thus infer "a = -e"
215             AddEquationFactRecursive(lhs_dd, SpvOpSNegate,
216                                      {equation.operands[1]});
217           }
218         }
219       }
220 
221       for (const auto& equation : GetEquations(rhs_dds[1])) {
222         if (equation.opcode == SpvOpIAdd) {
223           // Equation form: "a = b - (d + e)"
224           if (synonymous_.IsEquivalent(*equation.operands[0], *rhs_dds[0])) {
225             // Equation form: "a = b - (b + e)"
226             // We can thus infer "a = -e"
227             AddEquationFactRecursive(lhs_dd, SpvOpSNegate,
228                                      {equation.operands[1]});
229           }
230           if (synonymous_.IsEquivalent(*equation.operands[1], *rhs_dds[0])) {
231             // Equation form: "a = b - (d + b)"
232             // We can thus infer "a = -d"
233             AddEquationFactRecursive(lhs_dd, SpvOpSNegate,
234                                      {equation.operands[0]});
235           }
236         }
237         if (equation.opcode == SpvOpISub) {
238           // Equation form: "a = b - (d - e)"
239           if (synonymous_.IsEquivalent(*equation.operands[0], *rhs_dds[0])) {
240             // Equation form: "a = b - (b - e)"
241             // We can thus infer "a = e"
242             AddDataSynonymFactRecursive(lhs_dd, *equation.operands[1]);
243           }
244         }
245       }
246       break;
247     }
248     case SpvOpLogicalNot:
249     case SpvOpSNegate: {
250       // Equation form: "a = !b" or "a = -b"
251       for (const auto& equation : GetEquations(rhs_dds[0])) {
252         if (equation.opcode == opcode) {
253           // Equation form: "a = !!b" or "a = -(-b)"
254           // We can thus infer "a = b"
255           AddDataSynonymFactRecursive(lhs_dd, *equation.operands[0]);
256         }
257       }
258       break;
259     }
260     default:
261       break;
262   }
263 }
264 
AddDataSynonymFactRecursive(const protobufs::DataDescriptor & dd1,const protobufs::DataDescriptor & dd2)265 void DataSynonymAndIdEquationFacts::AddDataSynonymFactRecursive(
266     const protobufs::DataDescriptor& dd1,
267     const protobufs::DataDescriptor& dd2) {
268   assert((!ObjectStillExists(dd1) || !ObjectStillExists(dd2) ||
269           DataDescriptorsAreWellFormedAndComparable(dd1, dd2)) &&
270          "Mismatched data descriptors.");
271 
272   // Record that the data descriptors provided in the fact are equivalent.
273   MakeEquivalent(dd1, dd2);
274   assert(synonymous_.Find(&dd1) == synonymous_.Find(&dd2) &&
275          "|dd1| and |dd2| must have a single representative");
276 
277   // Compute various corollary facts.
278 
279   // |dd1| and |dd2| belong to the same equivalence class so it doesn't matter
280   // which one we use here.
281   ComputeConversionDataSynonymFacts(dd1);
282 
283   ComputeCompositeDataSynonymFacts(dd1, dd2);
284 }
285 
ComputeConversionDataSynonymFacts(const protobufs::DataDescriptor & dd)286 void DataSynonymAndIdEquationFacts::ComputeConversionDataSynonymFacts(
287     const protobufs::DataDescriptor& dd) {
288   assert(synonymous_.Exists(dd) &&
289          "|dd| should've been registered in the equivalence relation");
290 
291   if (!ObjectStillExists(dd)) {
292     // The object is gone from the module, so we cannot proceed.
293     return;
294   }
295 
296   const auto* type =
297       ir_context_->get_type_mgr()->GetType(fuzzerutil::WalkCompositeTypeIndices(
298           ir_context_, fuzzerutil::GetTypeId(ir_context_, dd.object()),
299           dd.index()));
300   assert(type && "Data descriptor has invalid type");
301 
302   if ((type->AsVector() && type->AsVector()->element_type()->AsInteger()) ||
303       type->AsInteger()) {
304     // If there exist equation facts of the form |%a = opcode %representative|
305     // and |%b = opcode %representative| where |opcode| is either OpConvertSToF
306     // or OpConvertUToF, then |a| and |b| are synonymous.
307     std::vector<const protobufs::DataDescriptor*> convert_s_to_f_lhs;
308     std::vector<const protobufs::DataDescriptor*> convert_u_to_f_lhs;
309 
310     for (const auto& fact : id_equations_) {
311       auto equivalence_class = synonymous_.GetEquivalenceClass(*fact.first);
312       auto dd_it =
313           std::find_if(equivalence_class.begin(), equivalence_class.end(),
314                        [this](const protobufs::DataDescriptor* a) {
315                          return ObjectStillExists(*a);
316                        });
317       if (dd_it == equivalence_class.end()) {
318         // Skip |equivalence_class| if it has no valid ids.
319         continue;
320       }
321 
322       for (const auto& equation : fact.second) {
323         if (synonymous_.IsEquivalent(*equation.operands[0], dd)) {
324           if (equation.opcode == SpvOpConvertSToF) {
325             convert_s_to_f_lhs.push_back(*dd_it);
326           } else if (equation.opcode == SpvOpConvertUToF) {
327             convert_u_to_f_lhs.push_back(*dd_it);
328           }
329         }
330       }
331     }
332 
333     // We use pointers in the initializer list here since otherwise we would
334     // copy memory from these vectors.
335     for (const auto* synonyms : {&convert_s_to_f_lhs, &convert_u_to_f_lhs}) {
336       for (const auto* synonym_a : *synonyms) {
337         for (const auto* synonym_b : *synonyms) {
338           // DataDescriptorsAreWellFormedAndComparable will be called in the
339           // AddDataSynonymFactRecursive method.
340           if (!synonymous_.IsEquivalent(*synonym_a, *synonym_b)) {
341             // |synonym_a| and |synonym_b| have compatible types - they are
342             // synonymous.
343             AddDataSynonymFactRecursive(*synonym_a, *synonym_b);
344           }
345         }
346       }
347     }
348   }
349 }
350 
ComputeCompositeDataSynonymFacts(const protobufs::DataDescriptor & dd1,const protobufs::DataDescriptor & dd2)351 void DataSynonymAndIdEquationFacts::ComputeCompositeDataSynonymFacts(
352     const protobufs::DataDescriptor& dd1,
353     const protobufs::DataDescriptor& dd2) {
354   // Check whether this is a synonym about composite objects. If it is,
355   // we can recursively add synonym facts about their associated sub-components.
356 
357   // Get the type of the object referred to by the first data descriptor in the
358   // synonym fact.
359   uint32_t type_id = fuzzerutil::WalkCompositeTypeIndices(
360       ir_context_,
361       ir_context_->get_def_use_mgr()->GetDef(dd1.object())->type_id(),
362       dd1.index());
363   auto type = ir_context_->get_type_mgr()->GetType(type_id);
364   auto type_instruction = ir_context_->get_def_use_mgr()->GetDef(type_id);
365   assert(type != nullptr &&
366          "Invalid data synonym fact: one side has an unknown type.");
367 
368   // Check whether the type is composite, recording the number of elements
369   // associated with the composite if so.
370   uint32_t num_composite_elements;
371   if (type->AsArray()) {
372     num_composite_elements =
373         fuzzerutil::GetArraySize(*type_instruction, ir_context_);
374   } else if (type->AsMatrix()) {
375     num_composite_elements = type->AsMatrix()->element_count();
376   } else if (type->AsStruct()) {
377     num_composite_elements =
378         fuzzerutil::GetNumberOfStructMembers(*type_instruction);
379   } else if (type->AsVector()) {
380     num_composite_elements = type->AsVector()->element_count();
381   } else {
382     // The type is not a composite, so return.
383     return;
384   }
385 
386   // If the fact has the form:
387   //   obj_1[a_1, ..., a_m] == obj_2[b_1, ..., b_n]
388   // then for each composite index i, we add a fact of the form:
389   //   obj_1[a_1, ..., a_m, i] == obj_2[b_1, ..., b_n, i]
390   //
391   // However, to avoid adding a large number of synonym facts e.g. in the case
392   // of arrays, we bound the number of composite elements to which this is
393   // applied.  Nevertheless, we always add a synonym fact for the final
394   // components, as this may be an interesting edge case.
395 
396   // The bound on the number of indices of the composite pair to note as being
397   // synonymous.
398   const uint32_t kCompositeElementBound = 10;
399 
400   for (uint32_t i = 0; i < num_composite_elements;) {
401     std::vector<uint32_t> extended_indices1 =
402         fuzzerutil::RepeatedFieldToVector(dd1.index());
403     extended_indices1.push_back(i);
404     std::vector<uint32_t> extended_indices2 =
405         fuzzerutil::RepeatedFieldToVector(dd2.index());
406     extended_indices2.push_back(i);
407     AddDataSynonymFactRecursive(
408         MakeDataDescriptor(dd1.object(), extended_indices1),
409         MakeDataDescriptor(dd2.object(), extended_indices2));
410 
411     if (i < kCompositeElementBound - 1 || i == num_composite_elements - 1) {
412       // We have not reached the bound yet, or have already skipped ahead to the
413       // last element, so increment the loop counter as standard.
414       i++;
415     } else {
416       // We have reached the bound, so skip ahead to the last element.
417       assert(i == kCompositeElementBound - 1);
418       i = num_composite_elements - 1;
419     }
420   }
421 }
422 
ComputeClosureOfFacts(uint32_t maximum_equivalence_class_size)423 void DataSynonymAndIdEquationFacts::ComputeClosureOfFacts(
424     uint32_t maximum_equivalence_class_size) {
425   // Suppose that obj_1[a_1, ..., a_m] and obj_2[b_1, ..., b_n] are distinct
426   // data descriptors that describe objects of the same composite type, and that
427   // the composite type is comprised of k components.
428   //
429   // For example, if m is a mat4x4 and v a vec4, we might consider:
430   //   m[2]: describes the 2nd column of m, a vec4
431   //   v[]: describes all of v, a vec4
432   //
433   // Suppose that we know, for every 0 <= i < k, that the fact:
434   //   obj_1[a_1, ..., a_m, i] == obj_2[b_1, ..., b_n, i]
435   // holds - i.e. that the children of the two data descriptors are synonymous.
436   //
437   // Then we can conclude that:
438   //   obj_1[a_1, ..., a_m] == obj_2[b_1, ..., b_n]
439   // holds.
440   //
441   // For instance, if we have the facts:
442   //   m[2, 0] == v[0]
443   //   m[2, 1] == v[1]
444   //   m[2, 2] == v[2]
445   //   m[2, 3] == v[3]
446   // then we can conclude that:
447   //   m[2] == v.
448   //
449   // This method repeatedly searches the equivalence relation of data
450   // descriptors, deducing and adding such facts, until a pass over the
451   // relation leads to no further facts being deduced.
452 
453   // The method relies on working with pairs of data descriptors, and in
454   // particular being able to hash and compare such pairs.
455 
456   using DataDescriptorPair =
457       std::pair<protobufs::DataDescriptor, protobufs::DataDescriptor>;
458 
459   struct DataDescriptorPairHash {
460     std::size_t operator()(const DataDescriptorPair& pair) const {
461       return DataDescriptorHash()(&pair.first) ^
462              DataDescriptorHash()(&pair.second);
463     }
464   };
465 
466   struct DataDescriptorPairEquals {
467     bool operator()(const DataDescriptorPair& first,
468                     const DataDescriptorPair& second) const {
469       return (DataDescriptorEquals()(&first.first, &second.first) &&
470               DataDescriptorEquals()(&first.second, &second.second)) ||
471              (DataDescriptorEquals()(&first.first, &second.second) &&
472               DataDescriptorEquals()(&first.second, &second.first));
473     }
474   };
475 
476   // This map records, for a given pair of composite data descriptors of the
477   // same type, all the indices at which the data descriptors are known to be
478   // synonymous.  A pair is a key to this map only if we have observed that
479   // the pair are synonymous at *some* index, but not at *all* indices.
480   // Once we find that a pair of data descriptors are equivalent at all indices
481   // we record the fact that they are synonymous and remove them from the map.
482   //
483   // Using the m and v example from above, initially the pair (m[2], v) would
484   // not be a key to the map.  If we find that m[2, 2] == v[2] holds, we would
485   // add an entry:
486   //   (m[2], v) -> [false, false, true, false]
487   // to record that they are synonymous at index 2.  If we then find that
488   // m[2, 0] == v[0] holds, we would update this entry to:
489   //   (m[2], v) -> [true, false, true, false]
490   // If we then find that m[2, 3] == v[3] holds, we would update this entry to:
491   //   (m[2], v) -> [true, false, true, true]
492   // Finally, if we then find that m[2, 1] == v[1] holds, which would make the
493   // boolean vector true at every index, we would add the fact:
494   //   m[2] == v
495   // to the equivalence relation and remove (m[2], v) from the map.
496   std::unordered_map<DataDescriptorPair, std::vector<bool>,
497                      DataDescriptorPairHash, DataDescriptorPairEquals>
498       candidate_composite_synonyms;
499 
500   // We keep looking for new facts until we perform a complete pass over the
501   // equivalence relation without finding any new facts.
502   while (closure_computation_required_) {
503     // We have not found any new facts yet during this pass; we set this to
504     // 'true' if we do find a new fact.
505     closure_computation_required_ = false;
506 
507     // Consider each class in the equivalence relation.
508     for (auto representative :
509          synonymous_.GetEquivalenceClassRepresentatives()) {
510       auto equivalence_class = synonymous_.GetEquivalenceClass(*representative);
511 
512       if (equivalence_class.size() > maximum_equivalence_class_size) {
513         // This equivalence class is larger than the maximum size we are willing
514         // to consider, so we skip it.  This potentially leads to missed fact
515         // deductions, but avoids excessive runtime for closure computation.
516         continue;
517       }
518 
519       // Consider every data descriptor in the equivalence class.
520       for (auto dd1_it = equivalence_class.begin();
521            dd1_it != equivalence_class.end(); ++dd1_it) {
522         // If this data descriptor has no indices then it does not have the form
523         // obj_1[a_1, ..., a_m, i], so move on.
524         auto dd1 = *dd1_it;
525         if (dd1->index_size() == 0) {
526           continue;
527         }
528 
529         // Consider every other data descriptor later in the equivalence class
530         // (due to symmetry, there is no need to compare with previous data
531         // descriptors).
532         auto dd2_it = dd1_it;
533         for (++dd2_it; dd2_it != equivalence_class.end(); ++dd2_it) {
534           auto dd2 = *dd2_it;
535           // If this data descriptor has no indices then it does not have the
536           // form obj_2[b_1, ..., b_n, i], so move on.
537           if (dd2->index_size() == 0) {
538             continue;
539           }
540 
541           // At this point we know that:
542           // - |dd1| has the form obj_1[a_1, ..., a_m, i]
543           // - |dd2| has the form obj_2[b_1, ..., b_n, j]
544           assert(dd1->index_size() > 0 && dd2->index_size() > 0 &&
545                  "Control should not reach here if either data descriptor has "
546                  "no indices.");
547 
548           // We are only interested if i == j.
549           if (dd1->index(dd1->index_size() - 1) !=
550               dd2->index(dd2->index_size() - 1)) {
551             continue;
552           }
553 
554           const uint32_t common_final_index = dd1->index(dd1->index_size() - 1);
555 
556           // Make data descriptors |dd1_prefix| and |dd2_prefix| for
557           //   obj_1[a_1, ..., a_m]
558           // and
559           //   obj_2[b_1, ..., b_n]
560           // These are the two data descriptors we might be getting closer to
561           // deducing as being synonymous, due to knowing that they are
562           // synonymous when extended by a particular index.
563           protobufs::DataDescriptor dd1_prefix;
564           dd1_prefix.set_object(dd1->object());
565           for (uint32_t i = 0; i < static_cast<uint32_t>(dd1->index_size() - 1);
566                i++) {
567             dd1_prefix.add_index(dd1->index(i));
568           }
569           protobufs::DataDescriptor dd2_prefix;
570           dd2_prefix.set_object(dd2->object());
571           for (uint32_t i = 0; i < static_cast<uint32_t>(dd2->index_size() - 1);
572                i++) {
573             dd2_prefix.add_index(dd2->index(i));
574           }
575           assert(!DataDescriptorEquals()(&dd1_prefix, &dd2_prefix) &&
576                  "By construction these prefixes should be different.");
577 
578           // If we already know that these prefixes are synonymous, move on.
579           if (synonymous_.Exists(dd1_prefix) &&
580               synonymous_.Exists(dd2_prefix) &&
581               synonymous_.IsEquivalent(dd1_prefix, dd2_prefix)) {
582             continue;
583           }
584           if (!ObjectStillExists(*dd1) || !ObjectStillExists(*dd2)) {
585             // The objects are not both available in the module, so we cannot
586             // investigate the types of the associated data descriptors; we need
587             // to move on.
588             continue;
589           }
590           // Get the type of obj_1
591           auto dd1_root_type_id =
592               fuzzerutil::GetTypeId(ir_context_, dd1->object());
593           // Use this type, together with a_1, ..., a_m, to get the type of
594           // obj_1[a_1, ..., a_m].
595           auto dd1_prefix_type = fuzzerutil::WalkCompositeTypeIndices(
596               ir_context_, dd1_root_type_id, dd1_prefix.index());
597 
598           // Similarly, get the type of obj_2 and use it to get the type of
599           // obj_2[b_1, ..., b_n].
600           auto dd2_root_type_id =
601               fuzzerutil::GetTypeId(ir_context_, dd2->object());
602           auto dd2_prefix_type = fuzzerutil::WalkCompositeTypeIndices(
603               ir_context_, dd2_root_type_id, dd2_prefix.index());
604 
605           // If the types of dd1_prefix and dd2_prefix are not the same, they
606           // cannot be synonymous.
607           if (dd1_prefix_type != dd2_prefix_type) {
608             continue;
609           }
610 
611           // At this point, we know we have synonymous data descriptors of the
612           // form:
613           //   obj_1[a_1, ..., a_m, i]
614           //   obj_2[b_1, ..., b_n, i]
615           // with the same last_index i, such that:
616           //   obj_1[a_1, ..., a_m]
617           // and
618           //   obj_2[b_1, ..., b_n]
619           // have the same type.
620 
621           // Work out how many components there are in the (common) commposite
622           // type associated with obj_1[a_1, ..., a_m] and obj_2[b_1, ..., b_n].
623           // This depends on whether the composite type is array, matrix, struct
624           // or vector.
625           uint32_t num_components_in_composite;
626           auto composite_type =
627               ir_context_->get_type_mgr()->GetType(dd1_prefix_type);
628           auto composite_type_instruction =
629               ir_context_->get_def_use_mgr()->GetDef(dd1_prefix_type);
630           if (composite_type->AsArray()) {
631             num_components_in_composite = fuzzerutil::GetArraySize(
632                 *composite_type_instruction, ir_context_);
633             if (num_components_in_composite == 0) {
634               // This indicates that the array has an unknown size, in which
635               // case we cannot be sure we have matched all of its elements with
636               // synonymous elements of another array.
637               continue;
638             }
639           } else if (composite_type->AsMatrix()) {
640             num_components_in_composite =
641                 composite_type->AsMatrix()->element_count();
642           } else if (composite_type->AsStruct()) {
643             num_components_in_composite = fuzzerutil::GetNumberOfStructMembers(
644                 *composite_type_instruction);
645           } else {
646             assert(composite_type->AsVector());
647             num_components_in_composite =
648                 composite_type->AsVector()->element_count();
649           }
650 
651           // We are one step closer to being able to say that |dd1_prefix| and
652           // |dd2_prefix| are synonymous.
653           DataDescriptorPair candidate_composite_synonym(dd1_prefix,
654                                                          dd2_prefix);
655 
656           // We look up what we already know about this pair.
657           auto existing_entry =
658               candidate_composite_synonyms.find(candidate_composite_synonym);
659 
660           if (existing_entry == candidate_composite_synonyms.end()) {
661             // If this is the first time we have seen the pair, we make a vector
662             // of size |num_components_in_composite| that is 'true' at the
663             // common final index associated with |dd1| and |dd2|, and 'false'
664             // everywhere else, and register this vector as being associated
665             // with the pair.
666             std::vector<bool> entry;
667             for (uint32_t i = 0; i < num_components_in_composite; i++) {
668               entry.push_back(i == common_final_index);
669             }
670             candidate_composite_synonyms[candidate_composite_synonym] = entry;
671             existing_entry =
672                 candidate_composite_synonyms.find(candidate_composite_synonym);
673           } else {
674             // We have seen this pair of data descriptors before, and we now
675             // know that they are synonymous at one further index, so we
676             // update the entry to record that.
677             existing_entry->second[common_final_index] = true;
678           }
679           assert(existing_entry != candidate_composite_synonyms.end());
680 
681           // Check whether |dd1_prefix| and |dd2_prefix| are now known to match
682           // at every sub-component.
683           bool all_components_match = true;
684           for (uint32_t i = 0; i < num_components_in_composite; i++) {
685             if (!existing_entry->second[i]) {
686               all_components_match = false;
687               break;
688             }
689           }
690           if (all_components_match) {
691             // The two prefixes match on all sub-components, so we know that
692             // they are synonymous.  We add this fact *non-recursively*, as we
693             // have deduced that |dd1_prefix| and |dd2_prefix| are synonymous
694             // by observing that all their sub-components are already
695             // synonymous.
696             assert(DataDescriptorsAreWellFormedAndComparable(dd1_prefix,
697                                                              dd2_prefix));
698             MakeEquivalent(dd1_prefix, dd2_prefix);
699             // Now that we know this pair of data descriptors are synonymous,
700             // there is no point recording how close they are to being
701             // synonymous.
702             candidate_composite_synonyms.erase(candidate_composite_synonym);
703           }
704         }
705       }
706     }
707   }
708 }
709 
MakeEquivalent(const protobufs::DataDescriptor & dd1,const protobufs::DataDescriptor & dd2)710 void DataSynonymAndIdEquationFacts::MakeEquivalent(
711     const protobufs::DataDescriptor& dd1,
712     const protobufs::DataDescriptor& dd2) {
713   // Register the data descriptors if they are not already known to the
714   // equivalence relation.
715   RegisterDataDescriptor(dd1);
716   RegisterDataDescriptor(dd2);
717 
718   if (synonymous_.IsEquivalent(dd1, dd2)) {
719     // The data descriptors are already known to be equivalent, so there is
720     // nothing to do.
721     return;
722   }
723 
724   // We must make the data descriptors equivalent, and also make sure any
725   // equation facts known about their representatives are merged.
726 
727   // Record the original equivalence class representatives of the data
728   // descriptors.
729   auto dd1_original_representative = synonymous_.Find(&dd1);
730   auto dd2_original_representative = synonymous_.Find(&dd2);
731 
732   // Make the data descriptors equivalent.
733   synonymous_.MakeEquivalent(dd1, dd2);
734   // As we have updated the equivalence relation, we might be able to deduce
735   // more facts by performing a closure computation, so we record that such a
736   // computation is required.
737   closure_computation_required_ = true;
738 
739   // At this point, exactly one of |dd1_original_representative| and
740   // |dd2_original_representative| will be the representative of the combined
741   // equivalence class.  We work out which one of them is still the class
742   // representative and which one is no longer the class representative.
743 
744   auto still_representative = synonymous_.Find(dd1_original_representative) ==
745                                       dd1_original_representative
746                                   ? dd1_original_representative
747                                   : dd2_original_representative;
748   auto no_longer_representative =
749       still_representative == dd1_original_representative
750           ? dd2_original_representative
751           : dd1_original_representative;
752 
753   assert(no_longer_representative != still_representative &&
754          "The current and former representatives cannot be the same.");
755 
756   // We now need to add all equations about |no_longer_representative| to the
757   // set of equations known about |still_representative|.
758 
759   // Get the equations associated with |no_longer_representative|.
760   auto no_longer_representative_id_equations =
761       id_equations_.find(no_longer_representative);
762   if (no_longer_representative_id_equations != id_equations_.end()) {
763     // There are some equations to transfer.  There might not yet be any
764     // equations about |still_representative|; create an empty set of equations
765     // if this is the case.
766     if (!id_equations_.count(still_representative)) {
767       id_equations_.insert({still_representative, OperationSet()});
768     }
769     auto still_representative_id_equations =
770         id_equations_.find(still_representative);
771     assert(still_representative_id_equations != id_equations_.end() &&
772            "At this point there must be a set of equations.");
773     // Add all the equations known about |no_longer_representative| to the set
774     // of equations known about |still_representative|.
775     still_representative_id_equations->second.insert(
776         no_longer_representative_id_equations->second.begin(),
777         no_longer_representative_id_equations->second.end());
778   }
779   // Delete the no longer-relevant equations about |no_longer_representative|.
780   id_equations_.erase(no_longer_representative);
781 }
782 
783 const protobufs::DataDescriptor*
RegisterDataDescriptor(const protobufs::DataDescriptor & dd)784 DataSynonymAndIdEquationFacts::RegisterDataDescriptor(
785     const protobufs::DataDescriptor& dd) {
786   return synonymous_.Exists(dd) ? synonymous_.Find(&dd)
787                                 : synonymous_.Register(dd);
788 }
789 
DataDescriptorsAreWellFormedAndComparable(const protobufs::DataDescriptor & dd1,const protobufs::DataDescriptor & dd2) const790 bool DataSynonymAndIdEquationFacts::DataDescriptorsAreWellFormedAndComparable(
791     const protobufs::DataDescriptor& dd1,
792     const protobufs::DataDescriptor& dd2) const {
793   if (!ObjectStillExists(dd1) || !ObjectStillExists(dd2)) {
794     // We trivially return true if one or other of the objects associated with
795     // the data descriptors is gone.
796     return true;
797   }
798 
799   auto end_type_id_1 = fuzzerutil::WalkCompositeTypeIndices(
800       ir_context_, fuzzerutil::GetTypeId(ir_context_, dd1.object()),
801       dd1.index());
802   auto end_type_id_2 = fuzzerutil::WalkCompositeTypeIndices(
803       ir_context_, fuzzerutil::GetTypeId(ir_context_, dd2.object()),
804       dd2.index());
805   // The end types of the data descriptors must exist.
806   if (end_type_id_1 == 0 || end_type_id_2 == 0) {
807     return false;
808   }
809   // Neither end type is allowed to be void.
810   if (ir_context_->get_def_use_mgr()->GetDef(end_type_id_1)->opcode() ==
811           SpvOpTypeVoid ||
812       ir_context_->get_def_use_mgr()->GetDef(end_type_id_2)->opcode() ==
813           SpvOpTypeVoid) {
814     return false;
815   }
816   // If the end types are the same, the data descriptors are comparable.
817   if (end_type_id_1 == end_type_id_2) {
818     return true;
819   }
820   // Otherwise they are only comparable if they are integer scalars or integer
821   // vectors that differ only in signedness.
822 
823   // Get both types.
824   const auto* type_a = ir_context_->get_type_mgr()->GetType(end_type_id_1);
825   const auto* type_b = ir_context_->get_type_mgr()->GetType(end_type_id_2);
826   assert(type_a && type_b && "Data descriptors have invalid type(s)");
827 
828   // If both types are numerical or vectors of numerical components, then they
829   // are compatible if they have the same number of components and the same bit
830   // count per component.
831 
832   if (type_a->AsVector() && type_b->AsVector()) {
833     const auto* vector_a = type_a->AsVector();
834     const auto* vector_b = type_b->AsVector();
835 
836     if (vector_a->element_count() != vector_b->element_count() ||
837         vector_a->element_type()->AsBool() ||
838         vector_b->element_type()->AsBool()) {
839       // The case where both vectors have boolean elements and the same number
840       // of components is handled by the direct equality check earlier.
841       // You can't have multiple identical boolean vector types.
842       return false;
843     }
844 
845     type_a = vector_a->element_type();
846     type_b = vector_b->element_type();
847   }
848 
849   auto get_bit_count_for_numeric_type =
850       [](const opt::analysis::Type& type) -> uint32_t {
851     if (const auto* integer = type.AsInteger()) {
852       return integer->width();
853     } else if (const auto* floating = type.AsFloat()) {
854       return floating->width();
855     } else {
856       assert(false && "|type| must be a numerical type");
857       return 0;
858     }
859   };
860 
861   // Checks that both |type_a| and |type_b| are either numerical or vectors of
862   // numerical components and have the same number of bits.
863   return (type_a->AsInteger() || type_a->AsFloat()) &&
864          (type_b->AsInteger() || type_b->AsFloat()) &&
865          (get_bit_count_for_numeric_type(*type_a) ==
866           get_bit_count_for_numeric_type(*type_b));
867 }
868 
869 std::vector<const protobufs::DataDescriptor*>
GetSynonymsForId(uint32_t id) const870 DataSynonymAndIdEquationFacts::GetSynonymsForId(uint32_t id) const {
871   return GetSynonymsForDataDescriptor(MakeDataDescriptor(id, {}));
872 }
873 
874 std::vector<const protobufs::DataDescriptor*>
GetSynonymsForDataDescriptor(const protobufs::DataDescriptor & data_descriptor) const875 DataSynonymAndIdEquationFacts::GetSynonymsForDataDescriptor(
876     const protobufs::DataDescriptor& data_descriptor) const {
877   std::vector<const protobufs::DataDescriptor*> result;
878   if (synonymous_.Exists(data_descriptor)) {
879     for (auto dd : synonymous_.GetEquivalenceClass(data_descriptor)) {
880       // There may be data descriptors in the equivalence class whose base
881       // objects have been removed from the module.  We do not expose these
882       // data descriptors to clients of the fact manager.
883       if (ObjectStillExists(*dd)) {
884         result.push_back(dd);
885       }
886     }
887   }
888   return result;
889 }
890 
891 std::vector<uint32_t>
GetIdsForWhichSynonymsAreKnown() const892 DataSynonymAndIdEquationFacts::GetIdsForWhichSynonymsAreKnown() const {
893   std::vector<uint32_t> result;
894   for (auto& data_descriptor : synonymous_.GetAllKnownValues()) {
895     // We skip any data descriptors whose base objects no longer exist in the
896     // module, and we restrict attention to data descriptors for plain ids,
897     // which have no indices.
898     if (ObjectStillExists(*data_descriptor) &&
899         data_descriptor->index().empty()) {
900       result.push_back(data_descriptor->object());
901     }
902   }
903   return result;
904 }
905 
906 std::vector<const protobufs::DataDescriptor*>
GetAllKnownSynonyms() const907 DataSynonymAndIdEquationFacts::GetAllKnownSynonyms() const {
908   std::vector<const protobufs::DataDescriptor*> result;
909   for (const auto* dd : synonymous_.GetAllKnownValues()) {
910     if (ObjectStillExists(*dd)) {
911       result.push_back(dd);
912     }
913   }
914   return result;
915 }
916 
IsSynonymous(const protobufs::DataDescriptor & data_descriptor1,const protobufs::DataDescriptor & data_descriptor2) const917 bool DataSynonymAndIdEquationFacts::IsSynonymous(
918     const protobufs::DataDescriptor& data_descriptor1,
919     const protobufs::DataDescriptor& data_descriptor2) const {
920   return synonymous_.Exists(data_descriptor1) &&
921          synonymous_.Exists(data_descriptor2) &&
922          synonymous_.IsEquivalent(data_descriptor1, data_descriptor2);
923 }
924 
ObjectStillExists(const protobufs::DataDescriptor & dd) const925 bool DataSynonymAndIdEquationFacts::ObjectStillExists(
926     const protobufs::DataDescriptor& dd) const {
927   return ir_context_->get_def_use_mgr()->GetDef(dd.object()) != nullptr;
928 }
929 
930 }  // namespace fact_manager
931 }  // namespace fuzz
932 }  // namespace spvtools
933