1 // Copyright (c) 2019 Google LLC
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14
15 #include "source/fuzz/fact_manager/data_synonym_and_id_equation_facts.h"
16
17 #include "source/fuzz/fuzzer_util.h"
18
19 namespace spvtools {
20 namespace fuzz {
21 namespace fact_manager {
22
operator ()(const Operation & operation) const23 size_t DataSynonymAndIdEquationFacts::OperationHash::operator()(
24 const Operation& operation) const {
25 std::u32string hash;
26 hash.push_back(operation.opcode);
27 for (auto operand : operation.operands) {
28 hash.push_back(static_cast<uint32_t>(DataDescriptorHash()(operand)));
29 }
30 return std::hash<std::u32string>()(hash);
31 }
32
operator ()(const Operation & first,const Operation & second) const33 bool DataSynonymAndIdEquationFacts::OperationEquals::operator()(
34 const Operation& first, const Operation& second) const {
35 // Equal operations require...
36 //
37 // Equal opcodes.
38 if (first.opcode != second.opcode) {
39 return false;
40 }
41 // Matching operand counts.
42 if (first.operands.size() != second.operands.size()) {
43 return false;
44 }
45 // Equal operands.
46 for (uint32_t i = 0; i < first.operands.size(); i++) {
47 if (!DataDescriptorEquals()(first.operands[i], second.operands[i])) {
48 return false;
49 }
50 }
51 return true;
52 }
53
DataSynonymAndIdEquationFacts(opt::IRContext * ir_context)54 DataSynonymAndIdEquationFacts::DataSynonymAndIdEquationFacts(
55 opt::IRContext* ir_context)
56 : ir_context_(ir_context) {}
57
MaybeAddFact(const protobufs::FactDataSynonym & fact,const DeadBlockFacts & dead_block_facts,const IrrelevantValueFacts & irrelevant_value_facts)58 bool DataSynonymAndIdEquationFacts::MaybeAddFact(
59 const protobufs::FactDataSynonym& fact,
60 const DeadBlockFacts& dead_block_facts,
61 const IrrelevantValueFacts& irrelevant_value_facts) {
62 if (irrelevant_value_facts.IdIsIrrelevant(fact.data1().object(),
63 dead_block_facts) ||
64 irrelevant_value_facts.IdIsIrrelevant(fact.data2().object(),
65 dead_block_facts)) {
66 // Irrelevant ids cannot be synonymous with other ids.
67 return false;
68 }
69
70 // Add the fact, including all facts relating sub-components of the data
71 // descriptors that are involved.
72 AddDataSynonymFactRecursive(fact.data1(), fact.data2());
73 return true;
74 }
75
MaybeAddFact(const protobufs::FactIdEquation & fact,const DeadBlockFacts & dead_block_facts,const IrrelevantValueFacts & irrelevant_value_facts)76 bool DataSynonymAndIdEquationFacts::MaybeAddFact(
77 const protobufs::FactIdEquation& fact,
78 const DeadBlockFacts& dead_block_facts,
79 const IrrelevantValueFacts& irrelevant_value_facts) {
80 if (irrelevant_value_facts.IdIsIrrelevant(fact.lhs_id(), dead_block_facts)) {
81 // Irrelevant ids cannot participate in IdEquation facts.
82 return false;
83 }
84
85 for (auto id : fact.rhs_id()) {
86 if (irrelevant_value_facts.IdIsIrrelevant(id, dead_block_facts)) {
87 // Irrelevant ids cannot participate in IdEquation facts.
88 return false;
89 }
90 }
91
92 protobufs::DataDescriptor lhs_dd = MakeDataDescriptor(fact.lhs_id(), {});
93
94 // Register the LHS in the equivalence relation if needed.
95 RegisterDataDescriptor(lhs_dd);
96
97 // Get equivalence class representatives for all ids used on the RHS of the
98 // equation.
99 std::vector<const protobufs::DataDescriptor*> rhs_dds;
100 for (auto rhs_id : fact.rhs_id()) {
101 // Register a data descriptor based on this id in the equivalence relation
102 // if needed, and then record the equivalence class representative.
103 rhs_dds.push_back(RegisterDataDescriptor(MakeDataDescriptor(rhs_id, {})));
104 }
105
106 // Now add the fact.
107 AddEquationFactRecursive(lhs_dd, static_cast<SpvOp>(fact.opcode()), rhs_dds);
108 return true;
109 }
110
111 DataSynonymAndIdEquationFacts::OperationSet
GetEquations(const protobufs::DataDescriptor * lhs) const112 DataSynonymAndIdEquationFacts::GetEquations(
113 const protobufs::DataDescriptor* lhs) const {
114 auto existing = id_equations_.find(lhs);
115 if (existing == id_equations_.end()) {
116 return OperationSet();
117 }
118 return existing->second;
119 }
120
AddEquationFactRecursive(const protobufs::DataDescriptor & lhs_dd,SpvOp opcode,const std::vector<const protobufs::DataDescriptor * > & rhs_dds)121 void DataSynonymAndIdEquationFacts::AddEquationFactRecursive(
122 const protobufs::DataDescriptor& lhs_dd, SpvOp opcode,
123 const std::vector<const protobufs::DataDescriptor*>& rhs_dds) {
124 assert(synonymous_.Exists(lhs_dd) &&
125 "The LHS must be known to the equivalence relation.");
126 for (auto rhs_dd : rhs_dds) {
127 // Keep release compilers happy.
128 (void)(rhs_dd);
129 assert(synonymous_.Exists(*rhs_dd) &&
130 "The RHS operands must be known to the equivalence relation.");
131 }
132
133 auto lhs_dd_representative = synonymous_.Find(&lhs_dd);
134
135 if (id_equations_.count(lhs_dd_representative) == 0) {
136 // We have not seen an equation with this LHS before, so associate the LHS
137 // with an initially empty set.
138 id_equations_.insert({lhs_dd_representative, OperationSet()});
139 }
140
141 {
142 auto existing_equations = id_equations_.find(lhs_dd_representative);
143 assert(existing_equations != id_equations_.end() &&
144 "A set of operations should be present, even if empty.");
145
146 Operation new_operation = {opcode, rhs_dds};
147 if (existing_equations->second.count(new_operation)) {
148 // This equation is known, so there is nothing further to be done.
149 return;
150 }
151 // Add the equation to the set of known equations.
152 existing_equations->second.insert(new_operation);
153 }
154
155 // Now try to work out corollaries implied by the new equation and existing
156 // facts.
157 switch (opcode) {
158 case SpvOpConvertSToF:
159 case SpvOpConvertUToF:
160 ComputeConversionDataSynonymFacts(*rhs_dds[0]);
161 break;
162 case SpvOpBitcast: {
163 assert(DataDescriptorsAreWellFormedAndComparable(lhs_dd, *rhs_dds[0]) &&
164 "Operands of OpBitcast equation fact must have compatible types");
165 if (!synonymous_.IsEquivalent(lhs_dd, *rhs_dds[0])) {
166 AddDataSynonymFactRecursive(lhs_dd, *rhs_dds[0]);
167 }
168 } break;
169 case SpvOpIAdd: {
170 // Equation form: "a = b + c"
171 for (const auto& equation : GetEquations(rhs_dds[0])) {
172 if (equation.opcode == SpvOpISub) {
173 // Equation form: "a = (d - e) + c"
174 if (synonymous_.IsEquivalent(*equation.operands[1], *rhs_dds[1])) {
175 // Equation form: "a = (d - c) + c"
176 // We can thus infer "a = d"
177 AddDataSynonymFactRecursive(lhs_dd, *equation.operands[0]);
178 }
179 }
180 }
181 for (const auto& equation : GetEquations(rhs_dds[1])) {
182 if (equation.opcode == SpvOpISub) {
183 // Equation form: "a = b + (d - e)"
184 if (synonymous_.IsEquivalent(*equation.operands[1], *rhs_dds[0])) {
185 // Equation form: "a = b + (d - b)"
186 // We can thus infer "a = d"
187 AddDataSynonymFactRecursive(lhs_dd, *equation.operands[0]);
188 }
189 }
190 }
191 break;
192 }
193 case SpvOpISub: {
194 // Equation form: "a = b - c"
195 for (const auto& equation : GetEquations(rhs_dds[0])) {
196 if (equation.opcode == SpvOpIAdd) {
197 // Equation form: "a = (d + e) - c"
198 if (synonymous_.IsEquivalent(*equation.operands[0], *rhs_dds[1])) {
199 // Equation form: "a = (c + e) - c"
200 // We can thus infer "a = e"
201 AddDataSynonymFactRecursive(lhs_dd, *equation.operands[1]);
202 }
203 if (synonymous_.IsEquivalent(*equation.operands[1], *rhs_dds[1])) {
204 // Equation form: "a = (d + c) - c"
205 // We can thus infer "a = d"
206 AddDataSynonymFactRecursive(lhs_dd, *equation.operands[0]);
207 }
208 }
209
210 if (equation.opcode == SpvOpISub) {
211 // Equation form: "a = (d - e) - c"
212 if (synonymous_.IsEquivalent(*equation.operands[0], *rhs_dds[1])) {
213 // Equation form: "a = (c - e) - c"
214 // We can thus infer "a = -e"
215 AddEquationFactRecursive(lhs_dd, SpvOpSNegate,
216 {equation.operands[1]});
217 }
218 }
219 }
220
221 for (const auto& equation : GetEquations(rhs_dds[1])) {
222 if (equation.opcode == SpvOpIAdd) {
223 // Equation form: "a = b - (d + e)"
224 if (synonymous_.IsEquivalent(*equation.operands[0], *rhs_dds[0])) {
225 // Equation form: "a = b - (b + e)"
226 // We can thus infer "a = -e"
227 AddEquationFactRecursive(lhs_dd, SpvOpSNegate,
228 {equation.operands[1]});
229 }
230 if (synonymous_.IsEquivalent(*equation.operands[1], *rhs_dds[0])) {
231 // Equation form: "a = b - (d + b)"
232 // We can thus infer "a = -d"
233 AddEquationFactRecursive(lhs_dd, SpvOpSNegate,
234 {equation.operands[0]});
235 }
236 }
237 if (equation.opcode == SpvOpISub) {
238 // Equation form: "a = b - (d - e)"
239 if (synonymous_.IsEquivalent(*equation.operands[0], *rhs_dds[0])) {
240 // Equation form: "a = b - (b - e)"
241 // We can thus infer "a = e"
242 AddDataSynonymFactRecursive(lhs_dd, *equation.operands[1]);
243 }
244 }
245 }
246 break;
247 }
248 case SpvOpLogicalNot:
249 case SpvOpSNegate: {
250 // Equation form: "a = !b" or "a = -b"
251 for (const auto& equation : GetEquations(rhs_dds[0])) {
252 if (equation.opcode == opcode) {
253 // Equation form: "a = !!b" or "a = -(-b)"
254 // We can thus infer "a = b"
255 AddDataSynonymFactRecursive(lhs_dd, *equation.operands[0]);
256 }
257 }
258 break;
259 }
260 default:
261 break;
262 }
263 }
264
AddDataSynonymFactRecursive(const protobufs::DataDescriptor & dd1,const protobufs::DataDescriptor & dd2)265 void DataSynonymAndIdEquationFacts::AddDataSynonymFactRecursive(
266 const protobufs::DataDescriptor& dd1,
267 const protobufs::DataDescriptor& dd2) {
268 assert((!ObjectStillExists(dd1) || !ObjectStillExists(dd2) ||
269 DataDescriptorsAreWellFormedAndComparable(dd1, dd2)) &&
270 "Mismatched data descriptors.");
271
272 // Record that the data descriptors provided in the fact are equivalent.
273 MakeEquivalent(dd1, dd2);
274 assert(synonymous_.Find(&dd1) == synonymous_.Find(&dd2) &&
275 "|dd1| and |dd2| must have a single representative");
276
277 // Compute various corollary facts.
278
279 // |dd1| and |dd2| belong to the same equivalence class so it doesn't matter
280 // which one we use here.
281 ComputeConversionDataSynonymFacts(dd1);
282
283 ComputeCompositeDataSynonymFacts(dd1, dd2);
284 }
285
ComputeConversionDataSynonymFacts(const protobufs::DataDescriptor & dd)286 void DataSynonymAndIdEquationFacts::ComputeConversionDataSynonymFacts(
287 const protobufs::DataDescriptor& dd) {
288 assert(synonymous_.Exists(dd) &&
289 "|dd| should've been registered in the equivalence relation");
290
291 if (!ObjectStillExists(dd)) {
292 // The object is gone from the module, so we cannot proceed.
293 return;
294 }
295
296 const auto* type =
297 ir_context_->get_type_mgr()->GetType(fuzzerutil::WalkCompositeTypeIndices(
298 ir_context_, fuzzerutil::GetTypeId(ir_context_, dd.object()),
299 dd.index()));
300 assert(type && "Data descriptor has invalid type");
301
302 if ((type->AsVector() && type->AsVector()->element_type()->AsInteger()) ||
303 type->AsInteger()) {
304 // If there exist equation facts of the form |%a = opcode %representative|
305 // and |%b = opcode %representative| where |opcode| is either OpConvertSToF
306 // or OpConvertUToF, then |a| and |b| are synonymous.
307 std::vector<const protobufs::DataDescriptor*> convert_s_to_f_lhs;
308 std::vector<const protobufs::DataDescriptor*> convert_u_to_f_lhs;
309
310 for (const auto& fact : id_equations_) {
311 auto equivalence_class = synonymous_.GetEquivalenceClass(*fact.first);
312 auto dd_it =
313 std::find_if(equivalence_class.begin(), equivalence_class.end(),
314 [this](const protobufs::DataDescriptor* a) {
315 return ObjectStillExists(*a);
316 });
317 if (dd_it == equivalence_class.end()) {
318 // Skip |equivalence_class| if it has no valid ids.
319 continue;
320 }
321
322 for (const auto& equation : fact.second) {
323 if (synonymous_.IsEquivalent(*equation.operands[0], dd)) {
324 if (equation.opcode == SpvOpConvertSToF) {
325 convert_s_to_f_lhs.push_back(*dd_it);
326 } else if (equation.opcode == SpvOpConvertUToF) {
327 convert_u_to_f_lhs.push_back(*dd_it);
328 }
329 }
330 }
331 }
332
333 // We use pointers in the initializer list here since otherwise we would
334 // copy memory from these vectors.
335 for (const auto* synonyms : {&convert_s_to_f_lhs, &convert_u_to_f_lhs}) {
336 for (const auto* synonym_a : *synonyms) {
337 for (const auto* synonym_b : *synonyms) {
338 // DataDescriptorsAreWellFormedAndComparable will be called in the
339 // AddDataSynonymFactRecursive method.
340 if (!synonymous_.IsEquivalent(*synonym_a, *synonym_b)) {
341 // |synonym_a| and |synonym_b| have compatible types - they are
342 // synonymous.
343 AddDataSynonymFactRecursive(*synonym_a, *synonym_b);
344 }
345 }
346 }
347 }
348 }
349 }
350
ComputeCompositeDataSynonymFacts(const protobufs::DataDescriptor & dd1,const protobufs::DataDescriptor & dd2)351 void DataSynonymAndIdEquationFacts::ComputeCompositeDataSynonymFacts(
352 const protobufs::DataDescriptor& dd1,
353 const protobufs::DataDescriptor& dd2) {
354 // Check whether this is a synonym about composite objects. If it is,
355 // we can recursively add synonym facts about their associated sub-components.
356
357 // Get the type of the object referred to by the first data descriptor in the
358 // synonym fact.
359 uint32_t type_id = fuzzerutil::WalkCompositeTypeIndices(
360 ir_context_,
361 ir_context_->get_def_use_mgr()->GetDef(dd1.object())->type_id(),
362 dd1.index());
363 auto type = ir_context_->get_type_mgr()->GetType(type_id);
364 auto type_instruction = ir_context_->get_def_use_mgr()->GetDef(type_id);
365 assert(type != nullptr &&
366 "Invalid data synonym fact: one side has an unknown type.");
367
368 // Check whether the type is composite, recording the number of elements
369 // associated with the composite if so.
370 uint32_t num_composite_elements;
371 if (type->AsArray()) {
372 num_composite_elements =
373 fuzzerutil::GetArraySize(*type_instruction, ir_context_);
374 } else if (type->AsMatrix()) {
375 num_composite_elements = type->AsMatrix()->element_count();
376 } else if (type->AsStruct()) {
377 num_composite_elements =
378 fuzzerutil::GetNumberOfStructMembers(*type_instruction);
379 } else if (type->AsVector()) {
380 num_composite_elements = type->AsVector()->element_count();
381 } else {
382 // The type is not a composite, so return.
383 return;
384 }
385
386 // If the fact has the form:
387 // obj_1[a_1, ..., a_m] == obj_2[b_1, ..., b_n]
388 // then for each composite index i, we add a fact of the form:
389 // obj_1[a_1, ..., a_m, i] == obj_2[b_1, ..., b_n, i]
390 //
391 // However, to avoid adding a large number of synonym facts e.g. in the case
392 // of arrays, we bound the number of composite elements to which this is
393 // applied. Nevertheless, we always add a synonym fact for the final
394 // components, as this may be an interesting edge case.
395
396 // The bound on the number of indices of the composite pair to note as being
397 // synonymous.
398 const uint32_t kCompositeElementBound = 10;
399
400 for (uint32_t i = 0; i < num_composite_elements;) {
401 std::vector<uint32_t> extended_indices1 =
402 fuzzerutil::RepeatedFieldToVector(dd1.index());
403 extended_indices1.push_back(i);
404 std::vector<uint32_t> extended_indices2 =
405 fuzzerutil::RepeatedFieldToVector(dd2.index());
406 extended_indices2.push_back(i);
407 AddDataSynonymFactRecursive(
408 MakeDataDescriptor(dd1.object(), extended_indices1),
409 MakeDataDescriptor(dd2.object(), extended_indices2));
410
411 if (i < kCompositeElementBound - 1 || i == num_composite_elements - 1) {
412 // We have not reached the bound yet, or have already skipped ahead to the
413 // last element, so increment the loop counter as standard.
414 i++;
415 } else {
416 // We have reached the bound, so skip ahead to the last element.
417 assert(i == kCompositeElementBound - 1);
418 i = num_composite_elements - 1;
419 }
420 }
421 }
422
ComputeClosureOfFacts(uint32_t maximum_equivalence_class_size)423 void DataSynonymAndIdEquationFacts::ComputeClosureOfFacts(
424 uint32_t maximum_equivalence_class_size) {
425 // Suppose that obj_1[a_1, ..., a_m] and obj_2[b_1, ..., b_n] are distinct
426 // data descriptors that describe objects of the same composite type, and that
427 // the composite type is comprised of k components.
428 //
429 // For example, if m is a mat4x4 and v a vec4, we might consider:
430 // m[2]: describes the 2nd column of m, a vec4
431 // v[]: describes all of v, a vec4
432 //
433 // Suppose that we know, for every 0 <= i < k, that the fact:
434 // obj_1[a_1, ..., a_m, i] == obj_2[b_1, ..., b_n, i]
435 // holds - i.e. that the children of the two data descriptors are synonymous.
436 //
437 // Then we can conclude that:
438 // obj_1[a_1, ..., a_m] == obj_2[b_1, ..., b_n]
439 // holds.
440 //
441 // For instance, if we have the facts:
442 // m[2, 0] == v[0]
443 // m[2, 1] == v[1]
444 // m[2, 2] == v[2]
445 // m[2, 3] == v[3]
446 // then we can conclude that:
447 // m[2] == v.
448 //
449 // This method repeatedly searches the equivalence relation of data
450 // descriptors, deducing and adding such facts, until a pass over the
451 // relation leads to no further facts being deduced.
452
453 // The method relies on working with pairs of data descriptors, and in
454 // particular being able to hash and compare such pairs.
455
456 using DataDescriptorPair =
457 std::pair<protobufs::DataDescriptor, protobufs::DataDescriptor>;
458
459 struct DataDescriptorPairHash {
460 std::size_t operator()(const DataDescriptorPair& pair) const {
461 return DataDescriptorHash()(&pair.first) ^
462 DataDescriptorHash()(&pair.second);
463 }
464 };
465
466 struct DataDescriptorPairEquals {
467 bool operator()(const DataDescriptorPair& first,
468 const DataDescriptorPair& second) const {
469 return (DataDescriptorEquals()(&first.first, &second.first) &&
470 DataDescriptorEquals()(&first.second, &second.second)) ||
471 (DataDescriptorEquals()(&first.first, &second.second) &&
472 DataDescriptorEquals()(&first.second, &second.first));
473 }
474 };
475
476 // This map records, for a given pair of composite data descriptors of the
477 // same type, all the indices at which the data descriptors are known to be
478 // synonymous. A pair is a key to this map only if we have observed that
479 // the pair are synonymous at *some* index, but not at *all* indices.
480 // Once we find that a pair of data descriptors are equivalent at all indices
481 // we record the fact that they are synonymous and remove them from the map.
482 //
483 // Using the m and v example from above, initially the pair (m[2], v) would
484 // not be a key to the map. If we find that m[2, 2] == v[2] holds, we would
485 // add an entry:
486 // (m[2], v) -> [false, false, true, false]
487 // to record that they are synonymous at index 2. If we then find that
488 // m[2, 0] == v[0] holds, we would update this entry to:
489 // (m[2], v) -> [true, false, true, false]
490 // If we then find that m[2, 3] == v[3] holds, we would update this entry to:
491 // (m[2], v) -> [true, false, true, true]
492 // Finally, if we then find that m[2, 1] == v[1] holds, which would make the
493 // boolean vector true at every index, we would add the fact:
494 // m[2] == v
495 // to the equivalence relation and remove (m[2], v) from the map.
496 std::unordered_map<DataDescriptorPair, std::vector<bool>,
497 DataDescriptorPairHash, DataDescriptorPairEquals>
498 candidate_composite_synonyms;
499
500 // We keep looking for new facts until we perform a complete pass over the
501 // equivalence relation without finding any new facts.
502 while (closure_computation_required_) {
503 // We have not found any new facts yet during this pass; we set this to
504 // 'true' if we do find a new fact.
505 closure_computation_required_ = false;
506
507 // Consider each class in the equivalence relation.
508 for (auto representative :
509 synonymous_.GetEquivalenceClassRepresentatives()) {
510 auto equivalence_class = synonymous_.GetEquivalenceClass(*representative);
511
512 if (equivalence_class.size() > maximum_equivalence_class_size) {
513 // This equivalence class is larger than the maximum size we are willing
514 // to consider, so we skip it. This potentially leads to missed fact
515 // deductions, but avoids excessive runtime for closure computation.
516 continue;
517 }
518
519 // Consider every data descriptor in the equivalence class.
520 for (auto dd1_it = equivalence_class.begin();
521 dd1_it != equivalence_class.end(); ++dd1_it) {
522 // If this data descriptor has no indices then it does not have the form
523 // obj_1[a_1, ..., a_m, i], so move on.
524 auto dd1 = *dd1_it;
525 if (dd1->index_size() == 0) {
526 continue;
527 }
528
529 // Consider every other data descriptor later in the equivalence class
530 // (due to symmetry, there is no need to compare with previous data
531 // descriptors).
532 auto dd2_it = dd1_it;
533 for (++dd2_it; dd2_it != equivalence_class.end(); ++dd2_it) {
534 auto dd2 = *dd2_it;
535 // If this data descriptor has no indices then it does not have the
536 // form obj_2[b_1, ..., b_n, i], so move on.
537 if (dd2->index_size() == 0) {
538 continue;
539 }
540
541 // At this point we know that:
542 // - |dd1| has the form obj_1[a_1, ..., a_m, i]
543 // - |dd2| has the form obj_2[b_1, ..., b_n, j]
544 assert(dd1->index_size() > 0 && dd2->index_size() > 0 &&
545 "Control should not reach here if either data descriptor has "
546 "no indices.");
547
548 // We are only interested if i == j.
549 if (dd1->index(dd1->index_size() - 1) !=
550 dd2->index(dd2->index_size() - 1)) {
551 continue;
552 }
553
554 const uint32_t common_final_index = dd1->index(dd1->index_size() - 1);
555
556 // Make data descriptors |dd1_prefix| and |dd2_prefix| for
557 // obj_1[a_1, ..., a_m]
558 // and
559 // obj_2[b_1, ..., b_n]
560 // These are the two data descriptors we might be getting closer to
561 // deducing as being synonymous, due to knowing that they are
562 // synonymous when extended by a particular index.
563 protobufs::DataDescriptor dd1_prefix;
564 dd1_prefix.set_object(dd1->object());
565 for (uint32_t i = 0; i < static_cast<uint32_t>(dd1->index_size() - 1);
566 i++) {
567 dd1_prefix.add_index(dd1->index(i));
568 }
569 protobufs::DataDescriptor dd2_prefix;
570 dd2_prefix.set_object(dd2->object());
571 for (uint32_t i = 0; i < static_cast<uint32_t>(dd2->index_size() - 1);
572 i++) {
573 dd2_prefix.add_index(dd2->index(i));
574 }
575 assert(!DataDescriptorEquals()(&dd1_prefix, &dd2_prefix) &&
576 "By construction these prefixes should be different.");
577
578 // If we already know that these prefixes are synonymous, move on.
579 if (synonymous_.Exists(dd1_prefix) &&
580 synonymous_.Exists(dd2_prefix) &&
581 synonymous_.IsEquivalent(dd1_prefix, dd2_prefix)) {
582 continue;
583 }
584 if (!ObjectStillExists(*dd1) || !ObjectStillExists(*dd2)) {
585 // The objects are not both available in the module, so we cannot
586 // investigate the types of the associated data descriptors; we need
587 // to move on.
588 continue;
589 }
590 // Get the type of obj_1
591 auto dd1_root_type_id =
592 fuzzerutil::GetTypeId(ir_context_, dd1->object());
593 // Use this type, together with a_1, ..., a_m, to get the type of
594 // obj_1[a_1, ..., a_m].
595 auto dd1_prefix_type = fuzzerutil::WalkCompositeTypeIndices(
596 ir_context_, dd1_root_type_id, dd1_prefix.index());
597
598 // Similarly, get the type of obj_2 and use it to get the type of
599 // obj_2[b_1, ..., b_n].
600 auto dd2_root_type_id =
601 fuzzerutil::GetTypeId(ir_context_, dd2->object());
602 auto dd2_prefix_type = fuzzerutil::WalkCompositeTypeIndices(
603 ir_context_, dd2_root_type_id, dd2_prefix.index());
604
605 // If the types of dd1_prefix and dd2_prefix are not the same, they
606 // cannot be synonymous.
607 if (dd1_prefix_type != dd2_prefix_type) {
608 continue;
609 }
610
611 // At this point, we know we have synonymous data descriptors of the
612 // form:
613 // obj_1[a_1, ..., a_m, i]
614 // obj_2[b_1, ..., b_n, i]
615 // with the same last_index i, such that:
616 // obj_1[a_1, ..., a_m]
617 // and
618 // obj_2[b_1, ..., b_n]
619 // have the same type.
620
621 // Work out how many components there are in the (common) commposite
622 // type associated with obj_1[a_1, ..., a_m] and obj_2[b_1, ..., b_n].
623 // This depends on whether the composite type is array, matrix, struct
624 // or vector.
625 uint32_t num_components_in_composite;
626 auto composite_type =
627 ir_context_->get_type_mgr()->GetType(dd1_prefix_type);
628 auto composite_type_instruction =
629 ir_context_->get_def_use_mgr()->GetDef(dd1_prefix_type);
630 if (composite_type->AsArray()) {
631 num_components_in_composite = fuzzerutil::GetArraySize(
632 *composite_type_instruction, ir_context_);
633 if (num_components_in_composite == 0) {
634 // This indicates that the array has an unknown size, in which
635 // case we cannot be sure we have matched all of its elements with
636 // synonymous elements of another array.
637 continue;
638 }
639 } else if (composite_type->AsMatrix()) {
640 num_components_in_composite =
641 composite_type->AsMatrix()->element_count();
642 } else if (composite_type->AsStruct()) {
643 num_components_in_composite = fuzzerutil::GetNumberOfStructMembers(
644 *composite_type_instruction);
645 } else {
646 assert(composite_type->AsVector());
647 num_components_in_composite =
648 composite_type->AsVector()->element_count();
649 }
650
651 // We are one step closer to being able to say that |dd1_prefix| and
652 // |dd2_prefix| are synonymous.
653 DataDescriptorPair candidate_composite_synonym(dd1_prefix,
654 dd2_prefix);
655
656 // We look up what we already know about this pair.
657 auto existing_entry =
658 candidate_composite_synonyms.find(candidate_composite_synonym);
659
660 if (existing_entry == candidate_composite_synonyms.end()) {
661 // If this is the first time we have seen the pair, we make a vector
662 // of size |num_components_in_composite| that is 'true' at the
663 // common final index associated with |dd1| and |dd2|, and 'false'
664 // everywhere else, and register this vector as being associated
665 // with the pair.
666 std::vector<bool> entry;
667 for (uint32_t i = 0; i < num_components_in_composite; i++) {
668 entry.push_back(i == common_final_index);
669 }
670 candidate_composite_synonyms[candidate_composite_synonym] = entry;
671 existing_entry =
672 candidate_composite_synonyms.find(candidate_composite_synonym);
673 } else {
674 // We have seen this pair of data descriptors before, and we now
675 // know that they are synonymous at one further index, so we
676 // update the entry to record that.
677 existing_entry->second[common_final_index] = true;
678 }
679 assert(existing_entry != candidate_composite_synonyms.end());
680
681 // Check whether |dd1_prefix| and |dd2_prefix| are now known to match
682 // at every sub-component.
683 bool all_components_match = true;
684 for (uint32_t i = 0; i < num_components_in_composite; i++) {
685 if (!existing_entry->second[i]) {
686 all_components_match = false;
687 break;
688 }
689 }
690 if (all_components_match) {
691 // The two prefixes match on all sub-components, so we know that
692 // they are synonymous. We add this fact *non-recursively*, as we
693 // have deduced that |dd1_prefix| and |dd2_prefix| are synonymous
694 // by observing that all their sub-components are already
695 // synonymous.
696 assert(DataDescriptorsAreWellFormedAndComparable(dd1_prefix,
697 dd2_prefix));
698 MakeEquivalent(dd1_prefix, dd2_prefix);
699 // Now that we know this pair of data descriptors are synonymous,
700 // there is no point recording how close they are to being
701 // synonymous.
702 candidate_composite_synonyms.erase(candidate_composite_synonym);
703 }
704 }
705 }
706 }
707 }
708 }
709
MakeEquivalent(const protobufs::DataDescriptor & dd1,const protobufs::DataDescriptor & dd2)710 void DataSynonymAndIdEquationFacts::MakeEquivalent(
711 const protobufs::DataDescriptor& dd1,
712 const protobufs::DataDescriptor& dd2) {
713 // Register the data descriptors if they are not already known to the
714 // equivalence relation.
715 RegisterDataDescriptor(dd1);
716 RegisterDataDescriptor(dd2);
717
718 if (synonymous_.IsEquivalent(dd1, dd2)) {
719 // The data descriptors are already known to be equivalent, so there is
720 // nothing to do.
721 return;
722 }
723
724 // We must make the data descriptors equivalent, and also make sure any
725 // equation facts known about their representatives are merged.
726
727 // Record the original equivalence class representatives of the data
728 // descriptors.
729 auto dd1_original_representative = synonymous_.Find(&dd1);
730 auto dd2_original_representative = synonymous_.Find(&dd2);
731
732 // Make the data descriptors equivalent.
733 synonymous_.MakeEquivalent(dd1, dd2);
734 // As we have updated the equivalence relation, we might be able to deduce
735 // more facts by performing a closure computation, so we record that such a
736 // computation is required.
737 closure_computation_required_ = true;
738
739 // At this point, exactly one of |dd1_original_representative| and
740 // |dd2_original_representative| will be the representative of the combined
741 // equivalence class. We work out which one of them is still the class
742 // representative and which one is no longer the class representative.
743
744 auto still_representative = synonymous_.Find(dd1_original_representative) ==
745 dd1_original_representative
746 ? dd1_original_representative
747 : dd2_original_representative;
748 auto no_longer_representative =
749 still_representative == dd1_original_representative
750 ? dd2_original_representative
751 : dd1_original_representative;
752
753 assert(no_longer_representative != still_representative &&
754 "The current and former representatives cannot be the same.");
755
756 // We now need to add all equations about |no_longer_representative| to the
757 // set of equations known about |still_representative|.
758
759 // Get the equations associated with |no_longer_representative|.
760 auto no_longer_representative_id_equations =
761 id_equations_.find(no_longer_representative);
762 if (no_longer_representative_id_equations != id_equations_.end()) {
763 // There are some equations to transfer. There might not yet be any
764 // equations about |still_representative|; create an empty set of equations
765 // if this is the case.
766 if (!id_equations_.count(still_representative)) {
767 id_equations_.insert({still_representative, OperationSet()});
768 }
769 auto still_representative_id_equations =
770 id_equations_.find(still_representative);
771 assert(still_representative_id_equations != id_equations_.end() &&
772 "At this point there must be a set of equations.");
773 // Add all the equations known about |no_longer_representative| to the set
774 // of equations known about |still_representative|.
775 still_representative_id_equations->second.insert(
776 no_longer_representative_id_equations->second.begin(),
777 no_longer_representative_id_equations->second.end());
778 }
779 // Delete the no longer-relevant equations about |no_longer_representative|.
780 id_equations_.erase(no_longer_representative);
781 }
782
783 const protobufs::DataDescriptor*
RegisterDataDescriptor(const protobufs::DataDescriptor & dd)784 DataSynonymAndIdEquationFacts::RegisterDataDescriptor(
785 const protobufs::DataDescriptor& dd) {
786 return synonymous_.Exists(dd) ? synonymous_.Find(&dd)
787 : synonymous_.Register(dd);
788 }
789
DataDescriptorsAreWellFormedAndComparable(const protobufs::DataDescriptor & dd1,const protobufs::DataDescriptor & dd2) const790 bool DataSynonymAndIdEquationFacts::DataDescriptorsAreWellFormedAndComparable(
791 const protobufs::DataDescriptor& dd1,
792 const protobufs::DataDescriptor& dd2) const {
793 if (!ObjectStillExists(dd1) || !ObjectStillExists(dd2)) {
794 // We trivially return true if one or other of the objects associated with
795 // the data descriptors is gone.
796 return true;
797 }
798
799 auto end_type_id_1 = fuzzerutil::WalkCompositeTypeIndices(
800 ir_context_, fuzzerutil::GetTypeId(ir_context_, dd1.object()),
801 dd1.index());
802 auto end_type_id_2 = fuzzerutil::WalkCompositeTypeIndices(
803 ir_context_, fuzzerutil::GetTypeId(ir_context_, dd2.object()),
804 dd2.index());
805 // The end types of the data descriptors must exist.
806 if (end_type_id_1 == 0 || end_type_id_2 == 0) {
807 return false;
808 }
809 // Neither end type is allowed to be void.
810 if (ir_context_->get_def_use_mgr()->GetDef(end_type_id_1)->opcode() ==
811 SpvOpTypeVoid ||
812 ir_context_->get_def_use_mgr()->GetDef(end_type_id_2)->opcode() ==
813 SpvOpTypeVoid) {
814 return false;
815 }
816 // If the end types are the same, the data descriptors are comparable.
817 if (end_type_id_1 == end_type_id_2) {
818 return true;
819 }
820 // Otherwise they are only comparable if they are integer scalars or integer
821 // vectors that differ only in signedness.
822
823 // Get both types.
824 const auto* type_a = ir_context_->get_type_mgr()->GetType(end_type_id_1);
825 const auto* type_b = ir_context_->get_type_mgr()->GetType(end_type_id_2);
826 assert(type_a && type_b && "Data descriptors have invalid type(s)");
827
828 // If both types are numerical or vectors of numerical components, then they
829 // are compatible if they have the same number of components and the same bit
830 // count per component.
831
832 if (type_a->AsVector() && type_b->AsVector()) {
833 const auto* vector_a = type_a->AsVector();
834 const auto* vector_b = type_b->AsVector();
835
836 if (vector_a->element_count() != vector_b->element_count() ||
837 vector_a->element_type()->AsBool() ||
838 vector_b->element_type()->AsBool()) {
839 // The case where both vectors have boolean elements and the same number
840 // of components is handled by the direct equality check earlier.
841 // You can't have multiple identical boolean vector types.
842 return false;
843 }
844
845 type_a = vector_a->element_type();
846 type_b = vector_b->element_type();
847 }
848
849 auto get_bit_count_for_numeric_type =
850 [](const opt::analysis::Type& type) -> uint32_t {
851 if (const auto* integer = type.AsInteger()) {
852 return integer->width();
853 } else if (const auto* floating = type.AsFloat()) {
854 return floating->width();
855 } else {
856 assert(false && "|type| must be a numerical type");
857 return 0;
858 }
859 };
860
861 // Checks that both |type_a| and |type_b| are either numerical or vectors of
862 // numerical components and have the same number of bits.
863 return (type_a->AsInteger() || type_a->AsFloat()) &&
864 (type_b->AsInteger() || type_b->AsFloat()) &&
865 (get_bit_count_for_numeric_type(*type_a) ==
866 get_bit_count_for_numeric_type(*type_b));
867 }
868
869 std::vector<const protobufs::DataDescriptor*>
GetSynonymsForId(uint32_t id) const870 DataSynonymAndIdEquationFacts::GetSynonymsForId(uint32_t id) const {
871 return GetSynonymsForDataDescriptor(MakeDataDescriptor(id, {}));
872 }
873
874 std::vector<const protobufs::DataDescriptor*>
GetSynonymsForDataDescriptor(const protobufs::DataDescriptor & data_descriptor) const875 DataSynonymAndIdEquationFacts::GetSynonymsForDataDescriptor(
876 const protobufs::DataDescriptor& data_descriptor) const {
877 std::vector<const protobufs::DataDescriptor*> result;
878 if (synonymous_.Exists(data_descriptor)) {
879 for (auto dd : synonymous_.GetEquivalenceClass(data_descriptor)) {
880 // There may be data descriptors in the equivalence class whose base
881 // objects have been removed from the module. We do not expose these
882 // data descriptors to clients of the fact manager.
883 if (ObjectStillExists(*dd)) {
884 result.push_back(dd);
885 }
886 }
887 }
888 return result;
889 }
890
891 std::vector<uint32_t>
GetIdsForWhichSynonymsAreKnown() const892 DataSynonymAndIdEquationFacts::GetIdsForWhichSynonymsAreKnown() const {
893 std::vector<uint32_t> result;
894 for (auto& data_descriptor : synonymous_.GetAllKnownValues()) {
895 // We skip any data descriptors whose base objects no longer exist in the
896 // module, and we restrict attention to data descriptors for plain ids,
897 // which have no indices.
898 if (ObjectStillExists(*data_descriptor) &&
899 data_descriptor->index().empty()) {
900 result.push_back(data_descriptor->object());
901 }
902 }
903 return result;
904 }
905
906 std::vector<const protobufs::DataDescriptor*>
GetAllKnownSynonyms() const907 DataSynonymAndIdEquationFacts::GetAllKnownSynonyms() const {
908 std::vector<const protobufs::DataDescriptor*> result;
909 for (const auto* dd : synonymous_.GetAllKnownValues()) {
910 if (ObjectStillExists(*dd)) {
911 result.push_back(dd);
912 }
913 }
914 return result;
915 }
916
IsSynonymous(const protobufs::DataDescriptor & data_descriptor1,const protobufs::DataDescriptor & data_descriptor2) const917 bool DataSynonymAndIdEquationFacts::IsSynonymous(
918 const protobufs::DataDescriptor& data_descriptor1,
919 const protobufs::DataDescriptor& data_descriptor2) const {
920 return synonymous_.Exists(data_descriptor1) &&
921 synonymous_.Exists(data_descriptor2) &&
922 synonymous_.IsEquivalent(data_descriptor1, data_descriptor2);
923 }
924
ObjectStillExists(const protobufs::DataDescriptor & dd) const925 bool DataSynonymAndIdEquationFacts::ObjectStillExists(
926 const protobufs::DataDescriptor& dd) const {
927 return ir_context_->get_def_use_mgr()->GetDef(dd.object()) != nullptr;
928 }
929
930 } // namespace fact_manager
931 } // namespace fuzz
932 } // namespace spvtools
933