1 /*
2 * Copyright © 2018 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
22 */
23
24 #include <gtest/gtest.h>
25
26 #include "nir.h"
27 #include "nir_builder.h"
28
29 /* This is a macro so you get good line numbers */
30 #define EXPECT_INSTR_SWIZZLES(instr, load, expected_swizzle) \
31 EXPECT_EQ((instr)->src[0].src.ssa, &(load)->dest.ssa); \
32 EXPECT_EQ(swizzle(instr, 0), expected_swizzle);
33
34 namespace {
35
36 class nir_load_store_vectorize_test : public ::testing::Test {
37 protected:
38 nir_load_store_vectorize_test();
39 ~nir_load_store_vectorize_test();
40
41 unsigned count_intrinsics(nir_intrinsic_op intrinsic);
42
43 nir_intrinsic_instr *get_intrinsic(nir_intrinsic_op intrinsic,
44 unsigned index);
45
46 bool run_vectorizer(nir_variable_mode modes, bool cse=false,
47 nir_variable_mode robust_modes = (nir_variable_mode)0);
48
49 nir_ssa_def *get_resource(uint32_t binding, bool ssbo);
50
51 nir_intrinsic_instr *create_indirect_load(nir_variable_mode mode, uint32_t binding, nir_ssa_def *offset,
52 uint32_t id, unsigned bit_size=32, unsigned components=1,
53 unsigned access=0);
54 void create_indirect_store(nir_variable_mode mode, uint32_t binding, nir_ssa_def *offset,
55 uint32_t id, unsigned bit_size=32, unsigned components=1,
56 unsigned wrmask=0xf, unsigned access=0);
57
58 nir_intrinsic_instr *create_load(nir_variable_mode mode, uint32_t binding, uint32_t offset,
59 uint32_t id, unsigned bit_size=32, unsigned components=1,
60 unsigned access=0);
61 void create_store(nir_variable_mode mode, uint32_t binding, uint32_t offset,
62 uint32_t id, unsigned bit_size=32, unsigned components=1, unsigned wrmask=0xf,
63 unsigned access=0);
64
65 void create_shared_load(nir_deref_instr *deref, uint32_t id,
66 unsigned bit_size=32, unsigned components=1);
67 void create_shared_store(nir_deref_instr *deref, uint32_t id,
68 unsigned bit_size=32, unsigned components=1, unsigned wrmask=0xf);
69
70 bool test_alu(nir_instr *instr, nir_op op);
71 bool test_alu_def(nir_instr *instr, unsigned index, nir_ssa_def *def, unsigned swizzle=0);
72
73 static bool mem_vectorize_callback(unsigned align_mul, unsigned align_offset,
74 unsigned bit_size,
75 unsigned num_components,
76 nir_intrinsic_instr *low, nir_intrinsic_instr *high,
77 void *data);
78 static void shared_type_info(const struct glsl_type *type, unsigned *size, unsigned *align);
79
80 std::string swizzle(nir_alu_instr *instr, int src);
81
82 nir_builder *b, _b;
83 std::map<unsigned, nir_alu_instr*> movs;
84 std::map<unsigned, nir_alu_src*> loads;
85 std::map<unsigned, nir_ssa_def*> res_map;
86 };
87
nir_load_store_vectorize_test()88 nir_load_store_vectorize_test::nir_load_store_vectorize_test()
89 {
90 glsl_type_singleton_init_or_ref();
91
92 static const nir_shader_compiler_options options = { };
93 _b = nir_builder_init_simple_shader(MESA_SHADER_COMPUTE, &options, "load store tests");
94 b = &_b;
95 }
96
~nir_load_store_vectorize_test()97 nir_load_store_vectorize_test::~nir_load_store_vectorize_test()
98 {
99 if (HasFailure()) {
100 printf("\nShader from the failed test:\n\n");
101 nir_print_shader(b->shader, stdout);
102 }
103
104 ralloc_free(b->shader);
105
106 glsl_type_singleton_decref();
107 }
108
109 std::string
swizzle(nir_alu_instr * instr,int src)110 nir_load_store_vectorize_test::swizzle(nir_alu_instr *instr, int src)
111 {
112 std::string swizzle;
113 for (unsigned i = 0; i < nir_ssa_alu_instr_src_components(instr, src); i++) {
114 swizzle += "xyzw"[instr->src[src].swizzle[i]];
115 }
116
117 return swizzle;
118 }
119
120 unsigned
count_intrinsics(nir_intrinsic_op intrinsic)121 nir_load_store_vectorize_test::count_intrinsics(nir_intrinsic_op intrinsic)
122 {
123 unsigned count = 0;
124 nir_foreach_block(block, b->impl) {
125 nir_foreach_instr(instr, block) {
126 if (instr->type != nir_instr_type_intrinsic)
127 continue;
128 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
129 if (intrin->intrinsic == intrinsic)
130 count++;
131 }
132 }
133 return count;
134 }
135
136 nir_intrinsic_instr *
get_intrinsic(nir_intrinsic_op intrinsic,unsigned index)137 nir_load_store_vectorize_test::get_intrinsic(nir_intrinsic_op intrinsic,
138 unsigned index)
139 {
140 nir_foreach_block(block, b->impl) {
141 nir_foreach_instr(instr, block) {
142 if (instr->type != nir_instr_type_intrinsic)
143 continue;
144 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
145 if (intrin->intrinsic == intrinsic) {
146 if (index == 0)
147 return intrin;
148 index--;
149 }
150 }
151 }
152 return NULL;
153 }
154
155 bool
run_vectorizer(nir_variable_mode modes,bool cse,nir_variable_mode robust_modes)156 nir_load_store_vectorize_test::run_vectorizer(nir_variable_mode modes,
157 bool cse,
158 nir_variable_mode robust_modes)
159 {
160 if (modes & nir_var_mem_shared)
161 nir_lower_vars_to_explicit_types(b->shader, nir_var_mem_shared, shared_type_info);
162
163 nir_load_store_vectorize_options opts = { };
164 opts.callback = mem_vectorize_callback;
165 opts.modes = modes;
166 opts.robust_modes = robust_modes;
167 bool progress = nir_opt_load_store_vectorize(b->shader, &opts);
168
169 if (progress) {
170 nir_validate_shader(b->shader, NULL);
171 if (cse)
172 nir_opt_cse(b->shader);
173 nir_copy_prop(b->shader);
174 nir_opt_algebraic(b->shader);
175 nir_opt_constant_folding(b->shader);
176 }
177 return progress;
178 }
179
180 nir_ssa_def *
get_resource(uint32_t binding,bool ssbo)181 nir_load_store_vectorize_test::get_resource(uint32_t binding, bool ssbo)
182 {
183 if (res_map.count(binding))
184 return res_map[binding];
185
186 nir_intrinsic_instr *res = nir_intrinsic_instr_create(
187 b->shader, nir_intrinsic_vulkan_resource_index);
188 nir_ssa_dest_init(&res->instr, &res->dest, 1, 32, NULL);
189 res->num_components = 1;
190 res->src[0] = nir_src_for_ssa(nir_imm_zero(b, 1, 32));
191 nir_intrinsic_set_desc_type(
192 res, ssbo ? 7/*VK_DESCRIPTOR_TYPE_STORAGE_BUFFER*/ : 6/*VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER*/);
193 nir_intrinsic_set_desc_set(res, 0);
194 nir_intrinsic_set_binding(res, binding);
195 nir_builder_instr_insert(b, &res->instr);
196 res_map[binding] = &res->dest.ssa;
197 return &res->dest.ssa;
198 }
199
200 nir_intrinsic_instr *
create_indirect_load(nir_variable_mode mode,uint32_t binding,nir_ssa_def * offset,uint32_t id,unsigned bit_size,unsigned components,unsigned access)201 nir_load_store_vectorize_test::create_indirect_load(
202 nir_variable_mode mode, uint32_t binding, nir_ssa_def *offset, uint32_t id,
203 unsigned bit_size, unsigned components, unsigned access)
204 {
205 nir_intrinsic_op intrinsic;
206 nir_ssa_def *res = NULL;
207 switch (mode) {
208 case nir_var_mem_ubo:
209 intrinsic = nir_intrinsic_load_ubo;
210 res = get_resource(binding, false);
211 break;
212 case nir_var_mem_ssbo:
213 intrinsic = nir_intrinsic_load_ssbo;
214 res = get_resource(binding, true);
215 break;
216 case nir_var_mem_push_const:
217 intrinsic = nir_intrinsic_load_push_constant;
218 break;
219 default:
220 return NULL;
221 }
222 nir_intrinsic_instr *load = nir_intrinsic_instr_create(b->shader, intrinsic);
223 nir_ssa_dest_init(&load->instr, &load->dest, components, bit_size, NULL);
224 load->num_components = components;
225 if (res) {
226 load->src[0] = nir_src_for_ssa(res);
227 load->src[1] = nir_src_for_ssa(offset);
228 } else {
229 load->src[0] = nir_src_for_ssa(offset);
230 }
231 int byte_size = (bit_size == 1 ? 32 : bit_size) / 8;
232
233 if (mode != nir_var_mem_push_const) {
234 nir_intrinsic_set_align(load, byte_size, 0);
235 nir_intrinsic_set_access(load, (gl_access_qualifier)access);
236 }
237
238 if (nir_intrinsic_has_range_base(load)) {
239 uint32_t range = byte_size * components;
240 int offset_src = res ? 1 : 0;
241
242 if (nir_src_is_const(load->src[offset_src])) {
243 nir_intrinsic_set_range_base(load, nir_src_as_uint(load->src[offset_src]));
244 nir_intrinsic_set_range(load, range);
245 } else {
246 /* Unknown range */
247 nir_intrinsic_set_range_base(load, 0);
248 nir_intrinsic_set_range(load, ~0);
249 }
250 }
251
252 nir_builder_instr_insert(b, &load->instr);
253 nir_alu_instr *mov = nir_instr_as_alu(nir_mov(b, &load->dest.ssa)->parent_instr);
254 movs[id] = mov;
255 loads[id] = &mov->src[0];
256
257 return load;
258 }
259
260 void
create_indirect_store(nir_variable_mode mode,uint32_t binding,nir_ssa_def * offset,uint32_t id,unsigned bit_size,unsigned components,unsigned wrmask,unsigned access)261 nir_load_store_vectorize_test::create_indirect_store(
262 nir_variable_mode mode, uint32_t binding, nir_ssa_def *offset, uint32_t id,
263 unsigned bit_size, unsigned components, unsigned wrmask, unsigned access)
264 {
265 nir_const_value values[NIR_MAX_VEC_COMPONENTS];
266 for (unsigned i = 0; i < components; i++)
267 values[i] = nir_const_value_for_raw_uint((id << 4) | i, bit_size);
268 nir_ssa_def *value = nir_build_imm(b, components, bit_size, values);
269
270 nir_intrinsic_op intrinsic;
271 nir_ssa_def *res = NULL;
272 switch (mode) {
273 case nir_var_mem_ssbo:
274 intrinsic = nir_intrinsic_store_ssbo;
275 res = get_resource(binding, true);
276 break;
277 case nir_var_mem_shared:
278 intrinsic = nir_intrinsic_store_shared;
279 break;
280 default:
281 return;
282 }
283 nir_intrinsic_instr *store = nir_intrinsic_instr_create(b->shader, intrinsic);
284 nir_ssa_dest_init(&store->instr, &store->dest, components, bit_size, NULL);
285 store->num_components = components;
286 if (res) {
287 store->src[0] = nir_src_for_ssa(value);
288 store->src[1] = nir_src_for_ssa(res);
289 store->src[2] = nir_src_for_ssa(offset);
290 } else {
291 store->src[0] = nir_src_for_ssa(value);
292 store->src[1] = nir_src_for_ssa(offset);
293 }
294 nir_intrinsic_set_align(store, (bit_size == 1 ? 32 : bit_size) / 8, 0);
295 nir_intrinsic_set_access(store, (gl_access_qualifier)access);
296 nir_intrinsic_set_write_mask(store, wrmask & ((1 << components) - 1));
297 nir_builder_instr_insert(b, &store->instr);
298 }
299
300 nir_intrinsic_instr *
create_load(nir_variable_mode mode,uint32_t binding,uint32_t offset,uint32_t id,unsigned bit_size,unsigned components,unsigned access)301 nir_load_store_vectorize_test::create_load(
302 nir_variable_mode mode, uint32_t binding, uint32_t offset, uint32_t id,
303 unsigned bit_size, unsigned components, unsigned access)
304 {
305 return create_indirect_load(mode, binding, nir_imm_int(b, offset), id, bit_size, components, access);
306 }
307
308 void
create_store(nir_variable_mode mode,uint32_t binding,uint32_t offset,uint32_t id,unsigned bit_size,unsigned components,unsigned wrmask,unsigned access)309 nir_load_store_vectorize_test::create_store(
310 nir_variable_mode mode, uint32_t binding, uint32_t offset, uint32_t id,
311 unsigned bit_size, unsigned components, unsigned wrmask, unsigned access)
312 {
313 create_indirect_store(mode, binding, nir_imm_int(b, offset), id, bit_size, components, wrmask, access);
314 }
315
create_shared_load(nir_deref_instr * deref,uint32_t id,unsigned bit_size,unsigned components)316 void nir_load_store_vectorize_test::create_shared_load(
317 nir_deref_instr *deref, uint32_t id, unsigned bit_size, unsigned components)
318 {
319 nir_ssa_def *load = nir_load_deref(b, deref);
320 nir_alu_instr *mov = nir_instr_as_alu(nir_mov(b, load)->parent_instr);
321 movs[id] = mov;
322 loads[id] = &mov->src[0];
323 }
324
create_shared_store(nir_deref_instr * deref,uint32_t id,unsigned bit_size,unsigned components,unsigned wrmask)325 void nir_load_store_vectorize_test::create_shared_store(
326 nir_deref_instr *deref, uint32_t id,
327 unsigned bit_size, unsigned components, unsigned wrmask)
328 {
329 nir_const_value values[NIR_MAX_VEC_COMPONENTS];
330 for (unsigned i = 0; i < components; i++)
331 values[i] = nir_const_value_for_raw_uint((id << 4) | i, bit_size);
332 nir_ssa_def *value = nir_build_imm(b, components, bit_size, values);
333
334 nir_store_deref(b, deref, value, wrmask & ((1 << components) - 1));
335 }
336
test_alu(nir_instr * instr,nir_op op)337 bool nir_load_store_vectorize_test::test_alu(nir_instr *instr, nir_op op)
338 {
339 return instr->type == nir_instr_type_alu && nir_instr_as_alu(instr)->op == op;
340 }
341
test_alu_def(nir_instr * instr,unsigned index,nir_ssa_def * def,unsigned swizzle)342 bool nir_load_store_vectorize_test::test_alu_def(
343 nir_instr *instr, unsigned index, nir_ssa_def *def, unsigned swizzle)
344 {
345 if (instr->type != nir_instr_type_alu)
346 return false;
347
348 nir_alu_instr *alu = nir_instr_as_alu(instr);
349
350 if (index >= nir_op_infos[alu->op].num_inputs)
351 return false;
352 if (alu->src[index].src.ssa != def)
353 return false;
354 if (alu->src[index].swizzle[0] != swizzle)
355 return false;
356
357 return true;
358 }
359
mem_vectorize_callback(unsigned align_mul,unsigned align_offset,unsigned bit_size,unsigned num_components,nir_intrinsic_instr * low,nir_intrinsic_instr * high,void * data)360 bool nir_load_store_vectorize_test::mem_vectorize_callback(
361 unsigned align_mul, unsigned align_offset, unsigned bit_size,
362 unsigned num_components,
363 nir_intrinsic_instr *low, nir_intrinsic_instr *high,
364 void *data)
365 {
366 /* Calculate a simple alignment, like how nir_intrinsic_align() does. */
367 uint32_t align = align_mul;
368 if (align_offset)
369 align = 1 << (ffs(align_offset) - 1);
370
371 /* Require scalar alignment and less than 5 components. */
372 return align % (bit_size / 8) == 0 &&
373 num_components <= 4;
374 }
375
shared_type_info(const struct glsl_type * type,unsigned * size,unsigned * align)376 void nir_load_store_vectorize_test::shared_type_info(
377 const struct glsl_type *type, unsigned *size, unsigned *align)
378 {
379 assert(glsl_type_is_vector_or_scalar(type));
380
381 uint32_t comp_size = glsl_type_is_boolean(type)
382 ? 4 : glsl_get_bit_size(type) / 8;
383 unsigned length = glsl_get_vector_elements(type);
384 *size = comp_size * length,
385 *align = comp_size;
386 }
387 } // namespace
388
TEST_F(nir_load_store_vectorize_test,ubo_load_adjacent)389 TEST_F(nir_load_store_vectorize_test, ubo_load_adjacent)
390 {
391 create_load(nir_var_mem_ubo, 0, 0, 0x1);
392 create_load(nir_var_mem_ubo, 0, 4, 0x2);
393
394 nir_validate_shader(b->shader, NULL);
395 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ubo), 2);
396
397 EXPECT_TRUE(run_vectorizer(nir_var_mem_ubo));
398
399 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ubo), 1);
400
401 nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_ubo, 0);
402 ASSERT_EQ(load->dest.ssa.bit_size, 32);
403 ASSERT_EQ(load->dest.ssa.num_components, 2);
404 ASSERT_EQ(nir_intrinsic_range_base(load), 0);
405 ASSERT_EQ(nir_intrinsic_range(load), 8);
406 ASSERT_EQ(nir_src_as_uint(load->src[1]), 0);
407 EXPECT_INSTR_SWIZZLES(movs[0x1], load, "x");
408 EXPECT_INSTR_SWIZZLES(movs[0x2], load, "y");
409 }
410
TEST_F(nir_load_store_vectorize_test,ubo_load_intersecting)411 TEST_F(nir_load_store_vectorize_test, ubo_load_intersecting)
412 {
413 create_load(nir_var_mem_ubo, 0, 0, 0x1, 32, 2);
414 create_load(nir_var_mem_ubo, 0, 4, 0x2, 32, 2);
415
416 nir_validate_shader(b->shader, NULL);
417 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ubo), 2);
418
419 EXPECT_TRUE(run_vectorizer(nir_var_mem_ubo));
420
421 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ubo), 1);
422
423 nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_ubo, 0);
424 ASSERT_EQ(load->dest.ssa.bit_size, 32);
425 ASSERT_EQ(load->dest.ssa.num_components, 3);
426 ASSERT_EQ(nir_intrinsic_range_base(load), 0);
427 ASSERT_EQ(nir_intrinsic_range(load), 12);
428 ASSERT_EQ(nir_src_as_uint(load->src[1]), 0);
429 EXPECT_INSTR_SWIZZLES(movs[0x1], load, "xy");
430 EXPECT_INSTR_SWIZZLES(movs[0x2], load, "yz");
431 }
432
433 /* Test for a bug in range handling */
TEST_F(nir_load_store_vectorize_test,ubo_load_intersecting_range)434 TEST_F(nir_load_store_vectorize_test, ubo_load_intersecting_range)
435 {
436 create_load(nir_var_mem_ubo, 0, 0, 0x1, 32, 4);
437 create_load(nir_var_mem_ubo, 0, 4, 0x2, 32, 1);
438
439 nir_validate_shader(b->shader, NULL);
440 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ubo), 2);
441
442 EXPECT_TRUE(run_vectorizer(nir_var_mem_ubo));
443
444 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ubo), 1);
445
446 nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_ubo, 0);
447 ASSERT_EQ(load->dest.ssa.bit_size, 32);
448 ASSERT_EQ(load->dest.ssa.num_components, 4);
449 ASSERT_EQ(nir_intrinsic_range_base(load), 0);
450 ASSERT_EQ(nir_intrinsic_range(load), 16);
451 ASSERT_EQ(nir_src_as_uint(load->src[1]), 0);
452 ASSERT_EQ(loads[0x1]->src.ssa, &load->dest.ssa);
453 ASSERT_EQ(loads[0x2]->src.ssa, &load->dest.ssa);
454 ASSERT_EQ(loads[0x1]->swizzle[0], 0);
455 ASSERT_EQ(loads[0x1]->swizzle[1], 1);
456 ASSERT_EQ(loads[0x1]->swizzle[2], 2);
457 ASSERT_EQ(loads[0x1]->swizzle[3], 3);
458 ASSERT_EQ(loads[0x2]->swizzle[0], 1);
459 }
460
TEST_F(nir_load_store_vectorize_test,ubo_load_identical)461 TEST_F(nir_load_store_vectorize_test, ubo_load_identical)
462 {
463 create_load(nir_var_mem_ubo, 0, 0, 0x1);
464 create_load(nir_var_mem_ubo, 0, 0, 0x2);
465
466 nir_validate_shader(b->shader, NULL);
467 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ubo), 2);
468
469 EXPECT_TRUE(run_vectorizer(nir_var_mem_ubo));
470
471 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ubo), 1);
472
473 nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_ubo, 0);
474 ASSERT_EQ(load->dest.ssa.bit_size, 32);
475 ASSERT_EQ(load->dest.ssa.num_components, 1);
476 ASSERT_EQ(nir_intrinsic_range_base(load), 0);
477 ASSERT_EQ(nir_intrinsic_range(load), 4);
478 ASSERT_EQ(nir_src_as_uint(load->src[1]), 0);
479 ASSERT_EQ(loads[0x1]->src.ssa, &load->dest.ssa);
480 ASSERT_EQ(loads[0x2]->src.ssa, &load->dest.ssa);
481 EXPECT_INSTR_SWIZZLES(movs[0x1], load, "x");
482 EXPECT_INSTR_SWIZZLES(movs[0x2], load, "x");
483 }
484
TEST_F(nir_load_store_vectorize_test,ubo_load_large)485 TEST_F(nir_load_store_vectorize_test, ubo_load_large)
486 {
487 create_load(nir_var_mem_ubo, 0, 0, 0x1, 32, 2);
488 create_load(nir_var_mem_ubo, 0, 8, 0x2, 32, 3);
489
490 nir_validate_shader(b->shader, NULL);
491 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ubo), 2);
492
493 EXPECT_TRUE(run_vectorizer(nir_var_mem_ubo));
494
495 nir_validate_shader(b->shader, NULL);
496 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ubo), 2);
497 }
498
TEST_F(nir_load_store_vectorize_test,push_const_load_adjacent)499 TEST_F(nir_load_store_vectorize_test, push_const_load_adjacent)
500 {
501 create_load(nir_var_mem_push_const, 0, 0, 0x1);
502 create_load(nir_var_mem_push_const, 0, 4, 0x2);
503
504 nir_validate_shader(b->shader, NULL);
505 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_push_constant), 2);
506
507 EXPECT_TRUE(run_vectorizer(nir_var_mem_push_const));
508
509 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_push_constant), 1);
510
511 nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_push_constant, 0);
512 ASSERT_EQ(load->dest.ssa.bit_size, 32);
513 ASSERT_EQ(load->dest.ssa.num_components, 2);
514 ASSERT_EQ(nir_src_as_uint(load->src[0]), 0);
515 EXPECT_INSTR_SWIZZLES(movs[0x1], load, "x");
516 EXPECT_INSTR_SWIZZLES(movs[0x2], load, "y");
517 }
518
TEST_F(nir_load_store_vectorize_test,push_const_load_adjacent_base)519 TEST_F(nir_load_store_vectorize_test, push_const_load_adjacent_base)
520 {
521 create_load(nir_var_mem_push_const, 0, 0, 0x1);
522 nir_intrinsic_set_base(create_load(nir_var_mem_push_const, 0, 0, 0x2), 4);
523
524 nir_validate_shader(b->shader, NULL);
525 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_push_constant), 2);
526
527 EXPECT_TRUE(run_vectorizer(nir_var_mem_push_const));
528
529 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_push_constant), 1);
530
531 nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_push_constant, 0);
532 ASSERT_EQ(load->dest.ssa.bit_size, 32);
533 ASSERT_EQ(load->dest.ssa.num_components, 2);
534 ASSERT_EQ(nir_src_as_uint(load->src[0]), 0);
535 EXPECT_INSTR_SWIZZLES(movs[0x1], load, "x");
536 EXPECT_INSTR_SWIZZLES(movs[0x2], load, "y");
537 }
538
TEST_F(nir_load_store_vectorize_test,ssbo_load_adjacent)539 TEST_F(nir_load_store_vectorize_test, ssbo_load_adjacent)
540 {
541 create_load(nir_var_mem_ssbo, 0, 0, 0x1);
542 create_load(nir_var_mem_ssbo, 0, 4, 0x2);
543
544 nir_validate_shader(b->shader, NULL);
545 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
546
547 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
548
549 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 1);
550
551 nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_ssbo, 0);
552 ASSERT_EQ(load->dest.ssa.bit_size, 32);
553 ASSERT_EQ(load->dest.ssa.num_components, 2);
554 ASSERT_EQ(nir_src_as_uint(load->src[1]), 0);
555 EXPECT_INSTR_SWIZZLES(movs[0x1], load, "x");
556 EXPECT_INSTR_SWIZZLES(movs[0x2], load, "y");
557 }
558
TEST_F(nir_load_store_vectorize_test,ssbo_load_adjacent_indirect)559 TEST_F(nir_load_store_vectorize_test, ssbo_load_adjacent_indirect)
560 {
561 nir_ssa_def *index_base = nir_load_local_invocation_index(b);
562 create_indirect_load(nir_var_mem_ssbo, 0, index_base, 0x1);
563 create_indirect_load(nir_var_mem_ssbo, 0, nir_iadd_imm(b, index_base, 4), 0x2);
564
565 nir_validate_shader(b->shader, NULL);
566 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
567
568 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
569
570 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 1);
571
572 nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_ssbo, 0);
573 ASSERT_EQ(load->dest.ssa.bit_size, 32);
574 ASSERT_EQ(load->dest.ssa.num_components, 2);
575 ASSERT_EQ(load->src[1].ssa, index_base);
576 EXPECT_INSTR_SWIZZLES(movs[0x1], load, "x");
577 EXPECT_INSTR_SWIZZLES(movs[0x2], load, "y");
578 }
579
TEST_F(nir_load_store_vectorize_test,ssbo_load_adjacent_indirect_sub)580 TEST_F(nir_load_store_vectorize_test, ssbo_load_adjacent_indirect_sub)
581 {
582 nir_ssa_def *index_base = nir_load_local_invocation_index(b);
583 nir_ssa_def *index_base_prev = nir_iadd_imm(b, index_base, 0xfffffffc);
584 create_indirect_load(nir_var_mem_ssbo, 0, index_base_prev, 0x1);
585 create_indirect_load(nir_var_mem_ssbo, 0, index_base, 0x2);
586
587 nir_validate_shader(b->shader, NULL);
588 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
589
590 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
591
592 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 1);
593
594 nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_ssbo, 0);
595 ASSERT_EQ(load->dest.ssa.bit_size, 32);
596 ASSERT_EQ(load->dest.ssa.num_components, 2);
597 ASSERT_EQ(load->src[1].ssa, index_base_prev);
598 EXPECT_INSTR_SWIZZLES(movs[0x1], load, "x");
599 EXPECT_INSTR_SWIZZLES(movs[0x2], load, "y");
600 }
601
TEST_F(nir_load_store_vectorize_test,ssbo_load_adjacent_indirect_neg_stride)602 TEST_F(nir_load_store_vectorize_test, ssbo_load_adjacent_indirect_neg_stride)
603 {
604 nir_ssa_def *inv = nir_load_local_invocation_index(b);
605 nir_ssa_def *inv_plus_one = nir_iadd_imm(b, inv, 1);
606 nir_ssa_def *index_base = nir_imul_imm(b, inv, 0xfffffffc);
607 nir_ssa_def *index_base_prev = nir_imul_imm(b, inv_plus_one, 0xfffffffc);
608 create_indirect_load(nir_var_mem_ssbo, 0, index_base_prev, 0x1);
609 create_indirect_load(nir_var_mem_ssbo, 0, index_base, 0x2);
610
611 nir_validate_shader(b->shader, NULL);
612 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
613
614 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
615
616 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 1);
617
618 nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_ssbo, 0);
619 ASSERT_EQ(load->dest.ssa.bit_size, 32);
620 ASSERT_EQ(load->dest.ssa.num_components, 2);
621 EXPECT_INSTR_SWIZZLES(movs[0x1], load, "x");
622 EXPECT_INSTR_SWIZZLES(movs[0x2], load, "y");
623
624 /* nir_opt_algebraic optimizes the imul */
625 ASSERT_TRUE(test_alu(load->src[1].ssa->parent_instr, nir_op_ineg));
626 nir_ssa_def *offset = nir_instr_as_alu(load->src[1].ssa->parent_instr)->src[0].src.ssa;
627 ASSERT_TRUE(test_alu(offset->parent_instr, nir_op_ishl));
628 nir_alu_instr *shl = nir_instr_as_alu(offset->parent_instr);
629 ASSERT_EQ(shl->src[0].src.ssa, inv_plus_one);
630 ASSERT_EQ(nir_src_as_uint(shl->src[1].src), 2);
631 }
632
TEST_F(nir_load_store_vectorize_test,ssbo_load_identical_store_adjacent)633 TEST_F(nir_load_store_vectorize_test, ssbo_load_identical_store_adjacent)
634 {
635 create_load(nir_var_mem_ssbo, 0, 0, 0x1);
636 create_store(nir_var_mem_ssbo, 0, 4, 0x2);
637 create_load(nir_var_mem_ssbo, 0, 0, 0x3);
638
639 nir_validate_shader(b->shader, NULL);
640 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
641
642 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
643
644 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 1);
645
646 nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_ssbo, 0);
647 ASSERT_EQ(load->dest.ssa.bit_size, 32);
648 ASSERT_EQ(load->dest.ssa.num_components, 1);
649 ASSERT_EQ(nir_src_as_uint(load->src[1]), 0);
650 EXPECT_INSTR_SWIZZLES(movs[0x1], load, "x");
651 EXPECT_INSTR_SWIZZLES(movs[0x3], load, "x");
652 }
653
TEST_F(nir_load_store_vectorize_test,ssbo_load_identical_store_intersecting)654 TEST_F(nir_load_store_vectorize_test, ssbo_load_identical_store_intersecting)
655 {
656 create_load(nir_var_mem_ssbo, 0, 0, 0x1, 32, 2);
657 create_store(nir_var_mem_ssbo, 0, 4, 0x2);
658 create_load(nir_var_mem_ssbo, 0, 0, 0x3, 32, 2);
659
660 nir_validate_shader(b->shader, NULL);
661 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
662
663 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
664
665 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
666 }
667
TEST_F(nir_load_store_vectorize_test,ssbo_load_identical_store_identical)668 TEST_F(nir_load_store_vectorize_test, ssbo_load_identical_store_identical)
669 {
670 create_load(nir_var_mem_ssbo, 0, 0, 0x1);
671 create_store(nir_var_mem_ssbo, 0, 0, 0x2);
672 create_load(nir_var_mem_ssbo, 0, 0, 0x3);
673
674 nir_validate_shader(b->shader, NULL);
675 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
676
677 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
678
679 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
680 }
681
TEST_F(nir_load_store_vectorize_test,ssbo_store_identical_load_identical)682 TEST_F(nir_load_store_vectorize_test, ssbo_store_identical_load_identical)
683 {
684 create_store(nir_var_mem_ssbo, 0, 0, 0x1);
685 create_load(nir_var_mem_ssbo, 0, 0, 0x2);
686 create_store(nir_var_mem_ssbo, 0, 0, 0x3);
687
688 nir_validate_shader(b->shader, NULL);
689 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 2);
690
691 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
692
693 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 2);
694 }
695
696 /* if nir_opt_load_store_vectorize were implemented like many load/store
697 * optimization passes are (for example, nir_opt_combine_stores and
698 * nir_opt_copy_prop_vars) and stopped tracking a load when an aliasing store is
699 * encountered, this case wouldn't be optimized.
700 * A similar test for derefs is shared_load_adjacent_store_identical. */
TEST_F(nir_load_store_vectorize_test,ssbo_load_adjacent_store_identical)701 TEST_F(nir_load_store_vectorize_test, ssbo_load_adjacent_store_identical)
702 {
703 create_load(nir_var_mem_ssbo, 0, 0, 0x1);
704 create_store(nir_var_mem_ssbo, 0, 0, 0x2);
705 create_load(nir_var_mem_ssbo, 0, 4, 0x3);
706
707 nir_validate_shader(b->shader, NULL);
708 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
709 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 1);
710
711 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
712
713 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 1);
714 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 1);
715
716 nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_ssbo, 0);
717 ASSERT_EQ(load->dest.ssa.bit_size, 32);
718 ASSERT_EQ(load->dest.ssa.num_components, 2);
719 ASSERT_EQ(nir_src_as_uint(load->src[1]), 0);
720 EXPECT_INSTR_SWIZZLES(movs[0x1], load, "x");
721 EXPECT_INSTR_SWIZZLES(movs[0x3], load, "y");
722 }
723
TEST_F(nir_load_store_vectorize_test,ssbo_store_adjacent)724 TEST_F(nir_load_store_vectorize_test, ssbo_store_adjacent)
725 {
726 create_store(nir_var_mem_ssbo, 0, 0, 0x1);
727 create_store(nir_var_mem_ssbo, 0, 4, 0x2);
728
729 nir_validate_shader(b->shader, NULL);
730 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 2);
731
732 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
733
734 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 1);
735
736 nir_intrinsic_instr *store = get_intrinsic(nir_intrinsic_store_ssbo, 0);
737 ASSERT_EQ(nir_src_as_uint(store->src[2]), 0);
738 ASSERT_EQ(nir_intrinsic_write_mask(store), 0x3);
739 nir_ssa_def *val = store->src[0].ssa;
740 ASSERT_EQ(val->bit_size, 32);
741 ASSERT_EQ(val->num_components, 2);
742 nir_const_value *cv = nir_instr_as_load_const(val->parent_instr)->value;
743 ASSERT_EQ(nir_const_value_as_uint(cv[0], 32), 0x10);
744 ASSERT_EQ(nir_const_value_as_uint(cv[1], 32), 0x20);
745 }
746
TEST_F(nir_load_store_vectorize_test,ssbo_store_intersecting)747 TEST_F(nir_load_store_vectorize_test, ssbo_store_intersecting)
748 {
749 create_store(nir_var_mem_ssbo, 0, 0, 0x1, 32, 2);
750 create_store(nir_var_mem_ssbo, 0, 4, 0x2, 32, 2);
751
752 nir_validate_shader(b->shader, NULL);
753 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 2);
754
755 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
756
757 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 1);
758
759 nir_intrinsic_instr *store = get_intrinsic(nir_intrinsic_store_ssbo, 0);
760 ASSERT_EQ(nir_src_as_uint(store->src[2]), 0);
761 ASSERT_EQ(nir_intrinsic_write_mask(store), 0x7);
762 nir_ssa_def *val = store->src[0].ssa;
763 ASSERT_EQ(val->bit_size, 32);
764 ASSERT_EQ(val->num_components, 3);
765 nir_const_value *cv = nir_instr_as_load_const(val->parent_instr)->value;
766 ASSERT_EQ(nir_const_value_as_uint(cv[0], 32), 0x10);
767 ASSERT_EQ(nir_const_value_as_uint(cv[1], 32), 0x20);
768 ASSERT_EQ(nir_const_value_as_uint(cv[2], 32), 0x21);
769 }
770
TEST_F(nir_load_store_vectorize_test,ssbo_store_identical)771 TEST_F(nir_load_store_vectorize_test, ssbo_store_identical)
772 {
773 create_store(nir_var_mem_ssbo, 0, 0, 0x1);
774 create_store(nir_var_mem_ssbo, 0, 0, 0x2);
775
776 nir_validate_shader(b->shader, NULL);
777 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 2);
778
779 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
780
781 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 1);
782
783 nir_intrinsic_instr *store = get_intrinsic(nir_intrinsic_store_ssbo, 0);
784 ASSERT_EQ(nir_src_as_uint(store->src[2]), 0);
785 ASSERT_EQ(nir_intrinsic_write_mask(store), 0x1);
786 nir_ssa_def *val = store->src[0].ssa;
787 ASSERT_EQ(val->bit_size, 32);
788 ASSERT_EQ(val->num_components, 1);
789 ASSERT_EQ(nir_src_as_uint(store->src[0]), 0x20);
790 }
791
TEST_F(nir_load_store_vectorize_test,ssbo_store_large)792 TEST_F(nir_load_store_vectorize_test, ssbo_store_large)
793 {
794 create_store(nir_var_mem_ssbo, 0, 0, 0x1, 32, 2);
795 create_store(nir_var_mem_ssbo, 0, 8, 0x2, 32, 3);
796
797 nir_validate_shader(b->shader, NULL);
798 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 2);
799
800 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
801
802 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 2);
803 }
804
TEST_F(nir_load_store_vectorize_test,ubo_load_adjacent_memory_barrier)805 TEST_F(nir_load_store_vectorize_test, ubo_load_adjacent_memory_barrier)
806 {
807 create_load(nir_var_mem_ubo, 0, 0, 0x1);
808
809 nir_scoped_memory_barrier(b, NIR_SCOPE_DEVICE, NIR_MEMORY_ACQ_REL,
810 nir_var_mem_ssbo);
811
812 create_load(nir_var_mem_ubo, 0, 4, 0x2);
813
814 nir_validate_shader(b->shader, NULL);
815 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ubo), 2);
816
817 EXPECT_TRUE(run_vectorizer(nir_var_mem_ubo));
818
819 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ubo), 1);
820 }
821
TEST_F(nir_load_store_vectorize_test,ssbo_load_adjacent_memory_barrier)822 TEST_F(nir_load_store_vectorize_test, ssbo_load_adjacent_memory_barrier)
823 {
824 create_load(nir_var_mem_ssbo, 0, 0, 0x1);
825
826 nir_scoped_memory_barrier(b, NIR_SCOPE_DEVICE, NIR_MEMORY_ACQ_REL,
827 nir_var_mem_ssbo);
828
829 create_load(nir_var_mem_ssbo, 0, 4, 0x2);
830
831 nir_validate_shader(b->shader, NULL);
832 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
833
834 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
835
836 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
837 }
838
839 /* nir_intrinsic_control_barrier only syncs invocations in a workgroup, it
840 * doesn't require that loads/stores complete.
841 */
TEST_F(nir_load_store_vectorize_test,ssbo_load_adjacent_barrier)842 TEST_F(nir_load_store_vectorize_test, ssbo_load_adjacent_barrier)
843 {
844 create_load(nir_var_mem_ssbo, 0, 0, 0x1);
845 nir_control_barrier(b);
846 create_load(nir_var_mem_ssbo, 0, 4, 0x2);
847
848 nir_validate_shader(b->shader, NULL);
849 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
850
851 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
852
853 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 1);
854 }
855
TEST_F(nir_load_store_vectorize_test,ssbo_load_adjacent_memory_barrier_shared)856 TEST_F(nir_load_store_vectorize_test, ssbo_load_adjacent_memory_barrier_shared)
857 {
858 create_load(nir_var_mem_ssbo, 0, 0, 0x1);
859
860 nir_scoped_memory_barrier(b, NIR_SCOPE_WORKGROUP, NIR_MEMORY_ACQ_REL,
861 nir_var_mem_shared);
862
863 create_load(nir_var_mem_ssbo, 0, 4, 0x2);
864
865 nir_validate_shader(b->shader, NULL);
866 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
867
868 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
869
870 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 1);
871 }
872
TEST_F(nir_load_store_vectorize_test,ssbo_store_adjacent_discard)873 TEST_F(nir_load_store_vectorize_test, ssbo_store_adjacent_discard)
874 {
875 create_store(nir_var_mem_ssbo, 0, 0, 0x1);
876 nir_discard(b);
877 create_store(nir_var_mem_ssbo, 0, 4, 0x2);
878
879 nir_validate_shader(b->shader, NULL);
880 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 2);
881
882 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
883
884 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 2);
885 }
886
TEST_F(nir_load_store_vectorize_test,ssbo_store_adjacent_demote)887 TEST_F(nir_load_store_vectorize_test, ssbo_store_adjacent_demote)
888 {
889 create_store(nir_var_mem_ssbo, 0, 0, 0x1);
890 nir_demote(b);
891 create_store(nir_var_mem_ssbo, 0, 4, 0x2);
892
893 nir_validate_shader(b->shader, NULL);
894 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 2);
895
896 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
897
898 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 2);
899 }
900
TEST_F(nir_load_store_vectorize_test,ssbo_load_adjacent_discard)901 TEST_F(nir_load_store_vectorize_test, ssbo_load_adjacent_discard)
902 {
903 create_load(nir_var_mem_ssbo, 0, 0, 0x1);
904 nir_discard(b);
905 create_load(nir_var_mem_ssbo, 0, 4, 0x2);
906
907 nir_validate_shader(b->shader, NULL);
908 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
909
910 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
911
912 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
913 }
914
TEST_F(nir_load_store_vectorize_test,ssbo_load_adjacent_demote)915 TEST_F(nir_load_store_vectorize_test, ssbo_load_adjacent_demote)
916 {
917 create_load(nir_var_mem_ssbo, 0, 0, 0x1);
918 nir_demote(b);
919 create_load(nir_var_mem_ssbo, 0, 4, 0x2);
920
921 nir_validate_shader(b->shader, NULL);
922 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
923
924 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
925
926 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 1);
927 }
928
TEST_F(nir_load_store_vectorize_test,ssbo_load_adjacent_8_8_16)929 TEST_F(nir_load_store_vectorize_test, ssbo_load_adjacent_8_8_16)
930 {
931 create_load(nir_var_mem_ssbo, 0, 0, 0x1, 8);
932 create_load(nir_var_mem_ssbo, 0, 1, 0x2, 8);
933 create_load(nir_var_mem_ssbo, 0, 2, 0x3, 16);
934
935 nir_validate_shader(b->shader, NULL);
936 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 3);
937
938 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
939
940 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 1);
941
942 nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_ssbo, 0);
943 ASSERT_EQ(load->dest.ssa.bit_size, 8);
944 ASSERT_EQ(load->dest.ssa.num_components, 4);
945 ASSERT_EQ(nir_src_as_uint(load->src[1]), 0);
946 EXPECT_INSTR_SWIZZLES(movs[0x1], load, "x");
947 EXPECT_INSTR_SWIZZLES(movs[0x2], load, "y");
948
949 nir_ssa_def *val = loads[0x3]->src.ssa;
950 ASSERT_EQ(val->bit_size, 16);
951 ASSERT_EQ(val->num_components, 1);
952 ASSERT_TRUE(test_alu(val->parent_instr, nir_op_ior));
953 nir_ssa_def *low = nir_instr_as_alu(val->parent_instr)->src[0].src.ssa;
954 nir_ssa_def *high = nir_instr_as_alu(val->parent_instr)->src[1].src.ssa;
955 ASSERT_TRUE(test_alu(high->parent_instr, nir_op_ishl));
956 high = nir_instr_as_alu(high->parent_instr)->src[0].src.ssa;
957 ASSERT_TRUE(test_alu(low->parent_instr, nir_op_u2u16));
958 ASSERT_TRUE(test_alu(high->parent_instr, nir_op_u2u16));
959 ASSERT_TRUE(test_alu_def(low->parent_instr, 0, &load->dest.ssa, 2));
960 ASSERT_TRUE(test_alu_def(high->parent_instr, 0, &load->dest.ssa, 3));
961 }
962
TEST_F(nir_load_store_vectorize_test,ssbo_load_adjacent_32_32_64)963 TEST_F(nir_load_store_vectorize_test, ssbo_load_adjacent_32_32_64)
964 {
965 create_load(nir_var_mem_ssbo, 0, 0, 0x1, 32, 2);
966 create_load(nir_var_mem_ssbo, 0, 8, 0x2, 64);
967
968 nir_validate_shader(b->shader, NULL);
969 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
970
971 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
972
973 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 1);
974
975 nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_ssbo, 0);
976 ASSERT_EQ(load->dest.ssa.bit_size, 32);
977 ASSERT_EQ(load->dest.ssa.num_components, 4);
978 ASSERT_EQ(nir_src_as_uint(load->src[1]), 0);
979 EXPECT_INSTR_SWIZZLES(movs[0x1], load, "xy");
980
981 nir_ssa_def *val = loads[0x2]->src.ssa;
982 ASSERT_EQ(val->bit_size, 64);
983 ASSERT_EQ(val->num_components, 1);
984 ASSERT_TRUE(test_alu(val->parent_instr, nir_op_pack_64_2x32));
985 nir_alu_instr *pack = nir_instr_as_alu(val->parent_instr);
986 EXPECT_INSTR_SWIZZLES(pack, load, "zw");
987 }
988
TEST_F(nir_load_store_vectorize_test,ssbo_load_adjacent_32_32_64_64)989 TEST_F(nir_load_store_vectorize_test, ssbo_load_adjacent_32_32_64_64)
990 {
991 create_load(nir_var_mem_ssbo, 0, 0, 0x1, 32, 2);
992 create_load(nir_var_mem_ssbo, 0, 8, 0x2, 64);
993 create_load(nir_var_mem_ssbo, 0, 16, 0x3, 64);
994
995 nir_validate_shader(b->shader, NULL);
996 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 3);
997
998 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo, true));
999
1000 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 1);
1001
1002 nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_ssbo, 0);
1003 ASSERT_EQ(load->dest.ssa.bit_size, 64);
1004 ASSERT_EQ(load->dest.ssa.num_components, 3);
1005 ASSERT_EQ(nir_src_as_uint(load->src[1]), 0);
1006 EXPECT_INSTR_SWIZZLES(movs[0x3], load, "z");
1007
1008 nir_ssa_def *val = loads[0x2]->src.ssa;
1009 ASSERT_EQ(val->bit_size, 64);
1010 ASSERT_EQ(val->num_components, 1);
1011 ASSERT_TRUE(test_alu(val->parent_instr, nir_op_mov));
1012 nir_alu_instr *mov = nir_instr_as_alu(val->parent_instr);
1013 EXPECT_INSTR_SWIZZLES(mov, load, "y");
1014
1015 val = loads[0x1]->src.ssa;
1016 ASSERT_EQ(val->bit_size, 32);
1017 ASSERT_EQ(val->num_components, 2);
1018 ASSERT_TRUE(test_alu(val->parent_instr, nir_op_unpack_64_2x32));
1019 nir_alu_instr *unpack = nir_instr_as_alu(val->parent_instr);
1020 EXPECT_INSTR_SWIZZLES(unpack, load, "x");
1021 }
1022
TEST_F(nir_load_store_vectorize_test,ssbo_load_intersecting_32_32_64)1023 TEST_F(nir_load_store_vectorize_test, ssbo_load_intersecting_32_32_64)
1024 {
1025 create_load(nir_var_mem_ssbo, 0, 4, 0x1, 32, 2);
1026 create_load(nir_var_mem_ssbo, 0, 8, 0x2, 64);
1027
1028 nir_validate_shader(b->shader, NULL);
1029 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
1030
1031 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
1032
1033 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 1);
1034
1035 nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_ssbo, 0);
1036 ASSERT_EQ(load->dest.ssa.bit_size, 32);
1037 ASSERT_EQ(load->dest.ssa.num_components, 3);
1038 ASSERT_EQ(nir_src_as_uint(load->src[1]), 4);
1039 EXPECT_INSTR_SWIZZLES(movs[0x1], load, "xy");
1040
1041 nir_ssa_def *val = loads[0x2]->src.ssa;
1042 ASSERT_EQ(val->bit_size, 64);
1043 ASSERT_EQ(val->num_components, 1);
1044 ASSERT_TRUE(test_alu(val->parent_instr, nir_op_pack_64_2x32));
1045 nir_alu_instr *pack = nir_instr_as_alu(val->parent_instr);
1046 EXPECT_INSTR_SWIZZLES(pack, load, "yz");
1047 }
1048
TEST_F(nir_load_store_vectorize_test,ssbo_store_adjacent_8_8_16)1049 TEST_F(nir_load_store_vectorize_test, ssbo_store_adjacent_8_8_16)
1050 {
1051 create_store(nir_var_mem_ssbo, 0, 0, 0x1, 8);
1052 create_store(nir_var_mem_ssbo, 0, 1, 0x2, 8);
1053 create_store(nir_var_mem_ssbo, 0, 2, 0x3, 16);
1054
1055 nir_validate_shader(b->shader, NULL);
1056 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 3);
1057
1058 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
1059
1060 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 1);
1061
1062 nir_intrinsic_instr *store = get_intrinsic(nir_intrinsic_store_ssbo, 0);
1063 ASSERT_EQ(nir_src_as_uint(store->src[2]), 0);
1064 ASSERT_EQ(nir_intrinsic_write_mask(store), 0xf);
1065 nir_ssa_def *val = store->src[0].ssa;
1066 ASSERT_EQ(val->bit_size, 8);
1067 ASSERT_EQ(val->num_components, 4);
1068 nir_const_value *cv = nir_instr_as_load_const(val->parent_instr)->value;
1069 ASSERT_EQ(nir_const_value_as_uint(cv[0], 8), 0x10);
1070 ASSERT_EQ(nir_const_value_as_uint(cv[1], 8), 0x20);
1071 ASSERT_EQ(nir_const_value_as_uint(cv[2], 8), 0x30);
1072 ASSERT_EQ(nir_const_value_as_uint(cv[3], 8), 0x0);
1073 }
1074
TEST_F(nir_load_store_vectorize_test,ssbo_store_adjacent_32_32_64)1075 TEST_F(nir_load_store_vectorize_test, ssbo_store_adjacent_32_32_64)
1076 {
1077 create_store(nir_var_mem_ssbo, 0, 0, 0x1, 32, 2);
1078 create_store(nir_var_mem_ssbo, 0, 8, 0x2, 64);
1079
1080 nir_validate_shader(b->shader, NULL);
1081 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 2);
1082
1083 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
1084
1085 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 1);
1086
1087 nir_intrinsic_instr *store = get_intrinsic(nir_intrinsic_store_ssbo, 0);
1088 ASSERT_EQ(nir_src_as_uint(store->src[2]), 0);
1089 ASSERT_EQ(nir_intrinsic_write_mask(store), 0xf);
1090 nir_ssa_def *val = store->src[0].ssa;
1091 ASSERT_EQ(val->bit_size, 32);
1092 ASSERT_EQ(val->num_components, 4);
1093 nir_const_value *cv = nir_instr_as_load_const(val->parent_instr)->value;
1094 ASSERT_EQ(nir_const_value_as_uint(cv[0], 32), 0x10);
1095 ASSERT_EQ(nir_const_value_as_uint(cv[1], 32), 0x11);
1096 ASSERT_EQ(nir_const_value_as_uint(cv[2], 32), 0x20);
1097 ASSERT_EQ(nir_const_value_as_uint(cv[3], 32), 0x0);
1098 }
1099
TEST_F(nir_load_store_vectorize_test,ssbo_store_adjacent_32_32_64_64)1100 TEST_F(nir_load_store_vectorize_test, ssbo_store_adjacent_32_32_64_64)
1101 {
1102 create_store(nir_var_mem_ssbo, 0, 0, 0x1, 32, 2);
1103 create_store(nir_var_mem_ssbo, 0, 8, 0x2, 64);
1104 create_store(nir_var_mem_ssbo, 0, 16, 0x3, 64);
1105
1106 nir_validate_shader(b->shader, NULL);
1107 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 3);
1108
1109 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
1110
1111 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 1);
1112
1113 nir_intrinsic_instr *store = get_intrinsic(nir_intrinsic_store_ssbo, 0);
1114 ASSERT_EQ(nir_src_as_uint(store->src[2]), 0);
1115 ASSERT_EQ(nir_intrinsic_write_mask(store), 0x7);
1116 nir_ssa_def *val = store->src[0].ssa;
1117 ASSERT_EQ(val->bit_size, 64);
1118 ASSERT_EQ(val->num_components, 3);
1119 nir_const_value *cv = nir_instr_as_load_const(val->parent_instr)->value;
1120 ASSERT_EQ(nir_const_value_as_uint(cv[0], 64), 0x1100000010ull);
1121 ASSERT_EQ(nir_const_value_as_uint(cv[1], 64), 0x20);
1122 ASSERT_EQ(nir_const_value_as_uint(cv[2], 64), 0x30);
1123 }
1124
TEST_F(nir_load_store_vectorize_test,ssbo_store_intersecting_32_32_64)1125 TEST_F(nir_load_store_vectorize_test, ssbo_store_intersecting_32_32_64)
1126 {
1127 create_store(nir_var_mem_ssbo, 0, 0, 0x1, 32, 2);
1128 create_store(nir_var_mem_ssbo, 0, 4, 0x2, 64);
1129
1130 nir_validate_shader(b->shader, NULL);
1131 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 2);
1132
1133 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
1134
1135 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 1);
1136
1137 nir_intrinsic_instr *store = get_intrinsic(nir_intrinsic_store_ssbo, 0);
1138 ASSERT_EQ(nir_src_as_uint(store->src[2]), 0);
1139 ASSERT_EQ(nir_intrinsic_write_mask(store), 0x7);
1140 nir_ssa_def *val = store->src[0].ssa;
1141 ASSERT_EQ(val->bit_size, 32);
1142 ASSERT_EQ(val->num_components, 3);
1143 nir_const_value *cv = nir_instr_as_load_const(val->parent_instr)->value;
1144 ASSERT_EQ(nir_const_value_as_uint(cv[0], 32), 0x10);
1145 ASSERT_EQ(nir_const_value_as_uint(cv[1], 32), 0x20);
1146 ASSERT_EQ(nir_const_value_as_uint(cv[2], 32), 0x0);
1147 }
1148
TEST_F(nir_load_store_vectorize_test,ssbo_store_adjacent_32_64)1149 TEST_F(nir_load_store_vectorize_test, ssbo_store_adjacent_32_64)
1150 {
1151 create_store(nir_var_mem_ssbo, 0, 0, 0x1, 32);
1152 create_store(nir_var_mem_ssbo, 0, 4, 0x2, 64, 2);
1153
1154 nir_validate_shader(b->shader, NULL);
1155 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 2);
1156
1157 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
1158
1159 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 2);
1160 }
1161
TEST_F(nir_load_store_vectorize_test,ssbo_store_identical_wrmask)1162 TEST_F(nir_load_store_vectorize_test, ssbo_store_identical_wrmask)
1163 {
1164 create_store(nir_var_mem_ssbo, 0, 0, 0x1, 32, 4, 1 | 4);
1165 create_store(nir_var_mem_ssbo, 0, 0, 0x2, 32, 4, 2 | 4 | 8);
1166
1167 nir_validate_shader(b->shader, NULL);
1168 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 2);
1169
1170 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
1171
1172 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 1);
1173
1174 nir_intrinsic_instr *store = get_intrinsic(nir_intrinsic_store_ssbo, 0);
1175 ASSERT_EQ(nir_src_as_uint(store->src[2]), 0);
1176 ASSERT_EQ(nir_intrinsic_write_mask(store), 0xf);
1177 nir_ssa_def *val = store->src[0].ssa;
1178 ASSERT_EQ(val->bit_size, 32);
1179 ASSERT_EQ(val->num_components, 4);
1180 nir_const_value *cv = nir_instr_as_load_const(val->parent_instr)->value;
1181 ASSERT_EQ(nir_const_value_as_uint(cv[0], 32), 0x10);
1182 ASSERT_EQ(nir_const_value_as_uint(cv[1], 32), 0x21);
1183 ASSERT_EQ(nir_const_value_as_uint(cv[2], 32), 0x22);
1184 ASSERT_EQ(nir_const_value_as_uint(cv[3], 32), 0x23);
1185 }
1186
TEST_F(nir_load_store_vectorize_test,shared_load_adjacent)1187 TEST_F(nir_load_store_vectorize_test, shared_load_adjacent)
1188 {
1189 nir_variable *var = nir_variable_create(b->shader, nir_var_mem_shared, glsl_array_type(glsl_uint_type(), 4, 0), "var");
1190 nir_deref_instr *deref = nir_build_deref_var(b, var);
1191
1192 create_shared_load(nir_build_deref_array_imm(b, deref, 0), 0x1);
1193 create_shared_load(nir_build_deref_array_imm(b, deref, 1), 0x2);
1194
1195 nir_validate_shader(b->shader, NULL);
1196 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 2);
1197
1198 EXPECT_TRUE(run_vectorizer(nir_var_mem_shared));
1199
1200 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 1);
1201
1202 nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_deref, 0);
1203 ASSERT_EQ(load->dest.ssa.bit_size, 32);
1204 ASSERT_EQ(load->dest.ssa.num_components, 2);
1205
1206 deref = nir_src_as_deref(load->src[0]);
1207 ASSERT_EQ(deref->deref_type, nir_deref_type_cast);
1208
1209 deref = nir_deref_instr_parent(deref);
1210 ASSERT_EQ(deref->deref_type, nir_deref_type_array);
1211 ASSERT_EQ(nir_src_as_uint(deref->arr.index), 0);
1212
1213 deref = nir_deref_instr_parent(deref);
1214 ASSERT_EQ(deref->deref_type, nir_deref_type_var);
1215 ASSERT_EQ(deref->var, var);
1216
1217 EXPECT_INSTR_SWIZZLES(movs[0x1], load, "x");
1218 EXPECT_INSTR_SWIZZLES(movs[0x2], load, "y");
1219 }
1220
TEST_F(nir_load_store_vectorize_test,shared_load_distant_64bit)1221 TEST_F(nir_load_store_vectorize_test, shared_load_distant_64bit)
1222 {
1223 nir_variable *var = nir_variable_create(b->shader, nir_var_mem_shared, glsl_array_type(glsl_uint_type(), 4, 0), "var");
1224 nir_deref_instr *deref = nir_build_deref_var(b, var);
1225 nir_ssa_dest_init(&deref->instr, &deref->dest, 1, 64, NULL);
1226
1227 create_shared_load(nir_build_deref_array_imm(b, deref, 0x100000000), 0x1);
1228 create_shared_load(nir_build_deref_array_imm(b, deref, 0x200000001), 0x2);
1229
1230 nir_validate_shader(b->shader, NULL);
1231 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 2);
1232
1233 EXPECT_FALSE(run_vectorizer(nir_var_mem_shared));
1234
1235 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 2);
1236 }
1237
TEST_F(nir_load_store_vectorize_test,shared_load_adjacent_indirect)1238 TEST_F(nir_load_store_vectorize_test, shared_load_adjacent_indirect)
1239 {
1240 nir_variable *var = nir_variable_create(b->shader, nir_var_mem_shared, glsl_array_type(glsl_uint_type(), 4, 0), "var");
1241 nir_deref_instr *deref = nir_build_deref_var(b, var);
1242 nir_ssa_def *index_base = nir_load_local_invocation_index(b);
1243
1244 create_shared_load(nir_build_deref_array(b, deref, index_base), 0x1);
1245 create_shared_load(nir_build_deref_array(b, deref, nir_iadd_imm(b, index_base, 1)), 0x2);
1246
1247 nir_validate_shader(b->shader, NULL);
1248 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 2);
1249
1250 EXPECT_TRUE(run_vectorizer(nir_var_mem_shared));
1251
1252 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 1);
1253
1254 nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_deref, 0);
1255 ASSERT_EQ(load->dest.ssa.bit_size, 32);
1256 ASSERT_EQ(load->dest.ssa.num_components, 2);
1257
1258 deref = nir_src_as_deref(load->src[0]);
1259 ASSERT_EQ(deref->deref_type, nir_deref_type_cast);
1260
1261 deref = nir_deref_instr_parent(deref);
1262 ASSERT_EQ(deref->deref_type, nir_deref_type_array);
1263 ASSERT_EQ(deref->arr.index.ssa, index_base);
1264
1265 deref = nir_deref_instr_parent(deref);
1266 ASSERT_EQ(deref->deref_type, nir_deref_type_var);
1267 ASSERT_EQ(deref->var, var);
1268
1269 EXPECT_INSTR_SWIZZLES(movs[0x1], load, "x");
1270 EXPECT_INSTR_SWIZZLES(movs[0x2], load, "y");
1271 }
1272
TEST_F(nir_load_store_vectorize_test,shared_load_adjacent_indirect_sub)1273 TEST_F(nir_load_store_vectorize_test, shared_load_adjacent_indirect_sub)
1274 {
1275 nir_variable *var = nir_variable_create(b->shader, nir_var_mem_shared, glsl_array_type(glsl_uint_type(), 4, 0), "var");
1276 nir_deref_instr *deref = nir_build_deref_var(b, var);
1277 nir_ssa_def *index_base = nir_load_local_invocation_index(b);
1278 nir_ssa_def *index_base_prev = nir_iadd_imm(b, index_base, 0xffffffff);
1279
1280 create_shared_load(nir_build_deref_array(b, deref, index_base_prev), 0x1);
1281 create_shared_load(nir_build_deref_array(b, deref, index_base), 0x2);
1282
1283 nir_validate_shader(b->shader, NULL);
1284 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 2);
1285
1286 EXPECT_TRUE(run_vectorizer(nir_var_mem_shared));
1287
1288 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 1);
1289
1290 nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_deref, 0);
1291 ASSERT_EQ(load->dest.ssa.bit_size, 32);
1292 ASSERT_EQ(load->dest.ssa.num_components, 2);
1293
1294 deref = nir_src_as_deref(load->src[0]);
1295 ASSERT_EQ(deref->deref_type, nir_deref_type_cast);
1296
1297 deref = nir_deref_instr_parent(deref);
1298 ASSERT_EQ(deref->deref_type, nir_deref_type_array);
1299 ASSERT_EQ(deref->arr.index.ssa, index_base_prev);
1300
1301 deref = nir_deref_instr_parent(deref);
1302 ASSERT_EQ(deref->deref_type, nir_deref_type_var);
1303 ASSERT_EQ(deref->var, var);
1304
1305 EXPECT_INSTR_SWIZZLES(movs[0x1], load, "x");
1306 EXPECT_INSTR_SWIZZLES(movs[0x2], load, "y");
1307 }
1308
TEST_F(nir_load_store_vectorize_test,shared_load_struct)1309 TEST_F(nir_load_store_vectorize_test, shared_load_struct)
1310 {
1311 glsl_struct_field fields[2] = {glsl_struct_field(glsl_uint_type(), "field0"),
1312 glsl_struct_field(glsl_array_type(glsl_uint_type(), 4, 0), "field1")};
1313
1314 nir_variable *var = nir_variable_create(b->shader, nir_var_mem_shared, glsl_struct_type(fields, 2, "Struct", false), "var");
1315 nir_deref_instr *deref = nir_build_deref_var(b, var);
1316
1317 create_shared_load(nir_build_deref_struct(b, deref, 0), 0x1);
1318 create_shared_load(nir_build_deref_array_imm(b, nir_build_deref_struct(b, deref, 1), 0), 0x2);
1319
1320 nir_validate_shader(b->shader, NULL);
1321 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 2);
1322
1323 EXPECT_TRUE(run_vectorizer(nir_var_mem_shared));
1324
1325 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 1);
1326
1327 nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_deref, 0);
1328 ASSERT_EQ(load->dest.ssa.bit_size, 32);
1329 ASSERT_EQ(load->dest.ssa.num_components, 2);
1330
1331 deref = nir_src_as_deref(load->src[0]);
1332 ASSERT_EQ(deref->deref_type, nir_deref_type_cast);
1333
1334 deref = nir_deref_instr_parent(deref);
1335 ASSERT_EQ(deref->deref_type, nir_deref_type_struct);
1336 ASSERT_EQ(deref->strct.index, 0);
1337
1338 deref = nir_deref_instr_parent(deref);
1339 ASSERT_EQ(deref->deref_type, nir_deref_type_var);
1340 ASSERT_EQ(deref->var, var);
1341
1342 EXPECT_INSTR_SWIZZLES(movs[0x1], load, "x");
1343 EXPECT_INSTR_SWIZZLES(movs[0x2], load, "y");
1344 }
1345
TEST_F(nir_load_store_vectorize_test,shared_load_identical_store_adjacent)1346 TEST_F(nir_load_store_vectorize_test, shared_load_identical_store_adjacent)
1347 {
1348 nir_variable *var = nir_variable_create(b->shader, nir_var_mem_shared, glsl_array_type(glsl_uint_type(), 4, 0), "var");
1349 nir_deref_instr *deref = nir_build_deref_var(b, var);
1350
1351 create_shared_load(nir_build_deref_array_imm(b, deref, 0), 0x1);
1352 create_shared_store(nir_build_deref_array_imm(b, deref, 1), 0x2);
1353 create_shared_load(nir_build_deref_array_imm(b, deref, 0), 0x3);
1354
1355 nir_validate_shader(b->shader, NULL);
1356 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 2);
1357 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_deref), 1);
1358
1359 EXPECT_TRUE(run_vectorizer(nir_var_mem_shared));
1360
1361 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 1);
1362 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_deref), 1);
1363
1364 nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_deref, 0);
1365 ASSERT_EQ(load->dest.ssa.bit_size, 32);
1366 ASSERT_EQ(load->dest.ssa.num_components, 1);
1367
1368 deref = nir_src_as_deref(load->src[0]);
1369 ASSERT_EQ(deref->deref_type, nir_deref_type_array);
1370 ASSERT_EQ(nir_src_as_uint(deref->arr.index), 0);
1371
1372 deref = nir_deref_instr_parent(deref);
1373 ASSERT_EQ(deref->deref_type, nir_deref_type_var);
1374 ASSERT_EQ(deref->var, var);
1375
1376 EXPECT_INSTR_SWIZZLES(movs[0x1], load, "x");
1377 EXPECT_INSTR_SWIZZLES(movs[0x3], load, "x");
1378 }
1379
TEST_F(nir_load_store_vectorize_test,shared_load_identical_store_identical)1380 TEST_F(nir_load_store_vectorize_test, shared_load_identical_store_identical)
1381 {
1382 nir_variable *var = nir_variable_create(b->shader, nir_var_mem_shared, glsl_array_type(glsl_uint_type(), 4, 0), "var");
1383 nir_deref_instr *deref = nir_build_deref_var(b, var);
1384
1385 create_shared_load(nir_build_deref_array_imm(b, deref, 0), 0x1);
1386 create_shared_store(nir_build_deref_array_imm(b, deref, 0), 0x2);
1387 create_shared_load(nir_build_deref_array_imm(b, deref, 0), 0x3);
1388
1389 nir_validate_shader(b->shader, NULL);
1390 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 2);
1391
1392 EXPECT_FALSE(run_vectorizer(nir_var_mem_shared));
1393
1394 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 2);
1395 }
1396
TEST_F(nir_load_store_vectorize_test,shared_load_adjacent_store_identical)1397 TEST_F(nir_load_store_vectorize_test, shared_load_adjacent_store_identical)
1398 {
1399 nir_variable *var = nir_variable_create(b->shader, nir_var_mem_shared, glsl_array_type(glsl_uint_type(), 4, 0), "var");
1400 nir_deref_instr *deref = nir_build_deref_var(b, var);
1401
1402 create_shared_load(nir_build_deref_array_imm(b, deref, 0), 0x1);
1403 create_shared_store(nir_build_deref_array_imm(b, deref, 0), 0x2);
1404 create_shared_load(nir_build_deref_array_imm(b, deref, 1), 0x3);
1405
1406 nir_validate_shader(b->shader, NULL);
1407 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 2);
1408 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_deref), 1);
1409
1410 EXPECT_TRUE(run_vectorizer(nir_var_mem_shared));
1411
1412 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 1);
1413 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_deref), 1);
1414
1415 nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_deref, 0);
1416 ASSERT_EQ(load->dest.ssa.bit_size, 32);
1417 ASSERT_EQ(load->dest.ssa.num_components, 2);
1418
1419 deref = nir_src_as_deref(load->src[0]);
1420 ASSERT_EQ(deref->deref_type, nir_deref_type_cast);
1421
1422 deref = nir_deref_instr_parent(deref);
1423 ASSERT_EQ(deref->deref_type, nir_deref_type_array);
1424 ASSERT_EQ(nir_src_as_uint(deref->arr.index), 0);
1425
1426 deref = nir_deref_instr_parent(deref);
1427 ASSERT_EQ(deref->deref_type, nir_deref_type_var);
1428 ASSERT_EQ(deref->var, var);
1429
1430 EXPECT_INSTR_SWIZZLES(movs[0x1], load, "x");
1431 EXPECT_INSTR_SWIZZLES(movs[0x3], load, "y");
1432 }
1433
TEST_F(nir_load_store_vectorize_test,shared_load_bool)1434 TEST_F(nir_load_store_vectorize_test, shared_load_bool)
1435 {
1436 nir_variable *var = nir_variable_create(b->shader, nir_var_mem_shared, glsl_array_type(glsl_bool_type(), 4, 0), "var");
1437 nir_deref_instr *deref = nir_build_deref_var(b, var);
1438
1439 create_shared_load(nir_build_deref_array_imm(b, deref, 0), 0x1, 1);
1440 create_shared_load(nir_build_deref_array_imm(b, deref, 1), 0x2, 1);
1441
1442 nir_validate_shader(b->shader, NULL);
1443 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 2);
1444
1445 EXPECT_TRUE(run_vectorizer(nir_var_mem_shared));
1446
1447 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 1);
1448
1449 nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_deref, 0);
1450 ASSERT_EQ(load->dest.ssa.bit_size, 32);
1451 ASSERT_EQ(load->dest.ssa.num_components, 2);
1452
1453 deref = nir_src_as_deref(load->src[0]);
1454 ASSERT_EQ(deref->deref_type, nir_deref_type_cast);
1455
1456 deref = nir_deref_instr_parent(deref);
1457 ASSERT_EQ(deref->deref_type, nir_deref_type_array);
1458 ASSERT_EQ(nir_src_as_uint(deref->arr.index), 0);
1459
1460 deref = nir_deref_instr_parent(deref);
1461 ASSERT_EQ(deref->deref_type, nir_deref_type_var);
1462 ASSERT_EQ(deref->var, var);
1463
1464 ASSERT_TRUE(test_alu(loads[0x1]->src.ssa->parent_instr, nir_op_i2b1));
1465 ASSERT_TRUE(test_alu(loads[0x2]->src.ssa->parent_instr, nir_op_i2b1));
1466 ASSERT_TRUE(test_alu_def(loads[0x1]->src.ssa->parent_instr, 0, &load->dest.ssa, 0));
1467 ASSERT_TRUE(test_alu_def(loads[0x2]->src.ssa->parent_instr, 0, &load->dest.ssa, 1));
1468 }
1469
TEST_F(nir_load_store_vectorize_test,shared_load_bool_mixed)1470 TEST_F(nir_load_store_vectorize_test, shared_load_bool_mixed)
1471 {
1472 glsl_struct_field fields[2] = {glsl_struct_field(glsl_bool_type(), "field0"),
1473 glsl_struct_field(glsl_array_type(glsl_uint_type(), 4, 0), "field1")};
1474
1475 nir_variable *var = nir_variable_create(b->shader, nir_var_mem_shared, glsl_struct_type(fields, 2, "Struct", false), "var");
1476 nir_deref_instr *deref = nir_build_deref_var(b, var);
1477
1478 create_shared_load(nir_build_deref_struct(b, deref, 0), 0x1, 1);
1479 create_shared_load(nir_build_deref_array_imm(b, nir_build_deref_struct(b, deref, 1), 0), 0x2);
1480
1481 nir_validate_shader(b->shader, NULL);
1482 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 2);
1483
1484 EXPECT_TRUE(run_vectorizer(nir_var_mem_shared));
1485
1486 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 1);
1487
1488 nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_deref, 0);
1489 ASSERT_EQ(load->dest.ssa.bit_size, 32);
1490 ASSERT_EQ(load->dest.ssa.num_components, 2);
1491
1492 deref = nir_src_as_deref(load->src[0]);
1493 ASSERT_EQ(deref->deref_type, nir_deref_type_cast);
1494
1495 deref = nir_deref_instr_parent(deref);
1496 ASSERT_EQ(deref->deref_type, nir_deref_type_struct);
1497 ASSERT_EQ(deref->strct.index, 0);
1498
1499 deref = nir_deref_instr_parent(deref);
1500 ASSERT_EQ(deref->deref_type, nir_deref_type_var);
1501 ASSERT_EQ(deref->var, var);
1502
1503 ASSERT_TRUE(test_alu(loads[0x1]->src.ssa->parent_instr, nir_op_i2b1));
1504 ASSERT_TRUE(test_alu_def(loads[0x1]->src.ssa->parent_instr, 0, &load->dest.ssa, 0));
1505
1506 EXPECT_INSTR_SWIZZLES(movs[0x2], load, "y");
1507 }
1508
TEST_F(nir_load_store_vectorize_test,shared_store_adjacent)1509 TEST_F(nir_load_store_vectorize_test, shared_store_adjacent)
1510 {
1511 nir_variable *var = nir_variable_create(b->shader, nir_var_mem_shared, glsl_array_type(glsl_uint_type(), 4, 0), "var");
1512 nir_deref_instr *deref = nir_build_deref_var(b, var);
1513
1514 create_shared_store(nir_build_deref_array_imm(b, deref, 0), 0x1);
1515 create_shared_store(nir_build_deref_array_imm(b, deref, 1), 0x2);
1516
1517 nir_validate_shader(b->shader, NULL);
1518 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_deref), 2);
1519
1520 EXPECT_TRUE(run_vectorizer(nir_var_mem_shared));
1521
1522 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_deref), 1);
1523
1524 nir_intrinsic_instr *store = get_intrinsic(nir_intrinsic_store_deref, 0);
1525 ASSERT_EQ(nir_intrinsic_write_mask(store), 0x3);
1526 nir_ssa_def *val = store->src[1].ssa;
1527 ASSERT_EQ(val->bit_size, 32);
1528 ASSERT_EQ(val->num_components, 2);
1529 nir_const_value *cv = nir_instr_as_load_const(val->parent_instr)->value;
1530 ASSERT_EQ(nir_const_value_as_uint(cv[0], 32), 0x10);
1531 ASSERT_EQ(nir_const_value_as_uint(cv[1], 32), 0x20);
1532
1533 deref = nir_src_as_deref(store->src[0]);
1534 ASSERT_EQ(deref->deref_type, nir_deref_type_cast);
1535
1536 deref = nir_deref_instr_parent(deref);
1537 ASSERT_EQ(deref->deref_type, nir_deref_type_array);
1538 ASSERT_EQ(nir_src_as_uint(deref->arr.index), 0);
1539
1540 deref = nir_deref_instr_parent(deref);
1541 ASSERT_EQ(deref->deref_type, nir_deref_type_var);
1542 ASSERT_EQ(deref->var, var);
1543 }
1544
TEST_F(nir_load_store_vectorize_test,push_const_load_separate_base)1545 TEST_F(nir_load_store_vectorize_test, push_const_load_separate_base)
1546 {
1547 create_load(nir_var_mem_push_const, 0, 0, 0x1);
1548 nir_intrinsic_set_base(create_load(nir_var_mem_push_const, 0, 4, 0x2), 4);
1549
1550 nir_validate_shader(b->shader, NULL);
1551 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_push_constant), 2);
1552
1553 EXPECT_FALSE(run_vectorizer(nir_var_mem_push_const));
1554
1555 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_push_constant), 2);
1556 }
1557
TEST_F(nir_load_store_vectorize_test,push_const_load_separate_direct_direct)1558 TEST_F(nir_load_store_vectorize_test, push_const_load_separate_direct_direct)
1559 {
1560 create_load(nir_var_mem_push_const, 0, 0, 0x1);
1561 create_load(nir_var_mem_push_const, 0, 8, 0x2);
1562
1563 nir_validate_shader(b->shader, NULL);
1564 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_push_constant), 2);
1565
1566 EXPECT_FALSE(run_vectorizer(nir_var_mem_push_const));
1567
1568 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_push_constant), 2);
1569 }
1570
TEST_F(nir_load_store_vectorize_test,push_const_load_separate_direct_indirect)1571 TEST_F(nir_load_store_vectorize_test, push_const_load_separate_direct_indirect)
1572 {
1573 nir_ssa_def *index_base = nir_load_local_invocation_index(b);
1574 create_load(nir_var_mem_push_const, 0, 0, 0x1);
1575 create_indirect_load(nir_var_mem_push_const, 0, index_base, 0x2);
1576
1577 nir_validate_shader(b->shader, NULL);
1578 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_push_constant), 2);
1579
1580 EXPECT_FALSE(run_vectorizer(nir_var_mem_push_const));
1581
1582 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_push_constant), 2);
1583 }
1584
TEST_F(nir_load_store_vectorize_test,push_const_load_separate_indirect_indirect)1585 TEST_F(nir_load_store_vectorize_test, push_const_load_separate_indirect_indirect)
1586 {
1587 nir_ssa_def *index_base = nir_load_local_invocation_index(b);
1588 create_indirect_load(nir_var_mem_push_const, 0,
1589 nir_iadd_imm(b, nir_imul_imm(b, nir_iadd_imm(b, index_base, 2), 16), 32), 0x1);
1590 create_indirect_load(nir_var_mem_push_const, 0,
1591 nir_iadd_imm(b, nir_imul_imm(b, nir_iadd_imm(b, index_base, 3), 16), 32), 0x2);
1592
1593 nir_validate_shader(b->shader, NULL);
1594 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_push_constant), 2);
1595
1596 EXPECT_FALSE(run_vectorizer(nir_var_mem_push_const));
1597
1598 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_push_constant), 2);
1599 }
1600
TEST_F(nir_load_store_vectorize_test,push_const_load_adjacent_complex_indirect)1601 TEST_F(nir_load_store_vectorize_test, push_const_load_adjacent_complex_indirect)
1602 {
1603 nir_ssa_def *index_base = nir_load_local_invocation_index(b);
1604 //vec4 pc[]; pc[gl_LocalInvocationIndex].w; pc[gl_LocalInvocationIndex+1].x;
1605 nir_ssa_def *low = nir_iadd_imm(b, nir_imul_imm(b, index_base, 16), 12);
1606 nir_ssa_def *high = nir_imul_imm(b, nir_iadd_imm(b, index_base, 1), 16);
1607 create_indirect_load(nir_var_mem_push_const, 0, low, 0x1);
1608 create_indirect_load(nir_var_mem_push_const, 0, high, 0x2);
1609
1610 nir_validate_shader(b->shader, NULL);
1611 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_push_constant), 2);
1612
1613 EXPECT_TRUE(run_vectorizer(nir_var_mem_push_const));
1614
1615 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_push_constant), 1);
1616
1617 nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_push_constant, 0);
1618 ASSERT_EQ(load->dest.ssa.bit_size, 32);
1619 ASSERT_EQ(load->dest.ssa.num_components, 2);
1620 ASSERT_EQ(load->src[0].ssa, low);
1621 EXPECT_INSTR_SWIZZLES(movs[0x1], load, "x");
1622 EXPECT_INSTR_SWIZZLES(movs[0x2], load, "y");
1623 }
1624
TEST_F(nir_load_store_vectorize_test,ssbo_alias0)1625 TEST_F(nir_load_store_vectorize_test, ssbo_alias0)
1626 {
1627 nir_ssa_def *index_base = nir_load_local_invocation_index(b);
1628 create_load(nir_var_mem_ssbo, 0, 0, 0x1);
1629 create_indirect_store(nir_var_mem_ssbo, 0, index_base, 0x2);
1630 create_load(nir_var_mem_ssbo, 0, 0, 0x3);
1631
1632 nir_validate_shader(b->shader, NULL);
1633 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
1634
1635 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
1636
1637 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
1638 }
1639
TEST_F(nir_load_store_vectorize_test,ssbo_alias1)1640 TEST_F(nir_load_store_vectorize_test, ssbo_alias1)
1641 {
1642 nir_ssa_def *load_base = nir_load_global_invocation_index(b, 32);
1643 nir_ssa_def *store_base = nir_load_local_invocation_index(b);
1644 create_indirect_load(nir_var_mem_ssbo, 0, load_base, 0x1);
1645 create_indirect_store(nir_var_mem_ssbo, 0, store_base, 0x2);
1646 create_indirect_load(nir_var_mem_ssbo, 0, load_base, 0x3);
1647
1648 nir_validate_shader(b->shader, NULL);
1649 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
1650
1651 EXPECT_FALSE(run_vectorizer(nir_var_mem_ssbo));
1652
1653 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
1654 }
1655
TEST_F(nir_load_store_vectorize_test,DISABLED_ssbo_alias2)1656 TEST_F(nir_load_store_vectorize_test, DISABLED_ssbo_alias2)
1657 {
1658 /* TODO: try to combine these loads */
1659 nir_ssa_def *index_base = nir_load_local_invocation_index(b);
1660 nir_ssa_def *offset = nir_iadd_imm(b, nir_imul_imm(b, index_base, 16), 4);
1661 create_indirect_load(nir_var_mem_ssbo, 0, offset, 0x1);
1662 create_store(nir_var_mem_ssbo, 0, 0, 0x2);
1663 create_indirect_load(nir_var_mem_ssbo, 0, offset, 0x3);
1664
1665 nir_validate_shader(b->shader, NULL);
1666 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
1667
1668 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
1669
1670 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 1);
1671
1672 nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_ssbo, 0);
1673 ASSERT_EQ(load->dest.ssa.bit_size, 32);
1674 ASSERT_EQ(load->dest.ssa.num_components, 1);
1675 ASSERT_EQ(load->src[1].ssa, offset);
1676 EXPECT_INSTR_SWIZZLES(movs[0x1], load, "x");
1677 EXPECT_INSTR_SWIZZLES(movs[0x3], load, "x");
1678 }
1679
TEST_F(nir_load_store_vectorize_test,ssbo_alias3)1680 TEST_F(nir_load_store_vectorize_test, ssbo_alias3)
1681 {
1682 /* these loads can be combined if nir_alu_instr::no_unsigned_wrap is set.
1683 * these loads can't be combined because if index_base == 268435455, then
1684 * offset == 0 because the addition would wrap around */
1685 nir_ssa_def *index_base = nir_load_local_invocation_index(b);
1686 nir_ssa_def *offset = nir_iadd_imm(b, nir_imul_imm(b, index_base, 16), 16);
1687 create_indirect_load(nir_var_mem_ssbo, 0, offset, 0x1);
1688 create_store(nir_var_mem_ssbo, 0, 0, 0x2);
1689 create_indirect_load(nir_var_mem_ssbo, 0, offset, 0x3);
1690
1691 nir_validate_shader(b->shader, NULL);
1692 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
1693
1694 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
1695
1696 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
1697 }
1698
TEST_F(nir_load_store_vectorize_test,DISABLED_ssbo_alias4)1699 TEST_F(nir_load_store_vectorize_test, DISABLED_ssbo_alias4)
1700 {
1701 /* TODO: try to combine these loads */
1702 nir_ssa_def *index_base = nir_load_local_invocation_index(b);
1703 nir_ssa_def *offset = nir_iadd_imm(b, nir_imul_imm(b, index_base, 16), 16);
1704 nir_instr_as_alu(offset->parent_instr)->no_unsigned_wrap = true;
1705 create_indirect_load(nir_var_mem_ssbo, 0, offset, 0x1);
1706 create_store(nir_var_mem_ssbo, 0, 0, 0x2);
1707 create_indirect_load(nir_var_mem_ssbo, 0, offset, 0x3);
1708
1709 nir_validate_shader(b->shader, NULL);
1710 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
1711
1712 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
1713
1714 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 1);
1715
1716 nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_ssbo, 0);
1717 ASSERT_EQ(load->dest.ssa.bit_size, 32);
1718 ASSERT_EQ(load->dest.ssa.num_components, 1);
1719 ASSERT_EQ(load->src[1].ssa, offset);
1720 EXPECT_INSTR_SWIZZLES(movs[0x1], load, "x");
1721 EXPECT_INSTR_SWIZZLES(movs[0x3], load, "x");
1722 }
1723
TEST_F(nir_load_store_vectorize_test,ssbo_alias5)1724 TEST_F(nir_load_store_vectorize_test, ssbo_alias5)
1725 {
1726 create_load(nir_var_mem_ssbo, 0, 0, 0x1);
1727 create_store(nir_var_mem_ssbo, 1, 0, 0x2);
1728 create_load(nir_var_mem_ssbo, 0, 0, 0x3);
1729
1730 nir_validate_shader(b->shader, NULL);
1731 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
1732
1733 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
1734
1735 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
1736 }
1737
TEST_F(nir_load_store_vectorize_test,ssbo_alias6)1738 TEST_F(nir_load_store_vectorize_test, ssbo_alias6)
1739 {
1740 create_load(nir_var_mem_ssbo, 0, 0, 0x1, 32, 1, ACCESS_RESTRICT);
1741 create_store(nir_var_mem_ssbo, 1, 0, 0x2, 32, 1, 0xf, ACCESS_RESTRICT);
1742 create_load(nir_var_mem_ssbo, 0, 0, 0x3, 32, 1, ACCESS_RESTRICT);
1743
1744 nir_validate_shader(b->shader, NULL);
1745 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
1746
1747 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
1748
1749 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 1);
1750
1751 nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_ssbo, 0);
1752 ASSERT_EQ(load->dest.ssa.bit_size, 32);
1753 ASSERT_EQ(load->dest.ssa.num_components, 1);
1754 ASSERT_EQ(nir_src_as_uint(load->src[1]), 0);
1755 EXPECT_INSTR_SWIZZLES(movs[0x1], load, "x");
1756 EXPECT_INSTR_SWIZZLES(movs[0x3], load, "x");
1757 }
1758
TEST_F(nir_load_store_vectorize_test,DISABLED_shared_alias0)1759 TEST_F(nir_load_store_vectorize_test, DISABLED_shared_alias0)
1760 {
1761 /* TODO: implement type-based alias analysis so that these loads can be
1762 * combined. this is made a bit more difficult than simply using
1763 * nir_compare_derefs() because the vectorizer creates loads/stores with
1764 * casted derefs. The solution would probably be to keep multiple derefs for
1765 * an entry (one for each load/store combined into it). */
1766 glsl_struct_field fields[2] = {glsl_struct_field(glsl_array_type(glsl_uint_type(), 4, 0), "field0"),
1767 glsl_struct_field(glsl_array_type(glsl_uint_type(), 4, 0), "field1")};
1768
1769 nir_variable *var = nir_variable_create(b->shader, nir_var_mem_shared, glsl_struct_type(fields, 2, "Struct", false), "var");
1770 nir_deref_instr *deref = nir_build_deref_var(b, var);
1771
1772 nir_ssa_def *index0 = nir_load_local_invocation_index(b);
1773 nir_ssa_def *index1 = nir_load_global_invocation_index(b, 32);
1774 nir_deref_instr *load_deref = nir_build_deref_array(b, nir_build_deref_struct(b, deref, 0), index0);
1775
1776 create_shared_load(load_deref, 0x1);
1777 create_shared_store(nir_build_deref_array(b, nir_build_deref_struct(b, deref, 1), index1), 0x2);
1778 create_shared_load(load_deref, 0x3);
1779
1780 nir_validate_shader(b->shader, NULL);
1781 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 2);
1782
1783 EXPECT_TRUE(run_vectorizer(nir_var_mem_shared));
1784
1785 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 1);
1786
1787 nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_deref, 0);
1788 ASSERT_EQ(load->dest.ssa.bit_size, 32);
1789 ASSERT_EQ(load->dest.ssa.num_components, 1);
1790 ASSERT_EQ(load->src[0].ssa, &load_deref->dest.ssa);
1791 EXPECT_INSTR_SWIZZLES(movs[0x1], load, "x");
1792 EXPECT_INSTR_SWIZZLES(movs[0x3], load, "x");
1793 }
1794
TEST_F(nir_load_store_vectorize_test,shared_alias1)1795 TEST_F(nir_load_store_vectorize_test, shared_alias1)
1796 {
1797 nir_variable *var0 = nir_variable_create(b->shader, nir_var_mem_shared, glsl_uint_type(), "var0");
1798 nir_variable *var1 = nir_variable_create(b->shader, nir_var_mem_shared, glsl_uint_type(), "var1");
1799 nir_deref_instr *load_deref = nir_build_deref_var(b, var0);
1800
1801 create_shared_load(load_deref, 0x1);
1802 create_shared_store(nir_build_deref_var(b, var1), 0x2);
1803 create_shared_load(load_deref, 0x3);
1804
1805 nir_validate_shader(b->shader, NULL);
1806 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 2);
1807
1808 EXPECT_TRUE(run_vectorizer(nir_var_mem_shared));
1809
1810 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 1);
1811
1812 nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_deref, 0);
1813 ASSERT_EQ(load->dest.ssa.bit_size, 32);
1814 ASSERT_EQ(load->dest.ssa.num_components, 1);
1815 ASSERT_EQ(load->src[0].ssa, &load_deref->dest.ssa);
1816 EXPECT_INSTR_SWIZZLES(movs[0x1], load, "x");
1817 EXPECT_INSTR_SWIZZLES(movs[0x3], load, "x");
1818 }
1819
TEST_F(nir_load_store_vectorize_test,ssbo_load_distant_64bit)1820 TEST_F(nir_load_store_vectorize_test, ssbo_load_distant_64bit)
1821 {
1822 create_indirect_load(nir_var_mem_ssbo, 0, nir_imm_int64(b, 0x100000000), 0x1);
1823 create_indirect_load(nir_var_mem_ssbo, 0, nir_imm_int64(b, 0x200000004), 0x2);
1824
1825 nir_validate_shader(b->shader, NULL);
1826 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
1827
1828 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
1829
1830 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
1831 }
1832
TEST_F(nir_load_store_vectorize_test,ssbo_load_distant_indirect_64bit)1833 TEST_F(nir_load_store_vectorize_test, ssbo_load_distant_indirect_64bit)
1834 {
1835 nir_ssa_def *index_base = nir_u2u64(b, nir_load_local_invocation_index(b));
1836 nir_ssa_def *first = nir_imul_imm(b, index_base, 0x100000000);
1837 nir_ssa_def *second = nir_imul_imm(b, index_base, 0x200000000);
1838 create_indirect_load(nir_var_mem_ssbo, 0, first, 0x1);
1839 create_indirect_load(nir_var_mem_ssbo, 0, second, 0x2);
1840
1841 nir_validate_shader(b->shader, NULL);
1842 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
1843
1844 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
1845
1846 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
1847 }
1848
TEST_F(nir_load_store_vectorize_test,ssbo_offset_overflow_robust)1849 TEST_F(nir_load_store_vectorize_test, ssbo_offset_overflow_robust)
1850 {
1851 create_load(nir_var_mem_ssbo, 0, 0xfffffffc, 0x1);
1852 create_load(nir_var_mem_ssbo, 0, 0x0, 0x2);
1853
1854 nir_validate_shader(b->shader, NULL);
1855 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
1856
1857 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo, false, nir_var_mem_ssbo));
1858
1859 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
1860 }
1861
TEST_F(nir_load_store_vectorize_test,ssbo_offset_overflow_robust_indirect_stride1)1862 TEST_F(nir_load_store_vectorize_test, ssbo_offset_overflow_robust_indirect_stride1)
1863 {
1864 nir_ssa_def *offset = nir_load_local_invocation_index(b);
1865 create_indirect_load(nir_var_mem_ssbo, 0, offset, 0x1);
1866 create_indirect_load(nir_var_mem_ssbo, 0, nir_iadd_imm(b, offset, 4), 0x2);
1867
1868 nir_validate_shader(b->shader, NULL);
1869 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
1870
1871 EXPECT_FALSE(run_vectorizer(nir_var_mem_ssbo, false, nir_var_mem_ssbo));
1872
1873 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
1874 }
1875
TEST_F(nir_load_store_vectorize_test,ssbo_offset_overflow_robust_indirect_stride8)1876 TEST_F(nir_load_store_vectorize_test, ssbo_offset_overflow_robust_indirect_stride8)
1877 {
1878 nir_ssa_def *offset = nir_load_local_invocation_index(b);
1879 offset = nir_imul_imm(b, offset, 8);
1880 create_indirect_load(nir_var_mem_ssbo, 0, offset, 0x1);
1881 create_indirect_load(nir_var_mem_ssbo, 0, nir_iadd_imm(b, offset, 4), 0x2);
1882
1883 nir_validate_shader(b->shader, NULL);
1884 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
1885
1886 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo, false, nir_var_mem_ssbo));
1887
1888 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 1);
1889 }
1890
TEST_F(nir_load_store_vectorize_test,ssbo_offset_overflow_robust_indirect_stride12)1891 TEST_F(nir_load_store_vectorize_test, ssbo_offset_overflow_robust_indirect_stride12)
1892 {
1893 nir_ssa_def *offset = nir_load_local_invocation_index(b);
1894 offset = nir_imul_imm(b, offset, 12);
1895 create_indirect_load(nir_var_mem_ssbo, 0, offset, 0x1);
1896 nir_ssa_def *offset_4 = nir_iadd_imm(b, offset, 4);
1897 create_indirect_load(nir_var_mem_ssbo, 0, offset_4, 0x2);
1898 create_indirect_load(nir_var_mem_ssbo, 0, nir_iadd_imm(b, offset, 8), 0x3);
1899
1900 nir_validate_shader(b->shader, NULL);
1901 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 3);
1902
1903 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo, false, nir_var_mem_ssbo));
1904
1905 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
1906
1907 nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_ssbo, 0);
1908 ASSERT_EQ(load->dest.ssa.bit_size, 32);
1909 ASSERT_EQ(load->dest.ssa.num_components, 1);
1910 ASSERT_EQ(load->src[1].ssa, offset);
1911 EXPECT_INSTR_SWIZZLES(movs[0x1], load, "x");
1912
1913 load = get_intrinsic(nir_intrinsic_load_ssbo, 1);
1914 ASSERT_EQ(load->dest.ssa.bit_size, 32);
1915 ASSERT_EQ(load->dest.ssa.num_components, 2);
1916 ASSERT_EQ(load->src[1].ssa, offset_4);
1917 EXPECT_INSTR_SWIZZLES(movs[0x2], load, "x");
1918 EXPECT_INSTR_SWIZZLES(movs[0x3], load, "y");
1919 }
1920
TEST_F(nir_load_store_vectorize_test,ssbo_offset_overflow_robust_indirect_stride16)1921 TEST_F(nir_load_store_vectorize_test, ssbo_offset_overflow_robust_indirect_stride16)
1922 {
1923 nir_ssa_def *offset = nir_load_local_invocation_index(b);
1924 offset = nir_imul_imm(b, offset, 16);
1925 create_indirect_load(nir_var_mem_ssbo, 0, offset, 0x1);
1926 create_indirect_load(nir_var_mem_ssbo, 0, nir_iadd_imm(b, offset, 4), 0x2);
1927 create_indirect_load(nir_var_mem_ssbo, 0, nir_iadd_imm(b, offset, 8), 0x3);
1928 create_indirect_load(nir_var_mem_ssbo, 0, nir_iadd_imm(b, offset, 12), 0x4);
1929
1930 nir_validate_shader(b->shader, NULL);
1931 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 4);
1932
1933 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo, false, nir_var_mem_ssbo));
1934
1935 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 1);
1936 }
1937
TEST_F(nir_load_store_vectorize_test,shared_offset_overflow_robust_indirect_stride12)1938 TEST_F(nir_load_store_vectorize_test, shared_offset_overflow_robust_indirect_stride12)
1939 {
1940 nir_variable *var = nir_variable_create(b->shader, nir_var_mem_shared,
1941 glsl_array_type(glsl_uint_type(), 4, 0), "var");
1942 nir_deref_instr *deref = nir_build_deref_var(b, var);
1943
1944 nir_ssa_def *index = nir_load_local_invocation_index(b);
1945 index = nir_imul_imm(b, index, 3);
1946 create_shared_load(nir_build_deref_array(b, deref, index), 0x1);
1947 create_shared_load(nir_build_deref_array(b, deref, nir_iadd_imm(b, index, 1)), 0x2);
1948 create_shared_load(nir_build_deref_array(b, deref, nir_iadd_imm(b, index, 2)), 0x3);
1949
1950 nir_validate_shader(b->shader, NULL);
1951 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 3);
1952
1953 EXPECT_FALSE(run_vectorizer(nir_var_mem_shared, false, nir_var_mem_shared));
1954
1955 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 3);
1956 }
1957
TEST_F(nir_load_store_vectorize_test,ubo_alignment_16_4)1958 TEST_F(nir_load_store_vectorize_test, ubo_alignment_16_4)
1959 {
1960 nir_ssa_def *offset = nir_load_local_invocation_index(b);
1961 offset = nir_imul_imm(b, offset, 16);
1962 offset = nir_iadd_imm(b, offset, 4);
1963 nir_intrinsic_instr *load = create_indirect_load(nir_var_mem_ubo, 0, offset,
1964 0x1);
1965
1966 EXPECT_TRUE(run_vectorizer(nir_var_mem_ubo));
1967 EXPECT_EQ(nir_intrinsic_align_mul(load), 16);
1968 EXPECT_EQ(nir_intrinsic_align_offset(load), 4);
1969 }
1970
TEST_F(nir_load_store_vectorize_test,ubo_alignment_16_4_swapped)1971 TEST_F(nir_load_store_vectorize_test, ubo_alignment_16_4_swapped)
1972 {
1973 nir_ssa_def *offset = nir_load_local_invocation_index(b);
1974 offset = nir_iadd_imm(b, offset, 1);
1975 offset = nir_imul_imm(b, offset, 16);
1976 offset = nir_iadd_imm(b, offset, 4);
1977 nir_intrinsic_instr *load =
1978 create_indirect_load(nir_var_mem_ubo, 0, offset, 0x1);
1979
1980 EXPECT_TRUE(run_vectorizer(nir_var_mem_ubo));
1981 EXPECT_EQ(nir_intrinsic_align_mul(load), 16);
1982 EXPECT_EQ(nir_intrinsic_align_offset(load), 4);
1983 }
1984
1985 /* Check offset % mul != 0 */
TEST_F(nir_load_store_vectorize_test,ubo_alignment_16_20)1986 TEST_F(nir_load_store_vectorize_test, ubo_alignment_16_20)
1987 {
1988 nir_ssa_def *offset = nir_load_local_invocation_index(b);
1989 offset = nir_imul_imm(b, offset, 16);
1990 offset = nir_iadd_imm(b, offset, 20);
1991 nir_intrinsic_instr *load = create_indirect_load(nir_var_mem_ubo, 0, offset,
1992 0x1);
1993
1994 EXPECT_TRUE(run_vectorizer(nir_var_mem_ubo));
1995 EXPECT_EQ(nir_intrinsic_align_mul(load), 16);
1996 EXPECT_EQ(nir_intrinsic_align_offset(load), 4);
1997 }
1998
1999 /* Check that we don't upgrade to non-power-of-two alignments. */
TEST_F(nir_load_store_vectorize_test,ubo_alignment_24_4)2000 TEST_F(nir_load_store_vectorize_test, ubo_alignment_24_4)
2001 {
2002 nir_ssa_def *offset = nir_load_local_invocation_index(b);
2003 offset = nir_imul_imm(b, offset, 24);
2004 offset = nir_iadd_imm(b, offset, 4);
2005 nir_intrinsic_instr *load =
2006 create_indirect_load(nir_var_mem_ubo, 0, offset, 0x1);
2007
2008 EXPECT_TRUE(run_vectorizer(nir_var_mem_ubo));
2009 EXPECT_EQ(nir_intrinsic_align_mul(load), 8);
2010 EXPECT_EQ(nir_intrinsic_align_offset(load), 4);
2011 }
2012
2013 /* Check that we don't upgrade to non-power-of-two alignments. */
TEST_F(nir_load_store_vectorize_test,ubo_alignment_64_16_8)2014 TEST_F(nir_load_store_vectorize_test, ubo_alignment_64_16_8)
2015 {
2016 nir_ssa_def *x = nir_imul_imm(b, nir_load_local_invocation_index(b), 64);
2017 nir_ssa_def *y = nir_imul_imm(b, nir_load_instance_id(b), 16);
2018 nir_ssa_def *offset = nir_iadd(b, x, y);
2019 offset = nir_iadd_imm(b, offset, 8);
2020 nir_intrinsic_instr *load =
2021 create_indirect_load(nir_var_mem_ubo, 0, offset, 0x1);
2022
2023 EXPECT_TRUE(run_vectorizer(nir_var_mem_ubo));
2024 EXPECT_EQ(nir_intrinsic_align_mul(load), 16);
2025 EXPECT_EQ(nir_intrinsic_align_offset(load), 8);
2026 }
2027
TEST_F(nir_load_store_vectorize_test,ubo_alignment_const_100)2028 TEST_F(nir_load_store_vectorize_test, ubo_alignment_const_100)
2029 {
2030 nir_intrinsic_instr *load =
2031 create_indirect_load(nir_var_mem_ubo, 0, nir_imm_int(b, 100), 0x1);
2032
2033 EXPECT_TRUE(run_vectorizer(nir_var_mem_ubo));
2034 EXPECT_EQ(nir_intrinsic_align_mul(load), NIR_ALIGN_MUL_MAX);
2035 EXPECT_EQ(nir_intrinsic_align_offset(load), 100);
2036 }
2037