1 /*
2  * Copyright © 2018 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21  * DEALINGS IN THE SOFTWARE.
22  */
23 
24 #include <gtest/gtest.h>
25 
26 #include "nir.h"
27 #include "nir_builder.h"
28 
29 /* This is a macro so you get good line numbers */
30 #define EXPECT_INSTR_SWIZZLES(instr, load, expected_swizzle)    \
31    EXPECT_EQ((instr)->src[0].src.ssa, &(load)->dest.ssa);       \
32    EXPECT_EQ(swizzle(instr, 0), expected_swizzle);
33 
34 namespace {
35 
36 class nir_load_store_vectorize_test : public ::testing::Test {
37 protected:
38    nir_load_store_vectorize_test();
39    ~nir_load_store_vectorize_test();
40 
41    unsigned count_intrinsics(nir_intrinsic_op intrinsic);
42 
43    nir_intrinsic_instr *get_intrinsic(nir_intrinsic_op intrinsic,
44                                       unsigned index);
45 
46    bool run_vectorizer(nir_variable_mode modes, bool cse=false,
47                        nir_variable_mode robust_modes = (nir_variable_mode)0);
48 
49    nir_ssa_def *get_resource(uint32_t binding, bool ssbo);
50 
51    nir_intrinsic_instr *create_indirect_load(nir_variable_mode mode, uint32_t binding, nir_ssa_def *offset,
52                                              uint32_t id, unsigned bit_size=32, unsigned components=1,
53                                              unsigned access=0);
54    void create_indirect_store(nir_variable_mode mode, uint32_t binding, nir_ssa_def *offset,
55                               uint32_t id, unsigned bit_size=32, unsigned components=1,
56                               unsigned wrmask=0xf, unsigned access=0);
57 
58    nir_intrinsic_instr *create_load(nir_variable_mode mode, uint32_t binding, uint32_t offset,
59                                     uint32_t id, unsigned bit_size=32, unsigned components=1,
60                                     unsigned access=0);
61    void create_store(nir_variable_mode mode, uint32_t binding, uint32_t offset,
62                      uint32_t id, unsigned bit_size=32, unsigned components=1, unsigned wrmask=0xf,
63                      unsigned access=0);
64 
65    void create_shared_load(nir_deref_instr *deref, uint32_t id,
66                            unsigned bit_size=32, unsigned components=1);
67    void create_shared_store(nir_deref_instr *deref, uint32_t id,
68                             unsigned bit_size=32, unsigned components=1, unsigned wrmask=0xf);
69 
70    bool test_alu(nir_instr *instr, nir_op op);
71    bool test_alu_def(nir_instr *instr, unsigned index, nir_ssa_def *def, unsigned swizzle=0);
72 
73    static bool mem_vectorize_callback(unsigned align_mul, unsigned align_offset,
74                                       unsigned bit_size,
75                                       unsigned num_components,
76                                       nir_intrinsic_instr *low, nir_intrinsic_instr *high,
77                                       void *data);
78    static void shared_type_info(const struct glsl_type *type, unsigned *size, unsigned *align);
79 
80    std::string swizzle(nir_alu_instr *instr, int src);
81 
82    nir_builder *b, _b;
83    std::map<unsigned, nir_alu_instr*> movs;
84    std::map<unsigned, nir_alu_src*> loads;
85    std::map<unsigned, nir_ssa_def*> res_map;
86 };
87 
nir_load_store_vectorize_test()88 nir_load_store_vectorize_test::nir_load_store_vectorize_test()
89 {
90    glsl_type_singleton_init_or_ref();
91 
92    static const nir_shader_compiler_options options = { };
93    _b = nir_builder_init_simple_shader(MESA_SHADER_COMPUTE, &options, "load store tests");
94    b = &_b;
95 }
96 
~nir_load_store_vectorize_test()97 nir_load_store_vectorize_test::~nir_load_store_vectorize_test()
98 {
99    if (HasFailure()) {
100       printf("\nShader from the failed test:\n\n");
101       nir_print_shader(b->shader, stdout);
102    }
103 
104    ralloc_free(b->shader);
105 
106    glsl_type_singleton_decref();
107 }
108 
109 std::string
swizzle(nir_alu_instr * instr,int src)110 nir_load_store_vectorize_test::swizzle(nir_alu_instr *instr, int src)
111 {
112    std::string swizzle;
113    for (unsigned i = 0; i < nir_ssa_alu_instr_src_components(instr, src); i++) {
114       swizzle += "xyzw"[instr->src[src].swizzle[i]];
115    }
116 
117    return swizzle;
118 }
119 
120 unsigned
count_intrinsics(nir_intrinsic_op intrinsic)121 nir_load_store_vectorize_test::count_intrinsics(nir_intrinsic_op intrinsic)
122 {
123    unsigned count = 0;
124    nir_foreach_block(block, b->impl) {
125       nir_foreach_instr(instr, block) {
126          if (instr->type != nir_instr_type_intrinsic)
127             continue;
128          nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
129          if (intrin->intrinsic == intrinsic)
130             count++;
131       }
132    }
133    return count;
134 }
135 
136 nir_intrinsic_instr *
get_intrinsic(nir_intrinsic_op intrinsic,unsigned index)137 nir_load_store_vectorize_test::get_intrinsic(nir_intrinsic_op intrinsic,
138                              unsigned index)
139 {
140    nir_foreach_block(block, b->impl) {
141       nir_foreach_instr(instr, block) {
142          if (instr->type != nir_instr_type_intrinsic)
143             continue;
144          nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
145          if (intrin->intrinsic == intrinsic) {
146             if (index == 0)
147                return intrin;
148             index--;
149          }
150       }
151    }
152    return NULL;
153 }
154 
155 bool
run_vectorizer(nir_variable_mode modes,bool cse,nir_variable_mode robust_modes)156 nir_load_store_vectorize_test::run_vectorizer(nir_variable_mode modes,
157                                               bool cse,
158                                               nir_variable_mode robust_modes)
159 {
160    if (modes & nir_var_mem_shared)
161       nir_lower_vars_to_explicit_types(b->shader, nir_var_mem_shared, shared_type_info);
162 
163    nir_load_store_vectorize_options opts = { };
164    opts.callback = mem_vectorize_callback;
165    opts.modes = modes;
166    opts.robust_modes = robust_modes;
167    bool progress = nir_opt_load_store_vectorize(b->shader, &opts);
168 
169    if (progress) {
170       nir_validate_shader(b->shader, NULL);
171       if (cse)
172          nir_opt_cse(b->shader);
173       nir_copy_prop(b->shader);
174       nir_opt_algebraic(b->shader);
175       nir_opt_constant_folding(b->shader);
176    }
177    return progress;
178 }
179 
180 nir_ssa_def *
get_resource(uint32_t binding,bool ssbo)181 nir_load_store_vectorize_test::get_resource(uint32_t binding, bool ssbo)
182 {
183    if (res_map.count(binding))
184       return res_map[binding];
185 
186    nir_intrinsic_instr *res = nir_intrinsic_instr_create(
187       b->shader, nir_intrinsic_vulkan_resource_index);
188    nir_ssa_dest_init(&res->instr, &res->dest, 1, 32, NULL);
189    res->num_components = 1;
190    res->src[0] = nir_src_for_ssa(nir_imm_zero(b, 1, 32));
191    nir_intrinsic_set_desc_type(
192       res, ssbo ? 7/*VK_DESCRIPTOR_TYPE_STORAGE_BUFFER*/ : 6/*VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER*/);
193    nir_intrinsic_set_desc_set(res, 0);
194    nir_intrinsic_set_binding(res, binding);
195    nir_builder_instr_insert(b, &res->instr);
196    res_map[binding] = &res->dest.ssa;
197    return &res->dest.ssa;
198 }
199 
200 nir_intrinsic_instr *
create_indirect_load(nir_variable_mode mode,uint32_t binding,nir_ssa_def * offset,uint32_t id,unsigned bit_size,unsigned components,unsigned access)201 nir_load_store_vectorize_test::create_indirect_load(
202    nir_variable_mode mode, uint32_t binding, nir_ssa_def *offset, uint32_t id,
203    unsigned bit_size, unsigned components, unsigned access)
204 {
205    nir_intrinsic_op intrinsic;
206    nir_ssa_def *res = NULL;
207    switch (mode) {
208    case nir_var_mem_ubo:
209       intrinsic = nir_intrinsic_load_ubo;
210       res = get_resource(binding, false);
211       break;
212    case nir_var_mem_ssbo:
213       intrinsic = nir_intrinsic_load_ssbo;
214       res = get_resource(binding, true);
215       break;
216    case nir_var_mem_push_const:
217       intrinsic = nir_intrinsic_load_push_constant;
218       break;
219    default:
220       return NULL;
221    }
222    nir_intrinsic_instr *load = nir_intrinsic_instr_create(b->shader, intrinsic);
223    nir_ssa_dest_init(&load->instr, &load->dest, components, bit_size, NULL);
224    load->num_components = components;
225    if (res) {
226       load->src[0] = nir_src_for_ssa(res);
227       load->src[1] = nir_src_for_ssa(offset);
228    } else {
229       load->src[0] = nir_src_for_ssa(offset);
230    }
231    int byte_size = (bit_size == 1 ? 32 : bit_size) / 8;
232 
233    if (mode != nir_var_mem_push_const) {
234       nir_intrinsic_set_align(load, byte_size, 0);
235       nir_intrinsic_set_access(load, (gl_access_qualifier)access);
236    }
237 
238    if (nir_intrinsic_has_range_base(load)) {
239       uint32_t range = byte_size * components;
240       int offset_src = res ? 1 : 0;
241 
242       if (nir_src_is_const(load->src[offset_src])) {
243          nir_intrinsic_set_range_base(load, nir_src_as_uint(load->src[offset_src]));
244          nir_intrinsic_set_range(load, range);
245       } else {
246          /* Unknown range */
247          nir_intrinsic_set_range_base(load, 0);
248          nir_intrinsic_set_range(load, ~0);
249       }
250    }
251 
252    nir_builder_instr_insert(b, &load->instr);
253    nir_alu_instr *mov = nir_instr_as_alu(nir_mov(b, &load->dest.ssa)->parent_instr);
254    movs[id] = mov;
255    loads[id] = &mov->src[0];
256 
257    return load;
258 }
259 
260 void
create_indirect_store(nir_variable_mode mode,uint32_t binding,nir_ssa_def * offset,uint32_t id,unsigned bit_size,unsigned components,unsigned wrmask,unsigned access)261 nir_load_store_vectorize_test::create_indirect_store(
262    nir_variable_mode mode, uint32_t binding, nir_ssa_def *offset, uint32_t id,
263    unsigned bit_size, unsigned components, unsigned wrmask, unsigned access)
264 {
265    nir_const_value values[NIR_MAX_VEC_COMPONENTS];
266    for (unsigned i = 0; i < components; i++)
267       values[i] = nir_const_value_for_raw_uint((id << 4) | i, bit_size);
268    nir_ssa_def *value = nir_build_imm(b, components, bit_size, values);
269 
270    nir_intrinsic_op intrinsic;
271    nir_ssa_def *res = NULL;
272    switch (mode) {
273    case nir_var_mem_ssbo:
274       intrinsic = nir_intrinsic_store_ssbo;
275       res = get_resource(binding, true);
276       break;
277    case nir_var_mem_shared:
278       intrinsic = nir_intrinsic_store_shared;
279       break;
280    default:
281       return;
282    }
283    nir_intrinsic_instr *store = nir_intrinsic_instr_create(b->shader, intrinsic);
284    nir_ssa_dest_init(&store->instr, &store->dest, components, bit_size, NULL);
285    store->num_components = components;
286    if (res) {
287       store->src[0] = nir_src_for_ssa(value);
288       store->src[1] = nir_src_for_ssa(res);
289       store->src[2] = nir_src_for_ssa(offset);
290    } else {
291       store->src[0] = nir_src_for_ssa(value);
292       store->src[1] = nir_src_for_ssa(offset);
293    }
294    nir_intrinsic_set_align(store, (bit_size == 1 ? 32 : bit_size) / 8, 0);
295    nir_intrinsic_set_access(store, (gl_access_qualifier)access);
296    nir_intrinsic_set_write_mask(store, wrmask & ((1 << components) - 1));
297    nir_builder_instr_insert(b, &store->instr);
298 }
299 
300 nir_intrinsic_instr *
create_load(nir_variable_mode mode,uint32_t binding,uint32_t offset,uint32_t id,unsigned bit_size,unsigned components,unsigned access)301 nir_load_store_vectorize_test::create_load(
302    nir_variable_mode mode, uint32_t binding, uint32_t offset, uint32_t id,
303    unsigned bit_size, unsigned components, unsigned access)
304 {
305    return create_indirect_load(mode, binding, nir_imm_int(b, offset), id, bit_size, components, access);
306 }
307 
308 void
create_store(nir_variable_mode mode,uint32_t binding,uint32_t offset,uint32_t id,unsigned bit_size,unsigned components,unsigned wrmask,unsigned access)309 nir_load_store_vectorize_test::create_store(
310    nir_variable_mode mode, uint32_t binding, uint32_t offset, uint32_t id,
311    unsigned bit_size, unsigned components, unsigned wrmask, unsigned access)
312 {
313    create_indirect_store(mode, binding, nir_imm_int(b, offset), id, bit_size, components, wrmask, access);
314 }
315 
create_shared_load(nir_deref_instr * deref,uint32_t id,unsigned bit_size,unsigned components)316 void nir_load_store_vectorize_test::create_shared_load(
317    nir_deref_instr *deref, uint32_t id, unsigned bit_size, unsigned components)
318 {
319    nir_ssa_def *load = nir_load_deref(b, deref);
320    nir_alu_instr *mov = nir_instr_as_alu(nir_mov(b, load)->parent_instr);
321    movs[id] = mov;
322    loads[id] = &mov->src[0];
323 }
324 
create_shared_store(nir_deref_instr * deref,uint32_t id,unsigned bit_size,unsigned components,unsigned wrmask)325 void nir_load_store_vectorize_test::create_shared_store(
326    nir_deref_instr *deref, uint32_t id,
327    unsigned bit_size, unsigned components, unsigned wrmask)
328 {
329    nir_const_value values[NIR_MAX_VEC_COMPONENTS];
330    for (unsigned i = 0; i < components; i++)
331       values[i] = nir_const_value_for_raw_uint((id << 4) | i, bit_size);
332    nir_ssa_def *value = nir_build_imm(b, components, bit_size, values);
333 
334    nir_store_deref(b, deref, value, wrmask & ((1 << components) - 1));
335 }
336 
test_alu(nir_instr * instr,nir_op op)337 bool nir_load_store_vectorize_test::test_alu(nir_instr *instr, nir_op op)
338 {
339    return instr->type == nir_instr_type_alu && nir_instr_as_alu(instr)->op == op;
340 }
341 
test_alu_def(nir_instr * instr,unsigned index,nir_ssa_def * def,unsigned swizzle)342 bool nir_load_store_vectorize_test::test_alu_def(
343    nir_instr *instr, unsigned index, nir_ssa_def *def, unsigned swizzle)
344 {
345    if (instr->type != nir_instr_type_alu)
346       return false;
347 
348    nir_alu_instr *alu = nir_instr_as_alu(instr);
349 
350    if (index >= nir_op_infos[alu->op].num_inputs)
351       return false;
352    if (alu->src[index].src.ssa != def)
353       return false;
354    if (alu->src[index].swizzle[0] != swizzle)
355       return false;
356 
357    return true;
358 }
359 
mem_vectorize_callback(unsigned align_mul,unsigned align_offset,unsigned bit_size,unsigned num_components,nir_intrinsic_instr * low,nir_intrinsic_instr * high,void * data)360 bool nir_load_store_vectorize_test::mem_vectorize_callback(
361    unsigned align_mul, unsigned align_offset, unsigned bit_size,
362    unsigned num_components,
363    nir_intrinsic_instr *low, nir_intrinsic_instr *high,
364    void *data)
365 {
366    /* Calculate a simple alignment, like how nir_intrinsic_align() does. */
367    uint32_t align = align_mul;
368    if (align_offset)
369       align = 1 << (ffs(align_offset) - 1);
370 
371    /* Require scalar alignment and less than 5 components. */
372    return align % (bit_size / 8) == 0 &&
373           num_components <= 4;
374 }
375 
shared_type_info(const struct glsl_type * type,unsigned * size,unsigned * align)376 void nir_load_store_vectorize_test::shared_type_info(
377    const struct glsl_type *type, unsigned *size, unsigned *align)
378 {
379    assert(glsl_type_is_vector_or_scalar(type));
380 
381    uint32_t comp_size = glsl_type_is_boolean(type)
382       ? 4 : glsl_get_bit_size(type) / 8;
383    unsigned length = glsl_get_vector_elements(type);
384    *size = comp_size * length,
385    *align = comp_size;
386 }
387 } // namespace
388 
TEST_F(nir_load_store_vectorize_test,ubo_load_adjacent)389 TEST_F(nir_load_store_vectorize_test, ubo_load_adjacent)
390 {
391    create_load(nir_var_mem_ubo, 0, 0, 0x1);
392    create_load(nir_var_mem_ubo, 0, 4, 0x2);
393 
394    nir_validate_shader(b->shader, NULL);
395    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ubo), 2);
396 
397    EXPECT_TRUE(run_vectorizer(nir_var_mem_ubo));
398 
399    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ubo), 1);
400 
401    nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_ubo, 0);
402    ASSERT_EQ(load->dest.ssa.bit_size, 32);
403    ASSERT_EQ(load->dest.ssa.num_components, 2);
404    ASSERT_EQ(nir_intrinsic_range_base(load), 0);
405    ASSERT_EQ(nir_intrinsic_range(load), 8);
406    ASSERT_EQ(nir_src_as_uint(load->src[1]), 0);
407    EXPECT_INSTR_SWIZZLES(movs[0x1], load, "x");
408    EXPECT_INSTR_SWIZZLES(movs[0x2], load, "y");
409 }
410 
TEST_F(nir_load_store_vectorize_test,ubo_load_intersecting)411 TEST_F(nir_load_store_vectorize_test, ubo_load_intersecting)
412 {
413    create_load(nir_var_mem_ubo, 0, 0, 0x1, 32, 2);
414    create_load(nir_var_mem_ubo, 0, 4, 0x2, 32, 2);
415 
416    nir_validate_shader(b->shader, NULL);
417    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ubo), 2);
418 
419    EXPECT_TRUE(run_vectorizer(nir_var_mem_ubo));
420 
421    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ubo), 1);
422 
423    nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_ubo, 0);
424    ASSERT_EQ(load->dest.ssa.bit_size, 32);
425    ASSERT_EQ(load->dest.ssa.num_components, 3);
426    ASSERT_EQ(nir_intrinsic_range_base(load), 0);
427    ASSERT_EQ(nir_intrinsic_range(load), 12);
428    ASSERT_EQ(nir_src_as_uint(load->src[1]), 0);
429    EXPECT_INSTR_SWIZZLES(movs[0x1], load, "xy");
430    EXPECT_INSTR_SWIZZLES(movs[0x2], load, "yz");
431 }
432 
433 /* Test for a bug in range handling */
TEST_F(nir_load_store_vectorize_test,ubo_load_intersecting_range)434 TEST_F(nir_load_store_vectorize_test, ubo_load_intersecting_range)
435 {
436    create_load(nir_var_mem_ubo, 0, 0, 0x1, 32, 4);
437    create_load(nir_var_mem_ubo, 0, 4, 0x2, 32, 1);
438 
439    nir_validate_shader(b->shader, NULL);
440    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ubo), 2);
441 
442    EXPECT_TRUE(run_vectorizer(nir_var_mem_ubo));
443 
444    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ubo), 1);
445 
446    nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_ubo, 0);
447    ASSERT_EQ(load->dest.ssa.bit_size, 32);
448    ASSERT_EQ(load->dest.ssa.num_components, 4);
449    ASSERT_EQ(nir_intrinsic_range_base(load), 0);
450    ASSERT_EQ(nir_intrinsic_range(load), 16);
451    ASSERT_EQ(nir_src_as_uint(load->src[1]), 0);
452    ASSERT_EQ(loads[0x1]->src.ssa, &load->dest.ssa);
453    ASSERT_EQ(loads[0x2]->src.ssa, &load->dest.ssa);
454    ASSERT_EQ(loads[0x1]->swizzle[0], 0);
455    ASSERT_EQ(loads[0x1]->swizzle[1], 1);
456    ASSERT_EQ(loads[0x1]->swizzle[2], 2);
457    ASSERT_EQ(loads[0x1]->swizzle[3], 3);
458    ASSERT_EQ(loads[0x2]->swizzle[0], 1);
459 }
460 
TEST_F(nir_load_store_vectorize_test,ubo_load_identical)461 TEST_F(nir_load_store_vectorize_test, ubo_load_identical)
462 {
463    create_load(nir_var_mem_ubo, 0, 0, 0x1);
464    create_load(nir_var_mem_ubo, 0, 0, 0x2);
465 
466    nir_validate_shader(b->shader, NULL);
467    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ubo), 2);
468 
469    EXPECT_TRUE(run_vectorizer(nir_var_mem_ubo));
470 
471    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ubo), 1);
472 
473    nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_ubo, 0);
474    ASSERT_EQ(load->dest.ssa.bit_size, 32);
475    ASSERT_EQ(load->dest.ssa.num_components, 1);
476    ASSERT_EQ(nir_intrinsic_range_base(load), 0);
477    ASSERT_EQ(nir_intrinsic_range(load), 4);
478    ASSERT_EQ(nir_src_as_uint(load->src[1]), 0);
479    ASSERT_EQ(loads[0x1]->src.ssa, &load->dest.ssa);
480    ASSERT_EQ(loads[0x2]->src.ssa, &load->dest.ssa);
481    EXPECT_INSTR_SWIZZLES(movs[0x1], load, "x");
482    EXPECT_INSTR_SWIZZLES(movs[0x2], load, "x");
483 }
484 
TEST_F(nir_load_store_vectorize_test,ubo_load_large)485 TEST_F(nir_load_store_vectorize_test, ubo_load_large)
486 {
487    create_load(nir_var_mem_ubo, 0, 0, 0x1, 32, 2);
488    create_load(nir_var_mem_ubo, 0, 8, 0x2, 32, 3);
489 
490    nir_validate_shader(b->shader, NULL);
491    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ubo), 2);
492 
493    EXPECT_TRUE(run_vectorizer(nir_var_mem_ubo));
494 
495    nir_validate_shader(b->shader, NULL);
496    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ubo), 2);
497 }
498 
TEST_F(nir_load_store_vectorize_test,push_const_load_adjacent)499 TEST_F(nir_load_store_vectorize_test, push_const_load_adjacent)
500 {
501    create_load(nir_var_mem_push_const, 0, 0, 0x1);
502    create_load(nir_var_mem_push_const, 0, 4, 0x2);
503 
504    nir_validate_shader(b->shader, NULL);
505    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_push_constant), 2);
506 
507    EXPECT_TRUE(run_vectorizer(nir_var_mem_push_const));
508 
509    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_push_constant), 1);
510 
511    nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_push_constant, 0);
512    ASSERT_EQ(load->dest.ssa.bit_size, 32);
513    ASSERT_EQ(load->dest.ssa.num_components, 2);
514    ASSERT_EQ(nir_src_as_uint(load->src[0]), 0);
515    EXPECT_INSTR_SWIZZLES(movs[0x1], load, "x");
516    EXPECT_INSTR_SWIZZLES(movs[0x2], load, "y");
517 }
518 
TEST_F(nir_load_store_vectorize_test,push_const_load_adjacent_base)519 TEST_F(nir_load_store_vectorize_test, push_const_load_adjacent_base)
520 {
521    create_load(nir_var_mem_push_const, 0, 0, 0x1);
522    nir_intrinsic_set_base(create_load(nir_var_mem_push_const, 0, 0, 0x2), 4);
523 
524    nir_validate_shader(b->shader, NULL);
525    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_push_constant), 2);
526 
527    EXPECT_TRUE(run_vectorizer(nir_var_mem_push_const));
528 
529    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_push_constant), 1);
530 
531    nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_push_constant, 0);
532    ASSERT_EQ(load->dest.ssa.bit_size, 32);
533    ASSERT_EQ(load->dest.ssa.num_components, 2);
534    ASSERT_EQ(nir_src_as_uint(load->src[0]), 0);
535    EXPECT_INSTR_SWIZZLES(movs[0x1], load, "x");
536    EXPECT_INSTR_SWIZZLES(movs[0x2], load, "y");
537 }
538 
TEST_F(nir_load_store_vectorize_test,ssbo_load_adjacent)539 TEST_F(nir_load_store_vectorize_test, ssbo_load_adjacent)
540 {
541    create_load(nir_var_mem_ssbo, 0, 0, 0x1);
542    create_load(nir_var_mem_ssbo, 0, 4, 0x2);
543 
544    nir_validate_shader(b->shader, NULL);
545    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
546 
547    EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
548 
549    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 1);
550 
551    nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_ssbo, 0);
552    ASSERT_EQ(load->dest.ssa.bit_size, 32);
553    ASSERT_EQ(load->dest.ssa.num_components, 2);
554    ASSERT_EQ(nir_src_as_uint(load->src[1]), 0);
555    EXPECT_INSTR_SWIZZLES(movs[0x1], load, "x");
556    EXPECT_INSTR_SWIZZLES(movs[0x2], load, "y");
557 }
558 
TEST_F(nir_load_store_vectorize_test,ssbo_load_adjacent_indirect)559 TEST_F(nir_load_store_vectorize_test, ssbo_load_adjacent_indirect)
560 {
561    nir_ssa_def *index_base = nir_load_local_invocation_index(b);
562    create_indirect_load(nir_var_mem_ssbo, 0, index_base, 0x1);
563    create_indirect_load(nir_var_mem_ssbo, 0, nir_iadd_imm(b, index_base, 4), 0x2);
564 
565    nir_validate_shader(b->shader, NULL);
566    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
567 
568    EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
569 
570    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 1);
571 
572    nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_ssbo, 0);
573    ASSERT_EQ(load->dest.ssa.bit_size, 32);
574    ASSERT_EQ(load->dest.ssa.num_components, 2);
575    ASSERT_EQ(load->src[1].ssa, index_base);
576    EXPECT_INSTR_SWIZZLES(movs[0x1], load, "x");
577    EXPECT_INSTR_SWIZZLES(movs[0x2], load, "y");
578 }
579 
TEST_F(nir_load_store_vectorize_test,ssbo_load_adjacent_indirect_sub)580 TEST_F(nir_load_store_vectorize_test, ssbo_load_adjacent_indirect_sub)
581 {
582    nir_ssa_def *index_base = nir_load_local_invocation_index(b);
583    nir_ssa_def *index_base_prev = nir_iadd_imm(b, index_base, 0xfffffffc);
584    create_indirect_load(nir_var_mem_ssbo, 0, index_base_prev, 0x1);
585    create_indirect_load(nir_var_mem_ssbo, 0, index_base, 0x2);
586 
587    nir_validate_shader(b->shader, NULL);
588    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
589 
590    EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
591 
592    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 1);
593 
594    nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_ssbo, 0);
595    ASSERT_EQ(load->dest.ssa.bit_size, 32);
596    ASSERT_EQ(load->dest.ssa.num_components, 2);
597    ASSERT_EQ(load->src[1].ssa, index_base_prev);
598    EXPECT_INSTR_SWIZZLES(movs[0x1], load, "x");
599    EXPECT_INSTR_SWIZZLES(movs[0x2], load, "y");
600 }
601 
TEST_F(nir_load_store_vectorize_test,ssbo_load_adjacent_indirect_neg_stride)602 TEST_F(nir_load_store_vectorize_test, ssbo_load_adjacent_indirect_neg_stride)
603 {
604    nir_ssa_def *inv = nir_load_local_invocation_index(b);
605    nir_ssa_def *inv_plus_one = nir_iadd_imm(b, inv, 1);
606    nir_ssa_def *index_base = nir_imul_imm(b, inv, 0xfffffffc);
607    nir_ssa_def *index_base_prev = nir_imul_imm(b, inv_plus_one, 0xfffffffc);
608    create_indirect_load(nir_var_mem_ssbo, 0, index_base_prev, 0x1);
609    create_indirect_load(nir_var_mem_ssbo, 0, index_base, 0x2);
610 
611    nir_validate_shader(b->shader, NULL);
612    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
613 
614    EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
615 
616    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 1);
617 
618    nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_ssbo, 0);
619    ASSERT_EQ(load->dest.ssa.bit_size, 32);
620    ASSERT_EQ(load->dest.ssa.num_components, 2);
621    EXPECT_INSTR_SWIZZLES(movs[0x1], load, "x");
622    EXPECT_INSTR_SWIZZLES(movs[0x2], load, "y");
623 
624    /* nir_opt_algebraic optimizes the imul */
625    ASSERT_TRUE(test_alu(load->src[1].ssa->parent_instr, nir_op_ineg));
626    nir_ssa_def *offset = nir_instr_as_alu(load->src[1].ssa->parent_instr)->src[0].src.ssa;
627    ASSERT_TRUE(test_alu(offset->parent_instr, nir_op_ishl));
628    nir_alu_instr *shl = nir_instr_as_alu(offset->parent_instr);
629    ASSERT_EQ(shl->src[0].src.ssa, inv_plus_one);
630    ASSERT_EQ(nir_src_as_uint(shl->src[1].src), 2);
631 }
632 
TEST_F(nir_load_store_vectorize_test,ssbo_load_identical_store_adjacent)633 TEST_F(nir_load_store_vectorize_test, ssbo_load_identical_store_adjacent)
634 {
635    create_load(nir_var_mem_ssbo, 0, 0, 0x1);
636    create_store(nir_var_mem_ssbo, 0, 4, 0x2);
637    create_load(nir_var_mem_ssbo, 0, 0, 0x3);
638 
639    nir_validate_shader(b->shader, NULL);
640    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
641 
642    EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
643 
644    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 1);
645 
646    nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_ssbo, 0);
647    ASSERT_EQ(load->dest.ssa.bit_size, 32);
648    ASSERT_EQ(load->dest.ssa.num_components, 1);
649    ASSERT_EQ(nir_src_as_uint(load->src[1]), 0);
650    EXPECT_INSTR_SWIZZLES(movs[0x1], load, "x");
651    EXPECT_INSTR_SWIZZLES(movs[0x3], load, "x");
652 }
653 
TEST_F(nir_load_store_vectorize_test,ssbo_load_identical_store_intersecting)654 TEST_F(nir_load_store_vectorize_test, ssbo_load_identical_store_intersecting)
655 {
656    create_load(nir_var_mem_ssbo, 0, 0, 0x1, 32, 2);
657    create_store(nir_var_mem_ssbo, 0, 4, 0x2);
658    create_load(nir_var_mem_ssbo, 0, 0, 0x3, 32, 2);
659 
660    nir_validate_shader(b->shader, NULL);
661    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
662 
663    EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
664 
665    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
666 }
667 
TEST_F(nir_load_store_vectorize_test,ssbo_load_identical_store_identical)668 TEST_F(nir_load_store_vectorize_test, ssbo_load_identical_store_identical)
669 {
670    create_load(nir_var_mem_ssbo, 0, 0, 0x1);
671    create_store(nir_var_mem_ssbo, 0, 0, 0x2);
672    create_load(nir_var_mem_ssbo, 0, 0, 0x3);
673 
674    nir_validate_shader(b->shader, NULL);
675    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
676 
677    EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
678 
679    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
680 }
681 
TEST_F(nir_load_store_vectorize_test,ssbo_store_identical_load_identical)682 TEST_F(nir_load_store_vectorize_test, ssbo_store_identical_load_identical)
683 {
684    create_store(nir_var_mem_ssbo, 0, 0, 0x1);
685    create_load(nir_var_mem_ssbo, 0, 0, 0x2);
686    create_store(nir_var_mem_ssbo, 0, 0, 0x3);
687 
688    nir_validate_shader(b->shader, NULL);
689    ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 2);
690 
691    EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
692 
693    ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 2);
694 }
695 
696 /* if nir_opt_load_store_vectorize were implemented like many load/store
697  * optimization passes are (for example, nir_opt_combine_stores and
698  * nir_opt_copy_prop_vars) and stopped tracking a load when an aliasing store is
699  * encountered, this case wouldn't be optimized.
700  * A similar test for derefs is shared_load_adjacent_store_identical. */
TEST_F(nir_load_store_vectorize_test,ssbo_load_adjacent_store_identical)701 TEST_F(nir_load_store_vectorize_test, ssbo_load_adjacent_store_identical)
702 {
703    create_load(nir_var_mem_ssbo, 0, 0, 0x1);
704    create_store(nir_var_mem_ssbo, 0, 0, 0x2);
705    create_load(nir_var_mem_ssbo, 0, 4, 0x3);
706 
707    nir_validate_shader(b->shader, NULL);
708    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
709    ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 1);
710 
711    EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
712 
713    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 1);
714    ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 1);
715 
716    nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_ssbo, 0);
717    ASSERT_EQ(load->dest.ssa.bit_size, 32);
718    ASSERT_EQ(load->dest.ssa.num_components, 2);
719    ASSERT_EQ(nir_src_as_uint(load->src[1]), 0);
720    EXPECT_INSTR_SWIZZLES(movs[0x1], load, "x");
721    EXPECT_INSTR_SWIZZLES(movs[0x3], load, "y");
722 }
723 
TEST_F(nir_load_store_vectorize_test,ssbo_store_adjacent)724 TEST_F(nir_load_store_vectorize_test, ssbo_store_adjacent)
725 {
726    create_store(nir_var_mem_ssbo, 0, 0, 0x1);
727    create_store(nir_var_mem_ssbo, 0, 4, 0x2);
728 
729    nir_validate_shader(b->shader, NULL);
730    ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 2);
731 
732    EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
733 
734    ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 1);
735 
736    nir_intrinsic_instr *store = get_intrinsic(nir_intrinsic_store_ssbo, 0);
737    ASSERT_EQ(nir_src_as_uint(store->src[2]), 0);
738    ASSERT_EQ(nir_intrinsic_write_mask(store), 0x3);
739    nir_ssa_def *val = store->src[0].ssa;
740    ASSERT_EQ(val->bit_size, 32);
741    ASSERT_EQ(val->num_components, 2);
742    nir_const_value *cv = nir_instr_as_load_const(val->parent_instr)->value;
743    ASSERT_EQ(nir_const_value_as_uint(cv[0], 32), 0x10);
744    ASSERT_EQ(nir_const_value_as_uint(cv[1], 32), 0x20);
745 }
746 
TEST_F(nir_load_store_vectorize_test,ssbo_store_intersecting)747 TEST_F(nir_load_store_vectorize_test, ssbo_store_intersecting)
748 {
749    create_store(nir_var_mem_ssbo, 0, 0, 0x1, 32, 2);
750    create_store(nir_var_mem_ssbo, 0, 4, 0x2, 32, 2);
751 
752    nir_validate_shader(b->shader, NULL);
753    ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 2);
754 
755    EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
756 
757    ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 1);
758 
759    nir_intrinsic_instr *store = get_intrinsic(nir_intrinsic_store_ssbo, 0);
760    ASSERT_EQ(nir_src_as_uint(store->src[2]), 0);
761    ASSERT_EQ(nir_intrinsic_write_mask(store), 0x7);
762    nir_ssa_def *val = store->src[0].ssa;
763    ASSERT_EQ(val->bit_size, 32);
764    ASSERT_EQ(val->num_components, 3);
765    nir_const_value *cv = nir_instr_as_load_const(val->parent_instr)->value;
766    ASSERT_EQ(nir_const_value_as_uint(cv[0], 32), 0x10);
767    ASSERT_EQ(nir_const_value_as_uint(cv[1], 32), 0x20);
768    ASSERT_EQ(nir_const_value_as_uint(cv[2], 32), 0x21);
769 }
770 
TEST_F(nir_load_store_vectorize_test,ssbo_store_identical)771 TEST_F(nir_load_store_vectorize_test, ssbo_store_identical)
772 {
773    create_store(nir_var_mem_ssbo, 0, 0, 0x1);
774    create_store(nir_var_mem_ssbo, 0, 0, 0x2);
775 
776    nir_validate_shader(b->shader, NULL);
777    ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 2);
778 
779    EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
780 
781    ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 1);
782 
783    nir_intrinsic_instr *store = get_intrinsic(nir_intrinsic_store_ssbo, 0);
784    ASSERT_EQ(nir_src_as_uint(store->src[2]), 0);
785    ASSERT_EQ(nir_intrinsic_write_mask(store), 0x1);
786    nir_ssa_def *val = store->src[0].ssa;
787    ASSERT_EQ(val->bit_size, 32);
788    ASSERT_EQ(val->num_components, 1);
789    ASSERT_EQ(nir_src_as_uint(store->src[0]), 0x20);
790 }
791 
TEST_F(nir_load_store_vectorize_test,ssbo_store_large)792 TEST_F(nir_load_store_vectorize_test, ssbo_store_large)
793 {
794    create_store(nir_var_mem_ssbo, 0, 0, 0x1, 32, 2);
795    create_store(nir_var_mem_ssbo, 0, 8, 0x2, 32, 3);
796 
797    nir_validate_shader(b->shader, NULL);
798    ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 2);
799 
800    EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
801 
802    ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 2);
803 }
804 
TEST_F(nir_load_store_vectorize_test,ubo_load_adjacent_memory_barrier)805 TEST_F(nir_load_store_vectorize_test, ubo_load_adjacent_memory_barrier)
806 {
807    create_load(nir_var_mem_ubo, 0, 0, 0x1);
808 
809    nir_scoped_memory_barrier(b, NIR_SCOPE_DEVICE, NIR_MEMORY_ACQ_REL,
810                              nir_var_mem_ssbo);
811 
812    create_load(nir_var_mem_ubo, 0, 4, 0x2);
813 
814    nir_validate_shader(b->shader, NULL);
815    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ubo), 2);
816 
817    EXPECT_TRUE(run_vectorizer(nir_var_mem_ubo));
818 
819    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ubo), 1);
820 }
821 
TEST_F(nir_load_store_vectorize_test,ssbo_load_adjacent_memory_barrier)822 TEST_F(nir_load_store_vectorize_test, ssbo_load_adjacent_memory_barrier)
823 {
824    create_load(nir_var_mem_ssbo, 0, 0, 0x1);
825 
826    nir_scoped_memory_barrier(b, NIR_SCOPE_DEVICE, NIR_MEMORY_ACQ_REL,
827                              nir_var_mem_ssbo);
828 
829    create_load(nir_var_mem_ssbo, 0, 4, 0x2);
830 
831    nir_validate_shader(b->shader, NULL);
832    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
833 
834    EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
835 
836    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
837 }
838 
839 /* nir_intrinsic_control_barrier only syncs invocations in a workgroup, it
840  * doesn't require that loads/stores complete.
841  */
TEST_F(nir_load_store_vectorize_test,ssbo_load_adjacent_barrier)842 TEST_F(nir_load_store_vectorize_test, ssbo_load_adjacent_barrier)
843 {
844    create_load(nir_var_mem_ssbo, 0, 0, 0x1);
845    nir_control_barrier(b);
846    create_load(nir_var_mem_ssbo, 0, 4, 0x2);
847 
848    nir_validate_shader(b->shader, NULL);
849    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
850 
851    EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
852 
853    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 1);
854 }
855 
TEST_F(nir_load_store_vectorize_test,ssbo_load_adjacent_memory_barrier_shared)856 TEST_F(nir_load_store_vectorize_test, ssbo_load_adjacent_memory_barrier_shared)
857 {
858    create_load(nir_var_mem_ssbo, 0, 0, 0x1);
859 
860    nir_scoped_memory_barrier(b, NIR_SCOPE_WORKGROUP, NIR_MEMORY_ACQ_REL,
861                              nir_var_mem_shared);
862 
863    create_load(nir_var_mem_ssbo, 0, 4, 0x2);
864 
865    nir_validate_shader(b->shader, NULL);
866    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
867 
868    EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
869 
870    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 1);
871 }
872 
TEST_F(nir_load_store_vectorize_test,ssbo_store_adjacent_discard)873 TEST_F(nir_load_store_vectorize_test, ssbo_store_adjacent_discard)
874 {
875    create_store(nir_var_mem_ssbo, 0, 0, 0x1);
876    nir_discard(b);
877    create_store(nir_var_mem_ssbo, 0, 4, 0x2);
878 
879    nir_validate_shader(b->shader, NULL);
880    ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 2);
881 
882    EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
883 
884    ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 2);
885 }
886 
TEST_F(nir_load_store_vectorize_test,ssbo_store_adjacent_demote)887 TEST_F(nir_load_store_vectorize_test, ssbo_store_adjacent_demote)
888 {
889    create_store(nir_var_mem_ssbo, 0, 0, 0x1);
890    nir_demote(b);
891    create_store(nir_var_mem_ssbo, 0, 4, 0x2);
892 
893    nir_validate_shader(b->shader, NULL);
894    ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 2);
895 
896    EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
897 
898    ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 2);
899 }
900 
TEST_F(nir_load_store_vectorize_test,ssbo_load_adjacent_discard)901 TEST_F(nir_load_store_vectorize_test, ssbo_load_adjacent_discard)
902 {
903    create_load(nir_var_mem_ssbo, 0, 0, 0x1);
904    nir_discard(b);
905    create_load(nir_var_mem_ssbo, 0, 4, 0x2);
906 
907    nir_validate_shader(b->shader, NULL);
908    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
909 
910    EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
911 
912    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
913 }
914 
TEST_F(nir_load_store_vectorize_test,ssbo_load_adjacent_demote)915 TEST_F(nir_load_store_vectorize_test, ssbo_load_adjacent_demote)
916 {
917    create_load(nir_var_mem_ssbo, 0, 0, 0x1);
918    nir_demote(b);
919    create_load(nir_var_mem_ssbo, 0, 4, 0x2);
920 
921    nir_validate_shader(b->shader, NULL);
922    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
923 
924    EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
925 
926    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 1);
927 }
928 
TEST_F(nir_load_store_vectorize_test,ssbo_load_adjacent_8_8_16)929 TEST_F(nir_load_store_vectorize_test, ssbo_load_adjacent_8_8_16)
930 {
931    create_load(nir_var_mem_ssbo, 0, 0, 0x1, 8);
932    create_load(nir_var_mem_ssbo, 0, 1, 0x2, 8);
933    create_load(nir_var_mem_ssbo, 0, 2, 0x3, 16);
934 
935    nir_validate_shader(b->shader, NULL);
936    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 3);
937 
938    EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
939 
940    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 1);
941 
942    nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_ssbo, 0);
943    ASSERT_EQ(load->dest.ssa.bit_size, 8);
944    ASSERT_EQ(load->dest.ssa.num_components, 4);
945    ASSERT_EQ(nir_src_as_uint(load->src[1]), 0);
946    EXPECT_INSTR_SWIZZLES(movs[0x1], load, "x");
947    EXPECT_INSTR_SWIZZLES(movs[0x2], load, "y");
948 
949    nir_ssa_def *val = loads[0x3]->src.ssa;
950    ASSERT_EQ(val->bit_size, 16);
951    ASSERT_EQ(val->num_components, 1);
952    ASSERT_TRUE(test_alu(val->parent_instr, nir_op_ior));
953    nir_ssa_def *low = nir_instr_as_alu(val->parent_instr)->src[0].src.ssa;
954    nir_ssa_def *high = nir_instr_as_alu(val->parent_instr)->src[1].src.ssa;
955    ASSERT_TRUE(test_alu(high->parent_instr, nir_op_ishl));
956    high = nir_instr_as_alu(high->parent_instr)->src[0].src.ssa;
957    ASSERT_TRUE(test_alu(low->parent_instr, nir_op_u2u16));
958    ASSERT_TRUE(test_alu(high->parent_instr, nir_op_u2u16));
959    ASSERT_TRUE(test_alu_def(low->parent_instr, 0, &load->dest.ssa, 2));
960    ASSERT_TRUE(test_alu_def(high->parent_instr, 0, &load->dest.ssa, 3));
961 }
962 
TEST_F(nir_load_store_vectorize_test,ssbo_load_adjacent_32_32_64)963 TEST_F(nir_load_store_vectorize_test, ssbo_load_adjacent_32_32_64)
964 {
965    create_load(nir_var_mem_ssbo, 0, 0, 0x1, 32, 2);
966    create_load(nir_var_mem_ssbo, 0, 8, 0x2, 64);
967 
968    nir_validate_shader(b->shader, NULL);
969    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
970 
971    EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
972 
973    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 1);
974 
975    nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_ssbo, 0);
976    ASSERT_EQ(load->dest.ssa.bit_size, 32);
977    ASSERT_EQ(load->dest.ssa.num_components, 4);
978    ASSERT_EQ(nir_src_as_uint(load->src[1]), 0);
979    EXPECT_INSTR_SWIZZLES(movs[0x1], load, "xy");
980 
981    nir_ssa_def *val = loads[0x2]->src.ssa;
982    ASSERT_EQ(val->bit_size, 64);
983    ASSERT_EQ(val->num_components, 1);
984    ASSERT_TRUE(test_alu(val->parent_instr, nir_op_pack_64_2x32));
985    nir_alu_instr *pack = nir_instr_as_alu(val->parent_instr);
986    EXPECT_INSTR_SWIZZLES(pack, load, "zw");
987 }
988 
TEST_F(nir_load_store_vectorize_test,ssbo_load_adjacent_32_32_64_64)989 TEST_F(nir_load_store_vectorize_test, ssbo_load_adjacent_32_32_64_64)
990 {
991    create_load(nir_var_mem_ssbo, 0, 0, 0x1, 32, 2);
992    create_load(nir_var_mem_ssbo, 0, 8, 0x2, 64);
993    create_load(nir_var_mem_ssbo, 0, 16, 0x3, 64);
994 
995    nir_validate_shader(b->shader, NULL);
996    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 3);
997 
998    EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo, true));
999 
1000    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 1);
1001 
1002    nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_ssbo, 0);
1003    ASSERT_EQ(load->dest.ssa.bit_size, 64);
1004    ASSERT_EQ(load->dest.ssa.num_components, 3);
1005    ASSERT_EQ(nir_src_as_uint(load->src[1]), 0);
1006    EXPECT_INSTR_SWIZZLES(movs[0x3], load, "z");
1007 
1008    nir_ssa_def *val = loads[0x2]->src.ssa;
1009    ASSERT_EQ(val->bit_size, 64);
1010    ASSERT_EQ(val->num_components, 1);
1011    ASSERT_TRUE(test_alu(val->parent_instr, nir_op_mov));
1012    nir_alu_instr *mov = nir_instr_as_alu(val->parent_instr);
1013    EXPECT_INSTR_SWIZZLES(mov, load, "y");
1014 
1015    val = loads[0x1]->src.ssa;
1016    ASSERT_EQ(val->bit_size, 32);
1017    ASSERT_EQ(val->num_components, 2);
1018    ASSERT_TRUE(test_alu(val->parent_instr, nir_op_unpack_64_2x32));
1019    nir_alu_instr *unpack = nir_instr_as_alu(val->parent_instr);
1020    EXPECT_INSTR_SWIZZLES(unpack, load, "x");
1021 }
1022 
TEST_F(nir_load_store_vectorize_test,ssbo_load_intersecting_32_32_64)1023 TEST_F(nir_load_store_vectorize_test, ssbo_load_intersecting_32_32_64)
1024 {
1025    create_load(nir_var_mem_ssbo, 0, 4, 0x1, 32, 2);
1026    create_load(nir_var_mem_ssbo, 0, 8, 0x2, 64);
1027 
1028    nir_validate_shader(b->shader, NULL);
1029    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
1030 
1031    EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
1032 
1033    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 1);
1034 
1035    nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_ssbo, 0);
1036    ASSERT_EQ(load->dest.ssa.bit_size, 32);
1037    ASSERT_EQ(load->dest.ssa.num_components, 3);
1038    ASSERT_EQ(nir_src_as_uint(load->src[1]), 4);
1039    EXPECT_INSTR_SWIZZLES(movs[0x1], load, "xy");
1040 
1041    nir_ssa_def *val = loads[0x2]->src.ssa;
1042    ASSERT_EQ(val->bit_size, 64);
1043    ASSERT_EQ(val->num_components, 1);
1044    ASSERT_TRUE(test_alu(val->parent_instr, nir_op_pack_64_2x32));
1045    nir_alu_instr *pack = nir_instr_as_alu(val->parent_instr);
1046    EXPECT_INSTR_SWIZZLES(pack, load, "yz");
1047 }
1048 
TEST_F(nir_load_store_vectorize_test,ssbo_store_adjacent_8_8_16)1049 TEST_F(nir_load_store_vectorize_test, ssbo_store_adjacent_8_8_16)
1050 {
1051    create_store(nir_var_mem_ssbo, 0, 0, 0x1, 8);
1052    create_store(nir_var_mem_ssbo, 0, 1, 0x2, 8);
1053    create_store(nir_var_mem_ssbo, 0, 2, 0x3, 16);
1054 
1055    nir_validate_shader(b->shader, NULL);
1056    ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 3);
1057 
1058    EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
1059 
1060    ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 1);
1061 
1062    nir_intrinsic_instr *store = get_intrinsic(nir_intrinsic_store_ssbo, 0);
1063    ASSERT_EQ(nir_src_as_uint(store->src[2]), 0);
1064    ASSERT_EQ(nir_intrinsic_write_mask(store), 0xf);
1065    nir_ssa_def *val = store->src[0].ssa;
1066    ASSERT_EQ(val->bit_size, 8);
1067    ASSERT_EQ(val->num_components, 4);
1068    nir_const_value *cv = nir_instr_as_load_const(val->parent_instr)->value;
1069    ASSERT_EQ(nir_const_value_as_uint(cv[0], 8), 0x10);
1070    ASSERT_EQ(nir_const_value_as_uint(cv[1], 8), 0x20);
1071    ASSERT_EQ(nir_const_value_as_uint(cv[2], 8), 0x30);
1072    ASSERT_EQ(nir_const_value_as_uint(cv[3], 8), 0x0);
1073 }
1074 
TEST_F(nir_load_store_vectorize_test,ssbo_store_adjacent_32_32_64)1075 TEST_F(nir_load_store_vectorize_test, ssbo_store_adjacent_32_32_64)
1076 {
1077    create_store(nir_var_mem_ssbo, 0, 0, 0x1, 32, 2);
1078    create_store(nir_var_mem_ssbo, 0, 8, 0x2, 64);
1079 
1080    nir_validate_shader(b->shader, NULL);
1081    ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 2);
1082 
1083    EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
1084 
1085    ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 1);
1086 
1087    nir_intrinsic_instr *store = get_intrinsic(nir_intrinsic_store_ssbo, 0);
1088    ASSERT_EQ(nir_src_as_uint(store->src[2]), 0);
1089    ASSERT_EQ(nir_intrinsic_write_mask(store), 0xf);
1090    nir_ssa_def *val = store->src[0].ssa;
1091    ASSERT_EQ(val->bit_size, 32);
1092    ASSERT_EQ(val->num_components, 4);
1093    nir_const_value *cv = nir_instr_as_load_const(val->parent_instr)->value;
1094    ASSERT_EQ(nir_const_value_as_uint(cv[0], 32), 0x10);
1095    ASSERT_EQ(nir_const_value_as_uint(cv[1], 32), 0x11);
1096    ASSERT_EQ(nir_const_value_as_uint(cv[2], 32), 0x20);
1097    ASSERT_EQ(nir_const_value_as_uint(cv[3], 32), 0x0);
1098 }
1099 
TEST_F(nir_load_store_vectorize_test,ssbo_store_adjacent_32_32_64_64)1100 TEST_F(nir_load_store_vectorize_test, ssbo_store_adjacent_32_32_64_64)
1101 {
1102    create_store(nir_var_mem_ssbo, 0, 0, 0x1, 32, 2);
1103    create_store(nir_var_mem_ssbo, 0, 8, 0x2, 64);
1104    create_store(nir_var_mem_ssbo, 0, 16, 0x3, 64);
1105 
1106    nir_validate_shader(b->shader, NULL);
1107    ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 3);
1108 
1109    EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
1110 
1111    ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 1);
1112 
1113    nir_intrinsic_instr *store = get_intrinsic(nir_intrinsic_store_ssbo, 0);
1114    ASSERT_EQ(nir_src_as_uint(store->src[2]), 0);
1115    ASSERT_EQ(nir_intrinsic_write_mask(store), 0x7);
1116    nir_ssa_def *val = store->src[0].ssa;
1117    ASSERT_EQ(val->bit_size, 64);
1118    ASSERT_EQ(val->num_components, 3);
1119    nir_const_value *cv = nir_instr_as_load_const(val->parent_instr)->value;
1120    ASSERT_EQ(nir_const_value_as_uint(cv[0], 64), 0x1100000010ull);
1121    ASSERT_EQ(nir_const_value_as_uint(cv[1], 64), 0x20);
1122    ASSERT_EQ(nir_const_value_as_uint(cv[2], 64), 0x30);
1123 }
1124 
TEST_F(nir_load_store_vectorize_test,ssbo_store_intersecting_32_32_64)1125 TEST_F(nir_load_store_vectorize_test, ssbo_store_intersecting_32_32_64)
1126 {
1127    create_store(nir_var_mem_ssbo, 0, 0, 0x1, 32, 2);
1128    create_store(nir_var_mem_ssbo, 0, 4, 0x2, 64);
1129 
1130    nir_validate_shader(b->shader, NULL);
1131    ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 2);
1132 
1133    EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
1134 
1135    ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 1);
1136 
1137    nir_intrinsic_instr *store = get_intrinsic(nir_intrinsic_store_ssbo, 0);
1138    ASSERT_EQ(nir_src_as_uint(store->src[2]), 0);
1139    ASSERT_EQ(nir_intrinsic_write_mask(store), 0x7);
1140    nir_ssa_def *val = store->src[0].ssa;
1141    ASSERT_EQ(val->bit_size, 32);
1142    ASSERT_EQ(val->num_components, 3);
1143    nir_const_value *cv = nir_instr_as_load_const(val->parent_instr)->value;
1144    ASSERT_EQ(nir_const_value_as_uint(cv[0], 32), 0x10);
1145    ASSERT_EQ(nir_const_value_as_uint(cv[1], 32), 0x20);
1146    ASSERT_EQ(nir_const_value_as_uint(cv[2], 32), 0x0);
1147 }
1148 
TEST_F(nir_load_store_vectorize_test,ssbo_store_adjacent_32_64)1149 TEST_F(nir_load_store_vectorize_test, ssbo_store_adjacent_32_64)
1150 {
1151    create_store(nir_var_mem_ssbo, 0, 0, 0x1, 32);
1152    create_store(nir_var_mem_ssbo, 0, 4, 0x2, 64, 2);
1153 
1154    nir_validate_shader(b->shader, NULL);
1155    ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 2);
1156 
1157    EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
1158 
1159    ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 2);
1160 }
1161 
TEST_F(nir_load_store_vectorize_test,ssbo_store_identical_wrmask)1162 TEST_F(nir_load_store_vectorize_test, ssbo_store_identical_wrmask)
1163 {
1164    create_store(nir_var_mem_ssbo, 0, 0, 0x1, 32, 4, 1 | 4);
1165    create_store(nir_var_mem_ssbo, 0, 0, 0x2, 32, 4, 2 | 4 | 8);
1166 
1167    nir_validate_shader(b->shader, NULL);
1168    ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 2);
1169 
1170    EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
1171 
1172    ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 1);
1173 
1174    nir_intrinsic_instr *store = get_intrinsic(nir_intrinsic_store_ssbo, 0);
1175    ASSERT_EQ(nir_src_as_uint(store->src[2]), 0);
1176    ASSERT_EQ(nir_intrinsic_write_mask(store), 0xf);
1177    nir_ssa_def *val = store->src[0].ssa;
1178    ASSERT_EQ(val->bit_size, 32);
1179    ASSERT_EQ(val->num_components, 4);
1180    nir_const_value *cv = nir_instr_as_load_const(val->parent_instr)->value;
1181    ASSERT_EQ(nir_const_value_as_uint(cv[0], 32), 0x10);
1182    ASSERT_EQ(nir_const_value_as_uint(cv[1], 32), 0x21);
1183    ASSERT_EQ(nir_const_value_as_uint(cv[2], 32), 0x22);
1184    ASSERT_EQ(nir_const_value_as_uint(cv[3], 32), 0x23);
1185 }
1186 
TEST_F(nir_load_store_vectorize_test,shared_load_adjacent)1187 TEST_F(nir_load_store_vectorize_test, shared_load_adjacent)
1188 {
1189    nir_variable *var = nir_variable_create(b->shader, nir_var_mem_shared, glsl_array_type(glsl_uint_type(), 4, 0), "var");
1190    nir_deref_instr *deref = nir_build_deref_var(b, var);
1191 
1192    create_shared_load(nir_build_deref_array_imm(b, deref, 0), 0x1);
1193    create_shared_load(nir_build_deref_array_imm(b, deref, 1), 0x2);
1194 
1195    nir_validate_shader(b->shader, NULL);
1196    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 2);
1197 
1198    EXPECT_TRUE(run_vectorizer(nir_var_mem_shared));
1199 
1200    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 1);
1201 
1202    nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_deref, 0);
1203    ASSERT_EQ(load->dest.ssa.bit_size, 32);
1204    ASSERT_EQ(load->dest.ssa.num_components, 2);
1205 
1206    deref = nir_src_as_deref(load->src[0]);
1207    ASSERT_EQ(deref->deref_type, nir_deref_type_cast);
1208 
1209    deref = nir_deref_instr_parent(deref);
1210    ASSERT_EQ(deref->deref_type, nir_deref_type_array);
1211    ASSERT_EQ(nir_src_as_uint(deref->arr.index), 0);
1212 
1213    deref = nir_deref_instr_parent(deref);
1214    ASSERT_EQ(deref->deref_type, nir_deref_type_var);
1215    ASSERT_EQ(deref->var, var);
1216 
1217    EXPECT_INSTR_SWIZZLES(movs[0x1], load, "x");
1218    EXPECT_INSTR_SWIZZLES(movs[0x2], load, "y");
1219 }
1220 
TEST_F(nir_load_store_vectorize_test,shared_load_distant_64bit)1221 TEST_F(nir_load_store_vectorize_test, shared_load_distant_64bit)
1222 {
1223    nir_variable *var = nir_variable_create(b->shader, nir_var_mem_shared, glsl_array_type(glsl_uint_type(), 4, 0), "var");
1224    nir_deref_instr *deref = nir_build_deref_var(b, var);
1225    nir_ssa_dest_init(&deref->instr, &deref->dest, 1, 64, NULL);
1226 
1227    create_shared_load(nir_build_deref_array_imm(b, deref, 0x100000000), 0x1);
1228    create_shared_load(nir_build_deref_array_imm(b, deref, 0x200000001), 0x2);
1229 
1230    nir_validate_shader(b->shader, NULL);
1231    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 2);
1232 
1233    EXPECT_FALSE(run_vectorizer(nir_var_mem_shared));
1234 
1235    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 2);
1236 }
1237 
TEST_F(nir_load_store_vectorize_test,shared_load_adjacent_indirect)1238 TEST_F(nir_load_store_vectorize_test, shared_load_adjacent_indirect)
1239 {
1240    nir_variable *var = nir_variable_create(b->shader, nir_var_mem_shared, glsl_array_type(glsl_uint_type(), 4, 0), "var");
1241    nir_deref_instr *deref = nir_build_deref_var(b, var);
1242    nir_ssa_def *index_base = nir_load_local_invocation_index(b);
1243 
1244    create_shared_load(nir_build_deref_array(b, deref, index_base), 0x1);
1245    create_shared_load(nir_build_deref_array(b, deref, nir_iadd_imm(b, index_base, 1)), 0x2);
1246 
1247    nir_validate_shader(b->shader, NULL);
1248    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 2);
1249 
1250    EXPECT_TRUE(run_vectorizer(nir_var_mem_shared));
1251 
1252    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 1);
1253 
1254    nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_deref, 0);
1255    ASSERT_EQ(load->dest.ssa.bit_size, 32);
1256    ASSERT_EQ(load->dest.ssa.num_components, 2);
1257 
1258    deref = nir_src_as_deref(load->src[0]);
1259    ASSERT_EQ(deref->deref_type, nir_deref_type_cast);
1260 
1261    deref = nir_deref_instr_parent(deref);
1262    ASSERT_EQ(deref->deref_type, nir_deref_type_array);
1263    ASSERT_EQ(deref->arr.index.ssa, index_base);
1264 
1265    deref = nir_deref_instr_parent(deref);
1266    ASSERT_EQ(deref->deref_type, nir_deref_type_var);
1267    ASSERT_EQ(deref->var, var);
1268 
1269    EXPECT_INSTR_SWIZZLES(movs[0x1], load, "x");
1270    EXPECT_INSTR_SWIZZLES(movs[0x2], load, "y");
1271 }
1272 
TEST_F(nir_load_store_vectorize_test,shared_load_adjacent_indirect_sub)1273 TEST_F(nir_load_store_vectorize_test, shared_load_adjacent_indirect_sub)
1274 {
1275    nir_variable *var = nir_variable_create(b->shader, nir_var_mem_shared, glsl_array_type(glsl_uint_type(), 4, 0), "var");
1276    nir_deref_instr *deref = nir_build_deref_var(b, var);
1277    nir_ssa_def *index_base = nir_load_local_invocation_index(b);
1278    nir_ssa_def *index_base_prev = nir_iadd_imm(b, index_base, 0xffffffff);
1279 
1280    create_shared_load(nir_build_deref_array(b, deref, index_base_prev), 0x1);
1281    create_shared_load(nir_build_deref_array(b, deref, index_base), 0x2);
1282 
1283    nir_validate_shader(b->shader, NULL);
1284    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 2);
1285 
1286    EXPECT_TRUE(run_vectorizer(nir_var_mem_shared));
1287 
1288    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 1);
1289 
1290    nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_deref, 0);
1291    ASSERT_EQ(load->dest.ssa.bit_size, 32);
1292    ASSERT_EQ(load->dest.ssa.num_components, 2);
1293 
1294    deref = nir_src_as_deref(load->src[0]);
1295    ASSERT_EQ(deref->deref_type, nir_deref_type_cast);
1296 
1297    deref = nir_deref_instr_parent(deref);
1298    ASSERT_EQ(deref->deref_type, nir_deref_type_array);
1299    ASSERT_EQ(deref->arr.index.ssa, index_base_prev);
1300 
1301    deref = nir_deref_instr_parent(deref);
1302    ASSERT_EQ(deref->deref_type, nir_deref_type_var);
1303    ASSERT_EQ(deref->var, var);
1304 
1305    EXPECT_INSTR_SWIZZLES(movs[0x1], load, "x");
1306    EXPECT_INSTR_SWIZZLES(movs[0x2], load, "y");
1307 }
1308 
TEST_F(nir_load_store_vectorize_test,shared_load_struct)1309 TEST_F(nir_load_store_vectorize_test, shared_load_struct)
1310 {
1311    glsl_struct_field fields[2] = {glsl_struct_field(glsl_uint_type(), "field0"),
1312                                   glsl_struct_field(glsl_array_type(glsl_uint_type(), 4, 0), "field1")};
1313 
1314    nir_variable *var = nir_variable_create(b->shader, nir_var_mem_shared, glsl_struct_type(fields, 2, "Struct", false), "var");
1315    nir_deref_instr *deref = nir_build_deref_var(b, var);
1316 
1317    create_shared_load(nir_build_deref_struct(b, deref, 0), 0x1);
1318    create_shared_load(nir_build_deref_array_imm(b, nir_build_deref_struct(b, deref, 1), 0), 0x2);
1319 
1320    nir_validate_shader(b->shader, NULL);
1321    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 2);
1322 
1323    EXPECT_TRUE(run_vectorizer(nir_var_mem_shared));
1324 
1325    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 1);
1326 
1327    nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_deref, 0);
1328    ASSERT_EQ(load->dest.ssa.bit_size, 32);
1329    ASSERT_EQ(load->dest.ssa.num_components, 2);
1330 
1331    deref = nir_src_as_deref(load->src[0]);
1332    ASSERT_EQ(deref->deref_type, nir_deref_type_cast);
1333 
1334    deref = nir_deref_instr_parent(deref);
1335    ASSERT_EQ(deref->deref_type, nir_deref_type_struct);
1336    ASSERT_EQ(deref->strct.index, 0);
1337 
1338    deref = nir_deref_instr_parent(deref);
1339    ASSERT_EQ(deref->deref_type, nir_deref_type_var);
1340    ASSERT_EQ(deref->var, var);
1341 
1342    EXPECT_INSTR_SWIZZLES(movs[0x1], load, "x");
1343    EXPECT_INSTR_SWIZZLES(movs[0x2], load, "y");
1344 }
1345 
TEST_F(nir_load_store_vectorize_test,shared_load_identical_store_adjacent)1346 TEST_F(nir_load_store_vectorize_test, shared_load_identical_store_adjacent)
1347 {
1348    nir_variable *var = nir_variable_create(b->shader, nir_var_mem_shared, glsl_array_type(glsl_uint_type(), 4, 0), "var");
1349    nir_deref_instr *deref = nir_build_deref_var(b, var);
1350 
1351    create_shared_load(nir_build_deref_array_imm(b, deref, 0), 0x1);
1352    create_shared_store(nir_build_deref_array_imm(b, deref, 1), 0x2);
1353    create_shared_load(nir_build_deref_array_imm(b, deref, 0), 0x3);
1354 
1355    nir_validate_shader(b->shader, NULL);
1356    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 2);
1357    ASSERT_EQ(count_intrinsics(nir_intrinsic_store_deref), 1);
1358 
1359    EXPECT_TRUE(run_vectorizer(nir_var_mem_shared));
1360 
1361    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 1);
1362    ASSERT_EQ(count_intrinsics(nir_intrinsic_store_deref), 1);
1363 
1364    nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_deref, 0);
1365    ASSERT_EQ(load->dest.ssa.bit_size, 32);
1366    ASSERT_EQ(load->dest.ssa.num_components, 1);
1367 
1368    deref = nir_src_as_deref(load->src[0]);
1369    ASSERT_EQ(deref->deref_type, nir_deref_type_array);
1370    ASSERT_EQ(nir_src_as_uint(deref->arr.index), 0);
1371 
1372    deref = nir_deref_instr_parent(deref);
1373    ASSERT_EQ(deref->deref_type, nir_deref_type_var);
1374    ASSERT_EQ(deref->var, var);
1375 
1376    EXPECT_INSTR_SWIZZLES(movs[0x1], load, "x");
1377    EXPECT_INSTR_SWIZZLES(movs[0x3], load, "x");
1378 }
1379 
TEST_F(nir_load_store_vectorize_test,shared_load_identical_store_identical)1380 TEST_F(nir_load_store_vectorize_test, shared_load_identical_store_identical)
1381 {
1382    nir_variable *var = nir_variable_create(b->shader, nir_var_mem_shared, glsl_array_type(glsl_uint_type(), 4, 0), "var");
1383    nir_deref_instr *deref = nir_build_deref_var(b, var);
1384 
1385    create_shared_load(nir_build_deref_array_imm(b, deref, 0), 0x1);
1386    create_shared_store(nir_build_deref_array_imm(b, deref, 0), 0x2);
1387    create_shared_load(nir_build_deref_array_imm(b, deref, 0), 0x3);
1388 
1389    nir_validate_shader(b->shader, NULL);
1390    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 2);
1391 
1392    EXPECT_FALSE(run_vectorizer(nir_var_mem_shared));
1393 
1394    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 2);
1395 }
1396 
TEST_F(nir_load_store_vectorize_test,shared_load_adjacent_store_identical)1397 TEST_F(nir_load_store_vectorize_test, shared_load_adjacent_store_identical)
1398 {
1399    nir_variable *var = nir_variable_create(b->shader, nir_var_mem_shared, glsl_array_type(glsl_uint_type(), 4, 0), "var");
1400    nir_deref_instr *deref = nir_build_deref_var(b, var);
1401 
1402    create_shared_load(nir_build_deref_array_imm(b, deref, 0), 0x1);
1403    create_shared_store(nir_build_deref_array_imm(b, deref, 0), 0x2);
1404    create_shared_load(nir_build_deref_array_imm(b, deref, 1), 0x3);
1405 
1406    nir_validate_shader(b->shader, NULL);
1407    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 2);
1408    ASSERT_EQ(count_intrinsics(nir_intrinsic_store_deref), 1);
1409 
1410    EXPECT_TRUE(run_vectorizer(nir_var_mem_shared));
1411 
1412    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 1);
1413    ASSERT_EQ(count_intrinsics(nir_intrinsic_store_deref), 1);
1414 
1415    nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_deref, 0);
1416    ASSERT_EQ(load->dest.ssa.bit_size, 32);
1417    ASSERT_EQ(load->dest.ssa.num_components, 2);
1418 
1419    deref = nir_src_as_deref(load->src[0]);
1420    ASSERT_EQ(deref->deref_type, nir_deref_type_cast);
1421 
1422    deref = nir_deref_instr_parent(deref);
1423    ASSERT_EQ(deref->deref_type, nir_deref_type_array);
1424    ASSERT_EQ(nir_src_as_uint(deref->arr.index), 0);
1425 
1426    deref = nir_deref_instr_parent(deref);
1427    ASSERT_EQ(deref->deref_type, nir_deref_type_var);
1428    ASSERT_EQ(deref->var, var);
1429 
1430    EXPECT_INSTR_SWIZZLES(movs[0x1], load, "x");
1431    EXPECT_INSTR_SWIZZLES(movs[0x3], load, "y");
1432 }
1433 
TEST_F(nir_load_store_vectorize_test,shared_load_bool)1434 TEST_F(nir_load_store_vectorize_test, shared_load_bool)
1435 {
1436    nir_variable *var = nir_variable_create(b->shader, nir_var_mem_shared, glsl_array_type(glsl_bool_type(), 4, 0), "var");
1437    nir_deref_instr *deref = nir_build_deref_var(b, var);
1438 
1439    create_shared_load(nir_build_deref_array_imm(b, deref, 0), 0x1, 1);
1440    create_shared_load(nir_build_deref_array_imm(b, deref, 1), 0x2, 1);
1441 
1442    nir_validate_shader(b->shader, NULL);
1443    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 2);
1444 
1445    EXPECT_TRUE(run_vectorizer(nir_var_mem_shared));
1446 
1447    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 1);
1448 
1449    nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_deref, 0);
1450    ASSERT_EQ(load->dest.ssa.bit_size, 32);
1451    ASSERT_EQ(load->dest.ssa.num_components, 2);
1452 
1453    deref = nir_src_as_deref(load->src[0]);
1454    ASSERT_EQ(deref->deref_type, nir_deref_type_cast);
1455 
1456    deref = nir_deref_instr_parent(deref);
1457    ASSERT_EQ(deref->deref_type, nir_deref_type_array);
1458    ASSERT_EQ(nir_src_as_uint(deref->arr.index), 0);
1459 
1460    deref = nir_deref_instr_parent(deref);
1461    ASSERT_EQ(deref->deref_type, nir_deref_type_var);
1462    ASSERT_EQ(deref->var, var);
1463 
1464    ASSERT_TRUE(test_alu(loads[0x1]->src.ssa->parent_instr, nir_op_i2b1));
1465    ASSERT_TRUE(test_alu(loads[0x2]->src.ssa->parent_instr, nir_op_i2b1));
1466    ASSERT_TRUE(test_alu_def(loads[0x1]->src.ssa->parent_instr, 0, &load->dest.ssa, 0));
1467    ASSERT_TRUE(test_alu_def(loads[0x2]->src.ssa->parent_instr, 0, &load->dest.ssa, 1));
1468 }
1469 
TEST_F(nir_load_store_vectorize_test,shared_load_bool_mixed)1470 TEST_F(nir_load_store_vectorize_test, shared_load_bool_mixed)
1471 {
1472    glsl_struct_field fields[2] = {glsl_struct_field(glsl_bool_type(), "field0"),
1473                                   glsl_struct_field(glsl_array_type(glsl_uint_type(), 4, 0), "field1")};
1474 
1475    nir_variable *var = nir_variable_create(b->shader, nir_var_mem_shared, glsl_struct_type(fields, 2, "Struct", false), "var");
1476    nir_deref_instr *deref = nir_build_deref_var(b, var);
1477 
1478    create_shared_load(nir_build_deref_struct(b, deref, 0), 0x1, 1);
1479    create_shared_load(nir_build_deref_array_imm(b, nir_build_deref_struct(b, deref, 1), 0), 0x2);
1480 
1481    nir_validate_shader(b->shader, NULL);
1482    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 2);
1483 
1484    EXPECT_TRUE(run_vectorizer(nir_var_mem_shared));
1485 
1486    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 1);
1487 
1488    nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_deref, 0);
1489    ASSERT_EQ(load->dest.ssa.bit_size, 32);
1490    ASSERT_EQ(load->dest.ssa.num_components, 2);
1491 
1492    deref = nir_src_as_deref(load->src[0]);
1493    ASSERT_EQ(deref->deref_type, nir_deref_type_cast);
1494 
1495    deref = nir_deref_instr_parent(deref);
1496    ASSERT_EQ(deref->deref_type, nir_deref_type_struct);
1497    ASSERT_EQ(deref->strct.index, 0);
1498 
1499    deref = nir_deref_instr_parent(deref);
1500    ASSERT_EQ(deref->deref_type, nir_deref_type_var);
1501    ASSERT_EQ(deref->var, var);
1502 
1503    ASSERT_TRUE(test_alu(loads[0x1]->src.ssa->parent_instr, nir_op_i2b1));
1504    ASSERT_TRUE(test_alu_def(loads[0x1]->src.ssa->parent_instr, 0, &load->dest.ssa, 0));
1505 
1506    EXPECT_INSTR_SWIZZLES(movs[0x2], load, "y");
1507 }
1508 
TEST_F(nir_load_store_vectorize_test,shared_store_adjacent)1509 TEST_F(nir_load_store_vectorize_test, shared_store_adjacent)
1510 {
1511    nir_variable *var = nir_variable_create(b->shader, nir_var_mem_shared, glsl_array_type(glsl_uint_type(), 4, 0), "var");
1512    nir_deref_instr *deref = nir_build_deref_var(b, var);
1513 
1514    create_shared_store(nir_build_deref_array_imm(b, deref, 0), 0x1);
1515    create_shared_store(nir_build_deref_array_imm(b, deref, 1), 0x2);
1516 
1517    nir_validate_shader(b->shader, NULL);
1518    ASSERT_EQ(count_intrinsics(nir_intrinsic_store_deref), 2);
1519 
1520    EXPECT_TRUE(run_vectorizer(nir_var_mem_shared));
1521 
1522    ASSERT_EQ(count_intrinsics(nir_intrinsic_store_deref), 1);
1523 
1524    nir_intrinsic_instr *store = get_intrinsic(nir_intrinsic_store_deref, 0);
1525    ASSERT_EQ(nir_intrinsic_write_mask(store), 0x3);
1526    nir_ssa_def *val = store->src[1].ssa;
1527    ASSERT_EQ(val->bit_size, 32);
1528    ASSERT_EQ(val->num_components, 2);
1529    nir_const_value *cv = nir_instr_as_load_const(val->parent_instr)->value;
1530    ASSERT_EQ(nir_const_value_as_uint(cv[0], 32), 0x10);
1531    ASSERT_EQ(nir_const_value_as_uint(cv[1], 32), 0x20);
1532 
1533    deref = nir_src_as_deref(store->src[0]);
1534    ASSERT_EQ(deref->deref_type, nir_deref_type_cast);
1535 
1536    deref = nir_deref_instr_parent(deref);
1537    ASSERT_EQ(deref->deref_type, nir_deref_type_array);
1538    ASSERT_EQ(nir_src_as_uint(deref->arr.index), 0);
1539 
1540    deref = nir_deref_instr_parent(deref);
1541    ASSERT_EQ(deref->deref_type, nir_deref_type_var);
1542    ASSERT_EQ(deref->var, var);
1543 }
1544 
TEST_F(nir_load_store_vectorize_test,push_const_load_separate_base)1545 TEST_F(nir_load_store_vectorize_test, push_const_load_separate_base)
1546 {
1547    create_load(nir_var_mem_push_const, 0, 0, 0x1);
1548    nir_intrinsic_set_base(create_load(nir_var_mem_push_const, 0, 4, 0x2), 4);
1549 
1550    nir_validate_shader(b->shader, NULL);
1551    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_push_constant), 2);
1552 
1553    EXPECT_FALSE(run_vectorizer(nir_var_mem_push_const));
1554 
1555    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_push_constant), 2);
1556 }
1557 
TEST_F(nir_load_store_vectorize_test,push_const_load_separate_direct_direct)1558 TEST_F(nir_load_store_vectorize_test, push_const_load_separate_direct_direct)
1559 {
1560    create_load(nir_var_mem_push_const, 0, 0, 0x1);
1561    create_load(nir_var_mem_push_const, 0, 8, 0x2);
1562 
1563    nir_validate_shader(b->shader, NULL);
1564    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_push_constant), 2);
1565 
1566    EXPECT_FALSE(run_vectorizer(nir_var_mem_push_const));
1567 
1568    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_push_constant), 2);
1569 }
1570 
TEST_F(nir_load_store_vectorize_test,push_const_load_separate_direct_indirect)1571 TEST_F(nir_load_store_vectorize_test, push_const_load_separate_direct_indirect)
1572 {
1573    nir_ssa_def *index_base = nir_load_local_invocation_index(b);
1574    create_load(nir_var_mem_push_const, 0, 0, 0x1);
1575    create_indirect_load(nir_var_mem_push_const, 0, index_base, 0x2);
1576 
1577    nir_validate_shader(b->shader, NULL);
1578    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_push_constant), 2);
1579 
1580    EXPECT_FALSE(run_vectorizer(nir_var_mem_push_const));
1581 
1582    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_push_constant), 2);
1583 }
1584 
TEST_F(nir_load_store_vectorize_test,push_const_load_separate_indirect_indirect)1585 TEST_F(nir_load_store_vectorize_test, push_const_load_separate_indirect_indirect)
1586 {
1587    nir_ssa_def *index_base = nir_load_local_invocation_index(b);
1588    create_indirect_load(nir_var_mem_push_const, 0,
1589       nir_iadd_imm(b, nir_imul_imm(b, nir_iadd_imm(b, index_base, 2), 16), 32), 0x1);
1590    create_indirect_load(nir_var_mem_push_const, 0,
1591       nir_iadd_imm(b, nir_imul_imm(b, nir_iadd_imm(b, index_base, 3), 16), 32), 0x2);
1592 
1593    nir_validate_shader(b->shader, NULL);
1594    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_push_constant), 2);
1595 
1596    EXPECT_FALSE(run_vectorizer(nir_var_mem_push_const));
1597 
1598    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_push_constant), 2);
1599 }
1600 
TEST_F(nir_load_store_vectorize_test,push_const_load_adjacent_complex_indirect)1601 TEST_F(nir_load_store_vectorize_test, push_const_load_adjacent_complex_indirect)
1602 {
1603    nir_ssa_def *index_base = nir_load_local_invocation_index(b);
1604    //vec4 pc[]; pc[gl_LocalInvocationIndex].w; pc[gl_LocalInvocationIndex+1].x;
1605    nir_ssa_def *low = nir_iadd_imm(b, nir_imul_imm(b, index_base, 16), 12);
1606    nir_ssa_def *high = nir_imul_imm(b, nir_iadd_imm(b, index_base, 1), 16);
1607    create_indirect_load(nir_var_mem_push_const, 0, low, 0x1);
1608    create_indirect_load(nir_var_mem_push_const, 0, high, 0x2);
1609 
1610    nir_validate_shader(b->shader, NULL);
1611    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_push_constant), 2);
1612 
1613    EXPECT_TRUE(run_vectorizer(nir_var_mem_push_const));
1614 
1615    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_push_constant), 1);
1616 
1617    nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_push_constant, 0);
1618    ASSERT_EQ(load->dest.ssa.bit_size, 32);
1619    ASSERT_EQ(load->dest.ssa.num_components, 2);
1620    ASSERT_EQ(load->src[0].ssa, low);
1621    EXPECT_INSTR_SWIZZLES(movs[0x1], load, "x");
1622    EXPECT_INSTR_SWIZZLES(movs[0x2], load, "y");
1623 }
1624 
TEST_F(nir_load_store_vectorize_test,ssbo_alias0)1625 TEST_F(nir_load_store_vectorize_test, ssbo_alias0)
1626 {
1627    nir_ssa_def *index_base = nir_load_local_invocation_index(b);
1628    create_load(nir_var_mem_ssbo, 0, 0, 0x1);
1629    create_indirect_store(nir_var_mem_ssbo, 0, index_base, 0x2);
1630    create_load(nir_var_mem_ssbo, 0, 0, 0x3);
1631 
1632    nir_validate_shader(b->shader, NULL);
1633    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
1634 
1635    EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
1636 
1637    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
1638 }
1639 
TEST_F(nir_load_store_vectorize_test,ssbo_alias1)1640 TEST_F(nir_load_store_vectorize_test, ssbo_alias1)
1641 {
1642    nir_ssa_def *load_base = nir_load_global_invocation_index(b, 32);
1643    nir_ssa_def *store_base = nir_load_local_invocation_index(b);
1644    create_indirect_load(nir_var_mem_ssbo, 0, load_base, 0x1);
1645    create_indirect_store(nir_var_mem_ssbo, 0, store_base, 0x2);
1646    create_indirect_load(nir_var_mem_ssbo, 0, load_base, 0x3);
1647 
1648    nir_validate_shader(b->shader, NULL);
1649    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
1650 
1651    EXPECT_FALSE(run_vectorizer(nir_var_mem_ssbo));
1652 
1653    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
1654 }
1655 
TEST_F(nir_load_store_vectorize_test,DISABLED_ssbo_alias2)1656 TEST_F(nir_load_store_vectorize_test, DISABLED_ssbo_alias2)
1657 {
1658    /* TODO: try to combine these loads */
1659    nir_ssa_def *index_base = nir_load_local_invocation_index(b);
1660    nir_ssa_def *offset = nir_iadd_imm(b, nir_imul_imm(b, index_base, 16), 4);
1661    create_indirect_load(nir_var_mem_ssbo, 0, offset, 0x1);
1662    create_store(nir_var_mem_ssbo, 0, 0, 0x2);
1663    create_indirect_load(nir_var_mem_ssbo, 0, offset, 0x3);
1664 
1665    nir_validate_shader(b->shader, NULL);
1666    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
1667 
1668    EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
1669 
1670    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 1);
1671 
1672    nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_ssbo, 0);
1673    ASSERT_EQ(load->dest.ssa.bit_size, 32);
1674    ASSERT_EQ(load->dest.ssa.num_components, 1);
1675    ASSERT_EQ(load->src[1].ssa, offset);
1676    EXPECT_INSTR_SWIZZLES(movs[0x1], load, "x");
1677    EXPECT_INSTR_SWIZZLES(movs[0x3], load, "x");
1678 }
1679 
TEST_F(nir_load_store_vectorize_test,ssbo_alias3)1680 TEST_F(nir_load_store_vectorize_test, ssbo_alias3)
1681 {
1682    /* these loads can be combined if nir_alu_instr::no_unsigned_wrap is set.
1683     * these loads can't be combined because if index_base == 268435455, then
1684     * offset == 0 because the addition would wrap around */
1685    nir_ssa_def *index_base = nir_load_local_invocation_index(b);
1686    nir_ssa_def *offset = nir_iadd_imm(b, nir_imul_imm(b, index_base, 16), 16);
1687    create_indirect_load(nir_var_mem_ssbo, 0, offset, 0x1);
1688    create_store(nir_var_mem_ssbo, 0, 0, 0x2);
1689    create_indirect_load(nir_var_mem_ssbo, 0, offset, 0x3);
1690 
1691    nir_validate_shader(b->shader, NULL);
1692    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
1693 
1694    EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
1695 
1696    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
1697 }
1698 
TEST_F(nir_load_store_vectorize_test,DISABLED_ssbo_alias4)1699 TEST_F(nir_load_store_vectorize_test, DISABLED_ssbo_alias4)
1700 {
1701    /* TODO: try to combine these loads */
1702    nir_ssa_def *index_base = nir_load_local_invocation_index(b);
1703    nir_ssa_def *offset = nir_iadd_imm(b, nir_imul_imm(b, index_base, 16), 16);
1704    nir_instr_as_alu(offset->parent_instr)->no_unsigned_wrap = true;
1705    create_indirect_load(nir_var_mem_ssbo, 0, offset, 0x1);
1706    create_store(nir_var_mem_ssbo, 0, 0, 0x2);
1707    create_indirect_load(nir_var_mem_ssbo, 0, offset, 0x3);
1708 
1709    nir_validate_shader(b->shader, NULL);
1710    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
1711 
1712    EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
1713 
1714    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 1);
1715 
1716    nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_ssbo, 0);
1717    ASSERT_EQ(load->dest.ssa.bit_size, 32);
1718    ASSERT_EQ(load->dest.ssa.num_components, 1);
1719    ASSERT_EQ(load->src[1].ssa, offset);
1720    EXPECT_INSTR_SWIZZLES(movs[0x1], load, "x");
1721    EXPECT_INSTR_SWIZZLES(movs[0x3], load, "x");
1722 }
1723 
TEST_F(nir_load_store_vectorize_test,ssbo_alias5)1724 TEST_F(nir_load_store_vectorize_test, ssbo_alias5)
1725 {
1726    create_load(nir_var_mem_ssbo, 0, 0, 0x1);
1727    create_store(nir_var_mem_ssbo, 1, 0, 0x2);
1728    create_load(nir_var_mem_ssbo, 0, 0, 0x3);
1729 
1730    nir_validate_shader(b->shader, NULL);
1731    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
1732 
1733    EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
1734 
1735    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
1736 }
1737 
TEST_F(nir_load_store_vectorize_test,ssbo_alias6)1738 TEST_F(nir_load_store_vectorize_test, ssbo_alias6)
1739 {
1740    create_load(nir_var_mem_ssbo, 0, 0, 0x1, 32, 1, ACCESS_RESTRICT);
1741    create_store(nir_var_mem_ssbo, 1, 0, 0x2, 32, 1, 0xf, ACCESS_RESTRICT);
1742    create_load(nir_var_mem_ssbo, 0, 0, 0x3, 32, 1, ACCESS_RESTRICT);
1743 
1744    nir_validate_shader(b->shader, NULL);
1745    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
1746 
1747    EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
1748 
1749    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 1);
1750 
1751    nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_ssbo, 0);
1752    ASSERT_EQ(load->dest.ssa.bit_size, 32);
1753    ASSERT_EQ(load->dest.ssa.num_components, 1);
1754    ASSERT_EQ(nir_src_as_uint(load->src[1]), 0);
1755    EXPECT_INSTR_SWIZZLES(movs[0x1], load, "x");
1756    EXPECT_INSTR_SWIZZLES(movs[0x3], load, "x");
1757 }
1758 
TEST_F(nir_load_store_vectorize_test,DISABLED_shared_alias0)1759 TEST_F(nir_load_store_vectorize_test, DISABLED_shared_alias0)
1760 {
1761    /* TODO: implement type-based alias analysis so that these loads can be
1762     * combined. this is made a bit more difficult than simply using
1763     * nir_compare_derefs() because the vectorizer creates loads/stores with
1764     * casted derefs. The solution would probably be to keep multiple derefs for
1765     * an entry (one for each load/store combined into it). */
1766    glsl_struct_field fields[2] = {glsl_struct_field(glsl_array_type(glsl_uint_type(), 4, 0), "field0"),
1767                                   glsl_struct_field(glsl_array_type(glsl_uint_type(), 4, 0), "field1")};
1768 
1769    nir_variable *var = nir_variable_create(b->shader, nir_var_mem_shared, glsl_struct_type(fields, 2, "Struct", false), "var");
1770    nir_deref_instr *deref = nir_build_deref_var(b, var);
1771 
1772    nir_ssa_def *index0 = nir_load_local_invocation_index(b);
1773    nir_ssa_def *index1 = nir_load_global_invocation_index(b, 32);
1774    nir_deref_instr *load_deref = nir_build_deref_array(b, nir_build_deref_struct(b, deref, 0), index0);
1775 
1776    create_shared_load(load_deref, 0x1);
1777    create_shared_store(nir_build_deref_array(b, nir_build_deref_struct(b, deref, 1), index1), 0x2);
1778    create_shared_load(load_deref, 0x3);
1779 
1780    nir_validate_shader(b->shader, NULL);
1781    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 2);
1782 
1783    EXPECT_TRUE(run_vectorizer(nir_var_mem_shared));
1784 
1785    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 1);
1786 
1787    nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_deref, 0);
1788    ASSERT_EQ(load->dest.ssa.bit_size, 32);
1789    ASSERT_EQ(load->dest.ssa.num_components, 1);
1790    ASSERT_EQ(load->src[0].ssa, &load_deref->dest.ssa);
1791    EXPECT_INSTR_SWIZZLES(movs[0x1], load, "x");
1792    EXPECT_INSTR_SWIZZLES(movs[0x3], load, "x");
1793 }
1794 
TEST_F(nir_load_store_vectorize_test,shared_alias1)1795 TEST_F(nir_load_store_vectorize_test, shared_alias1)
1796 {
1797    nir_variable *var0 = nir_variable_create(b->shader, nir_var_mem_shared, glsl_uint_type(), "var0");
1798    nir_variable *var1 = nir_variable_create(b->shader, nir_var_mem_shared, glsl_uint_type(), "var1");
1799    nir_deref_instr *load_deref = nir_build_deref_var(b, var0);
1800 
1801    create_shared_load(load_deref, 0x1);
1802    create_shared_store(nir_build_deref_var(b, var1), 0x2);
1803    create_shared_load(load_deref, 0x3);
1804 
1805    nir_validate_shader(b->shader, NULL);
1806    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 2);
1807 
1808    EXPECT_TRUE(run_vectorizer(nir_var_mem_shared));
1809 
1810    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 1);
1811 
1812    nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_deref, 0);
1813    ASSERT_EQ(load->dest.ssa.bit_size, 32);
1814    ASSERT_EQ(load->dest.ssa.num_components, 1);
1815    ASSERT_EQ(load->src[0].ssa, &load_deref->dest.ssa);
1816    EXPECT_INSTR_SWIZZLES(movs[0x1], load, "x");
1817    EXPECT_INSTR_SWIZZLES(movs[0x3], load, "x");
1818 }
1819 
TEST_F(nir_load_store_vectorize_test,ssbo_load_distant_64bit)1820 TEST_F(nir_load_store_vectorize_test, ssbo_load_distant_64bit)
1821 {
1822    create_indirect_load(nir_var_mem_ssbo, 0, nir_imm_int64(b, 0x100000000), 0x1);
1823    create_indirect_load(nir_var_mem_ssbo, 0, nir_imm_int64(b, 0x200000004), 0x2);
1824 
1825    nir_validate_shader(b->shader, NULL);
1826    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
1827 
1828    EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
1829 
1830    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
1831 }
1832 
TEST_F(nir_load_store_vectorize_test,ssbo_load_distant_indirect_64bit)1833 TEST_F(nir_load_store_vectorize_test, ssbo_load_distant_indirect_64bit)
1834 {
1835    nir_ssa_def *index_base = nir_u2u64(b, nir_load_local_invocation_index(b));
1836    nir_ssa_def *first = nir_imul_imm(b, index_base, 0x100000000);
1837    nir_ssa_def *second = nir_imul_imm(b, index_base, 0x200000000);
1838    create_indirect_load(nir_var_mem_ssbo, 0, first, 0x1);
1839    create_indirect_load(nir_var_mem_ssbo, 0, second, 0x2);
1840 
1841    nir_validate_shader(b->shader, NULL);
1842    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
1843 
1844    EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
1845 
1846    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
1847 }
1848 
TEST_F(nir_load_store_vectorize_test,ssbo_offset_overflow_robust)1849 TEST_F(nir_load_store_vectorize_test, ssbo_offset_overflow_robust)
1850 {
1851    create_load(nir_var_mem_ssbo, 0, 0xfffffffc, 0x1);
1852    create_load(nir_var_mem_ssbo, 0, 0x0, 0x2);
1853 
1854    nir_validate_shader(b->shader, NULL);
1855    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
1856 
1857    EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo, false, nir_var_mem_ssbo));
1858 
1859    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
1860 }
1861 
TEST_F(nir_load_store_vectorize_test,ssbo_offset_overflow_robust_indirect_stride1)1862 TEST_F(nir_load_store_vectorize_test, ssbo_offset_overflow_robust_indirect_stride1)
1863 {
1864    nir_ssa_def *offset = nir_load_local_invocation_index(b);
1865    create_indirect_load(nir_var_mem_ssbo, 0, offset, 0x1);
1866    create_indirect_load(nir_var_mem_ssbo, 0, nir_iadd_imm(b, offset, 4), 0x2);
1867 
1868    nir_validate_shader(b->shader, NULL);
1869    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
1870 
1871    EXPECT_FALSE(run_vectorizer(nir_var_mem_ssbo, false, nir_var_mem_ssbo));
1872 
1873    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
1874 }
1875 
TEST_F(nir_load_store_vectorize_test,ssbo_offset_overflow_robust_indirect_stride8)1876 TEST_F(nir_load_store_vectorize_test, ssbo_offset_overflow_robust_indirect_stride8)
1877 {
1878    nir_ssa_def *offset = nir_load_local_invocation_index(b);
1879    offset = nir_imul_imm(b, offset, 8);
1880    create_indirect_load(nir_var_mem_ssbo, 0, offset, 0x1);
1881    create_indirect_load(nir_var_mem_ssbo, 0, nir_iadd_imm(b, offset, 4), 0x2);
1882 
1883    nir_validate_shader(b->shader, NULL);
1884    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
1885 
1886    EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo, false, nir_var_mem_ssbo));
1887 
1888    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 1);
1889 }
1890 
TEST_F(nir_load_store_vectorize_test,ssbo_offset_overflow_robust_indirect_stride12)1891 TEST_F(nir_load_store_vectorize_test, ssbo_offset_overflow_robust_indirect_stride12)
1892 {
1893    nir_ssa_def *offset = nir_load_local_invocation_index(b);
1894    offset = nir_imul_imm(b, offset, 12);
1895    create_indirect_load(nir_var_mem_ssbo, 0, offset, 0x1);
1896    nir_ssa_def *offset_4 = nir_iadd_imm(b, offset, 4);
1897    create_indirect_load(nir_var_mem_ssbo, 0, offset_4, 0x2);
1898    create_indirect_load(nir_var_mem_ssbo, 0, nir_iadd_imm(b, offset, 8), 0x3);
1899 
1900    nir_validate_shader(b->shader, NULL);
1901    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 3);
1902 
1903    EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo, false, nir_var_mem_ssbo));
1904 
1905    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
1906 
1907    nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_ssbo, 0);
1908    ASSERT_EQ(load->dest.ssa.bit_size, 32);
1909    ASSERT_EQ(load->dest.ssa.num_components, 1);
1910    ASSERT_EQ(load->src[1].ssa, offset);
1911    EXPECT_INSTR_SWIZZLES(movs[0x1], load, "x");
1912 
1913    load = get_intrinsic(nir_intrinsic_load_ssbo, 1);
1914    ASSERT_EQ(load->dest.ssa.bit_size, 32);
1915    ASSERT_EQ(load->dest.ssa.num_components, 2);
1916    ASSERT_EQ(load->src[1].ssa, offset_4);
1917    EXPECT_INSTR_SWIZZLES(movs[0x2], load, "x");
1918    EXPECT_INSTR_SWIZZLES(movs[0x3], load, "y");
1919 }
1920 
TEST_F(nir_load_store_vectorize_test,ssbo_offset_overflow_robust_indirect_stride16)1921 TEST_F(nir_load_store_vectorize_test, ssbo_offset_overflow_robust_indirect_stride16)
1922 {
1923    nir_ssa_def *offset = nir_load_local_invocation_index(b);
1924    offset = nir_imul_imm(b, offset, 16);
1925    create_indirect_load(nir_var_mem_ssbo, 0, offset, 0x1);
1926    create_indirect_load(nir_var_mem_ssbo, 0, nir_iadd_imm(b, offset, 4), 0x2);
1927    create_indirect_load(nir_var_mem_ssbo, 0, nir_iadd_imm(b, offset, 8), 0x3);
1928    create_indirect_load(nir_var_mem_ssbo, 0, nir_iadd_imm(b, offset, 12), 0x4);
1929 
1930    nir_validate_shader(b->shader, NULL);
1931    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 4);
1932 
1933    EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo, false, nir_var_mem_ssbo));
1934 
1935    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 1);
1936 }
1937 
TEST_F(nir_load_store_vectorize_test,shared_offset_overflow_robust_indirect_stride12)1938 TEST_F(nir_load_store_vectorize_test, shared_offset_overflow_robust_indirect_stride12)
1939 {
1940    nir_variable *var = nir_variable_create(b->shader, nir_var_mem_shared,
1941                                            glsl_array_type(glsl_uint_type(), 4, 0), "var");
1942    nir_deref_instr *deref = nir_build_deref_var(b, var);
1943 
1944    nir_ssa_def *index = nir_load_local_invocation_index(b);
1945    index = nir_imul_imm(b, index, 3);
1946    create_shared_load(nir_build_deref_array(b, deref, index), 0x1);
1947    create_shared_load(nir_build_deref_array(b, deref, nir_iadd_imm(b, index, 1)), 0x2);
1948    create_shared_load(nir_build_deref_array(b, deref, nir_iadd_imm(b, index, 2)), 0x3);
1949 
1950    nir_validate_shader(b->shader, NULL);
1951    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 3);
1952 
1953    EXPECT_FALSE(run_vectorizer(nir_var_mem_shared, false, nir_var_mem_shared));
1954 
1955    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 3);
1956 }
1957 
TEST_F(nir_load_store_vectorize_test,ubo_alignment_16_4)1958 TEST_F(nir_load_store_vectorize_test, ubo_alignment_16_4)
1959 {
1960    nir_ssa_def *offset = nir_load_local_invocation_index(b);
1961    offset = nir_imul_imm(b, offset, 16);
1962    offset = nir_iadd_imm(b, offset, 4);
1963    nir_intrinsic_instr *load = create_indirect_load(nir_var_mem_ubo, 0, offset,
1964                                                     0x1);
1965 
1966    EXPECT_TRUE(run_vectorizer(nir_var_mem_ubo));
1967    EXPECT_EQ(nir_intrinsic_align_mul(load), 16);
1968    EXPECT_EQ(nir_intrinsic_align_offset(load), 4);
1969 }
1970 
TEST_F(nir_load_store_vectorize_test,ubo_alignment_16_4_swapped)1971 TEST_F(nir_load_store_vectorize_test, ubo_alignment_16_4_swapped)
1972 {
1973    nir_ssa_def *offset = nir_load_local_invocation_index(b);
1974    offset = nir_iadd_imm(b, offset, 1);
1975    offset = nir_imul_imm(b, offset, 16);
1976    offset = nir_iadd_imm(b, offset, 4);
1977    nir_intrinsic_instr *load =
1978       create_indirect_load(nir_var_mem_ubo, 0, offset, 0x1);
1979 
1980    EXPECT_TRUE(run_vectorizer(nir_var_mem_ubo));
1981    EXPECT_EQ(nir_intrinsic_align_mul(load), 16);
1982    EXPECT_EQ(nir_intrinsic_align_offset(load), 4);
1983 }
1984 
1985 /* Check offset % mul != 0 */
TEST_F(nir_load_store_vectorize_test,ubo_alignment_16_20)1986 TEST_F(nir_load_store_vectorize_test, ubo_alignment_16_20)
1987 {
1988    nir_ssa_def *offset = nir_load_local_invocation_index(b);
1989    offset = nir_imul_imm(b, offset, 16);
1990    offset = nir_iadd_imm(b, offset, 20);
1991    nir_intrinsic_instr *load = create_indirect_load(nir_var_mem_ubo, 0, offset,
1992                                                     0x1);
1993 
1994    EXPECT_TRUE(run_vectorizer(nir_var_mem_ubo));
1995    EXPECT_EQ(nir_intrinsic_align_mul(load), 16);
1996    EXPECT_EQ(nir_intrinsic_align_offset(load), 4);
1997 }
1998 
1999 /* Check that we don't upgrade to non-power-of-two alignments. */
TEST_F(nir_load_store_vectorize_test,ubo_alignment_24_4)2000 TEST_F(nir_load_store_vectorize_test, ubo_alignment_24_4)
2001 {
2002    nir_ssa_def *offset = nir_load_local_invocation_index(b);
2003    offset = nir_imul_imm(b, offset, 24);
2004    offset = nir_iadd_imm(b, offset, 4);
2005    nir_intrinsic_instr *load =
2006       create_indirect_load(nir_var_mem_ubo, 0, offset, 0x1);
2007 
2008    EXPECT_TRUE(run_vectorizer(nir_var_mem_ubo));
2009    EXPECT_EQ(nir_intrinsic_align_mul(load), 8);
2010    EXPECT_EQ(nir_intrinsic_align_offset(load), 4);
2011 }
2012 
2013 /* Check that we don't upgrade to non-power-of-two alignments. */
TEST_F(nir_load_store_vectorize_test,ubo_alignment_64_16_8)2014 TEST_F(nir_load_store_vectorize_test, ubo_alignment_64_16_8)
2015 {
2016    nir_ssa_def *x = nir_imul_imm(b, nir_load_local_invocation_index(b), 64);
2017    nir_ssa_def *y = nir_imul_imm(b, nir_load_instance_id(b), 16);
2018    nir_ssa_def *offset = nir_iadd(b, x, y);
2019    offset = nir_iadd_imm(b, offset, 8);
2020    nir_intrinsic_instr *load =
2021       create_indirect_load(nir_var_mem_ubo, 0, offset, 0x1);
2022 
2023    EXPECT_TRUE(run_vectorizer(nir_var_mem_ubo));
2024    EXPECT_EQ(nir_intrinsic_align_mul(load), 16);
2025    EXPECT_EQ(nir_intrinsic_align_offset(load), 8);
2026 }
2027 
TEST_F(nir_load_store_vectorize_test,ubo_alignment_const_100)2028 TEST_F(nir_load_store_vectorize_test, ubo_alignment_const_100)
2029 {
2030    nir_intrinsic_instr *load =
2031       create_indirect_load(nir_var_mem_ubo, 0, nir_imm_int(b, 100), 0x1);
2032 
2033    EXPECT_TRUE(run_vectorizer(nir_var_mem_ubo));
2034    EXPECT_EQ(nir_intrinsic_align_mul(load), NIR_ALIGN_MUL_MAX);
2035    EXPECT_EQ(nir_intrinsic_align_offset(load), 100);
2036 }
2037