1 /*
2  * Copyright © Microsoft Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 #include <stdio.h>
25 #include <stdint.h>
26 #include <stdexcept>
27 #include <vector>
28 
29 #include <directx/d3d12.h>
30 #include <dxgi1_4.h>
31 #include <gtest/gtest.h>
32 #include <wrl.h>
33 
34 #include "compute_test.h"
35 
36 using std::vector;
37 
TEST_F(ComputeTest,runtime_memcpy)38 TEST_F(ComputeTest, runtime_memcpy)
39 {
40    struct shift { uint8_t val; uint8_t shift; uint16_t ret; };
41    const char *kernel_source =
42    "struct shift { uchar val; uchar shift; ushort ret; };\n\
43    __kernel void main_test(__global struct shift *inout)\n\
44    {\n\
45       uint id = get_global_id(0);\n\
46       uint id2 = id + get_global_id(1);\n\
47       struct shift lc[4] = { { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 }};\n\
48       lc[id] = inout[id];\n\
49       inout[id2].ret = (ushort) lc[id2].val << (ushort) lc[id2].shift;\n\
50    }\n";
51 
52    auto inout = ShaderArg<struct shift>({
53          { 0x10, 1, 0xffff },
54          { 0x20, 2, 0xffff },
55          { 0x30, 3, 0xffff },
56          { 0x40, 4, 0xffff },
57       },
58       SHADER_ARG_INOUT);
59    const uint16_t expected[] = { 0x20, 0x80, 0x180, 0x400 };
60    run_shader(kernel_source, inout.size(), 1, 1, inout);
61    for (int i = 0; i < inout.size(); ++i)
62       EXPECT_EQ(inout[i].ret, expected[i]);
63 }
64 
TEST_F(ComputeTest,two_global_arrays)65 TEST_F(ComputeTest, two_global_arrays)
66 {
67    const char *kernel_source =
68    "__kernel void main_test(__global uint *g1, __global uint *g2)\n\
69    {\n\
70        uint idx = get_global_id(0);\n\
71        g1[idx] -= g2[idx];\n\
72    }\n";
73    auto g1 = ShaderArg<uint32_t>({ 10, 20, 30, 40 }, SHADER_ARG_INOUT);
74    auto g2 = ShaderArg<uint32_t>({ 1, 2, 3, 4 }, SHADER_ARG_INPUT);
75    const uint32_t expected[] = {
76       9, 18, 27, 36
77    };
78 
79    run_shader(kernel_source, g1.size(), 1, 1, g1, g2);
80    for (int i = 0; i < g1.size(); ++i)
81       EXPECT_EQ(g1[i], expected[i]);
82 }
83 
84 /* Disabled until saturated conversions from f32->i64 fixed (mesa/mesa#3824) */
TEST_F(ComputeTest,DISABLED_i64tof32)85 TEST_F(ComputeTest, DISABLED_i64tof32)
86 {
87    const char *kernel_source =
88    "__kernel void main_test(__global long *out, __constant long *in)\n\
89    {\n\
90        __local float tmp[12];\n\
91        uint idx = get_global_id(0);\n\
92        tmp[idx] = in[idx];\n\
93        barrier(CLK_LOCAL_MEM_FENCE);\n\
94        out[idx] = tmp[idx + get_global_id(1)];\n\
95    }\n";
96    auto in = ShaderArg<int64_t>({ 0x100000000LL,
97                                   -0x100000000LL,
98                                   0x7fffffffffffffffLL,
99                                   0x4000004000000000LL,
100                                   0x4000003fffffffffLL,
101                                   0x4000004000000001LL,
102                                   -1,
103                                   -0x4000004000000000LL,
104                                   -0x4000003fffffffffLL,
105                                   -0x4000004000000001LL,
106                                   0,
107 				  INT64_MIN },
108                                 SHADER_ARG_INPUT);
109    auto out = ShaderArg<int64_t>(std::vector<int64_t>(12, 0xdeadbeed), SHADER_ARG_OUTPUT);
110    const int64_t expected[] = {
111       0x100000000LL,
112       -0x100000000LL,
113       0x7fffffffffffffffLL,
114       0x4000000000000000LL,
115       0x4000000000000000LL,
116       0x4000008000000000LL,
117       -1,
118       -0x4000000000000000LL,
119       -0x4000000000000000LL,
120       -0x4000008000000000LL,
121       0,
122       INT64_MIN,
123    };
124 
125    run_shader(kernel_source, out.size(), 1, 1, out, in);
126    for (int i = 0; i < out.size(); ++i) {
127       EXPECT_EQ((int64_t)out[i], expected[i]);
128    }
129 }
TEST_F(ComputeTest,two_constant_arrays)130 TEST_F(ComputeTest, two_constant_arrays)
131 {
132    const char *kernel_source =
133    "__kernel void main_test(__constant uint *c1, __global uint *g1, __constant uint *c2)\n\
134    {\n\
135        uint idx = get_global_id(0);\n\
136        g1[idx] -= c1[idx] + c2[idx];\n\
137    }\n";
138    auto g1 = ShaderArg<uint32_t>({ 10, 20, 30, 40 }, SHADER_ARG_INOUT);
139    auto c1 = ShaderArg<uint32_t>({ 1, 2, 3, 4 }, SHADER_ARG_INPUT);
140    auto c2 = ShaderArg<uint32_t>(std::vector<uint32_t>(16384, 5), SHADER_ARG_INPUT);
141    const uint32_t expected[] = {
142       4, 13, 22, 31
143    };
144 
145    run_shader(kernel_source, g1.size(), 1, 1, c1, g1, c2);
146    for (int i = 0; i < g1.size(); ++i)
147       EXPECT_EQ(g1[i], expected[i]);
148 }
149 
TEST_F(ComputeTest,null_constant_ptr)150 TEST_F(ComputeTest, null_constant_ptr)
151 {
152    const char *kernel_source =
153    "__kernel void main_test(__global uint *g1, __constant uint *c1)\n\
154    {\n\
155        __constant uint fallback[] = {2, 3, 4, 5};\n\
156        __constant uint *c = c1 ? c1 : fallback;\n\
157        uint idx = get_global_id(0);\n\
158        g1[idx] -= c[idx];\n\
159    }\n";
160    auto g1 = ShaderArg<uint32_t>({ 10, 20, 30, 40 }, SHADER_ARG_INOUT);
161    auto c1 = ShaderArg<uint32_t>({ 1, 2, 3, 4 }, SHADER_ARG_INPUT);
162    const uint32_t expected1[] = {
163       9, 18, 27, 36
164    };
165 
166    run_shader(kernel_source, g1.size(), 1, 1, g1, c1);
167    for (int i = 0; i < g1.size(); ++i)
168       EXPECT_EQ(g1[i], expected1[i]);
169 
170    const uint32_t expected2[] = {
171       8, 17, 26, 35
172    };
173 
174    g1 = ShaderArg<uint32_t>({ 10, 20, 30, 40 }, SHADER_ARG_INOUT);
175    auto c2 = NullShaderArg();
176    run_shader(kernel_source, g1.size(), 1, 1, g1, c2);
177    for (int i = 0; i < g1.size(); ++i)
178       EXPECT_EQ(g1[i], expected2[i]);
179 }
180 
181 /* This test seems to fail on older versions of WARP. */
TEST_F(ComputeTest,DISABLED_null_global_ptr)182 TEST_F(ComputeTest, DISABLED_null_global_ptr)
183 {
184    const char *kernel_source =
185    "__kernel void main_test(__global uint *g1, __global uint *g2)\n\
186    {\n\
187        __constant uint fallback[] = {2, 3, 4, 5};\n\
188        uint idx = get_global_id(0);\n\
189        g1[idx] -= g2 ? g2[idx] : fallback[idx];\n\
190    }\n";
191    auto g1 = ShaderArg<uint32_t>({ 10, 20, 30, 40 }, SHADER_ARG_INOUT);
192    auto g2 = ShaderArg<uint32_t>({ 1, 2, 3, 4 }, SHADER_ARG_INPUT);
193    const uint32_t expected1[] = {
194       9, 18, 27, 36
195    };
196 
197    run_shader(kernel_source, g1.size(), 1, 1, g1, g2);
198    for (int i = 0; i < g1.size(); ++i)
199       EXPECT_EQ(g1[i], expected1[i]);
200 
201    const uint32_t expected2[] = {
202       8, 17, 26, 35
203    };
204 
205    g1 = ShaderArg<uint32_t>({ 10, 20, 30, 40 }, SHADER_ARG_INOUT);
206    auto g2null = NullShaderArg();
207    run_shader(kernel_source, g1.size(), 1, 1, g1, g2null);
208    for (int i = 0; i < g1.size(); ++i)
209       EXPECT_EQ(g1[i], expected2[i]);
210 }
211 
TEST_F(ComputeTest,ret_constant_ptr)212 TEST_F(ComputeTest, ret_constant_ptr)
213 {
214    struct s { uint64_t ptr; uint32_t val; };
215    const char *kernel_source =
216    "struct s { __constant uint *ptr; uint val; };\n\
217    __kernel void main_test(__global struct s *out, __constant uint *in)\n\
218    {\n\
219        __constant uint foo[] = { 1, 2 };\n\
220        uint idx = get_global_id(0);\n\
221        if (idx == 0)\n\
222           out[idx].ptr = foo;\n\
223        else\n\
224           out[idx].ptr = in;\n\
225        out[idx].val = out[idx].ptr[idx];\n\
226    }\n";
227    auto out = ShaderArg<struct s>(std::vector<struct s>(2, {0xdeadbeefdeadbeef, 0}), SHADER_ARG_OUTPUT);
228    auto in = ShaderArg<uint32_t>({ 3, 4 }, SHADER_ARG_INPUT);
229    const uint32_t expected_val[] = {
230       1, 4
231    };
232    const uint64_t expected_ptr[] = {
233       2ull << 32, 1ull << 32
234    };
235 
236    run_shader(kernel_source, out.size(), 1, 1, out, in);
237    for (int i = 0; i < out.size(); ++i) {
238       EXPECT_EQ(out[i].val, expected_val[i]);
239       EXPECT_EQ(out[i].ptr, expected_ptr[i]);
240    }
241 }
242 
TEST_F(ComputeTest,ret_global_ptr)243 TEST_F(ComputeTest, ret_global_ptr)
244 {
245    struct s { uint64_t ptr; uint32_t val; };
246    const char *kernel_source =
247    "struct s { __global uint *ptr; uint val; };\n\
248    __kernel void main_test(__global struct s *out, __global uint *in1, __global uint *in2)\n\
249    {\n\
250        uint idx = get_global_id(0);\n\
251        out[idx].ptr = idx ? in2 : in1;\n\
252        out[idx].val = out[idx].ptr[idx];\n\
253    }\n";
254    auto out = ShaderArg<struct s>(std::vector<struct s>(2, {0xdeadbeefdeadbeef, 0}), SHADER_ARG_OUTPUT);
255    auto in1 = ShaderArg<uint32_t>({ 1, 2 }, SHADER_ARG_INPUT);
256    auto in2 = ShaderArg<uint32_t>({ 3, 4 }, SHADER_ARG_INPUT);
257    const uint32_t expected_val[] = {
258       1, 4
259    };
260    const uint64_t expected_ptr[] = {
261       1ull << 32, 2ull << 32
262    };
263 
264    run_shader(kernel_source, out.size(), 1, 1, out, in1, in2);
265    for (int i = 0; i < out.size(); ++i) {
266       EXPECT_EQ(out[i].val, expected_val[i]);
267       EXPECT_EQ(out[i].ptr, expected_ptr[i]);
268    }
269 }
270 
TEST_F(ComputeTest,ret_local_ptr)271 TEST_F(ComputeTest, ret_local_ptr)
272 {
273    struct s { uint64_t ptr; };
274    const char *kernel_source =
275    "struct s { __local uint *ptr; };\n\
276    __kernel void main_test(__global struct s *out)\n\
277    {\n\
278        __local uint tmp[2];\n\
279        uint idx = get_global_id(0);\n\
280        tmp[idx] = idx;\n\
281        out[idx].ptr = &tmp[idx];\n\
282    }\n";
283    auto out = ShaderArg<struct s>(std::vector<struct s>(2, { 0xdeadbeefdeadbeef }), SHADER_ARG_OUTPUT);
284    const uint64_t expected_ptr[] = {
285       0, 4,
286    };
287 
288    run_shader(kernel_source, out.size(), 1, 1, out);
289    for (int i = 0; i < out.size(); ++i) {
290       EXPECT_EQ(out[i].ptr, expected_ptr[i]);
291    }
292 }
293 
TEST_F(ComputeTest,ret_private_ptr)294 TEST_F(ComputeTest, ret_private_ptr)
295 {
296    struct s { uint64_t ptr; uint32_t value; };
297    const char *kernel_source =
298    "struct s { __private uint *ptr; uint value; };\n\
299    __kernel void main_test(__global struct s *out)\n\
300    {\n\
301        uint tmp[2] = {1, 2};\n\
302        uint idx = get_global_id(0);\n\
303        out[idx].ptr = &tmp[idx];\n\
304        out[idx].value = *out[idx].ptr;\n\
305    }\n";
306    auto out = ShaderArg<struct s>(std::vector<struct s>(2, { 0xdeadbeefdeadbeef }), SHADER_ARG_OUTPUT);
307    const uint64_t expected_ptr[] = {
308       0, 4,
309    };
310    const uint32_t expected_value[] = {
311       1, 2
312    };
313 
314    run_shader(kernel_source, out.size(), 1, 1, out);
315    for (int i = 0; i < out.size(); ++i) {
316       EXPECT_EQ(out[i].ptr, expected_ptr[i]);
317    }
318 }
319 
TEST_F(ComputeTest,globals_8bit)320 TEST_F(ComputeTest, globals_8bit)
321 {
322    const char *kernel_source =
323    "__kernel void main_test(__global unsigned char *inout)\n\
324    {\n\
325        uint idx = get_global_id(0);\n\
326        inout[idx] = inout[idx] + 1;\n\
327    }\n";
328    auto inout = ShaderArg<uint8_t> ({ 100, 110, 120, 130 }, SHADER_ARG_INOUT);
329    const uint8_t expected[] = {
330       101, 111, 121, 131
331    };
332    run_shader(kernel_source, inout.size(), 1, 1, inout);
333    for (int i = 0; i < inout.size(); ++i)
334       EXPECT_EQ(inout[i], expected[i]);
335 }
336 
TEST_F(ComputeTest,globals_16bit)337 TEST_F(ComputeTest, globals_16bit)
338 {
339    const char *kernel_source =
340    "__kernel void main_test(__global unsigned short *inout)\n\
341    {\n\
342        uint idx = get_global_id(0);\n\
343        inout[idx] = inout[idx] + 1;\n\
344    }\n";
345    auto inout = ShaderArg<uint16_t> ({ 10000, 10010, 10020, 10030 }, SHADER_ARG_INOUT);
346    const uint16_t expected[] = {
347       10001, 10011, 10021, 10031
348    };
349    run_shader(kernel_source, inout.size(), 1, 1, inout);
350    for (int i = 0; i < inout.size(); ++i)
351       EXPECT_EQ(inout[i], expected[i]);
352 }
353 
TEST_F(ComputeTest,DISABLED_globals_64bit)354 TEST_F(ComputeTest, DISABLED_globals_64bit)
355 {
356    /* Test disabled, because we need a fixed version of WARP that hasn't
357       been officially shipped yet */
358 
359    const char *kernel_source =
360    "__kernel void main_test(__global unsigned long *inout)\n\
361    {\n\
362        uint idx = get_global_id(0);\n\
363        inout[idx] = inout[idx] + 1;\n\
364    }\n";
365    uint64_t base = 1ull << 50;
366    auto inout = ShaderArg<uint64_t>({ base, base + 10, base + 20, base + 30 },
367                                     SHADER_ARG_INOUT);
368    const uint64_t expected[] = {
369       base + 1, base + 11, base + 21, base + 31
370    };
371    run_shader(kernel_source, inout.size(), 1, 1, inout);
372    for (int i = 0; i < inout.size(); ++i)
373       EXPECT_EQ(inout[i], expected[i]);
374 }
375 
TEST_F(ComputeTest,built_ins_global_id)376 TEST_F(ComputeTest, built_ins_global_id)
377 {
378    const char *kernel_source =
379    "__kernel void main_test(__global uint *output)\n\
380    {\n\
381        output[get_global_id(0)] = get_global_id(0);\n\
382    }\n";
383    auto output = ShaderArg<uint32_t>(std::vector<uint32_t>(4, 0xdeadbeef),
384                                      SHADER_ARG_OUTPUT);
385    const uint32_t expected[] = {
386       0, 1, 2, 3
387    };
388 
389    run_shader(kernel_source, output.size(), 1, 1, output);
390    for (int i = 0; i < output.size(); ++i)
391       EXPECT_EQ(output[i], expected[i]);
392 }
393 
TEST_F(ComputeTest,built_ins_global_id_rmw)394 TEST_F(ComputeTest, built_ins_global_id_rmw)
395 {
396    const char *kernel_source =
397    "__kernel void main_test(__global uint *output)\n\
398    {\n\
399        uint id = get_global_id(0);\n\
400        output[id] = output[id] * (id + 1);\n\
401    }\n";
402    auto inout = ShaderArg<uint32_t>({0x00000001, 0x10000001, 0x00020002, 0x04010203},
403                                     SHADER_ARG_INOUT);
404    const uint32_t expected[] = {
405       0x00000001, 0x20000002, 0x00060006, 0x1004080c
406    };
407    run_shader(kernel_source, inout.size(), 1, 1, inout);
408    for (int i = 0; i < inout.size(); ++i)
409       EXPECT_EQ(inout[i], expected[i]);
410 }
411 
TEST_F(ComputeTest,types_float_basics)412 TEST_F(ComputeTest, types_float_basics)
413 {
414    const char *kernel_source =
415    "__kernel void main_test(__global uint *output)\n\
416    {\n\
417        output[get_global_id(0)] = (uint)((float)get_global_id(0) + 1.5f);\n\
418    }\n";
419    auto output = ShaderArg<uint32_t>(std::vector<uint32_t>(4, 0xdeadbeef),
420                                      SHADER_ARG_OUTPUT);
421    const uint32_t expected[] = {
422       1, 2, 3, 4
423    };
424    run_shader(kernel_source, output.size(), 1, 1, output);
425    for (int i = 0; i < output.size(); ++i)
426       EXPECT_EQ(output[i], expected[i]);
427 }
428 
TEST_F(ComputeTest,DISABLED_types_double_basics)429 TEST_F(ComputeTest, DISABLED_types_double_basics)
430 {
431    const char *kernel_source =
432    "__kernel void main_test(__global uint *output)\n\
433    {\n\
434        output[get_global_id(0)] = (uint)((double)get_global_id(0) + 1.5);\n\
435    }\n";
436    auto output = ShaderArg<uint32_t>(std::vector<uint32_t>(4, 0xdeadbeef),
437                                      SHADER_ARG_OUTPUT);
438    const uint32_t expected[] = {
439       1, 2, 3, 4
440    };
441    run_shader(kernel_source, output.size(), 1, 1, output);
442    for (int i = 0; i < output.size(); ++i)
443       EXPECT_EQ(output[i], expected[i]);
444 }
445 
TEST_F(ComputeTest,types_short_basics)446 TEST_F(ComputeTest, types_short_basics)
447 {
448    const char *kernel_source =
449    "__kernel void main_test(__global uint *output)\n\
450    {\n\
451        output[get_global_id(0)] = (uint)((short)get_global_id(0) + (short)1);\n\
452    }\n";
453    auto output = ShaderArg<uint32_t>(std::vector<uint32_t>(4, 0xdeadbeef),
454                                      SHADER_ARG_OUTPUT);
455    const uint32_t expected[] = {
456       1, 2, 3, 4
457    };
458    run_shader(kernel_source, output.size(), 1, 1, output);
459    for (int i = 0; i < output.size(); ++i)
460       EXPECT_EQ(output[i], expected[i]);
461 }
462 
TEST_F(ComputeTest,types_char_basics)463 TEST_F(ComputeTest, types_char_basics)
464 {
465    const char *kernel_source =
466    "__kernel void main_test(__global uint *output)\n\
467    {\n\
468        output[get_global_id(0)] = (uint)((char)get_global_id(0) + (char)1);\n\
469    }\n";
470    auto output = ShaderArg<uint32_t>(std::vector<uint32_t>(4, 0xdeadbeef),
471                                      SHADER_ARG_OUTPUT);
472    const uint32_t expected[] = {
473       1, 2, 3, 4
474    };
475    run_shader(kernel_source, output.size(), 1, 1, output);
476    for (int i = 0; i < output.size(); ++i)
477       EXPECT_EQ(output[i], expected[i]);
478 }
479 
TEST_F(ComputeTest,types_if_statement)480 TEST_F(ComputeTest, types_if_statement)
481 {
482    const char *kernel_source =
483    "__kernel void main_test(__global uint *output)\n\
484    {\n\
485        int idx = get_global_id(0);\n\
486        if (idx > 0)\n\
487            output[idx] = ~idx;\n\
488        else\n\
489            output[0] = 0xff;\n\
490    }\n";
491    auto output = ShaderArg<uint32_t>(std::vector<uint32_t>(4, 0xdeadbeef),
492                                      SHADER_ARG_OUTPUT);
493    const uint32_t expected[] = {
494       0xff, ~1u, ~2u, ~3u
495    };
496    run_shader(kernel_source, output.size(), 1, 1, output);
497    for (int i = 0; i < output.size(); ++i)
498       EXPECT_EQ(output[i], expected[i]);
499 }
500 
TEST_F(ComputeTest,types_do_while_loop)501 TEST_F(ComputeTest, types_do_while_loop)
502 {
503    const char *kernel_source =
504    "__kernel void main_test(__global uint *output)\n\
505    {\n\
506        int value = 1;\n\
507        int i = 1, n = get_global_id(0);\n\
508        do {\n\
509           value *= i++;\n\
510        } while (i <= n);\n\
511        output[n] = value;\n\
512    }\n";
513    auto output = ShaderArg<uint32_t>(std::vector<uint32_t>(5, 0xdeadbeef),
514                                      SHADER_ARG_OUTPUT);
515    const uint32_t expected[] = {
516       1, 1, 1*2, 1*2*3, 1*2*3*4
517    };
518    run_shader(kernel_source, output.size(), 1, 1, output);
519    for (int i = 0; i < output.size(); ++i)
520       EXPECT_EQ(output[i], expected[i]);
521 }
522 
TEST_F(ComputeTest,types_for_loop)523 TEST_F(ComputeTest, types_for_loop)
524 {
525    const char *kernel_source =
526    "__kernel void main_test(__global uint *output)\n\
527    {\n\
528        int value = 1;\n\
529        int n = get_global_id(0);\n\
530        for (int i = 1; i <= n; ++i)\n\
531           value *= i;\n\
532        output[n] = value;\n\
533    }\n";
534    auto output = ShaderArg<uint32_t>(std::vector<uint32_t>(5, 0xdeadbeef),
535                                      SHADER_ARG_OUTPUT);
536    const uint32_t expected[] = {
537       1, 1, 1*2, 1*2*3, 1*2*3*4
538    };
539    run_shader(kernel_source, output.size(), 1, 1, output);
540    for (int i = 0; i < output.size(); ++i)
541       EXPECT_EQ(output[i], expected[i]);
542 }
543 
TEST_F(ComputeTest,DISABLED_complex_types_local_array_long)544 TEST_F(ComputeTest, DISABLED_complex_types_local_array_long)
545 {
546    const char *kernel_source =
547    "__kernel void main_test(__global ulong *inout)\n\
548    {\n\
549       ushort tmp[] = {\n\
550          get_global_id(1) + 0x00000000,\n\
551          get_global_id(1) + 0x10000001,\n\
552          get_global_id(1) + 0x20000020,\n\
553          get_global_id(1) + 0x30000300,\n\
554       };\n\
555       uint idx = get_global_id(0);\n\
556       inout[idx] = tmp[idx];\n\
557    }\n";
558    auto inout = ShaderArg<uint64_t>({ 0, 0, 0, 0 }, SHADER_ARG_INOUT);
559    const uint16_t expected[] = {
560       0x00000000, 0x10000001, 0x20000020, 0x30000300,
561    };
562    run_shader(kernel_source, inout.size(), 1, 1, inout);
563    for (int i = 0; i < inout.size(); ++i)
564       EXPECT_EQ(inout[i], expected[i]);
565 }
566 
TEST_F(ComputeTest,complex_types_local_array_short)567 TEST_F(ComputeTest, complex_types_local_array_short)
568 {
569    const char *kernel_source =
570    "__kernel void main_test(__global ushort *inout)\n\
571    {\n\
572       ushort tmp[] = {\n\
573          get_global_id(1) + 0x00,\n\
574          get_global_id(1) + 0x10,\n\
575          get_global_id(1) + 0x20,\n\
576          get_global_id(1) + 0x30,\n\
577       };\n\
578       uint idx = get_global_id(0);\n\
579       inout[idx] = tmp[idx];\n\
580    }\n";
581    auto inout = ShaderArg<uint16_t>({ 0, 0, 0, 0 }, SHADER_ARG_INOUT);
582    const uint16_t expected[] = {
583       0x00, 0x10, 0x20, 0x30,
584    };
585    run_shader(kernel_source, inout.size(), 1, 1, inout);
586    for (int i = 0; i < inout.size(); ++i)
587       EXPECT_EQ(inout[i], expected[i]);
588 }
589 
TEST_F(ComputeTest,complex_types_local_array_struct_vec_float_misaligned)590 TEST_F(ComputeTest, complex_types_local_array_struct_vec_float_misaligned)
591 {
592    const char *kernel_source =
593    "struct has_vecs { uchar c; ushort s; float2 f; };\n\
594    __kernel void main_test(__global uint *inout)\n\
595    {\n\
596       struct has_vecs tmp[] = {\n\
597          { 10 + get_global_id(0), get_global_id(1), { 10.0f, 1.0f } },\n\
598          { 19 + get_global_id(0), get_global_id(1), { 20.0f, 4.0f } },\n\
599          { 28 + get_global_id(0), get_global_id(1), { 30.0f, 9.0f } },\n\
600          { 37 + get_global_id(0), get_global_id(1), { 40.0f, 16.0f } },\n\
601       };\n\
602       uint idx = get_global_id(0);\n\
603       uint mul = (tmp[idx].c + tmp[idx].s) * trunc(tmp[idx].f[0]);\n\
604       inout[idx] = mul + trunc(tmp[idx].f[1]);\n\
605    }\n";
606    auto inout = ShaderArg<uint32_t>({ 0, 0, 0, 0 }, SHADER_ARG_INOUT);
607    const uint16_t expected[] = { 101, 404, 909, 1616 };
608    run_shader(kernel_source, inout.size(), 1, 1, inout);
609    for (int i = 0; i < inout.size(); ++i)
610       EXPECT_EQ(inout[i], expected[i]);
611 }
612 
TEST_F(ComputeTest,complex_types_local_array)613 TEST_F(ComputeTest, complex_types_local_array)
614 {
615    const char *kernel_source =
616    "__kernel void main_test(__global uint *inout)\n\
617    {\n\
618       uint tmp[] = {\n\
619          get_global_id(1) + 0x00,\n\
620          get_global_id(1) + 0x10,\n\
621          get_global_id(1) + 0x20,\n\
622          get_global_id(1) + 0x30,\n\
623       };\n\
624       uint idx = get_global_id(0);\n\
625       inout[idx] = tmp[idx];\n\
626    }\n";
627    auto inout = ShaderArg<uint32_t>({ 0, 0, 0, 0 }, SHADER_ARG_INOUT);
628    const uint32_t expected[] = {
629       0x00, 0x10, 0x20, 0x30,
630    };
631    run_shader(kernel_source, inout.size(), 1, 1, inout);
632    for (int i = 0; i < inout.size(); ++i)
633       EXPECT_EQ(inout[i], expected[i]);
634 }
635 
TEST_F(ComputeTest,complex_types_global_struct_array)636 TEST_F(ComputeTest, complex_types_global_struct_array)
637 {
638    struct two_vals { uint32_t add; uint32_t mul; };
639    const char *kernel_source =
640    "struct two_vals { uint add; uint mul; };\n\
641    __kernel void main_test(__global struct two_vals *in_out)\n\
642    {\n\
643       uint id = get_global_id(0);\n\
644       in_out[id].add = in_out[id].add + id;\n\
645       in_out[id].mul = in_out[id].mul * id;\n\
646    }\n";
647    auto inout = ShaderArg<struct two_vals>({ { 8, 8 }, { 16, 16 }, { 64, 64 }, { 65536, 65536 } },
648                                            SHADER_ARG_INOUT);
649    const struct two_vals expected[] = {
650       { 8 + 0, 8 * 0 },
651       { 16 + 1, 16 * 1 },
652       { 64 + 2, 64 * 2 },
653       { 65536 + 3, 65536 * 3 }
654    };
655    run_shader(kernel_source, inout.size(), 1, 1, inout);
656    for (int i = 0; i < inout.size(); ++i) {
657       EXPECT_EQ(inout[i].add, expected[i].add);
658       EXPECT_EQ(inout[i].mul, expected[i].mul);
659    }
660 }
661 
TEST_F(ComputeTest,complex_types_global_uint2)662 TEST_F(ComputeTest, complex_types_global_uint2)
663 {
664    struct uint2 { uint32_t x; uint32_t y; };
665    const char *kernel_source =
666    "__kernel void main_test(__global uint2 *inout)\n\
667    {\n\
668       uint id = get_global_id(0);\n\
669       inout[id].x = inout[id].x + id;\n\
670       inout[id].y = inout[id].y * id;\n\
671    }\n";
672    auto inout = ShaderArg<struct uint2>({ { 8, 8 }, { 16, 16 }, { 64, 64 }, { 65536, 65536 } },
673                                         SHADER_ARG_INOUT);
674    const struct uint2 expected[] = {
675       { 8 + 0, 8 * 0 },
676       { 16 + 1, 16 * 1 },
677       { 64 + 2, 64 * 2 },
678       { 65536 + 3, 65536 * 3 }
679    };
680    run_shader(kernel_source, inout.size(), 1, 1, inout);
681    for (int i = 0; i < inout.size(); ++i) {
682       EXPECT_EQ(inout[i].x, expected[i].x);
683       EXPECT_EQ(inout[i].y, expected[i].y);
684    }
685 }
686 
TEST_F(ComputeTest,complex_types_global_ushort2)687 TEST_F(ComputeTest, complex_types_global_ushort2)
688 {
689    struct ushort2 { uint16_t x; uint16_t y; };
690    const char *kernel_source =
691    "__kernel void main_test(__global ushort2 *inout)\n\
692    {\n\
693       uint id = get_global_id(0);\n\
694       inout[id].x = inout[id].x + id;\n\
695       inout[id].y = inout[id].y * id;\n\
696    }\n";
697    auto inout = ShaderArg<struct ushort2>({ { 8, 8 }, { 16, 16 }, { 64, 64 },
698                                             { (uint16_t)65536, (uint16_t)65536 } },
699                                           SHADER_ARG_INOUT);
700    const struct ushort2 expected[] = {
701       { 8 + 0, 8 * 0 },
702       { 16 + 1, 16 * 1 },
703       { 64 + 2, 64 * 2 },
704       { (uint16_t)(65536 + 3), (uint16_t)(65536 * 3) }
705    };
706    run_shader(kernel_source, inout.size(), 1, 1, inout);
707    for (int i = 0; i < inout.size(); ++i) {
708       EXPECT_EQ(inout[i].x, expected[i].x);
709       EXPECT_EQ(inout[i].y, expected[i].y);
710    }
711 }
712 
TEST_F(ComputeTest,complex_types_global_uchar3)713 TEST_F(ComputeTest, complex_types_global_uchar3)
714 {
715    struct uchar3 { uint8_t x; uint8_t y; uint8_t z; uint8_t pad; };
716    const char *kernel_source =
717    "__kernel void main_test(__global uchar3 *inout)\n\
718    {\n\
719       uint id = get_global_id(0);\n\
720       inout[id].x = inout[id].x + id;\n\
721       inout[id].y = inout[id].y * id;\n\
722       inout[id].z = inout[id].y + inout[id].x;\n\
723    }\n";
724    auto inout = ShaderArg<struct uchar3>({ { 8, 8, 8 }, { 16, 16, 16 }, { 64, 64, 64 }, { 255, 255, 255 } },
725                                          SHADER_ARG_INOUT);
726    const struct uchar3 expected[] = {
727       { 8 + 0, 8 * 0, (8 + 0) + (8 * 0) },
728       { 16 + 1, 16 * 1, (16 + 1) + (16 * 1) },
729       { 64 + 2, 64 * 2, (64 + 2) + (64 * 2) },
730       { (uint8_t)(255 + 3), (uint8_t)(255 * 3), (uint8_t)((255 + 3) + (255 * 3)) }
731    };
732    run_shader(kernel_source, inout.size(), 1, 1, inout);
733    for (int i = 0; i < inout.size(); ++i) {
734       EXPECT_EQ(inout[i].x, expected[i].x);
735       EXPECT_EQ(inout[i].y, expected[i].y);
736       EXPECT_EQ(inout[i].z, expected[i].z);
737    }
738 }
739 
TEST_F(ComputeTest,complex_types_constant_uchar3)740 TEST_F(ComputeTest, complex_types_constant_uchar3)
741 {
742    struct uchar3 { uint8_t x; uint8_t y; uint8_t z; uint8_t pad; };
743    const char *kernel_source =
744    "__kernel void main_test(__global uchar3 *out, __constant uchar3 *in)\n\
745    {\n\
746       uint id = get_global_id(0);\n\
747       out[id].x = in[id].x + id;\n\
748       out[id].y = in[id].y * id;\n\
749       out[id].z = out[id].y + out[id].x;\n\
750    }\n";
751    auto in = ShaderArg<struct uchar3>({ { 8, 8, 8 }, { 16, 16, 16 }, { 64, 64, 64 }, { 255, 255, 255 } },
752                                       SHADER_ARG_INPUT);
753    auto out = ShaderArg<struct uchar3>(std::vector<struct uchar3>(4, { 0xff, 0xff, 0xff }),
754                                       SHADER_ARG_OUTPUT);
755    const struct uchar3 expected[] = {
756       { 8 + 0, 8 * 0, (8 + 0) + (8 * 0) },
757       { 16 + 1, 16 * 1, (16 + 1) + (16 * 1) },
758       { 64 + 2, 64 * 2, (64 + 2) + (64 * 2) },
759       { (uint8_t)(255 + 3), (uint8_t)(255 * 3), (uint8_t)((255 + 3) + (255 * 3)) }
760    };
761    run_shader(kernel_source, out.size(), 1, 1, out, in);
762    for (int i = 0; i < out.size(); ++i) {
763       EXPECT_EQ(out[i].x, expected[i].x);
764       EXPECT_EQ(out[i].y, expected[i].y);
765       EXPECT_EQ(out[i].z, expected[i].z);
766    }
767 }
768 
TEST_F(ComputeTest,complex_types_global_uint8)769 TEST_F(ComputeTest, complex_types_global_uint8)
770 {
771    struct uint8 {
772       uint32_t s0; uint32_t s1; uint32_t s2; uint32_t s3;
773       uint32_t s4; uint32_t s5; uint32_t s6; uint32_t s7;
774    };
775    const char *kernel_source =
776    "__kernel void main_test(__global uint8 *inout)\n\
777    {\n\
778       uint id = get_global_id(0);\n\
779       inout[id].s01234567 = inout[id].s01234567 * 2;\n\
780    }\n";
781    auto inout = ShaderArg<struct uint8>({ { 1, 2, 3, 4, 5, 6, 7, 8 } },
782                                         SHADER_ARG_INOUT);
783    const struct uint8 expected[] = {
784       { 2, 4, 6, 8, 10, 12, 14, 16 }
785    };
786    run_shader(kernel_source, inout.size(), 1, 1, inout);
787    for (int i = 0; i < inout.size(); ++i) {
788       EXPECT_EQ(inout[i].s0, expected[i].s0);
789       EXPECT_EQ(inout[i].s1, expected[i].s1);
790       EXPECT_EQ(inout[i].s2, expected[i].s2);
791       EXPECT_EQ(inout[i].s3, expected[i].s3);
792       EXPECT_EQ(inout[i].s4, expected[i].s4);
793       EXPECT_EQ(inout[i].s5, expected[i].s5);
794       EXPECT_EQ(inout[i].s6, expected[i].s6);
795       EXPECT_EQ(inout[i].s7, expected[i].s7);
796    }
797 }
798 
TEST_F(ComputeTest,complex_types_local_ulong16)799 TEST_F(ComputeTest, complex_types_local_ulong16)
800 {
801    struct ulong16 {
802       uint64_t values[16];
803    };
804    const char *kernel_source =
805    R"(__kernel void main_test(__global ulong16 *inout)
806    {
807       __local ulong16 local_array[2];
808       uint id = get_global_id(0);
809       local_array[id] = inout[id];
810       barrier(CLK_LOCAL_MEM_FENCE);
811       inout[id] = local_array[0] * 2;
812    })";
813    auto inout = ShaderArg<struct ulong16>({ { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 } },
814                                         SHADER_ARG_INOUT);
815    const struct ulong16 expected[] = {
816       { 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30 }
817    };
818    run_shader(kernel_source, inout.size(), 1, 1, inout);
819    for (int i = 0; i < inout.size(); ++i) {
820       for (int j = 0; j < 16; ++j) {
821          EXPECT_EQ(inout[i].values[j], expected[i].values[j]);
822       }
823    }
824 }
825 
TEST_F(ComputeTest,complex_types_constant_uint8)826 TEST_F(ComputeTest, complex_types_constant_uint8)
827 {
828    struct uint8 {
829       uint32_t s0; uint32_t s1; uint32_t s2; uint32_t s3;
830       uint32_t s4; uint32_t s5; uint32_t s6; uint32_t s7;
831    };
832    const char *kernel_source =
833    "__kernel void main_test(__global uint8 *out, __constant uint8 *in)\n\
834    {\n\
835       uint id = get_global_id(0);\n\
836       out[id].s01234567 = in[id].s01234567 * 2;\n\
837    }\n";
838    auto in = ShaderArg<struct uint8>({ { 1, 2, 3, 4, 5, 6, 7, 8 } },
839                                      SHADER_ARG_INPUT);
840    auto out = ShaderArg<struct uint8>({ { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff } },
841                                       SHADER_ARG_INOUT);
842    const struct uint8 expected[] = {
843       { 2, 4, 6, 8, 10, 12, 14, 16 }
844    };
845    run_shader(kernel_source, out.size(), 1, 1, out, in);
846    for (int i = 0; i < out.size(); ++i) {
847       EXPECT_EQ(out[i].s0, expected[i].s0);
848       EXPECT_EQ(out[i].s1, expected[i].s1);
849       EXPECT_EQ(out[i].s2, expected[i].s2);
850       EXPECT_EQ(out[i].s3, expected[i].s3);
851       EXPECT_EQ(out[i].s4, expected[i].s4);
852       EXPECT_EQ(out[i].s5, expected[i].s5);
853       EXPECT_EQ(out[i].s6, expected[i].s6);
854       EXPECT_EQ(out[i].s7, expected[i].s7);
855    }
856 }
857 
TEST_F(ComputeTest,DISABLED_complex_types_const_array)858 TEST_F(ComputeTest, DISABLED_complex_types_const_array)
859 {
860    /* DISABLED because current release versions of WARP either return
861     * rubbish from reads or crash: they are not prepared to handle
862     * non-float global constants */
863    const char *kernel_source =
864    "__kernel void main_test(__global uint *output)\n\
865    {\n\
866        const uint foo[] = { 100, 101, 102, 103 };\n\
867        output[get_global_id(0)] = foo[get_global_id(0) % 4];\n\
868    }\n";
869    auto output = ShaderArg<uint32_t>(std::vector<uint32_t>(4, 0xdeadbeef),
870                                      SHADER_ARG_OUTPUT);
871    const uint32_t expected[] = {
872       100, 101, 102, 103
873    };
874    run_shader(kernel_source, output.size(), 1, 1, output);
875    for (int i = 0; i < output.size(); ++i)
876       EXPECT_EQ(output[i], expected[i]);
877 }
878 
TEST_F(ComputeTest,mem_access_load_store_ordering)879 TEST_F(ComputeTest, mem_access_load_store_ordering)
880 {
881    const char *kernel_source =
882    "__kernel void main_test(__global uint *output)\n\
883    {\n\
884        uint foo[4];\n\
885        foo[0] = 0x11111111;\n\
886        foo[1] = 0x22222222;\n\
887        foo[2] = 0x44444444;\n\
888        foo[3] = 0x88888888;\n\
889        foo[get_global_id(1)] -= 0x11111111; // foo[0] = 0 \n\
890        foo[0] += get_global_id(0); // foo[0] = tid\n\
891        foo[foo[get_global_id(1)]] = get_global_id(0); // foo[tid] = tid\n\
892        output[get_global_id(0)] = foo[get_global_id(0)]; // output[tid] = tid\n\
893    }\n";
894    auto output = ShaderArg<uint32_t>(std::vector<uint32_t>(4, 0xdeadbeef),
895                                      SHADER_ARG_OUTPUT);
896    const uint16_t expected[] = {
897       0, 1, 2, 3
898    };
899    run_shader(kernel_source, output.size(), 1, 1, output);
900    for (int i = 0; i < output.size(); ++i)
901       EXPECT_EQ(output[i], expected[i]);
902 }
903 
TEST_F(ComputeTest,DISABLED_two_const_arrays)904 TEST_F(ComputeTest, DISABLED_two_const_arrays)
905 {
906    /* DISABLED because current release versions of WARP either return
907     * rubbish from reads or crash: they are not prepared to handle
908     * non-float global constants */
909    const char *kernel_source =
910    "__kernel void main_test(__global uint *output)\n\
911    {\n\
912       uint id = get_global_id(0);\n\
913       uint foo[4] = {100, 101, 102, 103};\n\
914       uint bar[4] = {1, 2, 3, 4};\n\
915       output[id] = foo[id] * bar[id];\n\
916    }\n";
917    auto output = ShaderArg<uint32_t>(std::vector<uint32_t>(4, 0xdeadbeef),
918                                      SHADER_ARG_OUTPUT);
919    const uint32_t expected[] = {
920       100, 202, 306, 412
921    };
922    run_shader(kernel_source, output.size(), 1, 1, output);
923    for (int i = 0; i < output.size(); ++i)
924       EXPECT_EQ(output[i], expected[i]);
925 }
926 
TEST_F(ComputeTest,imod_pos)927 TEST_F(ComputeTest, imod_pos)
928 {
929    const char *kernel_source =
930    "__kernel void main_test(__global int *inout)\n\
931    {\n\
932        inout[get_global_id(0)] = inout[get_global_id(0)] % 3;\n\
933    }\n";
934    auto inout = ShaderArg<int32_t>({ -4, -3, -2, -1, 0, 1, 2, 3, 4 },
935                                    SHADER_ARG_INOUT);
936    const int32_t expected[] = {
937       -1, 0, -2, -1,  0, 1, 2, 0, 1
938    };
939    run_shader(kernel_source, inout.size(), 1, 1, inout);
940    for (int i = 0; i < inout.size(); ++i)
941       EXPECT_EQ(inout[i], expected[i]);
942 }
943 
TEST_F(ComputeTest,imod_neg)944 TEST_F(ComputeTest, imod_neg)
945 {
946    const char *kernel_source =
947    "__kernel void main_test(__global int *inout)\n\
948    {\n\
949        inout[get_global_id(0)] = inout[get_global_id(0)] % -3;\n\
950    }\n";
951    auto inout = ShaderArg<int32_t>({ -4, -3, -2, -1, 0, 1, 2, 3, 4 },
952                                    SHADER_ARG_INOUT);
953    const int32_t expected[] = {
954       -1, 0, -2, -1,  0, 1, 2, 0, 1
955    };
956    run_shader(kernel_source, inout.size(), 1, 1, inout);
957    for (int i = 0; i < inout.size(); ++i)
958       EXPECT_EQ(inout[i], expected[i]);
959 }
960 
TEST_F(ComputeTest,umod)961 TEST_F(ComputeTest, umod)
962 {
963    const char *kernel_source =
964    "__kernel void main_test(__global uint *inout)\n\
965    {\n\
966        inout[get_global_id(0)] = inout[get_global_id(0)] % 0xfffffffc;\n\
967    }\n";
968    auto inout = ShaderArg<uint32_t>({ 0xfffffffa, 0xfffffffb, 0xfffffffc, 0xfffffffd, 0xfffffffe },
969                                     SHADER_ARG_INOUT);
970    const uint32_t expected[] = {
971       0xfffffffa, 0xfffffffb, 0, 1, 2
972    };
973    run_shader(kernel_source, inout.size(), 1, 1, inout);
974    for (int i = 0; i < inout.size(); ++i)
975       EXPECT_EQ(inout[i], expected[i]);
976 }
977 
TEST_F(ComputeTest,rotate)978 TEST_F(ComputeTest, rotate)
979 {
980    const char *kernel_source =
981    "__kernel void main_test(__global uint *inout)\n\
982    {\n\
983        inout[get_global_id(0)] = rotate(inout[get_global_id(0)], (uint)get_global_id(0) * 4);\n\
984    }\n";
985    auto inout = ShaderArg<uint32_t>(std::vector<uint32_t>(4, 0xdeadbeef),
986                                     SHADER_ARG_INOUT);
987    const uint32_t expected[] = {
988       0xdeadbeef, 0xeadbeefd, 0xadbeefde, 0xdbeefdea
989    };
990    run_shader(kernel_source, inout.size(), 1, 1, inout);
991    for (int i = 0; i < inout.size(); ++i)
992       EXPECT_EQ(inout[i], expected[i]);
993 }
994 
TEST_F(ComputeTest,popcount)995 TEST_F(ComputeTest, popcount)
996 {
997    const char *kernel_source =
998    "__kernel void main_test(__global uint *inout)\n\
999    {\n\
1000        inout[get_global_id(0)] = popcount(inout[get_global_id(0)]);\n\
1001    }\n";
1002    auto inout = ShaderArg<uint32_t>({ 0, 0x1, 0x3, 0x101, 0x110011, ~0u },
1003                                     SHADER_ARG_INOUT);
1004    const uint32_t expected[] = {
1005       0, 1, 2, 2, 4, 32
1006    };
1007    run_shader(kernel_source, inout.size(), 1, 1, inout);
1008    for (int i = 0; i < inout.size(); ++i)
1009       EXPECT_EQ(inout[i], expected[i]);
1010 }
1011 
TEST_F(ComputeTest,hadd)1012 TEST_F(ComputeTest, hadd)
1013 {
1014    const char *kernel_source =
1015    "__kernel void main_test(__global uint *inout)\n\
1016    {\n\
1017        inout[get_global_id(0)] = hadd(inout[get_global_id(0)], 1u << 31);\n\
1018    }\n";
1019    auto inout = ShaderArg<uint32_t>({ 0, 1, 2, 3, 0xfffffffc, 0xfffffffd, 0xfffffffe, 0xffffffff },
1020                                     SHADER_ARG_INOUT);
1021    const uint32_t expected[] = {
1022       (1u << 31) >> 1,
1023       ((1u << 31) + 1) >> 1,
1024       ((1u << 31) + 2) >> 1,
1025       ((1u << 31) + 3) >> 1,
1026       ((1ull << 31) + 0xfffffffc) >> 1,
1027       ((1ull << 31) + 0xfffffffd) >> 1,
1028       ((1ull << 31) + 0xfffffffe) >> 1,
1029       ((1ull << 31) + 0xffffffff) >> 1,
1030    };
1031    run_shader(kernel_source, inout.size(), 1, 1, inout);
1032    for (int i = 0; i < inout.size(); ++i)
1033       EXPECT_EQ(inout[i], expected[i]);
1034 }
1035 
TEST_F(ComputeTest,rhadd)1036 TEST_F(ComputeTest, rhadd)
1037 {
1038    const char *kernel_source =
1039    "__kernel void main_test(__global uint *inout)\n\
1040    {\n\
1041        inout[get_global_id(0)] = rhadd(inout[get_global_id(0)], 1u << 31);\n\
1042    }\n";
1043    auto inout = ShaderArg<uint32_t>({ 0, 1, 2, 3, 0xfffffffc, 0xfffffffd, 0xfffffffe, 0xffffffff },
1044                                     SHADER_ARG_INOUT);
1045    const uint32_t expected[] = {
1046       ((1u << 31) + 1) >> 1,
1047       ((1u << 31) + 2) >> 1,
1048       ((1u << 31) + 3) >> 1,
1049       ((1u << 31) + 4) >> 1,
1050       ((1ull << 31) + 0xfffffffd) >> 1,
1051       ((1ull << 31) + 0xfffffffe) >> 1,
1052       ((1ull << 31) + 0xffffffff) >> 1,
1053       ((1ull << 31) + (1ull << 32)) >> 1,
1054    };
1055    run_shader(kernel_source, inout.size(), 1, 1, inout);
1056    for (int i = 0; i < inout.size(); ++i)
1057       EXPECT_EQ(inout[i], expected[i]);
1058 }
1059 
TEST_F(ComputeTest,add_sat)1060 TEST_F(ComputeTest, add_sat)
1061 {
1062    const char *kernel_source =
1063    "__kernel void main_test(__global uint *inout)\n\
1064    {\n\
1065        inout[get_global_id(0)] = add_sat(inout[get_global_id(0)], 2u);\n\
1066    }\n";
1067    auto inout = ShaderArg<uint32_t>({ 0xffffffff - 3, 0xffffffff - 2, 0xffffffff - 1, 0xffffffff },
1068                                     SHADER_ARG_INOUT);
1069    const uint32_t expected[] = {
1070       0xffffffff - 1, 0xffffffff, 0xffffffff, 0xffffffff
1071    };
1072    run_shader(kernel_source, inout.size(), 1, 1, inout);
1073    for (int i = 0; i < inout.size(); ++i)
1074       EXPECT_EQ(inout[i], expected[i]);
1075 }
1076 
TEST_F(ComputeTest,sub_sat)1077 TEST_F(ComputeTest, sub_sat)
1078 {
1079    const char *kernel_source =
1080    "__kernel void main_test(__global uint *inout)\n\
1081    {\n\
1082        inout[get_global_id(0)] = sub_sat(inout[get_global_id(0)], 2u);\n\
1083    }\n";
1084    auto inout = ShaderArg<uint32_t>({ 0, 1, 2, 3 }, SHADER_ARG_INOUT);
1085    const uint32_t expected[] = {
1086       0, 0, 0, 1
1087    };
1088    run_shader(kernel_source, inout.size(), 1, 1, inout);
1089    for (int i = 0; i < inout.size(); ++i)
1090       EXPECT_EQ(inout[i], expected[i]);
1091 }
1092 
TEST_F(ComputeTest,mul_hi)1093 TEST_F(ComputeTest, mul_hi)
1094 {
1095    const char *kernel_source =
1096    "__kernel void main_test(__global uint *inout)\n\
1097    {\n\
1098        inout[get_global_id(0)] = mul_hi(inout[get_global_id(0)], 1u << 31);\n\
1099    }\n";
1100    auto inout = ShaderArg<uint32_t>({ 0, 1, 2, 3, (1u << 31) }, SHADER_ARG_INOUT);
1101    const uint32_t expected[] = {
1102       0, 0, 1, 1, (1u << 30)
1103    };
1104    run_shader(kernel_source, inout.size(), 1, 1, inout);
1105    for (int i = 0; i < inout.size(); ++i)
1106       EXPECT_EQ(inout[i], expected[i]);
1107 }
1108 
TEST_F(ComputeTest,ldexp_x)1109 TEST_F(ComputeTest, ldexp_x)
1110 {
1111    const char *kernel_source =
1112    "__kernel void main_test(__global float *inout)\n\
1113    {\n\
1114        inout[get_global_id(0)] = ldexp(inout[get_global_id(0)], 5);\n\
1115    }\n";
1116    auto inout = ShaderArg<float>({ 0.0f, 0.5f, 1.0f, 2.0f }, SHADER_ARG_INOUT);
1117    const float expected[] = {
1118       ldexp(0.0f, 5), ldexp(0.5f, 5), ldexp(1.0f, 5), ldexp(2.0f, 5)
1119    };
1120    run_shader(kernel_source, inout.size(), 1, 1, inout);
1121    for (int i = 0; i < inout.size(); ++i)
1122       EXPECT_FLOAT_EQ(inout[i], expected[i]);
1123 }
1124 
TEST_F(ComputeTest,ldexp_y)1125 TEST_F(ComputeTest, ldexp_y)
1126 {
1127    const char *kernel_source =
1128    "__kernel void main_test(__global float *inout)\n\
1129    {\n\
1130        inout[get_global_id(0)] = ldexp(inout[get_global_id(0)], get_global_id(0));\n\
1131    }\n";
1132    auto inout = ShaderArg<float>({ 0.25f, 0.5f, 0.75f, 1.0f }, SHADER_ARG_INOUT);
1133    const float expected[] = {
1134       ldexp(0.25f, 0), ldexp(0.5f, 1), ldexp(0.75f, 2), ldexp(1.0f, 3)
1135    };
1136    run_shader(kernel_source, inout.size(), 1, 1, inout);
1137    for (int i = 0; i < inout.size(); ++i)
1138       EXPECT_FLOAT_EQ(inout[i], expected[i]);
1139 }
1140 
TEST_F(ComputeTest,frexp_ret)1141 TEST_F(ComputeTest, frexp_ret)
1142 {
1143    const char *kernel_source =
1144    "__kernel void main_test(__global float *inout)\n\
1145    {\n\
1146        int exp;\n\
1147        inout[get_global_id(0)] = frexp(inout[get_global_id(0)], &exp);\n\
1148    }\n";
1149    auto inout = ShaderArg<float>({ 0.0f, 0.5f, 1.0f, 3.0f }, SHADER_ARG_INOUT);
1150    const float expected[] = {
1151       0.0f, 0.5f, 0.5f, 0.75f
1152    };
1153    run_shader(kernel_source, inout.size(), 1, 1, inout);
1154    for (int i = 0; i < inout.size(); ++i)
1155       EXPECT_FLOAT_EQ(inout[i], expected[i]);
1156 }
1157 
TEST_F(ComputeTest,frexp_exp)1158 TEST_F(ComputeTest, frexp_exp)
1159 {
1160    const char *kernel_source =
1161    "__kernel void main_test(__global float *inout)\n\
1162    {\n\
1163        int exp;\n\
1164        frexp(inout[get_global_id(0)], &exp);\n\
1165        inout[get_global_id(0)] = (float)exp;\n\
1166    }\n";
1167    auto inout = ShaderArg<float>({ 0.0f, 0.5f, 1.0f, 3.0f }, SHADER_ARG_INOUT);
1168    const float expected[] = {
1169       0.0f, 0.0f, 1.0f, 2.0f
1170    };
1171    run_shader(kernel_source, inout.size(), 1, 1, inout);
1172    for (int i = 0; i < inout.size(); ++i)
1173       EXPECT_FLOAT_EQ(inout[i], expected[i]);
1174 }
1175 
TEST_F(ComputeTest,clz)1176 TEST_F(ComputeTest, clz)
1177 {
1178    const char *kernel_source =
1179    "__kernel void main_test(__global uint *inout)\n\
1180    {\n\
1181        inout[get_global_id(0)] = clz(inout[get_global_id(0)]);\n\
1182    }\n";
1183    auto inout = ShaderArg<uint32_t>({ 0, 1, 0xffff,  (1u << 30), (1u << 31) }, SHADER_ARG_INOUT);
1184    const uint32_t expected[] = {
1185       32, 31, 16, 1, 0
1186    };
1187    run_shader(kernel_source, inout.size(), 1, 1, inout);
1188    for (int i = 0; i < inout.size(); ++i)
1189       EXPECT_FLOAT_EQ(inout[i], expected[i]);
1190 }
1191 
TEST_F(ComputeTest,sin)1192 TEST_F(ComputeTest, sin)
1193 {
1194    struct sin_vals { float in; float clc; float native; };
1195    const char *kernel_source =
1196    "struct sin_vals { float in; float clc; float native; };\n\
1197    __kernel void main_test(__global struct sin_vals *inout)\n\
1198    {\n\
1199        inout[get_global_id(0)].clc = sin(inout[get_global_id(0)].in);\n\
1200        inout[get_global_id(0)].native = native_sin(inout[get_global_id(0)].in);\n\
1201    }\n";
1202    const vector<sin_vals> input = {
1203       { 0.0f, 0.0f, 0.0f },
1204       { 1.0f, 0.0f, 0.0f },
1205       { 2.0f, 0.0f, 0.0f },
1206       { 3.0f, 0.0f, 0.0f },
1207    };
1208    auto inout = ShaderArg<sin_vals>(input, SHADER_ARG_INOUT);
1209    const struct sin_vals expected[] = {
1210       { 0.0f, 0.0f,       0.0f       },
1211       { 1.0f, sin(1.0f), sin(1.0f) },
1212       { 2.0f, sin(2.0f), sin(2.0f) },
1213       { 3.0f, sin(3.0f), sin(3.0f) },
1214    };
1215    run_shader(kernel_source, inout.size(), 1, 1, inout);
1216    for (int i = 0; i < inout.size(); ++i) {
1217       EXPECT_FLOAT_EQ(inout[i].in, inout[i].in);
1218       EXPECT_FLOAT_EQ(inout[i].clc, inout[i].clc);
1219       EXPECT_NEAR(inout[i].clc, inout[i].native, 0.008f); // range from DXIL spec
1220    }
1221 }
1222 
TEST_F(ComputeTest,DISABLED_cosh)1223 TEST_F(ComputeTest, DISABLED_cosh)
1224 {
1225    /* Disabled because of WARP failures, where we fetch incorrect results when
1226     * sourcing from non-float ICBs */
1227    const char *kernel_source =
1228    "__kernel void main_test(__global float *inout)\n\
1229    {\n\
1230        inout[get_global_id(0)] = cosh(inout[get_global_id(0)]);\n\
1231    }\n";
1232    auto inout = ShaderArg<float>({ 0.0f, 1.0f, 2.0f, 3.0f }, SHADER_ARG_INOUT);
1233    const float expected[] = {
1234       cosh(0.0f), cosh(1.0f), cosh(2.0f), cosh(3.0f)
1235    };
1236    run_shader(kernel_source, inout.size(), 1, 1, inout);
1237    for (int i = 0; i < inout.size(); ++i)
1238       EXPECT_FLOAT_EQ(inout[i], expected[i]);
1239 }
1240 
TEST_F(ComputeTest,exp)1241 TEST_F(ComputeTest, exp)
1242 {
1243    const char *kernel_source =
1244    "__kernel void main_test(__global float *inout)\n\
1245    {\n\
1246        inout[get_global_id(0)] = native_exp(inout[get_global_id(0)]);\n\
1247    }\n";
1248    auto inout = ShaderArg<float>({ 0.0f, 1.0f, 2.0f, 3.0f }, SHADER_ARG_INOUT);
1249    const float expected[] = {
1250       exp(0.0f), exp(1.0f), exp(2.0f), exp(3.0f)
1251    };
1252    run_shader(kernel_source, inout.size(), 1, 1, inout);
1253    for (int i = 0; i < inout.size(); ++i)
1254       EXPECT_FLOAT_EQ(inout[i], expected[i]);
1255 }
1256 
TEST_F(ComputeTest,exp10)1257 TEST_F(ComputeTest, exp10)
1258 {
1259    const char *kernel_source =
1260    "__kernel void main_test(__global float *inout)\n\
1261    {\n\
1262        inout[get_global_id(0)] = native_exp10(inout[get_global_id(0)]);\n\
1263    }\n";
1264    auto inout = ShaderArg<float>({ 0.0f, 1.0f, 2.0f, 3.0f }, SHADER_ARG_INOUT);
1265    const float expected[] = {
1266       pow(10.0f, 0.0f), pow(10.0f, 1.0f), pow(10.0f, 2.0f), pow(10.0f, 3.0f)
1267    };
1268    run_shader(kernel_source, inout.size(), 1, 1, inout);
1269    for (int i = 0; i < inout.size(); ++i)
1270       EXPECT_FLOAT_EQ(inout[i], expected[i]);
1271 }
1272 
TEST_F(ComputeTest,exp2)1273 TEST_F(ComputeTest, exp2)
1274 {
1275    const char *kernel_source =
1276    "__kernel void main_test(__global float *inout)\n\
1277    {\n\
1278        inout[get_global_id(0)] = native_exp2(inout[get_global_id(0)]);\n\
1279    }\n";
1280    auto inout = ShaderArg<float>({ 0.0f, 1.0f, 2.0f, 3.0f }, SHADER_ARG_INOUT);
1281    const float expected[] = {
1282       pow(2.0f, 0.0f), pow(2.0f, 1.0f), pow(2.0f, 2.0f), pow(2.0f, 3.0f)
1283    };
1284    run_shader(kernel_source, inout.size(), 1, 1, inout);
1285    for (int i = 0; i < inout.size(); ++i)
1286       EXPECT_FLOAT_EQ(inout[i], expected[i]);
1287 }
1288 
TEST_F(ComputeTest,log)1289 TEST_F(ComputeTest, log)
1290 {
1291    const char *kernel_source =
1292    "__kernel void main_test(__global float *inout)\n\
1293    {\n\
1294        inout[get_global_id(0)] = native_log(inout[get_global_id(0)]);\n\
1295    }\n";
1296    auto inout = ShaderArg<float>({ 0.0f, 1.0f, 2.0f, 3.0f }, SHADER_ARG_INOUT);
1297    const float expected[] = {
1298       log(0.0f), log(1.0f), log(2.0f), log(3.0f)
1299    };
1300    run_shader(kernel_source, inout.size(), 1, 1, inout);
1301    for (int i = 0; i < inout.size(); ++i)
1302       EXPECT_FLOAT_EQ(inout[i], expected[i]);
1303 }
1304 
TEST_F(ComputeTest,log10)1305 TEST_F(ComputeTest, log10)
1306 {
1307    const char *kernel_source =
1308    "__kernel void main_test(__global float *inout)\n\
1309    {\n\
1310        inout[get_global_id(0)] = native_log10(inout[get_global_id(0)]);\n\
1311    }\n";
1312    auto inout = ShaderArg<float>({ 0.0f, 1.0f, 2.0f, 3.0f }, SHADER_ARG_INOUT);
1313    const float expected[] = {
1314       log10(0.0f), log10(1.0f), log10(2.0f), log10(3.0f)
1315    };
1316    run_shader(kernel_source, inout.size(), 1, 1, inout);
1317    for (int i = 0; i < inout.size(); ++i)
1318       EXPECT_FLOAT_EQ(inout[i], expected[i]);
1319 }
1320 
TEST_F(ComputeTest,log2)1321 TEST_F(ComputeTest, log2)
1322 {
1323    const char *kernel_source =
1324    "__kernel void main_test(__global float *inout)\n\
1325    {\n\
1326        inout[get_global_id(0)] = native_log2(inout[get_global_id(0)]);\n\
1327    }\n";
1328    auto inout = ShaderArg<float>({ 0.0f, 1.0f, 2.0f, 3.0f }, SHADER_ARG_INOUT);
1329    const float expected[] = {
1330       log(0.0f) / log(2), log(1.0f) / log(2), log(2.0f) / log(2), log(3.0f) / log(2)
1331    };
1332    run_shader(kernel_source, inout.size(), 1, 1, inout);
1333    for (int i = 0; i < inout.size(); ++i)
1334       EXPECT_FLOAT_EQ(inout[i], expected[i]);
1335 }
1336 
TEST_F(ComputeTest,rint)1337 TEST_F(ComputeTest, rint)
1338 {
1339    const char *kernel_source =
1340    "__kernel void main_test(__global float *inout)\n\
1341    {\n\
1342       inout[get_global_id(0)] = rint(inout[get_global_id(0)]);\n\
1343    }\n";
1344 
1345    auto inout = ShaderArg<float>({ 0.5f, 1.5f, -0.5f, -1.5f, 1.4f }, SHADER_ARG_INOUT);
1346    const float expected[] = {
1347       0.0f, 2.0f, 0.0f, -2.0f, 1.0f,
1348    };
1349    run_shader(kernel_source, inout.size(), 1, 1, inout);
1350    for (int i = 0; i < inout.size(); ++i)
1351       EXPECT_FLOAT_EQ(inout[i], expected[i]);
1352 }
1353 
TEST_F(ComputeTest,round)1354 TEST_F(ComputeTest, round)
1355 {
1356    const char *kernel_source =
1357    "__kernel void main_test(__global float *inout)\n\
1358    {\n\
1359        inout[get_global_id(0)] = round(inout[get_global_id(0)]);\n\
1360    }\n";
1361    auto inout = ShaderArg<float>({ 0, 0.3f, -0.3f, 0.5f, -0.5f, 1.1f, -1.1f },
1362                                  SHADER_ARG_INOUT);
1363    const float expected[] = {
1364       0.0f, 0.0f, -0.0f, 1.0f, -1.0f, 1.0f, -1.0f
1365    };
1366    run_shader(kernel_source, inout.size(), 1, 1, inout);
1367    for (int i = 0; i < inout.size(); ++i)
1368       EXPECT_FLOAT_EQ(inout[i], expected[i]);
1369 }
1370 
TEST_F(ComputeTest,arg_by_val)1371 TEST_F(ComputeTest, arg_by_val)
1372 {
1373    const char *kernel_source =
1374    "__kernel void main_test(__global float *inout, float mul)\n\
1375    {\n\
1376        inout[get_global_id(0)] = inout[get_global_id(0)] * mul;\n\
1377    }\n";
1378    auto inout = ShaderArg<float>({ 0, 0.3f, -0.3f, 0.5f, -0.5f, 1.1f, -1.1f },
1379                                  SHADER_ARG_INOUT);
1380    auto mul = ShaderArg<float>(10.0f, SHADER_ARG_INPUT);
1381    const float expected[] = {
1382       0.0f, 3.0f, -3.0f, 5.0f, -5.0f, 11.0f, -11.0f
1383    };
1384    run_shader(kernel_source, inout.size(), 1, 1, inout, mul);
1385    for (int i = 0; i < inout.size(); ++i)
1386       EXPECT_FLOAT_EQ(inout[i], expected[i]);
1387 }
1388 
TEST_F(ComputeTest,uint8_by_val)1389 TEST_F(ComputeTest, uint8_by_val)
1390 {
1391    struct uint8 {
1392       uint32_t s0; uint32_t s1; uint32_t s2; uint32_t s3;
1393       uint32_t s4; uint32_t s5; uint32_t s6; uint32_t s7;
1394    };
1395    const char *kernel_source =
1396    "__kernel void main_test(__global uint *out, uint8 val)\n\
1397    {\n\
1398        out[get_global_id(0)] = val.s0 + val.s1 + val.s2 + val.s3 +\n\
1399                                val.s4 + val.s5 + val.s6 + val.s7;\n\
1400    }\n";
1401    auto out = ShaderArg<uint32_t>({ 0 }, SHADER_ARG_OUTPUT);
1402    auto val = ShaderArg<struct uint8>({ {0, 1, 2, 3, 4, 5, 6, 7 }}, SHADER_ARG_INPUT);
1403    const uint32_t expected[] = { 0 + 1 + 2 + 3 + 4 + 5 + 6 + 7 };
1404    run_shader(kernel_source, out.size(), 1, 1, out, val);
1405    for (int i = 0; i < out.size(); ++i)
1406       EXPECT_EQ(out[i], expected[i]);
1407 }
1408 
TEST_F(ComputeTest,link)1409 TEST_F(ComputeTest, link)
1410 {
1411    const char *foo_src =
1412    "float foo(float in)\n\
1413    {\n\
1414        return in * in;\n\
1415    }\n";
1416    const char *kernel_source =
1417    "float foo(float in);\n\
1418    __kernel void main_test(__global float *inout)\n\
1419    {\n\
1420        inout[get_global_id(0)] = foo(inout[get_global_id(0)]);\n\
1421    }\n";
1422    std::vector<const char *> srcs = { foo_src, kernel_source };
1423    auto inout = ShaderArg<float>({ 2.0f }, SHADER_ARG_INOUT);
1424    const float expected[] = {
1425       4.0f,
1426    };
1427    run_shader(srcs, inout.size(), 1, 1, inout);
1428    for (int i = 0; i < inout.size(); ++i)
1429       EXPECT_EQ(inout[i], expected[i]);
1430 }
1431 
TEST_F(ComputeTest,link_library)1432 TEST_F(ComputeTest, link_library)
1433 {
1434    const char *bar_src =
1435    "float bar(float in)\n\
1436    {\n\
1437       return in * 5;\n\
1438    }\n";
1439    const char *foo_src =
1440    "float bar(float in);\n\
1441    float foo(float in)\n\
1442    {\n\
1443        return in * bar(in);\n\
1444    }\n";
1445    const char *kernel_source =
1446    "float foo(float in);\n\
1447    __kernel void main_test(__global float *inout)\n\
1448    {\n\
1449        inout[get_global_id(0)] = foo(inout[get_global_id(0)]);\n\
1450    }\n";
1451    std::vector<Shader> libraries = {
1452       compile({ bar_src, kernel_source }, {}, true),
1453       compile({ foo_src }, {}, true)
1454    };
1455    Shader exe = link(libraries);
1456    auto inout = ShaderArg<float>({ 2.0f }, SHADER_ARG_INOUT);
1457    const float expected[] = {
1458       20.0f,
1459    };
1460    run_shader(exe, { (unsigned)inout.size(), 1, 1 }, inout);
1461    for (int i = 0; i < inout.size(); ++i)
1462       EXPECT_EQ(inout[i], expected[i]);
1463 }
1464 
TEST_F(ComputeTest,localvar)1465 TEST_F(ComputeTest, localvar)
1466 {
1467    const char *kernel_source =
1468    "__kernel __attribute__((reqd_work_group_size(2, 1, 1)))\n\
1469    void main_test(__global float *inout)\n\
1470    {\n\
1471       __local float2 tmp[2];\n\
1472       tmp[get_local_id(0)].x = inout[get_global_id(0)] + 1;\n\
1473       tmp[get_local_id(0)].y = inout[get_global_id(0)] - 1;\n\
1474       barrier(CLK_LOCAL_MEM_FENCE);\n\
1475       inout[get_global_id(0)] = tmp[get_local_id(0) % 2].x * tmp[(get_local_id(0) + 1) % 2].y;\n\
1476    }\n";
1477 
1478    auto inout = ShaderArg<float>({ 2.0f, 4.0f }, SHADER_ARG_INOUT);
1479    const float expected[] = {
1480       9.0f, 5.0f
1481    };
1482    run_shader(kernel_source, inout.size(), 1, 1, inout);
1483    for (int i = 0; i < inout.size(); ++i)
1484       EXPECT_EQ(inout[i], expected[i]);
1485 }
1486 
TEST_F(ComputeTest,localvar_uchar2)1487 TEST_F(ComputeTest, localvar_uchar2)
1488 {
1489    const char *kernel_source =
1490    "__attribute__((reqd_work_group_size(2, 1, 1)))\n\
1491    __kernel void main_test(__global uchar *inout)\n\
1492    {\n\
1493       __local uchar2 tmp[2];\n\
1494       tmp[get_local_id(0)].x = inout[get_global_id(0)] + 1;\n\
1495       tmp[get_local_id(0)].y = inout[get_global_id(0)] - 1;\n\
1496       barrier(CLK_LOCAL_MEM_FENCE);\n\
1497       inout[get_global_id(0)] = tmp[get_local_id(0) % 2].x * tmp[(get_local_id(0) + 1) % 2].y;\n\
1498    }\n";
1499 
1500    auto inout = ShaderArg<uint8_t>({ 2, 4 }, SHADER_ARG_INOUT);
1501    const uint8_t expected[] = { 9, 5 };
1502    run_shader(kernel_source, inout.size(), 1, 1, inout);
1503    for (int i = 0; i < inout.size(); ++i)
1504       EXPECT_EQ(inout[i], expected[i]);
1505 }
1506 
TEST_F(ComputeTest,work_group_size_hint)1507 TEST_F(ComputeTest, work_group_size_hint)
1508 {
1509    const char *kernel_source =
1510    "__attribute__((work_group_size_hint(2, 1, 1)))\n\
1511    __kernel void main_test(__global uint *output)\n\
1512    {\n\
1513        output[get_global_id(0)] = get_local_id(0);\n\
1514    }\n";
1515    auto output = ShaderArg<uint32_t>(std::vector<uint32_t>(4, 0xdeadbeef),
1516                                      SHADER_ARG_OUTPUT);
1517    const uint32_t expected[] = {
1518       0, 1, 2, 3
1519    };
1520    run_shader(kernel_source, output.size(), 1, 1, output);
1521    for (int i = 0; i < output.size(); ++i)
1522       EXPECT_EQ(output[i], expected[i]);
1523 }
1524 
TEST_F(ComputeTest,reqd_work_group_size)1525 TEST_F(ComputeTest, reqd_work_group_size)
1526 {
1527    const char *kernel_source =
1528    "__attribute__((reqd_work_group_size(2, 1, 1)))\n\
1529    __kernel void main_test(__global uint *output)\n\
1530    {\n\
1531        output[get_global_id(0)] = get_local_id(0);\n\
1532    }\n";
1533    auto output = ShaderArg<uint32_t>(std::vector<uint32_t>(4, 0xdeadbeef),
1534                                      SHADER_ARG_OUTPUT);
1535    const uint32_t expected[] = {
1536       0, 1, 0, 1
1537    };
1538    run_shader(kernel_source, output.size(), 1, 1, output);
1539    for (int i = 0; i < output.size(); ++i)
1540       EXPECT_EQ(output[i], expected[i]);
1541 }
1542 
TEST_F(ComputeTest,image)1543 TEST_F(ComputeTest, image)
1544 {
1545    const char* kernel_source =
1546    "__kernel void main_test(read_only image2d_t input, write_only image2d_t output)\n\
1547    {\n\
1548       int2 coords = (int2)(get_global_id(0), get_global_id(1));\n\
1549       write_imagef(output, coords, read_imagef(input, coords));\n\
1550    }\n";
1551    Shader shader = compile(std::vector<const char*>({ kernel_source }));
1552    validate(shader);
1553 }
1554 
TEST_F(ComputeTest,image_two_reads)1555 TEST_F(ComputeTest, image_two_reads)
1556 {
1557    const char* kernel_source =
1558    "__kernel void main_test(image2d_t image, int is_float, __global float* output)\n\
1559    {\n\
1560       if (is_float)\n\
1561          output[get_global_id(0)] = read_imagef(image, (int2)(0, 0)).x;\n\
1562       else \n\
1563          output[get_global_id(0)] = (float)read_imagei(image, (int2)(0, 0)).x;\n\
1564    }\n";
1565    Shader shader = compile(std::vector<const char*>({ kernel_source }));
1566    validate(shader);
1567 }
1568 
TEST_F(ComputeTest,image_read_write)1569 TEST_F(ComputeTest, image_read_write)
1570 {
1571    const char *kernel_source =
1572    R"(__kernel void main_test(read_write image2d_t image)
1573    {
1574       int2 coords = (int2)(get_global_id(0), get_global_id(1));
1575       write_imagef(image, coords, read_imagef(image, coords) + (float4)(1.0f, 1.0f, 1.0f, 1.0f));
1576    })";
1577    Shader shader = compile(std::vector<const char*>({ kernel_source }), { "-cl-std=cl3.0" });
1578    validate(shader);
1579 }
1580 
TEST_F(ComputeTest,sampler)1581 TEST_F(ComputeTest, sampler)
1582 {
1583    const char* kernel_source =
1584    "__kernel void main_test(image2d_t image, sampler_t sampler, __global float* output)\n\
1585    {\n\
1586       output[get_global_id(0)] = read_imagef(image, sampler, (int2)(0, 0)).x;\n\
1587    }\n";
1588    Shader shader = compile(std::vector<const char*>({ kernel_source }));
1589    validate(shader);
1590 }
1591 
TEST_F(ComputeTest,image_dims)1592 TEST_F(ComputeTest, image_dims)
1593 {
1594    const char* kernel_source =
1595    "__kernel void main_test(image2d_t roimage, write_only image2d_t woimage, __global uint* output)\n\
1596    {\n\
1597       output[get_global_id(0)] = get_image_width(roimage);\n\
1598       output[get_global_id(0) + 1] = get_image_width(woimage);\n\
1599    }\n";
1600    Shader shader = compile(std::vector<const char*>({ kernel_source }));
1601    validate(shader);
1602 }
1603 
TEST_F(ComputeTest,image_format)1604 TEST_F(ComputeTest, image_format)
1605 {
1606    const char* kernel_source =
1607    "__kernel void main_test(image2d_t roimage, write_only image2d_t woimage, __global uint* output)\n\
1608    {\n\
1609       output[get_global_id(0)] = get_image_channel_data_type(roimage);\n\
1610       output[get_global_id(0) + 1] = get_image_channel_order(woimage);\n\
1611    }\n";
1612    Shader shader = compile(std::vector<const char*>({ kernel_source }));
1613    validate(shader);
1614 }
1615 
TEST_F(ComputeTest,image1d_buffer_t)1616 TEST_F(ComputeTest, image1d_buffer_t)
1617 {
1618    const char* kernel_source =
1619    "__kernel void main_test(read_only image1d_buffer_t input, write_only image1d_buffer_t output)\n\
1620    {\n\
1621       write_imageui(output, get_global_id(0), read_imageui(input, get_global_id(0)));\n\
1622    }\n";
1623    Shader shader = compile(std::vector<const char*>({ kernel_source }));
1624    validate(shader);
1625 }
1626 
TEST_F(ComputeTest,local_ptr)1627 TEST_F(ComputeTest, local_ptr)
1628 {
1629    struct uint2 { uint32_t x, y; };
1630    const char *kernel_source =
1631    "__kernel void main_test(__global uint *inout, __local uint2 *tmp)\n\
1632    {\n\
1633       tmp[get_local_id(0)].x = inout[get_global_id(0)] + 1;\n\
1634       tmp[get_local_id(0)].y = inout[get_global_id(0)] - 1;\n\
1635       barrier(CLK_LOCAL_MEM_FENCE);\n\
1636       inout[get_global_id(0)] = tmp[get_local_id(0) % 2].x * tmp[(get_local_id(0) + 1) % 2].y;\n\
1637    }\n";
1638    auto inout = ShaderArg<uint32_t>({ 2, 4 }, SHADER_ARG_INOUT);
1639    auto tmp = ShaderArg<struct uint2>(std::vector<struct uint2>(4096), SHADER_ARG_INPUT);
1640    const uint8_t expected[] = { 9, 5 };
1641    run_shader(kernel_source, inout.size(), 1, 1, inout, tmp);
1642    for (int i = 0; i < inout.size(); ++i)
1643       EXPECT_EQ(inout[i], expected[i]);
1644 }
1645 
TEST_F(ComputeTest,two_local_ptrs)1646 TEST_F(ComputeTest, two_local_ptrs)
1647 {
1648    struct uint2 { uint32_t x, y; };
1649    const char *kernel_source =
1650    "__kernel void main_test(__global uint *inout, __local uint2 *tmp, __local uint *tmp2)\n\
1651    {\n\
1652       tmp[get_local_id(0)].x = inout[get_global_id(0)] + 1;\n\
1653       tmp[get_local_id(0)].y = inout[get_global_id(0)] - 1;\n\
1654       tmp2[get_local_id(0)] = get_global_id(0);\n\
1655       barrier(CLK_LOCAL_MEM_FENCE);\n\
1656       inout[get_global_id(0)] = tmp[get_local_id(0) % 2].x * tmp[(get_local_id(0) + 1) % 2].y + tmp2[get_local_id(0) % 2];\n\
1657    }\n";
1658    auto inout = ShaderArg<uint32_t>({ 2, 4 }, SHADER_ARG_INOUT);
1659    auto tmp = ShaderArg<struct uint2>(std::vector<struct uint2>(1024), SHADER_ARG_INPUT);
1660    auto tmp2 = ShaderArg<uint32_t>(std::vector<uint32_t>(1024), SHADER_ARG_INPUT);
1661    const uint8_t expected[] = { 9, 6 };
1662    run_shader(kernel_source, inout.size(), 1, 1, inout, tmp, tmp2);
1663    for (int i = 0; i < inout.size(); ++i)
1664       EXPECT_EQ(inout[i], expected[i]);
1665 }
1666 
TEST_F(ComputeTest,int8_to_float)1667 TEST_F(ComputeTest, int8_to_float)
1668 {
1669    const char *kernel_source =
1670    "__kernel void main_test(__global char* in, __global float* out)\n\
1671    {\n\
1672       uint pos = get_global_id(0);\n\
1673       out[pos] = in[pos] / 100.0f;\n\
1674    }";
1675    auto in = ShaderArg<char>({ 10, 20, 30, 40 }, SHADER_ARG_INPUT);
1676    auto out = ShaderArg<float>(std::vector<float>(4, std::numeric_limits<float>::infinity()), SHADER_ARG_OUTPUT);
1677    const float expected[] = { 0.1f, 0.2f, 0.3f, 0.4f };
1678    run_shader(kernel_source, in.size(), 1, 1, in, out);
1679    for (int i = 0; i < in.size(); ++i)
1680       EXPECT_FLOAT_EQ(out[i], expected[i]);
1681 }
1682 
TEST_F(ComputeTest,vec_hint_float4)1683 TEST_F(ComputeTest, vec_hint_float4)
1684 {
1685    const char *kernel_source =
1686    "__kernel __attribute__((vec_type_hint(float4))) void main_test(__global float *inout)\n\
1687    {\n\
1688       inout[get_global_id(0)] *= inout[get_global_id(1)];\n\
1689    }";
1690    Shader shader = compile({ kernel_source });
1691    EXPECT_EQ(shader.metadata->kernels[0].vec_hint_size, 4);
1692    EXPECT_EQ(shader.metadata->kernels[0].vec_hint_type, CLC_VEC_HINT_TYPE_FLOAT);
1693 }
1694 
TEST_F(ComputeTest,vec_hint_uchar2)1695 TEST_F(ComputeTest, vec_hint_uchar2)
1696 {
1697    const char *kernel_source =
1698    "__kernel __attribute__((vec_type_hint(uchar2))) void main_test(__global float *inout)\n\
1699    {\n\
1700       inout[get_global_id(0)] *= inout[get_global_id(1)];\n\
1701    }";
1702    Shader shader = compile({ kernel_source });
1703    EXPECT_EQ(shader.metadata->kernels[0].vec_hint_size, 2);
1704    EXPECT_EQ(shader.metadata->kernels[0].vec_hint_type, CLC_VEC_HINT_TYPE_CHAR);
1705 }
1706 
TEST_F(ComputeTest,vec_hint_none)1707 TEST_F(ComputeTest, vec_hint_none)
1708 {
1709    const char *kernel_source =
1710    "__kernel void main_test(__global float *inout)\n\
1711    {\n\
1712       inout[get_global_id(0)] *= inout[get_global_id(1)];\n\
1713    }";
1714    Shader shader = compile({ kernel_source });
1715    EXPECT_EQ(shader.metadata->kernels[0].vec_hint_size, 0);
1716 }
1717 
TEST_F(ComputeTest,DISABLED_debug_layer_failure)1718 TEST_F(ComputeTest, DISABLED_debug_layer_failure)
1719 {
1720    const char *kernel_source =
1721    "__kernel void main_test(__global float *inout, float mul)\n\
1722    {\n\
1723        inout[get_global_id(0)] = inout[get_global_id(0)] * mul;\n\
1724    }\n";
1725    auto inout = ShaderArg<float>({ 0, 0.3f, -0.3f, 0.5f, -0.5f, 1.1f, -1.1f },
1726                                  SHADER_ARG_INOUT);
1727    auto mul = ShaderArg<float>(10.0f, SHADER_ARG_INPUT);
1728    const float expected[] = {
1729       0.0f, 3.0f, -3.0f, 5.0f, -5.0f, 11.0f, -11.0f
1730    };
1731    ComPtr<ID3D12InfoQueue> info_queue;
1732    dev->QueryInterface(info_queue.ReleaseAndGetAddressOf());
1733    if (!info_queue) {
1734       GTEST_SKIP() << "No info queue";
1735       return;
1736    }
1737 
1738    info_queue->AddApplicationMessage(D3D12_MESSAGE_SEVERITY_ERROR, "This should cause the test to fail");
1739    run_shader(kernel_source, inout.size(), 1, 1, inout, mul);
1740    for (int i = 0; i < inout.size(); ++i)
1741       EXPECT_FLOAT_EQ(inout[i], expected[i]);
1742 }
1743 
TEST_F(ComputeTest,compiler_defines)1744 TEST_F(ComputeTest, compiler_defines)
1745 {
1746    const char *kernel_source =
1747       "__kernel void main_test(__global int* out)\n\
1748    {\n\
1749       out[0] = OUT_VAL0;\n\
1750       out[1] = __OPENCL_C_VERSION__;\n\
1751    }";
1752    auto out = ShaderArg<int>(std::vector<int>(2, 0), SHADER_ARG_OUTPUT);
1753    CompileArgs compile_args = { 1, 1, 1 };
1754    compile_args.compiler_command_line = { "-DOUT_VAL0=5", "-cl-std=cl" };
1755    std::vector<RawShaderArg *> raw_args = { &out };
1756    run_shader({ kernel_source }, compile_args, out);
1757    EXPECT_EQ(out[0], 5);
1758    EXPECT_EQ(out[1], 100);
1759 }
1760 
1761 /* There's a bug in WARP turning atomic_add(ptr, x) into
1762  * atomic_add(ptr, x * 4). Works fine on intel HW.
1763  */
TEST_F(ComputeTest,DISABLED_global_atomic_add)1764 TEST_F(ComputeTest, DISABLED_global_atomic_add)
1765 {
1766    const char *kernel_source =
1767    "__kernel void main_test(__global int *inout, __global int *old)\n\
1768    {\n\
1769       old[get_global_id(0)] = atomic_add(inout + get_global_id(0), 3);\n\
1770    }\n";
1771    auto inout = ShaderArg<int32_t>({ 2, 4 }, SHADER_ARG_INOUT);
1772    auto old = ShaderArg<int32_t>(std::vector<int32_t>(2, 0xdeadbeef), SHADER_ARG_OUTPUT);
1773    const int32_t expected_inout[] = { 5, 7 };
1774    const int32_t expected_old[] = { 2, 4 };
1775    run_shader(kernel_source, inout.size(), 1, 1, inout, old);
1776    for (int i = 0; i < inout.size(); ++i) {
1777       EXPECT_EQ(inout[i], expected_inout[i]);
1778       EXPECT_EQ(old[i], expected_old[i]);
1779    }
1780 }
1781 
TEST_F(ComputeTest,global_atomic_imin)1782 TEST_F(ComputeTest, global_atomic_imin)
1783 {
1784    const char *kernel_source =
1785    "__kernel void main_test(__global int *inout, __global int *old)\n\
1786    {\n\
1787       old[get_global_id(0)] = atomic_min(inout + get_global_id(0), 1);\n\
1788    }\n";
1789    auto inout = ShaderArg<int32_t>({ 0, 2, -1 }, SHADER_ARG_INOUT);
1790    auto old = ShaderArg<int32_t>(std::vector<int32_t>(3, 0xdeadbeef), SHADER_ARG_OUTPUT);
1791    const int32_t expected_inout[] = { 0, 1, -1 };
1792    const int32_t expected_old[] = { 0, 2, -1 };
1793    run_shader(kernel_source, inout.size(), 1, 1, inout, old);
1794    for (int i = 0; i < inout.size(); ++i) {
1795       EXPECT_EQ(inout[i], expected_inout[i]);
1796       EXPECT_EQ(old[i], expected_old[i]);
1797    }
1798 }
1799 
TEST_F(ComputeTest,global_atomic_and_or)1800 TEST_F(ComputeTest, global_atomic_and_or)
1801 {
1802    const char *kernel_source =
1803    "__attribute__((reqd_work_group_size(3, 1, 1)))\n\
1804    __kernel void main_test(__global int *inout)\n\
1805    {\n\
1806       atomic_and(inout, ~(1 << get_global_id(0)));\n\
1807       atomic_or(inout, (1 << (get_global_id(0) + 4)));\n\
1808    }\n";
1809    auto inout = ShaderArg<int32_t>(0xf, SHADER_ARG_INOUT);
1810    const int32_t expected[] = { 0x78 };
1811    run_shader(kernel_source, 3, 1, 1, inout);
1812    for (int i = 0; i < inout.size(); ++i)
1813       EXPECT_EQ(inout[i], expected[i]);
1814 }
1815 
TEST_F(ComputeTest,global_atomic_cmpxchg)1816 TEST_F(ComputeTest, global_atomic_cmpxchg)
1817 {
1818    const char *kernel_source =
1819    "__attribute__((reqd_work_group_size(2, 1, 1)))\n\
1820    __kernel void main_test(__global int *inout)\n\
1821    {\n\
1822       while (atomic_cmpxchg(inout, get_global_id(0), get_global_id(0) + 1) != get_global_id(0))\n\
1823          ;\n\
1824    }\n";
1825    auto inout = ShaderArg<int32_t>(0, SHADER_ARG_INOUT);
1826    const int32_t expected_inout[] = { 2 };
1827    run_shader(kernel_source, 2, 1, 1, inout);
1828    for (int i = 0; i < inout.size(); ++i)
1829       EXPECT_EQ(inout[i], expected_inout[i]);
1830 }
1831 
TEST_F(ComputeTest,local_atomic_and_or)1832 TEST_F(ComputeTest, local_atomic_and_or)
1833 {
1834    const char *kernel_source =
1835    "__attribute__((reqd_work_group_size(2, 1, 1)))\n\
1836    __kernel void main_test(__global ushort *inout)\n\
1837    {\n\
1838       __local ushort tmp;\n\
1839       atomic_and(&tmp, ~(0xff << (get_global_id(0) * 8)));\n\
1840       atomic_or(&tmp, inout[get_global_id(0)] << (get_global_id(0) * 8));\n\
1841       barrier(CLK_LOCAL_MEM_FENCE);\n\
1842       inout[get_global_id(0)] = tmp;\n\
1843    }\n";
1844    auto inout = ShaderArg<uint16_t>({ 2, 4 }, SHADER_ARG_INOUT);
1845    const uint16_t expected[] = { 0x402, 0x402 };
1846    run_shader(kernel_source, inout.size(), 1, 1, inout);
1847    for (int i = 0; i < inout.size(); ++i)
1848       EXPECT_EQ(inout[i], expected[i]);
1849 }
1850 
TEST_F(ComputeTest,local_atomic_cmpxchg)1851 TEST_F(ComputeTest, local_atomic_cmpxchg)
1852 {
1853    const char *kernel_source =
1854    "__attribute__((reqd_work_group_size(2, 1, 1)))\n\
1855    __kernel void main_test(__global int *out)\n\
1856    {\n\
1857       __local uint tmp;\n\
1858       tmp = 0;\n\
1859       barrier(CLK_LOCAL_MEM_FENCE);\n\
1860       while (atomic_cmpxchg(&tmp, get_global_id(0), get_global_id(0) + 1) != get_global_id(0))\n\
1861          ;\n\
1862       barrier(CLK_LOCAL_MEM_FENCE);\n\
1863       out[0] = tmp;\n\
1864    }\n";
1865 
1866    auto out = ShaderArg<uint32_t>(0xdeadbeef, SHADER_ARG_OUTPUT);
1867    const uint16_t expected[] = { 2 };
1868    run_shader(kernel_source, 2, 1, 1, out);
1869    for (int i = 0; i < out.size(); ++i)
1870       EXPECT_EQ(out[i], expected[i]);
1871 }
1872 
TEST_F(ComputeTest,constant_sampler)1873 TEST_F(ComputeTest, constant_sampler)
1874 {
1875    const char* kernel_source =
1876    "__constant sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP | CLK_FILTER_LINEAR;\n\
1877    __kernel void main_test(read_only image2d_t input, write_only image2d_t output)\n\
1878    {\n\
1879       int2 coordsi = (int2)(get_global_id(0), get_global_id(1));\n\
1880       float2 coordsf = (float2)((float)coordsi.x / get_image_width(input), (float)coordsi.y / get_image_height(input));\n\
1881       write_imagef(output, coordsi, \n\
1882          read_imagef(input, sampler, coordsf) + \n\
1883          read_imagef(input, sampler, coordsf + (float2)(0.1, 0.1)));\n\
1884    }\n";
1885    Shader shader = compile(std::vector<const char*>({ kernel_source }));
1886    validate(shader);
1887    EXPECT_EQ(shader.dxil->metadata.num_const_samplers, 1);
1888 }
1889 
TEST_F(ComputeTest,hi)1890 TEST_F(ComputeTest, hi)
1891 {
1892    const char *kernel_source = R"(
1893    __kernel void main_test(__global char3 *srcA, __global char2 *dst)
1894    {
1895        int  tid = get_global_id(0);
1896 
1897        char2 tmp = srcA[tid].hi;
1898        dst[tid] = tmp;
1899    })";
1900    Shader shader = compile(std::vector<const char*>({ kernel_source }));
1901    validate(shader);
1902 }
1903 
TEST_F(ComputeTest,system_values)1904 TEST_F(ComputeTest, system_values)
1905 {
1906    const char *kernel_source =
1907    "__kernel void main_test(__global uint* outputs)\n\
1908    {\n\
1909       outputs[0] = get_work_dim();\n\
1910       outputs[1] = get_global_size(0);\n\
1911       outputs[2] = get_local_size(0);\n\
1912       outputs[3] = get_num_groups(0);\n\
1913       outputs[4] = get_group_id(0);\n\
1914       outputs[5] = get_global_offset(0);\n\
1915       outputs[6] = get_global_id(0);\n\
1916    }\n";
1917    auto out = ShaderArg<uint32_t>(std::vector<uint32_t>(6, 0xdeadbeef), SHADER_ARG_OUTPUT);
1918    const uint16_t expected[] = { 3, 1, 1, 1, 0, 0, 0, };
1919    CompileArgs args = { 1, 1, 1 };
1920    Shader shader = compile({ kernel_source });
1921    run_shader(shader, args, out);
1922    for (int i = 0; i < out.size(); ++i)
1923       EXPECT_EQ(out[i], expected[i]);
1924 
1925    args.work_props.work_dim = 2;
1926    args.work_props.global_offset_x = 100;
1927    args.work_props.group_id_offset_x = 2;
1928    args.work_props.group_count_total_x = 5;
1929    const uint32_t expected_withoffsets[] = { 2, 5, 1, 5, 2, 100, 102 };
1930    run_shader(shader, args, out);
1931    for (int i = 0; i < out.size(); ++i)
1932       EXPECT_EQ(out[i], expected_withoffsets[i]);
1933 }
1934 
TEST_F(ComputeTest,convert_round_sat)1935 TEST_F(ComputeTest, convert_round_sat)
1936 {
1937    const char *kernel_source =
1938    "__kernel void main_test(__global float *f, __global uchar *u)\n\
1939    {\n\
1940        uint idx = get_global_id(0);\n\
1941        u[idx] = convert_uchar_sat_rtp(f[idx]);\n\
1942    }\n";
1943    auto f = ShaderArg<float>({ -1.0f, 1.1f, 20.0f, 255.5f }, SHADER_ARG_INPUT);
1944    auto u = ShaderArg<uint8_t>({ 255, 0, 0, 0 }, SHADER_ARG_OUTPUT);
1945    const uint8_t expected[] = {
1946       0, 2, 20, 255
1947    };
1948 
1949    run_shader(kernel_source, f.size(), 1, 1, f, u);
1950    for (int i = 0; i < u.size(); ++i)
1951       EXPECT_EQ(u[i], expected[i]);
1952 }
1953 
TEST_F(ComputeTest,convert_round_sat_vec)1954 TEST_F(ComputeTest, convert_round_sat_vec)
1955 {
1956    const char *kernel_source =
1957    "__kernel void main_test(__global float16 *f, __global uchar16 *u)\n\
1958    {\n\
1959        uint idx = get_global_id(0);\n\
1960        u[idx] = convert_uchar16_sat_rtp(f[idx]);\n\
1961    }\n";
1962    auto f = ShaderArg<float>({
1963       -1.0f, 1.1f, 20.0f, 255.5f, -1.0f, 1.1f, 20.0f, 255.5f, -1.0f, 1.1f, 20.0f, 255.5f, -1.0f, 1.1f, 20.0f, 255.5f,
1964       -0.5f, 1.9f, 20.0f, 254.5f, -1.0f, 1.1f, 20.0f, 255.5f, -1.0f, 1.1f, 20.0f, 255.5f, -1.0f, 1.1f, 20.0f, 255.5f,
1965        0.0f, 1.3f, 20.0f, 255.1f, -1.0f, 1.1f, 20.0f, 255.5f, -1.0f, 1.1f, 20.0f, 255.5f, -1.0f, 1.1f, 20.0f, 255.5f,
1966       -0.0f, 1.5555f, 20.0f, 254.9f, -1.0f, 1.1f, 20.0f, 255.5f, -1.0f, 1.1f, 20.0f, 255.5f, -1.0f, 1.1f, 20.0f, 255.5f,
1967    }, SHADER_ARG_INPUT);
1968    auto u = ShaderArg<uint8_t>({
1969       255, 0, 0, 0, 255, 0, 0, 0, 255, 0, 0, 0, 255, 0, 0, 0,
1970       255, 0, 0, 0, 255, 0, 0, 0, 255, 0, 0, 0, 255, 0, 0, 0,
1971       255, 0, 0, 0, 255, 0, 0, 0, 255, 0, 0, 0, 255, 0, 0, 0,
1972       255, 0, 0, 0, 255, 0, 0, 0, 255, 0, 0, 0, 255, 0, 0, 0,
1973    }, SHADER_ARG_OUTPUT);
1974    const uint8_t expected[] = {
1975       0, 2, 20, 255, 0, 2, 20, 255, 0, 2, 20, 255, 0, 2, 20, 255,
1976       0, 2, 20, 255, 0, 2, 20, 255, 0, 2, 20, 255, 0, 2, 20, 255,
1977       0, 2, 20, 255, 0, 2, 20, 255, 0, 2, 20, 255, 0, 2, 20, 255,
1978       0, 2, 20, 255, 0, 2, 20, 255, 0, 2, 20, 255, 0, 2, 20, 255,
1979    };
1980 
1981    run_shader(kernel_source, 4, 1, 1, f, u);
1982    for (int i = 0; i < u.size(); ++i)
1983       EXPECT_EQ(u[i], expected[i]);
1984 }
1985 
TEST_F(ComputeTest,convert_char2_uchar2)1986 TEST_F(ComputeTest, convert_char2_uchar2)
1987 {
1988    const char *kernel_source =
1989    "__kernel void main_test( __global char2 *src, __global uchar2 *dest )\n\
1990    {\n\
1991       size_t i = get_global_id(0);\n\
1992       dest[i] = convert_uchar2_sat( src[i] );\n\
1993    }\n";
1994 
1995    auto c = ShaderArg<int8_t>({ -127, -4, 0, 4, 126, 127, 16, 32 }, SHADER_ARG_INPUT);
1996    auto u = ShaderArg<uint8_t>({ 99, 99, 99, 99, 99, 99, 99, 99 }, SHADER_ARG_OUTPUT);
1997    const uint8_t expected[] = { 0, 0, 0, 4, 126, 127, 16, 32 };
1998    run_shader(kernel_source, 4, 1, 1, c, u);
1999    for (int i = 0; i < u.size(); i++)
2000       EXPECT_EQ(u[i], expected[i]);
2001 }
2002 
TEST_F(ComputeTest,async_copy)2003 TEST_F(ComputeTest, async_copy)
2004 {
2005    const char *kernel_source = R"(
2006    __kernel void main_test( const __global char *src, __global char *dst, __local char *localBuffer, int copiesPerWorkgroup, int copiesPerWorkItem )
2007    {
2008     int i;
2009     for(i=0; i<copiesPerWorkItem; i++)
2010         localBuffer[ get_local_id( 0 )*copiesPerWorkItem+i ] = (char)(char)0;
2011        barrier( CLK_LOCAL_MEM_FENCE );
2012        event_t event;
2013        event = async_work_group_copy( (__local char*)localBuffer, (__global const char*)(src+copiesPerWorkgroup*get_group_id(0)), (size_t)copiesPerWorkgroup, 0 );
2014        wait_group_events( 1, &event );
2015     for(i=0; i<copiesPerWorkItem; i++)
2016      dst[ get_global_id( 0 )*copiesPerWorkItem+i ] = localBuffer[ get_local_id( 0 )*copiesPerWorkItem+i ];
2017    })";
2018    Shader shader = compile({ kernel_source });
2019    validate(shader);
2020 }
2021 
TEST_F(ComputeTest,packed_struct_global)2022 TEST_F(ComputeTest, packed_struct_global)
2023 {
2024 #pragma pack(push, 1)
2025    struct s { uint8_t uc; uint64_t ul; uint16_t us; };
2026 #pragma pack(pop)
2027 
2028    const char *kernel_source =
2029    "struct __attribute__((packed)) s {uchar uc; ulong ul; ushort us; };\n\
2030    __kernel void main_test(__global struct s *inout, global uint *size)\n\
2031    {\n\
2032        uint idx = get_global_id(0);\n\
2033        inout[idx].uc = idx + 1;\n\
2034        inout[idx].ul = ((ulong)(idx + 1 + 0xfbfcfdfe) << 32) | 0x12345678;\n\
2035        inout[idx].us = ((ulong)(idx + 1 + 0xa0) << 8) | 0x12;\n\
2036        *size = sizeof(struct s);\n\
2037    }\n";
2038    auto inout = ShaderArg<struct s>({0, 0, 0}, SHADER_ARG_OUTPUT);
2039    auto size = ShaderArg<uint32_t>(0, SHADER_ARG_OUTPUT);
2040    const struct s expected[] = {
2041       { 1, 0xfbfcfdff12345678, 0xa112 }
2042    };
2043 
2044    run_shader(kernel_source, inout.size(), 1, 1, inout, size);
2045    for (int i = 0; i < inout.size(); ++i) {
2046       EXPECT_EQ(inout[i].uc, expected[i].uc);
2047       EXPECT_EQ(inout[i].ul, expected[i].ul);
2048       EXPECT_EQ(inout[i].us, expected[i].us);
2049    }
2050    EXPECT_EQ(size, sizeof(struct s));
2051 }
2052 
TEST_F(ComputeTest,packed_struct_arg)2053 TEST_F(ComputeTest, packed_struct_arg)
2054 {
2055 #pragma pack(push, 1)
2056    struct s { uint8_t uc; uint64_t ul; uint16_t us; };
2057 #pragma pack(pop)
2058 
2059    const char *kernel_source =
2060    "struct __attribute__((packed)) s {uchar uc; ulong ul; ushort us; };\n\
2061    __kernel void main_test(__global struct s *out, struct s in)\n\
2062    {\n\
2063        uint idx = get_global_id(0);\n\
2064        out[idx].uc = in.uc + 0x12;\n\
2065        out[idx].ul = in.ul + 0x123456789abcdef;\n\
2066        out[idx].us = in.us + 0x1234;\n\
2067    }\n";
2068    auto out = ShaderArg<struct s>({0, 0, 0}, SHADER_ARG_OUTPUT);
2069    auto in = ShaderArg<struct s>({1, 2, 3}, SHADER_ARG_INPUT);
2070    const struct s expected[] = {
2071       { 0x12 + 1, 0x123456789abcdef + 2, 0x1234 + 3 }
2072    };
2073 
2074    run_shader(kernel_source, out.size(), 1, 1, out, in);
2075    for (int i = 0; i < out.size(); ++i) {
2076       EXPECT_EQ(out[i].uc, expected[i].uc);
2077       EXPECT_EQ(out[i].ul, expected[i].ul);
2078       EXPECT_EQ(out[i].us, expected[i].us);
2079    }
2080 }
2081 
TEST_F(ComputeTest,packed_struct_local)2082 TEST_F(ComputeTest, packed_struct_local)
2083 {
2084 #pragma pack(push, 1)
2085    struct s { uint8_t uc; uint64_t ul; uint16_t us; };
2086 #pragma pack(pop)
2087 
2088    const char *kernel_source =
2089    "struct __attribute__((packed)) s {uchar uc; ulong ul; ushort us; };\n\
2090    __kernel void main_test(__global struct s *out, __constant struct s *in)\n\
2091    {\n\
2092        uint idx = get_global_id(0);\n\
2093        __local struct s tmp[2];\n\
2094        tmp[get_local_id(0)] = in[idx];\n\
2095        barrier(CLK_LOCAL_MEM_FENCE);\n\
2096        out[idx] = tmp[(get_local_id(0) + 1) % 2];\n\
2097    }\n";
2098    auto out = ShaderArg<struct s>({{0, 0, 0}, {0, 0, 0}}, SHADER_ARG_OUTPUT);
2099    auto in = ShaderArg<struct s>({{1, 2, 3}, {0x12, 0x123456789abcdef, 0x1234} }, SHADER_ARG_INPUT);
2100    const struct s expected[] = {
2101       { 0x12, 0x123456789abcdef, 0x1234 },
2102       { 1, 2, 3 },
2103    };
2104 
2105    run_shader(kernel_source, out.size(), 1, 1, out, in);
2106    for (int i = 0; i < out.size(); ++i) {
2107       EXPECT_EQ(out[i].uc, expected[i].uc);
2108       EXPECT_EQ(out[i].ul, expected[i].ul);
2109       EXPECT_EQ(out[i].us, expected[i].us);
2110    }
2111 }
2112 
2113 /* DISABLED because current release versions of WARP either return
2114  * rubbish from reads or crash: they are not prepared to handle
2115  * non-float global constants */
TEST_F(ComputeTest,DISABLED_packed_struct_const)2116 TEST_F(ComputeTest, DISABLED_packed_struct_const)
2117 {
2118 #pragma pack(push, 1)
2119    struct s { uint8_t uc; uint64_t ul; uint16_t us; };
2120 #pragma pack(pop)
2121 
2122    const char *kernel_source =
2123    "struct __attribute__((packed)) s {uchar uc; ulong ul; ushort us; };\n\
2124    __kernel void main_test(__global struct s *out, struct s in)\n\
2125    {\n\
2126        __constant struct s base[] = {\n\
2127           {0x12, 0x123456789abcdef, 0x1234},\n\
2128           {0x11, 0x123456789abcdee, 0x1233},\n\
2129        };\n\
2130        uint idx = get_global_id(0);\n\
2131        out[idx].uc = base[idx % 2].uc + in.uc;\n\
2132        out[idx].ul = base[idx % 2].ul + in.ul;\n\
2133        out[idx].us = base[idx % 2].us + in.us;\n\
2134    }\n";
2135    auto out = ShaderArg<struct s>(std::vector<struct s>(2, {0, 0, 0}), SHADER_ARG_OUTPUT);
2136    auto in = ShaderArg<struct s>({1, 2, 3}, SHADER_ARG_INPUT);
2137    const struct s expected[] = {
2138       { 0x12 + 1, 0x123456789abcdef + 2, 0x1234 + 3 },
2139       { 0x11 + 1, 0x123456789abcdee + 2, 0x1233 + 3 },
2140    };
2141 
2142    run_shader(kernel_source, out.size(), 1, 1, out, in);
2143    for (int i = 0; i < out.size(); ++i) {
2144       EXPECT_EQ(out[i].uc, expected[i].uc);
2145       EXPECT_EQ(out[i].ul, expected[i].ul);
2146       EXPECT_EQ(out[i].us, expected[i].us);
2147    }
2148 }
2149 
TEST_F(ComputeTest,DISABLED_printf)2150 TEST_F(ComputeTest, DISABLED_printf)
2151 {
2152    const char *kernel_source = R"(
2153    __kernel void main_test(__global float *src, __global uint *dest)
2154    {
2155       __constant char *format_str = "%s: %f";
2156       __constant char *str_val = "Test";
2157       *dest = printf(format_str, str_val, src[0]);
2158    })";
2159 
2160    auto src = ShaderArg<float>({ 1.0f }, SHADER_ARG_INPUT);
2161    auto dest = ShaderArg<uint32_t>({ 0xdeadbeef }, SHADER_ARG_OUTPUT);
2162    run_shader(kernel_source, 1, 1, 1, src, dest);
2163    EXPECT_EQ(dest[0], 0);
2164 }
2165 
TEST_F(ComputeTest,vload_half)2166 TEST_F(ComputeTest, vload_half)
2167 {
2168    const char *kernel_source = R"(
2169    __kernel void main_test(__global half *src, __global float4 *dest)
2170    {
2171       int offset = get_global_id(0);
2172       dest[offset] = vload_half4(offset, src);
2173    })";
2174    auto src = ShaderArg<uint16_t>({ 0x3c00, 0x4000, 0x4200, 0x4400,
2175                                     0x4500, 0x4600, 0x4700, 0x4800 }, SHADER_ARG_INPUT);
2176    auto dest = ShaderArg<float>({ FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX,
2177                                   FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX }, SHADER_ARG_OUTPUT);
2178    run_shader(kernel_source, 2, 1, 1, src, dest);
2179    for (unsigned i = 0; i < 8; ++i)
2180       EXPECT_FLOAT_EQ(dest[i], (float)(i + 1));
2181 }
2182 
TEST_F(ComputeTest,vstore_half)2183 TEST_F(ComputeTest, vstore_half)
2184 {
2185    const char *kernel_source = R"(
2186    __kernel void main_test(__global half *dst, __global float4 *src)
2187    {
2188       int offset = get_global_id(0);
2189       vstore_half4(src[offset], offset, dst);
2190    })";
2191    auto dest = ShaderArg<uint16_t>({0xdead, 0xdead, 0xdead, 0xdead,
2192                                    0xdead, 0xdead, 0xdead, 0xdead}, SHADER_ARG_OUTPUT);
2193    auto src = ShaderArg<float>({ 1.0, 2.0, 3.0, 4.0,
2194                                   5.0, 6.0, 7.0, 8.0 }, SHADER_ARG_INPUT);
2195    run_shader(kernel_source, 2, 1, 1, dest, src);
2196    const uint16_t expected[] = { 0x3c00, 0x4000, 0x4200, 0x4400,
2197                                  0x4500, 0x4600, 0x4700, 0x4800 };
2198    for (unsigned i = 0; i < 8; ++i)
2199       EXPECT_EQ(dest[i], expected[i]);
2200 }
2201 
TEST_F(ComputeTest,inline_function)2202 TEST_F(ComputeTest, inline_function)
2203 {
2204    const char *kernel_source = R"(
2205    inline float helper(float foo)
2206    {
2207       return foo * 2;
2208    }
2209 
2210    __kernel void main_test(__global float *dst, __global float *src)
2211    {
2212       *dst = helper(*src);
2213    })";
2214    auto dest = ShaderArg<float>({ NAN }, SHADER_ARG_OUTPUT);
2215    auto src = ShaderArg<float>({ 1.0f }, SHADER_ARG_INPUT);
2216    run_shader(kernel_source, 1, 1, 1, dest, src);
2217    EXPECT_EQ(dest[0], 2.0f);
2218 }
2219 
TEST_F(ComputeTest,unused_arg)2220 TEST_F(ComputeTest, unused_arg)
2221 {
2222    const char *kernel_source = R"(
2223    __kernel void main_test(__global int *dst, __global int *unused, __global int *src)
2224    {
2225       int i = get_global_id(0);
2226       dst[i] = src[i];
2227    })";
2228    auto dest = ShaderArg<int>({ -1, -1, -1, -1 }, SHADER_ARG_OUTPUT);
2229    auto src = ShaderArg<int>({ 1, 2, 3, 4 }, SHADER_ARG_INPUT);
2230    auto unused = ShaderArg<int>({ -1, -1, -1, -1 }, SHADER_ARG_INPUT);
2231    run_shader(kernel_source, 4, 1, 1, dest, unused, src);
2232    for (int i = 0; i < 4; ++i)
2233       EXPECT_EQ(dest[i], i + 1);
2234 }
2235 
TEST_F(ComputeTest,spec_constant)2236 TEST_F(ComputeTest, spec_constant)
2237 {
2238    const char *spirv_asm = R"(
2239                OpCapability Addresses
2240                OpCapability Kernel
2241                OpCapability Int64
2242           %1 = OpExtInstImport "OpenCL.std"
2243                OpMemoryModel Physical64 OpenCL
2244                OpEntryPoint Kernel %2 "main_test" %__spirv_BuiltInGlobalInvocationId
2245           %4 = OpString "kernel_arg_type.main_test.uint*,"
2246                OpSource OpenCL_C 102000
2247                OpName %__spirv_BuiltInGlobalInvocationId "__spirv_BuiltInGlobalInvocationId"
2248                OpName %output "output"
2249                OpName %entry "entry"
2250                OpName %output_addr "output.addr"
2251                OpName %id "id"
2252                OpName %call "call"
2253                OpName %conv "conv"
2254                OpName %idxprom "idxprom"
2255                OpName %arrayidx "arrayidx"
2256                OpName %add "add"
2257                OpName %mul "mul"
2258                OpName %idxprom1 "idxprom1"
2259                OpName %arrayidx2 "arrayidx2"
2260                OpDecorate %__spirv_BuiltInGlobalInvocationId BuiltIn GlobalInvocationId
2261                OpDecorate %__spirv_BuiltInGlobalInvocationId Constant
2262                OpDecorate %id Alignment 4
2263                OpDecorate %output_addr Alignment 8
2264                OpDecorate %uint_1 SpecId 1
2265       %ulong = OpTypeInt 64 0
2266        %uint = OpTypeInt 32 0
2267      %uint_1 = OpSpecConstant %uint 1
2268     %v3ulong = OpTypeVector %ulong 3
2269 %_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
2270        %void = OpTypeVoid
2271 %_ptr_CrossWorkgroup_uint = OpTypePointer CrossWorkgroup %uint
2272          %24 = OpTypeFunction %void %_ptr_CrossWorkgroup_uint
2273 %_ptr_Function__ptr_CrossWorkgroup_uint = OpTypePointer Function %_ptr_CrossWorkgroup_uint
2274 %_ptr_Function_uint = OpTypePointer Function %uint
2275 %__spirv_BuiltInGlobalInvocationId = OpVariable %_ptr_Input_v3ulong Input
2276           %2 = OpFunction %void DontInline %24
2277      %output = OpFunctionParameter %_ptr_CrossWorkgroup_uint
2278       %entry = OpLabel
2279 %output_addr = OpVariable %_ptr_Function__ptr_CrossWorkgroup_uint Function
2280          %id = OpVariable %_ptr_Function_uint Function
2281                OpStore %output_addr %output Aligned 8
2282          %27 = OpLoad %v3ulong %__spirv_BuiltInGlobalInvocationId Aligned 32
2283        %call = OpCompositeExtract %ulong %27 0
2284        %conv = OpUConvert %uint %call
2285                OpStore %id %conv Aligned 4
2286          %28 = OpLoad %_ptr_CrossWorkgroup_uint %output_addr Aligned 8
2287          %29 = OpLoad %uint %id Aligned 4
2288     %idxprom = OpUConvert %ulong %29
2289    %arrayidx = OpInBoundsPtrAccessChain %_ptr_CrossWorkgroup_uint %28 %idxprom
2290          %30 = OpLoad %uint %arrayidx Aligned 4
2291          %31 = OpLoad %uint %id Aligned 4
2292         %add = OpIAdd %uint %31 %uint_1
2293         %mul = OpIMul %uint %30 %add
2294          %32 = OpLoad %_ptr_CrossWorkgroup_uint %output_addr Aligned 8
2295          %33 = OpLoad %uint %id Aligned 4
2296    %idxprom1 = OpUConvert %ulong %33
2297   %arrayidx2 = OpInBoundsPtrAccessChain %_ptr_CrossWorkgroup_uint %32 %idxprom1
2298                OpStore %arrayidx2 %mul Aligned 4
2299                OpReturn
2300                OpFunctionEnd)";
2301    Shader shader = assemble(spirv_asm);
2302    Shader spec_shader = specialize(shader, 1, 5);
2303 
2304    auto inout = ShaderArg<uint32_t>({ 0x00000001, 0x10000001, 0x00020002, 0x04010203 },
2305       SHADER_ARG_INOUT);
2306    const uint32_t expected[] = {
2307       0x00000005, 0x60000006, 0x000e000e, 0x20081018
2308    };
2309    CompileArgs args = { inout.size(), 1, 1 };
2310    run_shader(spec_shader, args, inout);
2311    for (int i = 0; i < inout.size(); ++i)
2312       EXPECT_EQ(inout[i], expected[i]);
2313 }
2314