1 /*
2 * Copyright © Microsoft Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include <stdio.h>
25 #include <stdint.h>
26 #include <stdexcept>
27 #include <vector>
28
29 #include <directx/d3d12.h>
30 #include <dxgi1_4.h>
31 #include <gtest/gtest.h>
32 #include <wrl.h>
33
34 #include "compute_test.h"
35
36 using std::vector;
37
TEST_F(ComputeTest,runtime_memcpy)38 TEST_F(ComputeTest, runtime_memcpy)
39 {
40 struct shift { uint8_t val; uint8_t shift; uint16_t ret; };
41 const char *kernel_source =
42 "struct shift { uchar val; uchar shift; ushort ret; };\n\
43 __kernel void main_test(__global struct shift *inout)\n\
44 {\n\
45 uint id = get_global_id(0);\n\
46 uint id2 = id + get_global_id(1);\n\
47 struct shift lc[4] = { { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 }};\n\
48 lc[id] = inout[id];\n\
49 inout[id2].ret = (ushort) lc[id2].val << (ushort) lc[id2].shift;\n\
50 }\n";
51
52 auto inout = ShaderArg<struct shift>({
53 { 0x10, 1, 0xffff },
54 { 0x20, 2, 0xffff },
55 { 0x30, 3, 0xffff },
56 { 0x40, 4, 0xffff },
57 },
58 SHADER_ARG_INOUT);
59 const uint16_t expected[] = { 0x20, 0x80, 0x180, 0x400 };
60 run_shader(kernel_source, inout.size(), 1, 1, inout);
61 for (int i = 0; i < inout.size(); ++i)
62 EXPECT_EQ(inout[i].ret, expected[i]);
63 }
64
TEST_F(ComputeTest,two_global_arrays)65 TEST_F(ComputeTest, two_global_arrays)
66 {
67 const char *kernel_source =
68 "__kernel void main_test(__global uint *g1, __global uint *g2)\n\
69 {\n\
70 uint idx = get_global_id(0);\n\
71 g1[idx] -= g2[idx];\n\
72 }\n";
73 auto g1 = ShaderArg<uint32_t>({ 10, 20, 30, 40 }, SHADER_ARG_INOUT);
74 auto g2 = ShaderArg<uint32_t>({ 1, 2, 3, 4 }, SHADER_ARG_INPUT);
75 const uint32_t expected[] = {
76 9, 18, 27, 36
77 };
78
79 run_shader(kernel_source, g1.size(), 1, 1, g1, g2);
80 for (int i = 0; i < g1.size(); ++i)
81 EXPECT_EQ(g1[i], expected[i]);
82 }
83
84 /* Disabled until saturated conversions from f32->i64 fixed (mesa/mesa#3824) */
TEST_F(ComputeTest,DISABLED_i64tof32)85 TEST_F(ComputeTest, DISABLED_i64tof32)
86 {
87 const char *kernel_source =
88 "__kernel void main_test(__global long *out, __constant long *in)\n\
89 {\n\
90 __local float tmp[12];\n\
91 uint idx = get_global_id(0);\n\
92 tmp[idx] = in[idx];\n\
93 barrier(CLK_LOCAL_MEM_FENCE);\n\
94 out[idx] = tmp[idx + get_global_id(1)];\n\
95 }\n";
96 auto in = ShaderArg<int64_t>({ 0x100000000LL,
97 -0x100000000LL,
98 0x7fffffffffffffffLL,
99 0x4000004000000000LL,
100 0x4000003fffffffffLL,
101 0x4000004000000001LL,
102 -1,
103 -0x4000004000000000LL,
104 -0x4000003fffffffffLL,
105 -0x4000004000000001LL,
106 0,
107 INT64_MIN },
108 SHADER_ARG_INPUT);
109 auto out = ShaderArg<int64_t>(std::vector<int64_t>(12, 0xdeadbeed), SHADER_ARG_OUTPUT);
110 const int64_t expected[] = {
111 0x100000000LL,
112 -0x100000000LL,
113 0x7fffffffffffffffLL,
114 0x4000000000000000LL,
115 0x4000000000000000LL,
116 0x4000008000000000LL,
117 -1,
118 -0x4000000000000000LL,
119 -0x4000000000000000LL,
120 -0x4000008000000000LL,
121 0,
122 INT64_MIN,
123 };
124
125 run_shader(kernel_source, out.size(), 1, 1, out, in);
126 for (int i = 0; i < out.size(); ++i) {
127 EXPECT_EQ((int64_t)out[i], expected[i]);
128 }
129 }
TEST_F(ComputeTest,two_constant_arrays)130 TEST_F(ComputeTest, two_constant_arrays)
131 {
132 const char *kernel_source =
133 "__kernel void main_test(__constant uint *c1, __global uint *g1, __constant uint *c2)\n\
134 {\n\
135 uint idx = get_global_id(0);\n\
136 g1[idx] -= c1[idx] + c2[idx];\n\
137 }\n";
138 auto g1 = ShaderArg<uint32_t>({ 10, 20, 30, 40 }, SHADER_ARG_INOUT);
139 auto c1 = ShaderArg<uint32_t>({ 1, 2, 3, 4 }, SHADER_ARG_INPUT);
140 auto c2 = ShaderArg<uint32_t>(std::vector<uint32_t>(16384, 5), SHADER_ARG_INPUT);
141 const uint32_t expected[] = {
142 4, 13, 22, 31
143 };
144
145 run_shader(kernel_source, g1.size(), 1, 1, c1, g1, c2);
146 for (int i = 0; i < g1.size(); ++i)
147 EXPECT_EQ(g1[i], expected[i]);
148 }
149
TEST_F(ComputeTest,null_constant_ptr)150 TEST_F(ComputeTest, null_constant_ptr)
151 {
152 const char *kernel_source =
153 "__kernel void main_test(__global uint *g1, __constant uint *c1)\n\
154 {\n\
155 __constant uint fallback[] = {2, 3, 4, 5};\n\
156 __constant uint *c = c1 ? c1 : fallback;\n\
157 uint idx = get_global_id(0);\n\
158 g1[idx] -= c[idx];\n\
159 }\n";
160 auto g1 = ShaderArg<uint32_t>({ 10, 20, 30, 40 }, SHADER_ARG_INOUT);
161 auto c1 = ShaderArg<uint32_t>({ 1, 2, 3, 4 }, SHADER_ARG_INPUT);
162 const uint32_t expected1[] = {
163 9, 18, 27, 36
164 };
165
166 run_shader(kernel_source, g1.size(), 1, 1, g1, c1);
167 for (int i = 0; i < g1.size(); ++i)
168 EXPECT_EQ(g1[i], expected1[i]);
169
170 const uint32_t expected2[] = {
171 8, 17, 26, 35
172 };
173
174 g1 = ShaderArg<uint32_t>({ 10, 20, 30, 40 }, SHADER_ARG_INOUT);
175 auto c2 = NullShaderArg();
176 run_shader(kernel_source, g1.size(), 1, 1, g1, c2);
177 for (int i = 0; i < g1.size(); ++i)
178 EXPECT_EQ(g1[i], expected2[i]);
179 }
180
181 /* This test seems to fail on older versions of WARP. */
TEST_F(ComputeTest,DISABLED_null_global_ptr)182 TEST_F(ComputeTest, DISABLED_null_global_ptr)
183 {
184 const char *kernel_source =
185 "__kernel void main_test(__global uint *g1, __global uint *g2)\n\
186 {\n\
187 __constant uint fallback[] = {2, 3, 4, 5};\n\
188 uint idx = get_global_id(0);\n\
189 g1[idx] -= g2 ? g2[idx] : fallback[idx];\n\
190 }\n";
191 auto g1 = ShaderArg<uint32_t>({ 10, 20, 30, 40 }, SHADER_ARG_INOUT);
192 auto g2 = ShaderArg<uint32_t>({ 1, 2, 3, 4 }, SHADER_ARG_INPUT);
193 const uint32_t expected1[] = {
194 9, 18, 27, 36
195 };
196
197 run_shader(kernel_source, g1.size(), 1, 1, g1, g2);
198 for (int i = 0; i < g1.size(); ++i)
199 EXPECT_EQ(g1[i], expected1[i]);
200
201 const uint32_t expected2[] = {
202 8, 17, 26, 35
203 };
204
205 g1 = ShaderArg<uint32_t>({ 10, 20, 30, 40 }, SHADER_ARG_INOUT);
206 auto g2null = NullShaderArg();
207 run_shader(kernel_source, g1.size(), 1, 1, g1, g2null);
208 for (int i = 0; i < g1.size(); ++i)
209 EXPECT_EQ(g1[i], expected2[i]);
210 }
211
TEST_F(ComputeTest,ret_constant_ptr)212 TEST_F(ComputeTest, ret_constant_ptr)
213 {
214 struct s { uint64_t ptr; uint32_t val; };
215 const char *kernel_source =
216 "struct s { __constant uint *ptr; uint val; };\n\
217 __kernel void main_test(__global struct s *out, __constant uint *in)\n\
218 {\n\
219 __constant uint foo[] = { 1, 2 };\n\
220 uint idx = get_global_id(0);\n\
221 if (idx == 0)\n\
222 out[idx].ptr = foo;\n\
223 else\n\
224 out[idx].ptr = in;\n\
225 out[idx].val = out[idx].ptr[idx];\n\
226 }\n";
227 auto out = ShaderArg<struct s>(std::vector<struct s>(2, {0xdeadbeefdeadbeef, 0}), SHADER_ARG_OUTPUT);
228 auto in = ShaderArg<uint32_t>({ 3, 4 }, SHADER_ARG_INPUT);
229 const uint32_t expected_val[] = {
230 1, 4
231 };
232 const uint64_t expected_ptr[] = {
233 2ull << 32, 1ull << 32
234 };
235
236 run_shader(kernel_source, out.size(), 1, 1, out, in);
237 for (int i = 0; i < out.size(); ++i) {
238 EXPECT_EQ(out[i].val, expected_val[i]);
239 EXPECT_EQ(out[i].ptr, expected_ptr[i]);
240 }
241 }
242
TEST_F(ComputeTest,ret_global_ptr)243 TEST_F(ComputeTest, ret_global_ptr)
244 {
245 struct s { uint64_t ptr; uint32_t val; };
246 const char *kernel_source =
247 "struct s { __global uint *ptr; uint val; };\n\
248 __kernel void main_test(__global struct s *out, __global uint *in1, __global uint *in2)\n\
249 {\n\
250 uint idx = get_global_id(0);\n\
251 out[idx].ptr = idx ? in2 : in1;\n\
252 out[idx].val = out[idx].ptr[idx];\n\
253 }\n";
254 auto out = ShaderArg<struct s>(std::vector<struct s>(2, {0xdeadbeefdeadbeef, 0}), SHADER_ARG_OUTPUT);
255 auto in1 = ShaderArg<uint32_t>({ 1, 2 }, SHADER_ARG_INPUT);
256 auto in2 = ShaderArg<uint32_t>({ 3, 4 }, SHADER_ARG_INPUT);
257 const uint32_t expected_val[] = {
258 1, 4
259 };
260 const uint64_t expected_ptr[] = {
261 1ull << 32, 2ull << 32
262 };
263
264 run_shader(kernel_source, out.size(), 1, 1, out, in1, in2);
265 for (int i = 0; i < out.size(); ++i) {
266 EXPECT_EQ(out[i].val, expected_val[i]);
267 EXPECT_EQ(out[i].ptr, expected_ptr[i]);
268 }
269 }
270
TEST_F(ComputeTest,ret_local_ptr)271 TEST_F(ComputeTest, ret_local_ptr)
272 {
273 struct s { uint64_t ptr; };
274 const char *kernel_source =
275 "struct s { __local uint *ptr; };\n\
276 __kernel void main_test(__global struct s *out)\n\
277 {\n\
278 __local uint tmp[2];\n\
279 uint idx = get_global_id(0);\n\
280 tmp[idx] = idx;\n\
281 out[idx].ptr = &tmp[idx];\n\
282 }\n";
283 auto out = ShaderArg<struct s>(std::vector<struct s>(2, { 0xdeadbeefdeadbeef }), SHADER_ARG_OUTPUT);
284 const uint64_t expected_ptr[] = {
285 0, 4,
286 };
287
288 run_shader(kernel_source, out.size(), 1, 1, out);
289 for (int i = 0; i < out.size(); ++i) {
290 EXPECT_EQ(out[i].ptr, expected_ptr[i]);
291 }
292 }
293
TEST_F(ComputeTest,ret_private_ptr)294 TEST_F(ComputeTest, ret_private_ptr)
295 {
296 struct s { uint64_t ptr; uint32_t value; };
297 const char *kernel_source =
298 "struct s { __private uint *ptr; uint value; };\n\
299 __kernel void main_test(__global struct s *out)\n\
300 {\n\
301 uint tmp[2] = {1, 2};\n\
302 uint idx = get_global_id(0);\n\
303 out[idx].ptr = &tmp[idx];\n\
304 out[idx].value = *out[idx].ptr;\n\
305 }\n";
306 auto out = ShaderArg<struct s>(std::vector<struct s>(2, { 0xdeadbeefdeadbeef }), SHADER_ARG_OUTPUT);
307 const uint64_t expected_ptr[] = {
308 0, 4,
309 };
310 const uint32_t expected_value[] = {
311 1, 2
312 };
313
314 run_shader(kernel_source, out.size(), 1, 1, out);
315 for (int i = 0; i < out.size(); ++i) {
316 EXPECT_EQ(out[i].ptr, expected_ptr[i]);
317 }
318 }
319
TEST_F(ComputeTest,globals_8bit)320 TEST_F(ComputeTest, globals_8bit)
321 {
322 const char *kernel_source =
323 "__kernel void main_test(__global unsigned char *inout)\n\
324 {\n\
325 uint idx = get_global_id(0);\n\
326 inout[idx] = inout[idx] + 1;\n\
327 }\n";
328 auto inout = ShaderArg<uint8_t> ({ 100, 110, 120, 130 }, SHADER_ARG_INOUT);
329 const uint8_t expected[] = {
330 101, 111, 121, 131
331 };
332 run_shader(kernel_source, inout.size(), 1, 1, inout);
333 for (int i = 0; i < inout.size(); ++i)
334 EXPECT_EQ(inout[i], expected[i]);
335 }
336
TEST_F(ComputeTest,globals_16bit)337 TEST_F(ComputeTest, globals_16bit)
338 {
339 const char *kernel_source =
340 "__kernel void main_test(__global unsigned short *inout)\n\
341 {\n\
342 uint idx = get_global_id(0);\n\
343 inout[idx] = inout[idx] + 1;\n\
344 }\n";
345 auto inout = ShaderArg<uint16_t> ({ 10000, 10010, 10020, 10030 }, SHADER_ARG_INOUT);
346 const uint16_t expected[] = {
347 10001, 10011, 10021, 10031
348 };
349 run_shader(kernel_source, inout.size(), 1, 1, inout);
350 for (int i = 0; i < inout.size(); ++i)
351 EXPECT_EQ(inout[i], expected[i]);
352 }
353
TEST_F(ComputeTest,DISABLED_globals_64bit)354 TEST_F(ComputeTest, DISABLED_globals_64bit)
355 {
356 /* Test disabled, because we need a fixed version of WARP that hasn't
357 been officially shipped yet */
358
359 const char *kernel_source =
360 "__kernel void main_test(__global unsigned long *inout)\n\
361 {\n\
362 uint idx = get_global_id(0);\n\
363 inout[idx] = inout[idx] + 1;\n\
364 }\n";
365 uint64_t base = 1ull << 50;
366 auto inout = ShaderArg<uint64_t>({ base, base + 10, base + 20, base + 30 },
367 SHADER_ARG_INOUT);
368 const uint64_t expected[] = {
369 base + 1, base + 11, base + 21, base + 31
370 };
371 run_shader(kernel_source, inout.size(), 1, 1, inout);
372 for (int i = 0; i < inout.size(); ++i)
373 EXPECT_EQ(inout[i], expected[i]);
374 }
375
TEST_F(ComputeTest,built_ins_global_id)376 TEST_F(ComputeTest, built_ins_global_id)
377 {
378 const char *kernel_source =
379 "__kernel void main_test(__global uint *output)\n\
380 {\n\
381 output[get_global_id(0)] = get_global_id(0);\n\
382 }\n";
383 auto output = ShaderArg<uint32_t>(std::vector<uint32_t>(4, 0xdeadbeef),
384 SHADER_ARG_OUTPUT);
385 const uint32_t expected[] = {
386 0, 1, 2, 3
387 };
388
389 run_shader(kernel_source, output.size(), 1, 1, output);
390 for (int i = 0; i < output.size(); ++i)
391 EXPECT_EQ(output[i], expected[i]);
392 }
393
TEST_F(ComputeTest,built_ins_global_id_rmw)394 TEST_F(ComputeTest, built_ins_global_id_rmw)
395 {
396 const char *kernel_source =
397 "__kernel void main_test(__global uint *output)\n\
398 {\n\
399 uint id = get_global_id(0);\n\
400 output[id] = output[id] * (id + 1);\n\
401 }\n";
402 auto inout = ShaderArg<uint32_t>({0x00000001, 0x10000001, 0x00020002, 0x04010203},
403 SHADER_ARG_INOUT);
404 const uint32_t expected[] = {
405 0x00000001, 0x20000002, 0x00060006, 0x1004080c
406 };
407 run_shader(kernel_source, inout.size(), 1, 1, inout);
408 for (int i = 0; i < inout.size(); ++i)
409 EXPECT_EQ(inout[i], expected[i]);
410 }
411
TEST_F(ComputeTest,types_float_basics)412 TEST_F(ComputeTest, types_float_basics)
413 {
414 const char *kernel_source =
415 "__kernel void main_test(__global uint *output)\n\
416 {\n\
417 output[get_global_id(0)] = (uint)((float)get_global_id(0) + 1.5f);\n\
418 }\n";
419 auto output = ShaderArg<uint32_t>(std::vector<uint32_t>(4, 0xdeadbeef),
420 SHADER_ARG_OUTPUT);
421 const uint32_t expected[] = {
422 1, 2, 3, 4
423 };
424 run_shader(kernel_source, output.size(), 1, 1, output);
425 for (int i = 0; i < output.size(); ++i)
426 EXPECT_EQ(output[i], expected[i]);
427 }
428
TEST_F(ComputeTest,DISABLED_types_double_basics)429 TEST_F(ComputeTest, DISABLED_types_double_basics)
430 {
431 const char *kernel_source =
432 "__kernel void main_test(__global uint *output)\n\
433 {\n\
434 output[get_global_id(0)] = (uint)((double)get_global_id(0) + 1.5);\n\
435 }\n";
436 auto output = ShaderArg<uint32_t>(std::vector<uint32_t>(4, 0xdeadbeef),
437 SHADER_ARG_OUTPUT);
438 const uint32_t expected[] = {
439 1, 2, 3, 4
440 };
441 run_shader(kernel_source, output.size(), 1, 1, output);
442 for (int i = 0; i < output.size(); ++i)
443 EXPECT_EQ(output[i], expected[i]);
444 }
445
TEST_F(ComputeTest,types_short_basics)446 TEST_F(ComputeTest, types_short_basics)
447 {
448 const char *kernel_source =
449 "__kernel void main_test(__global uint *output)\n\
450 {\n\
451 output[get_global_id(0)] = (uint)((short)get_global_id(0) + (short)1);\n\
452 }\n";
453 auto output = ShaderArg<uint32_t>(std::vector<uint32_t>(4, 0xdeadbeef),
454 SHADER_ARG_OUTPUT);
455 const uint32_t expected[] = {
456 1, 2, 3, 4
457 };
458 run_shader(kernel_source, output.size(), 1, 1, output);
459 for (int i = 0; i < output.size(); ++i)
460 EXPECT_EQ(output[i], expected[i]);
461 }
462
TEST_F(ComputeTest,types_char_basics)463 TEST_F(ComputeTest, types_char_basics)
464 {
465 const char *kernel_source =
466 "__kernel void main_test(__global uint *output)\n\
467 {\n\
468 output[get_global_id(0)] = (uint)((char)get_global_id(0) + (char)1);\n\
469 }\n";
470 auto output = ShaderArg<uint32_t>(std::vector<uint32_t>(4, 0xdeadbeef),
471 SHADER_ARG_OUTPUT);
472 const uint32_t expected[] = {
473 1, 2, 3, 4
474 };
475 run_shader(kernel_source, output.size(), 1, 1, output);
476 for (int i = 0; i < output.size(); ++i)
477 EXPECT_EQ(output[i], expected[i]);
478 }
479
TEST_F(ComputeTest,types_if_statement)480 TEST_F(ComputeTest, types_if_statement)
481 {
482 const char *kernel_source =
483 "__kernel void main_test(__global uint *output)\n\
484 {\n\
485 int idx = get_global_id(0);\n\
486 if (idx > 0)\n\
487 output[idx] = ~idx;\n\
488 else\n\
489 output[0] = 0xff;\n\
490 }\n";
491 auto output = ShaderArg<uint32_t>(std::vector<uint32_t>(4, 0xdeadbeef),
492 SHADER_ARG_OUTPUT);
493 const uint32_t expected[] = {
494 0xff, ~1u, ~2u, ~3u
495 };
496 run_shader(kernel_source, output.size(), 1, 1, output);
497 for (int i = 0; i < output.size(); ++i)
498 EXPECT_EQ(output[i], expected[i]);
499 }
500
TEST_F(ComputeTest,types_do_while_loop)501 TEST_F(ComputeTest, types_do_while_loop)
502 {
503 const char *kernel_source =
504 "__kernel void main_test(__global uint *output)\n\
505 {\n\
506 int value = 1;\n\
507 int i = 1, n = get_global_id(0);\n\
508 do {\n\
509 value *= i++;\n\
510 } while (i <= n);\n\
511 output[n] = value;\n\
512 }\n";
513 auto output = ShaderArg<uint32_t>(std::vector<uint32_t>(5, 0xdeadbeef),
514 SHADER_ARG_OUTPUT);
515 const uint32_t expected[] = {
516 1, 1, 1*2, 1*2*3, 1*2*3*4
517 };
518 run_shader(kernel_source, output.size(), 1, 1, output);
519 for (int i = 0; i < output.size(); ++i)
520 EXPECT_EQ(output[i], expected[i]);
521 }
522
TEST_F(ComputeTest,types_for_loop)523 TEST_F(ComputeTest, types_for_loop)
524 {
525 const char *kernel_source =
526 "__kernel void main_test(__global uint *output)\n\
527 {\n\
528 int value = 1;\n\
529 int n = get_global_id(0);\n\
530 for (int i = 1; i <= n; ++i)\n\
531 value *= i;\n\
532 output[n] = value;\n\
533 }\n";
534 auto output = ShaderArg<uint32_t>(std::vector<uint32_t>(5, 0xdeadbeef),
535 SHADER_ARG_OUTPUT);
536 const uint32_t expected[] = {
537 1, 1, 1*2, 1*2*3, 1*2*3*4
538 };
539 run_shader(kernel_source, output.size(), 1, 1, output);
540 for (int i = 0; i < output.size(); ++i)
541 EXPECT_EQ(output[i], expected[i]);
542 }
543
TEST_F(ComputeTest,DISABLED_complex_types_local_array_long)544 TEST_F(ComputeTest, DISABLED_complex_types_local_array_long)
545 {
546 const char *kernel_source =
547 "__kernel void main_test(__global ulong *inout)\n\
548 {\n\
549 ushort tmp[] = {\n\
550 get_global_id(1) + 0x00000000,\n\
551 get_global_id(1) + 0x10000001,\n\
552 get_global_id(1) + 0x20000020,\n\
553 get_global_id(1) + 0x30000300,\n\
554 };\n\
555 uint idx = get_global_id(0);\n\
556 inout[idx] = tmp[idx];\n\
557 }\n";
558 auto inout = ShaderArg<uint64_t>({ 0, 0, 0, 0 }, SHADER_ARG_INOUT);
559 const uint16_t expected[] = {
560 0x00000000, 0x10000001, 0x20000020, 0x30000300,
561 };
562 run_shader(kernel_source, inout.size(), 1, 1, inout);
563 for (int i = 0; i < inout.size(); ++i)
564 EXPECT_EQ(inout[i], expected[i]);
565 }
566
TEST_F(ComputeTest,complex_types_local_array_short)567 TEST_F(ComputeTest, complex_types_local_array_short)
568 {
569 const char *kernel_source =
570 "__kernel void main_test(__global ushort *inout)\n\
571 {\n\
572 ushort tmp[] = {\n\
573 get_global_id(1) + 0x00,\n\
574 get_global_id(1) + 0x10,\n\
575 get_global_id(1) + 0x20,\n\
576 get_global_id(1) + 0x30,\n\
577 };\n\
578 uint idx = get_global_id(0);\n\
579 inout[idx] = tmp[idx];\n\
580 }\n";
581 auto inout = ShaderArg<uint16_t>({ 0, 0, 0, 0 }, SHADER_ARG_INOUT);
582 const uint16_t expected[] = {
583 0x00, 0x10, 0x20, 0x30,
584 };
585 run_shader(kernel_source, inout.size(), 1, 1, inout);
586 for (int i = 0; i < inout.size(); ++i)
587 EXPECT_EQ(inout[i], expected[i]);
588 }
589
TEST_F(ComputeTest,complex_types_local_array_struct_vec_float_misaligned)590 TEST_F(ComputeTest, complex_types_local_array_struct_vec_float_misaligned)
591 {
592 const char *kernel_source =
593 "struct has_vecs { uchar c; ushort s; float2 f; };\n\
594 __kernel void main_test(__global uint *inout)\n\
595 {\n\
596 struct has_vecs tmp[] = {\n\
597 { 10 + get_global_id(0), get_global_id(1), { 10.0f, 1.0f } },\n\
598 { 19 + get_global_id(0), get_global_id(1), { 20.0f, 4.0f } },\n\
599 { 28 + get_global_id(0), get_global_id(1), { 30.0f, 9.0f } },\n\
600 { 37 + get_global_id(0), get_global_id(1), { 40.0f, 16.0f } },\n\
601 };\n\
602 uint idx = get_global_id(0);\n\
603 uint mul = (tmp[idx].c + tmp[idx].s) * trunc(tmp[idx].f[0]);\n\
604 inout[idx] = mul + trunc(tmp[idx].f[1]);\n\
605 }\n";
606 auto inout = ShaderArg<uint32_t>({ 0, 0, 0, 0 }, SHADER_ARG_INOUT);
607 const uint16_t expected[] = { 101, 404, 909, 1616 };
608 run_shader(kernel_source, inout.size(), 1, 1, inout);
609 for (int i = 0; i < inout.size(); ++i)
610 EXPECT_EQ(inout[i], expected[i]);
611 }
612
TEST_F(ComputeTest,complex_types_local_array)613 TEST_F(ComputeTest, complex_types_local_array)
614 {
615 const char *kernel_source =
616 "__kernel void main_test(__global uint *inout)\n\
617 {\n\
618 uint tmp[] = {\n\
619 get_global_id(1) + 0x00,\n\
620 get_global_id(1) + 0x10,\n\
621 get_global_id(1) + 0x20,\n\
622 get_global_id(1) + 0x30,\n\
623 };\n\
624 uint idx = get_global_id(0);\n\
625 inout[idx] = tmp[idx];\n\
626 }\n";
627 auto inout = ShaderArg<uint32_t>({ 0, 0, 0, 0 }, SHADER_ARG_INOUT);
628 const uint32_t expected[] = {
629 0x00, 0x10, 0x20, 0x30,
630 };
631 run_shader(kernel_source, inout.size(), 1, 1, inout);
632 for (int i = 0; i < inout.size(); ++i)
633 EXPECT_EQ(inout[i], expected[i]);
634 }
635
TEST_F(ComputeTest,complex_types_global_struct_array)636 TEST_F(ComputeTest, complex_types_global_struct_array)
637 {
638 struct two_vals { uint32_t add; uint32_t mul; };
639 const char *kernel_source =
640 "struct two_vals { uint add; uint mul; };\n\
641 __kernel void main_test(__global struct two_vals *in_out)\n\
642 {\n\
643 uint id = get_global_id(0);\n\
644 in_out[id].add = in_out[id].add + id;\n\
645 in_out[id].mul = in_out[id].mul * id;\n\
646 }\n";
647 auto inout = ShaderArg<struct two_vals>({ { 8, 8 }, { 16, 16 }, { 64, 64 }, { 65536, 65536 } },
648 SHADER_ARG_INOUT);
649 const struct two_vals expected[] = {
650 { 8 + 0, 8 * 0 },
651 { 16 + 1, 16 * 1 },
652 { 64 + 2, 64 * 2 },
653 { 65536 + 3, 65536 * 3 }
654 };
655 run_shader(kernel_source, inout.size(), 1, 1, inout);
656 for (int i = 0; i < inout.size(); ++i) {
657 EXPECT_EQ(inout[i].add, expected[i].add);
658 EXPECT_EQ(inout[i].mul, expected[i].mul);
659 }
660 }
661
TEST_F(ComputeTest,complex_types_global_uint2)662 TEST_F(ComputeTest, complex_types_global_uint2)
663 {
664 struct uint2 { uint32_t x; uint32_t y; };
665 const char *kernel_source =
666 "__kernel void main_test(__global uint2 *inout)\n\
667 {\n\
668 uint id = get_global_id(0);\n\
669 inout[id].x = inout[id].x + id;\n\
670 inout[id].y = inout[id].y * id;\n\
671 }\n";
672 auto inout = ShaderArg<struct uint2>({ { 8, 8 }, { 16, 16 }, { 64, 64 }, { 65536, 65536 } },
673 SHADER_ARG_INOUT);
674 const struct uint2 expected[] = {
675 { 8 + 0, 8 * 0 },
676 { 16 + 1, 16 * 1 },
677 { 64 + 2, 64 * 2 },
678 { 65536 + 3, 65536 * 3 }
679 };
680 run_shader(kernel_source, inout.size(), 1, 1, inout);
681 for (int i = 0; i < inout.size(); ++i) {
682 EXPECT_EQ(inout[i].x, expected[i].x);
683 EXPECT_EQ(inout[i].y, expected[i].y);
684 }
685 }
686
TEST_F(ComputeTest,complex_types_global_ushort2)687 TEST_F(ComputeTest, complex_types_global_ushort2)
688 {
689 struct ushort2 { uint16_t x; uint16_t y; };
690 const char *kernel_source =
691 "__kernel void main_test(__global ushort2 *inout)\n\
692 {\n\
693 uint id = get_global_id(0);\n\
694 inout[id].x = inout[id].x + id;\n\
695 inout[id].y = inout[id].y * id;\n\
696 }\n";
697 auto inout = ShaderArg<struct ushort2>({ { 8, 8 }, { 16, 16 }, { 64, 64 },
698 { (uint16_t)65536, (uint16_t)65536 } },
699 SHADER_ARG_INOUT);
700 const struct ushort2 expected[] = {
701 { 8 + 0, 8 * 0 },
702 { 16 + 1, 16 * 1 },
703 { 64 + 2, 64 * 2 },
704 { (uint16_t)(65536 + 3), (uint16_t)(65536 * 3) }
705 };
706 run_shader(kernel_source, inout.size(), 1, 1, inout);
707 for (int i = 0; i < inout.size(); ++i) {
708 EXPECT_EQ(inout[i].x, expected[i].x);
709 EXPECT_EQ(inout[i].y, expected[i].y);
710 }
711 }
712
TEST_F(ComputeTest,complex_types_global_uchar3)713 TEST_F(ComputeTest, complex_types_global_uchar3)
714 {
715 struct uchar3 { uint8_t x; uint8_t y; uint8_t z; uint8_t pad; };
716 const char *kernel_source =
717 "__kernel void main_test(__global uchar3 *inout)\n\
718 {\n\
719 uint id = get_global_id(0);\n\
720 inout[id].x = inout[id].x + id;\n\
721 inout[id].y = inout[id].y * id;\n\
722 inout[id].z = inout[id].y + inout[id].x;\n\
723 }\n";
724 auto inout = ShaderArg<struct uchar3>({ { 8, 8, 8 }, { 16, 16, 16 }, { 64, 64, 64 }, { 255, 255, 255 } },
725 SHADER_ARG_INOUT);
726 const struct uchar3 expected[] = {
727 { 8 + 0, 8 * 0, (8 + 0) + (8 * 0) },
728 { 16 + 1, 16 * 1, (16 + 1) + (16 * 1) },
729 { 64 + 2, 64 * 2, (64 + 2) + (64 * 2) },
730 { (uint8_t)(255 + 3), (uint8_t)(255 * 3), (uint8_t)((255 + 3) + (255 * 3)) }
731 };
732 run_shader(kernel_source, inout.size(), 1, 1, inout);
733 for (int i = 0; i < inout.size(); ++i) {
734 EXPECT_EQ(inout[i].x, expected[i].x);
735 EXPECT_EQ(inout[i].y, expected[i].y);
736 EXPECT_EQ(inout[i].z, expected[i].z);
737 }
738 }
739
TEST_F(ComputeTest,complex_types_constant_uchar3)740 TEST_F(ComputeTest, complex_types_constant_uchar3)
741 {
742 struct uchar3 { uint8_t x; uint8_t y; uint8_t z; uint8_t pad; };
743 const char *kernel_source =
744 "__kernel void main_test(__global uchar3 *out, __constant uchar3 *in)\n\
745 {\n\
746 uint id = get_global_id(0);\n\
747 out[id].x = in[id].x + id;\n\
748 out[id].y = in[id].y * id;\n\
749 out[id].z = out[id].y + out[id].x;\n\
750 }\n";
751 auto in = ShaderArg<struct uchar3>({ { 8, 8, 8 }, { 16, 16, 16 }, { 64, 64, 64 }, { 255, 255, 255 } },
752 SHADER_ARG_INPUT);
753 auto out = ShaderArg<struct uchar3>(std::vector<struct uchar3>(4, { 0xff, 0xff, 0xff }),
754 SHADER_ARG_OUTPUT);
755 const struct uchar3 expected[] = {
756 { 8 + 0, 8 * 0, (8 + 0) + (8 * 0) },
757 { 16 + 1, 16 * 1, (16 + 1) + (16 * 1) },
758 { 64 + 2, 64 * 2, (64 + 2) + (64 * 2) },
759 { (uint8_t)(255 + 3), (uint8_t)(255 * 3), (uint8_t)((255 + 3) + (255 * 3)) }
760 };
761 run_shader(kernel_source, out.size(), 1, 1, out, in);
762 for (int i = 0; i < out.size(); ++i) {
763 EXPECT_EQ(out[i].x, expected[i].x);
764 EXPECT_EQ(out[i].y, expected[i].y);
765 EXPECT_EQ(out[i].z, expected[i].z);
766 }
767 }
768
TEST_F(ComputeTest,complex_types_global_uint8)769 TEST_F(ComputeTest, complex_types_global_uint8)
770 {
771 struct uint8 {
772 uint32_t s0; uint32_t s1; uint32_t s2; uint32_t s3;
773 uint32_t s4; uint32_t s5; uint32_t s6; uint32_t s7;
774 };
775 const char *kernel_source =
776 "__kernel void main_test(__global uint8 *inout)\n\
777 {\n\
778 uint id = get_global_id(0);\n\
779 inout[id].s01234567 = inout[id].s01234567 * 2;\n\
780 }\n";
781 auto inout = ShaderArg<struct uint8>({ { 1, 2, 3, 4, 5, 6, 7, 8 } },
782 SHADER_ARG_INOUT);
783 const struct uint8 expected[] = {
784 { 2, 4, 6, 8, 10, 12, 14, 16 }
785 };
786 run_shader(kernel_source, inout.size(), 1, 1, inout);
787 for (int i = 0; i < inout.size(); ++i) {
788 EXPECT_EQ(inout[i].s0, expected[i].s0);
789 EXPECT_EQ(inout[i].s1, expected[i].s1);
790 EXPECT_EQ(inout[i].s2, expected[i].s2);
791 EXPECT_EQ(inout[i].s3, expected[i].s3);
792 EXPECT_EQ(inout[i].s4, expected[i].s4);
793 EXPECT_EQ(inout[i].s5, expected[i].s5);
794 EXPECT_EQ(inout[i].s6, expected[i].s6);
795 EXPECT_EQ(inout[i].s7, expected[i].s7);
796 }
797 }
798
TEST_F(ComputeTest,complex_types_local_ulong16)799 TEST_F(ComputeTest, complex_types_local_ulong16)
800 {
801 struct ulong16 {
802 uint64_t values[16];
803 };
804 const char *kernel_source =
805 R"(__kernel void main_test(__global ulong16 *inout)
806 {
807 __local ulong16 local_array[2];
808 uint id = get_global_id(0);
809 local_array[id] = inout[id];
810 barrier(CLK_LOCAL_MEM_FENCE);
811 inout[id] = local_array[0] * 2;
812 })";
813 auto inout = ShaderArg<struct ulong16>({ { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 } },
814 SHADER_ARG_INOUT);
815 const struct ulong16 expected[] = {
816 { 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30 }
817 };
818 run_shader(kernel_source, inout.size(), 1, 1, inout);
819 for (int i = 0; i < inout.size(); ++i) {
820 for (int j = 0; j < 16; ++j) {
821 EXPECT_EQ(inout[i].values[j], expected[i].values[j]);
822 }
823 }
824 }
825
TEST_F(ComputeTest,complex_types_constant_uint8)826 TEST_F(ComputeTest, complex_types_constant_uint8)
827 {
828 struct uint8 {
829 uint32_t s0; uint32_t s1; uint32_t s2; uint32_t s3;
830 uint32_t s4; uint32_t s5; uint32_t s6; uint32_t s7;
831 };
832 const char *kernel_source =
833 "__kernel void main_test(__global uint8 *out, __constant uint8 *in)\n\
834 {\n\
835 uint id = get_global_id(0);\n\
836 out[id].s01234567 = in[id].s01234567 * 2;\n\
837 }\n";
838 auto in = ShaderArg<struct uint8>({ { 1, 2, 3, 4, 5, 6, 7, 8 } },
839 SHADER_ARG_INPUT);
840 auto out = ShaderArg<struct uint8>({ { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff } },
841 SHADER_ARG_INOUT);
842 const struct uint8 expected[] = {
843 { 2, 4, 6, 8, 10, 12, 14, 16 }
844 };
845 run_shader(kernel_source, out.size(), 1, 1, out, in);
846 for (int i = 0; i < out.size(); ++i) {
847 EXPECT_EQ(out[i].s0, expected[i].s0);
848 EXPECT_EQ(out[i].s1, expected[i].s1);
849 EXPECT_EQ(out[i].s2, expected[i].s2);
850 EXPECT_EQ(out[i].s3, expected[i].s3);
851 EXPECT_EQ(out[i].s4, expected[i].s4);
852 EXPECT_EQ(out[i].s5, expected[i].s5);
853 EXPECT_EQ(out[i].s6, expected[i].s6);
854 EXPECT_EQ(out[i].s7, expected[i].s7);
855 }
856 }
857
TEST_F(ComputeTest,DISABLED_complex_types_const_array)858 TEST_F(ComputeTest, DISABLED_complex_types_const_array)
859 {
860 /* DISABLED because current release versions of WARP either return
861 * rubbish from reads or crash: they are not prepared to handle
862 * non-float global constants */
863 const char *kernel_source =
864 "__kernel void main_test(__global uint *output)\n\
865 {\n\
866 const uint foo[] = { 100, 101, 102, 103 };\n\
867 output[get_global_id(0)] = foo[get_global_id(0) % 4];\n\
868 }\n";
869 auto output = ShaderArg<uint32_t>(std::vector<uint32_t>(4, 0xdeadbeef),
870 SHADER_ARG_OUTPUT);
871 const uint32_t expected[] = {
872 100, 101, 102, 103
873 };
874 run_shader(kernel_source, output.size(), 1, 1, output);
875 for (int i = 0; i < output.size(); ++i)
876 EXPECT_EQ(output[i], expected[i]);
877 }
878
TEST_F(ComputeTest,mem_access_load_store_ordering)879 TEST_F(ComputeTest, mem_access_load_store_ordering)
880 {
881 const char *kernel_source =
882 "__kernel void main_test(__global uint *output)\n\
883 {\n\
884 uint foo[4];\n\
885 foo[0] = 0x11111111;\n\
886 foo[1] = 0x22222222;\n\
887 foo[2] = 0x44444444;\n\
888 foo[3] = 0x88888888;\n\
889 foo[get_global_id(1)] -= 0x11111111; // foo[0] = 0 \n\
890 foo[0] += get_global_id(0); // foo[0] = tid\n\
891 foo[foo[get_global_id(1)]] = get_global_id(0); // foo[tid] = tid\n\
892 output[get_global_id(0)] = foo[get_global_id(0)]; // output[tid] = tid\n\
893 }\n";
894 auto output = ShaderArg<uint32_t>(std::vector<uint32_t>(4, 0xdeadbeef),
895 SHADER_ARG_OUTPUT);
896 const uint16_t expected[] = {
897 0, 1, 2, 3
898 };
899 run_shader(kernel_source, output.size(), 1, 1, output);
900 for (int i = 0; i < output.size(); ++i)
901 EXPECT_EQ(output[i], expected[i]);
902 }
903
TEST_F(ComputeTest,DISABLED_two_const_arrays)904 TEST_F(ComputeTest, DISABLED_two_const_arrays)
905 {
906 /* DISABLED because current release versions of WARP either return
907 * rubbish from reads or crash: they are not prepared to handle
908 * non-float global constants */
909 const char *kernel_source =
910 "__kernel void main_test(__global uint *output)\n\
911 {\n\
912 uint id = get_global_id(0);\n\
913 uint foo[4] = {100, 101, 102, 103};\n\
914 uint bar[4] = {1, 2, 3, 4};\n\
915 output[id] = foo[id] * bar[id];\n\
916 }\n";
917 auto output = ShaderArg<uint32_t>(std::vector<uint32_t>(4, 0xdeadbeef),
918 SHADER_ARG_OUTPUT);
919 const uint32_t expected[] = {
920 100, 202, 306, 412
921 };
922 run_shader(kernel_source, output.size(), 1, 1, output);
923 for (int i = 0; i < output.size(); ++i)
924 EXPECT_EQ(output[i], expected[i]);
925 }
926
TEST_F(ComputeTest,imod_pos)927 TEST_F(ComputeTest, imod_pos)
928 {
929 const char *kernel_source =
930 "__kernel void main_test(__global int *inout)\n\
931 {\n\
932 inout[get_global_id(0)] = inout[get_global_id(0)] % 3;\n\
933 }\n";
934 auto inout = ShaderArg<int32_t>({ -4, -3, -2, -1, 0, 1, 2, 3, 4 },
935 SHADER_ARG_INOUT);
936 const int32_t expected[] = {
937 -1, 0, -2, -1, 0, 1, 2, 0, 1
938 };
939 run_shader(kernel_source, inout.size(), 1, 1, inout);
940 for (int i = 0; i < inout.size(); ++i)
941 EXPECT_EQ(inout[i], expected[i]);
942 }
943
TEST_F(ComputeTest,imod_neg)944 TEST_F(ComputeTest, imod_neg)
945 {
946 const char *kernel_source =
947 "__kernel void main_test(__global int *inout)\n\
948 {\n\
949 inout[get_global_id(0)] = inout[get_global_id(0)] % -3;\n\
950 }\n";
951 auto inout = ShaderArg<int32_t>({ -4, -3, -2, -1, 0, 1, 2, 3, 4 },
952 SHADER_ARG_INOUT);
953 const int32_t expected[] = {
954 -1, 0, -2, -1, 0, 1, 2, 0, 1
955 };
956 run_shader(kernel_source, inout.size(), 1, 1, inout);
957 for (int i = 0; i < inout.size(); ++i)
958 EXPECT_EQ(inout[i], expected[i]);
959 }
960
TEST_F(ComputeTest,umod)961 TEST_F(ComputeTest, umod)
962 {
963 const char *kernel_source =
964 "__kernel void main_test(__global uint *inout)\n\
965 {\n\
966 inout[get_global_id(0)] = inout[get_global_id(0)] % 0xfffffffc;\n\
967 }\n";
968 auto inout = ShaderArg<uint32_t>({ 0xfffffffa, 0xfffffffb, 0xfffffffc, 0xfffffffd, 0xfffffffe },
969 SHADER_ARG_INOUT);
970 const uint32_t expected[] = {
971 0xfffffffa, 0xfffffffb, 0, 1, 2
972 };
973 run_shader(kernel_source, inout.size(), 1, 1, inout);
974 for (int i = 0; i < inout.size(); ++i)
975 EXPECT_EQ(inout[i], expected[i]);
976 }
977
TEST_F(ComputeTest,rotate)978 TEST_F(ComputeTest, rotate)
979 {
980 const char *kernel_source =
981 "__kernel void main_test(__global uint *inout)\n\
982 {\n\
983 inout[get_global_id(0)] = rotate(inout[get_global_id(0)], (uint)get_global_id(0) * 4);\n\
984 }\n";
985 auto inout = ShaderArg<uint32_t>(std::vector<uint32_t>(4, 0xdeadbeef),
986 SHADER_ARG_INOUT);
987 const uint32_t expected[] = {
988 0xdeadbeef, 0xeadbeefd, 0xadbeefde, 0xdbeefdea
989 };
990 run_shader(kernel_source, inout.size(), 1, 1, inout);
991 for (int i = 0; i < inout.size(); ++i)
992 EXPECT_EQ(inout[i], expected[i]);
993 }
994
TEST_F(ComputeTest,popcount)995 TEST_F(ComputeTest, popcount)
996 {
997 const char *kernel_source =
998 "__kernel void main_test(__global uint *inout)\n\
999 {\n\
1000 inout[get_global_id(0)] = popcount(inout[get_global_id(0)]);\n\
1001 }\n";
1002 auto inout = ShaderArg<uint32_t>({ 0, 0x1, 0x3, 0x101, 0x110011, ~0u },
1003 SHADER_ARG_INOUT);
1004 const uint32_t expected[] = {
1005 0, 1, 2, 2, 4, 32
1006 };
1007 run_shader(kernel_source, inout.size(), 1, 1, inout);
1008 for (int i = 0; i < inout.size(); ++i)
1009 EXPECT_EQ(inout[i], expected[i]);
1010 }
1011
TEST_F(ComputeTest,hadd)1012 TEST_F(ComputeTest, hadd)
1013 {
1014 const char *kernel_source =
1015 "__kernel void main_test(__global uint *inout)\n\
1016 {\n\
1017 inout[get_global_id(0)] = hadd(inout[get_global_id(0)], 1u << 31);\n\
1018 }\n";
1019 auto inout = ShaderArg<uint32_t>({ 0, 1, 2, 3, 0xfffffffc, 0xfffffffd, 0xfffffffe, 0xffffffff },
1020 SHADER_ARG_INOUT);
1021 const uint32_t expected[] = {
1022 (1u << 31) >> 1,
1023 ((1u << 31) + 1) >> 1,
1024 ((1u << 31) + 2) >> 1,
1025 ((1u << 31) + 3) >> 1,
1026 ((1ull << 31) + 0xfffffffc) >> 1,
1027 ((1ull << 31) + 0xfffffffd) >> 1,
1028 ((1ull << 31) + 0xfffffffe) >> 1,
1029 ((1ull << 31) + 0xffffffff) >> 1,
1030 };
1031 run_shader(kernel_source, inout.size(), 1, 1, inout);
1032 for (int i = 0; i < inout.size(); ++i)
1033 EXPECT_EQ(inout[i], expected[i]);
1034 }
1035
TEST_F(ComputeTest,rhadd)1036 TEST_F(ComputeTest, rhadd)
1037 {
1038 const char *kernel_source =
1039 "__kernel void main_test(__global uint *inout)\n\
1040 {\n\
1041 inout[get_global_id(0)] = rhadd(inout[get_global_id(0)], 1u << 31);\n\
1042 }\n";
1043 auto inout = ShaderArg<uint32_t>({ 0, 1, 2, 3, 0xfffffffc, 0xfffffffd, 0xfffffffe, 0xffffffff },
1044 SHADER_ARG_INOUT);
1045 const uint32_t expected[] = {
1046 ((1u << 31) + 1) >> 1,
1047 ((1u << 31) + 2) >> 1,
1048 ((1u << 31) + 3) >> 1,
1049 ((1u << 31) + 4) >> 1,
1050 ((1ull << 31) + 0xfffffffd) >> 1,
1051 ((1ull << 31) + 0xfffffffe) >> 1,
1052 ((1ull << 31) + 0xffffffff) >> 1,
1053 ((1ull << 31) + (1ull << 32)) >> 1,
1054 };
1055 run_shader(kernel_source, inout.size(), 1, 1, inout);
1056 for (int i = 0; i < inout.size(); ++i)
1057 EXPECT_EQ(inout[i], expected[i]);
1058 }
1059
TEST_F(ComputeTest,add_sat)1060 TEST_F(ComputeTest, add_sat)
1061 {
1062 const char *kernel_source =
1063 "__kernel void main_test(__global uint *inout)\n\
1064 {\n\
1065 inout[get_global_id(0)] = add_sat(inout[get_global_id(0)], 2u);\n\
1066 }\n";
1067 auto inout = ShaderArg<uint32_t>({ 0xffffffff - 3, 0xffffffff - 2, 0xffffffff - 1, 0xffffffff },
1068 SHADER_ARG_INOUT);
1069 const uint32_t expected[] = {
1070 0xffffffff - 1, 0xffffffff, 0xffffffff, 0xffffffff
1071 };
1072 run_shader(kernel_source, inout.size(), 1, 1, inout);
1073 for (int i = 0; i < inout.size(); ++i)
1074 EXPECT_EQ(inout[i], expected[i]);
1075 }
1076
TEST_F(ComputeTest,sub_sat)1077 TEST_F(ComputeTest, sub_sat)
1078 {
1079 const char *kernel_source =
1080 "__kernel void main_test(__global uint *inout)\n\
1081 {\n\
1082 inout[get_global_id(0)] = sub_sat(inout[get_global_id(0)], 2u);\n\
1083 }\n";
1084 auto inout = ShaderArg<uint32_t>({ 0, 1, 2, 3 }, SHADER_ARG_INOUT);
1085 const uint32_t expected[] = {
1086 0, 0, 0, 1
1087 };
1088 run_shader(kernel_source, inout.size(), 1, 1, inout);
1089 for (int i = 0; i < inout.size(); ++i)
1090 EXPECT_EQ(inout[i], expected[i]);
1091 }
1092
TEST_F(ComputeTest,mul_hi)1093 TEST_F(ComputeTest, mul_hi)
1094 {
1095 const char *kernel_source =
1096 "__kernel void main_test(__global uint *inout)\n\
1097 {\n\
1098 inout[get_global_id(0)] = mul_hi(inout[get_global_id(0)], 1u << 31);\n\
1099 }\n";
1100 auto inout = ShaderArg<uint32_t>({ 0, 1, 2, 3, (1u << 31) }, SHADER_ARG_INOUT);
1101 const uint32_t expected[] = {
1102 0, 0, 1, 1, (1u << 30)
1103 };
1104 run_shader(kernel_source, inout.size(), 1, 1, inout);
1105 for (int i = 0; i < inout.size(); ++i)
1106 EXPECT_EQ(inout[i], expected[i]);
1107 }
1108
TEST_F(ComputeTest,ldexp_x)1109 TEST_F(ComputeTest, ldexp_x)
1110 {
1111 const char *kernel_source =
1112 "__kernel void main_test(__global float *inout)\n\
1113 {\n\
1114 inout[get_global_id(0)] = ldexp(inout[get_global_id(0)], 5);\n\
1115 }\n";
1116 auto inout = ShaderArg<float>({ 0.0f, 0.5f, 1.0f, 2.0f }, SHADER_ARG_INOUT);
1117 const float expected[] = {
1118 ldexp(0.0f, 5), ldexp(0.5f, 5), ldexp(1.0f, 5), ldexp(2.0f, 5)
1119 };
1120 run_shader(kernel_source, inout.size(), 1, 1, inout);
1121 for (int i = 0; i < inout.size(); ++i)
1122 EXPECT_FLOAT_EQ(inout[i], expected[i]);
1123 }
1124
TEST_F(ComputeTest,ldexp_y)1125 TEST_F(ComputeTest, ldexp_y)
1126 {
1127 const char *kernel_source =
1128 "__kernel void main_test(__global float *inout)\n\
1129 {\n\
1130 inout[get_global_id(0)] = ldexp(inout[get_global_id(0)], get_global_id(0));\n\
1131 }\n";
1132 auto inout = ShaderArg<float>({ 0.25f, 0.5f, 0.75f, 1.0f }, SHADER_ARG_INOUT);
1133 const float expected[] = {
1134 ldexp(0.25f, 0), ldexp(0.5f, 1), ldexp(0.75f, 2), ldexp(1.0f, 3)
1135 };
1136 run_shader(kernel_source, inout.size(), 1, 1, inout);
1137 for (int i = 0; i < inout.size(); ++i)
1138 EXPECT_FLOAT_EQ(inout[i], expected[i]);
1139 }
1140
TEST_F(ComputeTest,frexp_ret)1141 TEST_F(ComputeTest, frexp_ret)
1142 {
1143 const char *kernel_source =
1144 "__kernel void main_test(__global float *inout)\n\
1145 {\n\
1146 int exp;\n\
1147 inout[get_global_id(0)] = frexp(inout[get_global_id(0)], &exp);\n\
1148 }\n";
1149 auto inout = ShaderArg<float>({ 0.0f, 0.5f, 1.0f, 3.0f }, SHADER_ARG_INOUT);
1150 const float expected[] = {
1151 0.0f, 0.5f, 0.5f, 0.75f
1152 };
1153 run_shader(kernel_source, inout.size(), 1, 1, inout);
1154 for (int i = 0; i < inout.size(); ++i)
1155 EXPECT_FLOAT_EQ(inout[i], expected[i]);
1156 }
1157
TEST_F(ComputeTest,frexp_exp)1158 TEST_F(ComputeTest, frexp_exp)
1159 {
1160 const char *kernel_source =
1161 "__kernel void main_test(__global float *inout)\n\
1162 {\n\
1163 int exp;\n\
1164 frexp(inout[get_global_id(0)], &exp);\n\
1165 inout[get_global_id(0)] = (float)exp;\n\
1166 }\n";
1167 auto inout = ShaderArg<float>({ 0.0f, 0.5f, 1.0f, 3.0f }, SHADER_ARG_INOUT);
1168 const float expected[] = {
1169 0.0f, 0.0f, 1.0f, 2.0f
1170 };
1171 run_shader(kernel_source, inout.size(), 1, 1, inout);
1172 for (int i = 0; i < inout.size(); ++i)
1173 EXPECT_FLOAT_EQ(inout[i], expected[i]);
1174 }
1175
TEST_F(ComputeTest,clz)1176 TEST_F(ComputeTest, clz)
1177 {
1178 const char *kernel_source =
1179 "__kernel void main_test(__global uint *inout)\n\
1180 {\n\
1181 inout[get_global_id(0)] = clz(inout[get_global_id(0)]);\n\
1182 }\n";
1183 auto inout = ShaderArg<uint32_t>({ 0, 1, 0xffff, (1u << 30), (1u << 31) }, SHADER_ARG_INOUT);
1184 const uint32_t expected[] = {
1185 32, 31, 16, 1, 0
1186 };
1187 run_shader(kernel_source, inout.size(), 1, 1, inout);
1188 for (int i = 0; i < inout.size(); ++i)
1189 EXPECT_FLOAT_EQ(inout[i], expected[i]);
1190 }
1191
TEST_F(ComputeTest,sin)1192 TEST_F(ComputeTest, sin)
1193 {
1194 struct sin_vals { float in; float clc; float native; };
1195 const char *kernel_source =
1196 "struct sin_vals { float in; float clc; float native; };\n\
1197 __kernel void main_test(__global struct sin_vals *inout)\n\
1198 {\n\
1199 inout[get_global_id(0)].clc = sin(inout[get_global_id(0)].in);\n\
1200 inout[get_global_id(0)].native = native_sin(inout[get_global_id(0)].in);\n\
1201 }\n";
1202 const vector<sin_vals> input = {
1203 { 0.0f, 0.0f, 0.0f },
1204 { 1.0f, 0.0f, 0.0f },
1205 { 2.0f, 0.0f, 0.0f },
1206 { 3.0f, 0.0f, 0.0f },
1207 };
1208 auto inout = ShaderArg<sin_vals>(input, SHADER_ARG_INOUT);
1209 const struct sin_vals expected[] = {
1210 { 0.0f, 0.0f, 0.0f },
1211 { 1.0f, sin(1.0f), sin(1.0f) },
1212 { 2.0f, sin(2.0f), sin(2.0f) },
1213 { 3.0f, sin(3.0f), sin(3.0f) },
1214 };
1215 run_shader(kernel_source, inout.size(), 1, 1, inout);
1216 for (int i = 0; i < inout.size(); ++i) {
1217 EXPECT_FLOAT_EQ(inout[i].in, inout[i].in);
1218 EXPECT_FLOAT_EQ(inout[i].clc, inout[i].clc);
1219 EXPECT_NEAR(inout[i].clc, inout[i].native, 0.008f); // range from DXIL spec
1220 }
1221 }
1222
TEST_F(ComputeTest,DISABLED_cosh)1223 TEST_F(ComputeTest, DISABLED_cosh)
1224 {
1225 /* Disabled because of WARP failures, where we fetch incorrect results when
1226 * sourcing from non-float ICBs */
1227 const char *kernel_source =
1228 "__kernel void main_test(__global float *inout)\n\
1229 {\n\
1230 inout[get_global_id(0)] = cosh(inout[get_global_id(0)]);\n\
1231 }\n";
1232 auto inout = ShaderArg<float>({ 0.0f, 1.0f, 2.0f, 3.0f }, SHADER_ARG_INOUT);
1233 const float expected[] = {
1234 cosh(0.0f), cosh(1.0f), cosh(2.0f), cosh(3.0f)
1235 };
1236 run_shader(kernel_source, inout.size(), 1, 1, inout);
1237 for (int i = 0; i < inout.size(); ++i)
1238 EXPECT_FLOAT_EQ(inout[i], expected[i]);
1239 }
1240
TEST_F(ComputeTest,exp)1241 TEST_F(ComputeTest, exp)
1242 {
1243 const char *kernel_source =
1244 "__kernel void main_test(__global float *inout)\n\
1245 {\n\
1246 inout[get_global_id(0)] = native_exp(inout[get_global_id(0)]);\n\
1247 }\n";
1248 auto inout = ShaderArg<float>({ 0.0f, 1.0f, 2.0f, 3.0f }, SHADER_ARG_INOUT);
1249 const float expected[] = {
1250 exp(0.0f), exp(1.0f), exp(2.0f), exp(3.0f)
1251 };
1252 run_shader(kernel_source, inout.size(), 1, 1, inout);
1253 for (int i = 0; i < inout.size(); ++i)
1254 EXPECT_FLOAT_EQ(inout[i], expected[i]);
1255 }
1256
TEST_F(ComputeTest,exp10)1257 TEST_F(ComputeTest, exp10)
1258 {
1259 const char *kernel_source =
1260 "__kernel void main_test(__global float *inout)\n\
1261 {\n\
1262 inout[get_global_id(0)] = native_exp10(inout[get_global_id(0)]);\n\
1263 }\n";
1264 auto inout = ShaderArg<float>({ 0.0f, 1.0f, 2.0f, 3.0f }, SHADER_ARG_INOUT);
1265 const float expected[] = {
1266 pow(10.0f, 0.0f), pow(10.0f, 1.0f), pow(10.0f, 2.0f), pow(10.0f, 3.0f)
1267 };
1268 run_shader(kernel_source, inout.size(), 1, 1, inout);
1269 for (int i = 0; i < inout.size(); ++i)
1270 EXPECT_FLOAT_EQ(inout[i], expected[i]);
1271 }
1272
TEST_F(ComputeTest,exp2)1273 TEST_F(ComputeTest, exp2)
1274 {
1275 const char *kernel_source =
1276 "__kernel void main_test(__global float *inout)\n\
1277 {\n\
1278 inout[get_global_id(0)] = native_exp2(inout[get_global_id(0)]);\n\
1279 }\n";
1280 auto inout = ShaderArg<float>({ 0.0f, 1.0f, 2.0f, 3.0f }, SHADER_ARG_INOUT);
1281 const float expected[] = {
1282 pow(2.0f, 0.0f), pow(2.0f, 1.0f), pow(2.0f, 2.0f), pow(2.0f, 3.0f)
1283 };
1284 run_shader(kernel_source, inout.size(), 1, 1, inout);
1285 for (int i = 0; i < inout.size(); ++i)
1286 EXPECT_FLOAT_EQ(inout[i], expected[i]);
1287 }
1288
TEST_F(ComputeTest,log)1289 TEST_F(ComputeTest, log)
1290 {
1291 const char *kernel_source =
1292 "__kernel void main_test(__global float *inout)\n\
1293 {\n\
1294 inout[get_global_id(0)] = native_log(inout[get_global_id(0)]);\n\
1295 }\n";
1296 auto inout = ShaderArg<float>({ 0.0f, 1.0f, 2.0f, 3.0f }, SHADER_ARG_INOUT);
1297 const float expected[] = {
1298 log(0.0f), log(1.0f), log(2.0f), log(3.0f)
1299 };
1300 run_shader(kernel_source, inout.size(), 1, 1, inout);
1301 for (int i = 0; i < inout.size(); ++i)
1302 EXPECT_FLOAT_EQ(inout[i], expected[i]);
1303 }
1304
TEST_F(ComputeTest,log10)1305 TEST_F(ComputeTest, log10)
1306 {
1307 const char *kernel_source =
1308 "__kernel void main_test(__global float *inout)\n\
1309 {\n\
1310 inout[get_global_id(0)] = native_log10(inout[get_global_id(0)]);\n\
1311 }\n";
1312 auto inout = ShaderArg<float>({ 0.0f, 1.0f, 2.0f, 3.0f }, SHADER_ARG_INOUT);
1313 const float expected[] = {
1314 log10(0.0f), log10(1.0f), log10(2.0f), log10(3.0f)
1315 };
1316 run_shader(kernel_source, inout.size(), 1, 1, inout);
1317 for (int i = 0; i < inout.size(); ++i)
1318 EXPECT_FLOAT_EQ(inout[i], expected[i]);
1319 }
1320
TEST_F(ComputeTest,log2)1321 TEST_F(ComputeTest, log2)
1322 {
1323 const char *kernel_source =
1324 "__kernel void main_test(__global float *inout)\n\
1325 {\n\
1326 inout[get_global_id(0)] = native_log2(inout[get_global_id(0)]);\n\
1327 }\n";
1328 auto inout = ShaderArg<float>({ 0.0f, 1.0f, 2.0f, 3.0f }, SHADER_ARG_INOUT);
1329 const float expected[] = {
1330 log(0.0f) / log(2), log(1.0f) / log(2), log(2.0f) / log(2), log(3.0f) / log(2)
1331 };
1332 run_shader(kernel_source, inout.size(), 1, 1, inout);
1333 for (int i = 0; i < inout.size(); ++i)
1334 EXPECT_FLOAT_EQ(inout[i], expected[i]);
1335 }
1336
TEST_F(ComputeTest,rint)1337 TEST_F(ComputeTest, rint)
1338 {
1339 const char *kernel_source =
1340 "__kernel void main_test(__global float *inout)\n\
1341 {\n\
1342 inout[get_global_id(0)] = rint(inout[get_global_id(0)]);\n\
1343 }\n";
1344
1345 auto inout = ShaderArg<float>({ 0.5f, 1.5f, -0.5f, -1.5f, 1.4f }, SHADER_ARG_INOUT);
1346 const float expected[] = {
1347 0.0f, 2.0f, 0.0f, -2.0f, 1.0f,
1348 };
1349 run_shader(kernel_source, inout.size(), 1, 1, inout);
1350 for (int i = 0; i < inout.size(); ++i)
1351 EXPECT_FLOAT_EQ(inout[i], expected[i]);
1352 }
1353
TEST_F(ComputeTest,round)1354 TEST_F(ComputeTest, round)
1355 {
1356 const char *kernel_source =
1357 "__kernel void main_test(__global float *inout)\n\
1358 {\n\
1359 inout[get_global_id(0)] = round(inout[get_global_id(0)]);\n\
1360 }\n";
1361 auto inout = ShaderArg<float>({ 0, 0.3f, -0.3f, 0.5f, -0.5f, 1.1f, -1.1f },
1362 SHADER_ARG_INOUT);
1363 const float expected[] = {
1364 0.0f, 0.0f, -0.0f, 1.0f, -1.0f, 1.0f, -1.0f
1365 };
1366 run_shader(kernel_source, inout.size(), 1, 1, inout);
1367 for (int i = 0; i < inout.size(); ++i)
1368 EXPECT_FLOAT_EQ(inout[i], expected[i]);
1369 }
1370
TEST_F(ComputeTest,arg_by_val)1371 TEST_F(ComputeTest, arg_by_val)
1372 {
1373 const char *kernel_source =
1374 "__kernel void main_test(__global float *inout, float mul)\n\
1375 {\n\
1376 inout[get_global_id(0)] = inout[get_global_id(0)] * mul;\n\
1377 }\n";
1378 auto inout = ShaderArg<float>({ 0, 0.3f, -0.3f, 0.5f, -0.5f, 1.1f, -1.1f },
1379 SHADER_ARG_INOUT);
1380 auto mul = ShaderArg<float>(10.0f, SHADER_ARG_INPUT);
1381 const float expected[] = {
1382 0.0f, 3.0f, -3.0f, 5.0f, -5.0f, 11.0f, -11.0f
1383 };
1384 run_shader(kernel_source, inout.size(), 1, 1, inout, mul);
1385 for (int i = 0; i < inout.size(); ++i)
1386 EXPECT_FLOAT_EQ(inout[i], expected[i]);
1387 }
1388
TEST_F(ComputeTest,uint8_by_val)1389 TEST_F(ComputeTest, uint8_by_val)
1390 {
1391 struct uint8 {
1392 uint32_t s0; uint32_t s1; uint32_t s2; uint32_t s3;
1393 uint32_t s4; uint32_t s5; uint32_t s6; uint32_t s7;
1394 };
1395 const char *kernel_source =
1396 "__kernel void main_test(__global uint *out, uint8 val)\n\
1397 {\n\
1398 out[get_global_id(0)] = val.s0 + val.s1 + val.s2 + val.s3 +\n\
1399 val.s4 + val.s5 + val.s6 + val.s7;\n\
1400 }\n";
1401 auto out = ShaderArg<uint32_t>({ 0 }, SHADER_ARG_OUTPUT);
1402 auto val = ShaderArg<struct uint8>({ {0, 1, 2, 3, 4, 5, 6, 7 }}, SHADER_ARG_INPUT);
1403 const uint32_t expected[] = { 0 + 1 + 2 + 3 + 4 + 5 + 6 + 7 };
1404 run_shader(kernel_source, out.size(), 1, 1, out, val);
1405 for (int i = 0; i < out.size(); ++i)
1406 EXPECT_EQ(out[i], expected[i]);
1407 }
1408
TEST_F(ComputeTest,link)1409 TEST_F(ComputeTest, link)
1410 {
1411 const char *foo_src =
1412 "float foo(float in)\n\
1413 {\n\
1414 return in * in;\n\
1415 }\n";
1416 const char *kernel_source =
1417 "float foo(float in);\n\
1418 __kernel void main_test(__global float *inout)\n\
1419 {\n\
1420 inout[get_global_id(0)] = foo(inout[get_global_id(0)]);\n\
1421 }\n";
1422 std::vector<const char *> srcs = { foo_src, kernel_source };
1423 auto inout = ShaderArg<float>({ 2.0f }, SHADER_ARG_INOUT);
1424 const float expected[] = {
1425 4.0f,
1426 };
1427 run_shader(srcs, inout.size(), 1, 1, inout);
1428 for (int i = 0; i < inout.size(); ++i)
1429 EXPECT_EQ(inout[i], expected[i]);
1430 }
1431
TEST_F(ComputeTest,link_library)1432 TEST_F(ComputeTest, link_library)
1433 {
1434 const char *bar_src =
1435 "float bar(float in)\n\
1436 {\n\
1437 return in * 5;\n\
1438 }\n";
1439 const char *foo_src =
1440 "float bar(float in);\n\
1441 float foo(float in)\n\
1442 {\n\
1443 return in * bar(in);\n\
1444 }\n";
1445 const char *kernel_source =
1446 "float foo(float in);\n\
1447 __kernel void main_test(__global float *inout)\n\
1448 {\n\
1449 inout[get_global_id(0)] = foo(inout[get_global_id(0)]);\n\
1450 }\n";
1451 std::vector<Shader> libraries = {
1452 compile({ bar_src, kernel_source }, {}, true),
1453 compile({ foo_src }, {}, true)
1454 };
1455 Shader exe = link(libraries);
1456 auto inout = ShaderArg<float>({ 2.0f }, SHADER_ARG_INOUT);
1457 const float expected[] = {
1458 20.0f,
1459 };
1460 run_shader(exe, { (unsigned)inout.size(), 1, 1 }, inout);
1461 for (int i = 0; i < inout.size(); ++i)
1462 EXPECT_EQ(inout[i], expected[i]);
1463 }
1464
TEST_F(ComputeTest,localvar)1465 TEST_F(ComputeTest, localvar)
1466 {
1467 const char *kernel_source =
1468 "__kernel __attribute__((reqd_work_group_size(2, 1, 1)))\n\
1469 void main_test(__global float *inout)\n\
1470 {\n\
1471 __local float2 tmp[2];\n\
1472 tmp[get_local_id(0)].x = inout[get_global_id(0)] + 1;\n\
1473 tmp[get_local_id(0)].y = inout[get_global_id(0)] - 1;\n\
1474 barrier(CLK_LOCAL_MEM_FENCE);\n\
1475 inout[get_global_id(0)] = tmp[get_local_id(0) % 2].x * tmp[(get_local_id(0) + 1) % 2].y;\n\
1476 }\n";
1477
1478 auto inout = ShaderArg<float>({ 2.0f, 4.0f }, SHADER_ARG_INOUT);
1479 const float expected[] = {
1480 9.0f, 5.0f
1481 };
1482 run_shader(kernel_source, inout.size(), 1, 1, inout);
1483 for (int i = 0; i < inout.size(); ++i)
1484 EXPECT_EQ(inout[i], expected[i]);
1485 }
1486
TEST_F(ComputeTest,localvar_uchar2)1487 TEST_F(ComputeTest, localvar_uchar2)
1488 {
1489 const char *kernel_source =
1490 "__attribute__((reqd_work_group_size(2, 1, 1)))\n\
1491 __kernel void main_test(__global uchar *inout)\n\
1492 {\n\
1493 __local uchar2 tmp[2];\n\
1494 tmp[get_local_id(0)].x = inout[get_global_id(0)] + 1;\n\
1495 tmp[get_local_id(0)].y = inout[get_global_id(0)] - 1;\n\
1496 barrier(CLK_LOCAL_MEM_FENCE);\n\
1497 inout[get_global_id(0)] = tmp[get_local_id(0) % 2].x * tmp[(get_local_id(0) + 1) % 2].y;\n\
1498 }\n";
1499
1500 auto inout = ShaderArg<uint8_t>({ 2, 4 }, SHADER_ARG_INOUT);
1501 const uint8_t expected[] = { 9, 5 };
1502 run_shader(kernel_source, inout.size(), 1, 1, inout);
1503 for (int i = 0; i < inout.size(); ++i)
1504 EXPECT_EQ(inout[i], expected[i]);
1505 }
1506
TEST_F(ComputeTest,work_group_size_hint)1507 TEST_F(ComputeTest, work_group_size_hint)
1508 {
1509 const char *kernel_source =
1510 "__attribute__((work_group_size_hint(2, 1, 1)))\n\
1511 __kernel void main_test(__global uint *output)\n\
1512 {\n\
1513 output[get_global_id(0)] = get_local_id(0);\n\
1514 }\n";
1515 auto output = ShaderArg<uint32_t>(std::vector<uint32_t>(4, 0xdeadbeef),
1516 SHADER_ARG_OUTPUT);
1517 const uint32_t expected[] = {
1518 0, 1, 2, 3
1519 };
1520 run_shader(kernel_source, output.size(), 1, 1, output);
1521 for (int i = 0; i < output.size(); ++i)
1522 EXPECT_EQ(output[i], expected[i]);
1523 }
1524
TEST_F(ComputeTest,reqd_work_group_size)1525 TEST_F(ComputeTest, reqd_work_group_size)
1526 {
1527 const char *kernel_source =
1528 "__attribute__((reqd_work_group_size(2, 1, 1)))\n\
1529 __kernel void main_test(__global uint *output)\n\
1530 {\n\
1531 output[get_global_id(0)] = get_local_id(0);\n\
1532 }\n";
1533 auto output = ShaderArg<uint32_t>(std::vector<uint32_t>(4, 0xdeadbeef),
1534 SHADER_ARG_OUTPUT);
1535 const uint32_t expected[] = {
1536 0, 1, 0, 1
1537 };
1538 run_shader(kernel_source, output.size(), 1, 1, output);
1539 for (int i = 0; i < output.size(); ++i)
1540 EXPECT_EQ(output[i], expected[i]);
1541 }
1542
TEST_F(ComputeTest,image)1543 TEST_F(ComputeTest, image)
1544 {
1545 const char* kernel_source =
1546 "__kernel void main_test(read_only image2d_t input, write_only image2d_t output)\n\
1547 {\n\
1548 int2 coords = (int2)(get_global_id(0), get_global_id(1));\n\
1549 write_imagef(output, coords, read_imagef(input, coords));\n\
1550 }\n";
1551 Shader shader = compile(std::vector<const char*>({ kernel_source }));
1552 validate(shader);
1553 }
1554
TEST_F(ComputeTest,image_two_reads)1555 TEST_F(ComputeTest, image_two_reads)
1556 {
1557 const char* kernel_source =
1558 "__kernel void main_test(image2d_t image, int is_float, __global float* output)\n\
1559 {\n\
1560 if (is_float)\n\
1561 output[get_global_id(0)] = read_imagef(image, (int2)(0, 0)).x;\n\
1562 else \n\
1563 output[get_global_id(0)] = (float)read_imagei(image, (int2)(0, 0)).x;\n\
1564 }\n";
1565 Shader shader = compile(std::vector<const char*>({ kernel_source }));
1566 validate(shader);
1567 }
1568
TEST_F(ComputeTest,image_read_write)1569 TEST_F(ComputeTest, image_read_write)
1570 {
1571 const char *kernel_source =
1572 R"(__kernel void main_test(read_write image2d_t image)
1573 {
1574 int2 coords = (int2)(get_global_id(0), get_global_id(1));
1575 write_imagef(image, coords, read_imagef(image, coords) + (float4)(1.0f, 1.0f, 1.0f, 1.0f));
1576 })";
1577 Shader shader = compile(std::vector<const char*>({ kernel_source }), { "-cl-std=cl3.0" });
1578 validate(shader);
1579 }
1580
TEST_F(ComputeTest,sampler)1581 TEST_F(ComputeTest, sampler)
1582 {
1583 const char* kernel_source =
1584 "__kernel void main_test(image2d_t image, sampler_t sampler, __global float* output)\n\
1585 {\n\
1586 output[get_global_id(0)] = read_imagef(image, sampler, (int2)(0, 0)).x;\n\
1587 }\n";
1588 Shader shader = compile(std::vector<const char*>({ kernel_source }));
1589 validate(shader);
1590 }
1591
TEST_F(ComputeTest,image_dims)1592 TEST_F(ComputeTest, image_dims)
1593 {
1594 const char* kernel_source =
1595 "__kernel void main_test(image2d_t roimage, write_only image2d_t woimage, __global uint* output)\n\
1596 {\n\
1597 output[get_global_id(0)] = get_image_width(roimage);\n\
1598 output[get_global_id(0) + 1] = get_image_width(woimage);\n\
1599 }\n";
1600 Shader shader = compile(std::vector<const char*>({ kernel_source }));
1601 validate(shader);
1602 }
1603
TEST_F(ComputeTest,image_format)1604 TEST_F(ComputeTest, image_format)
1605 {
1606 const char* kernel_source =
1607 "__kernel void main_test(image2d_t roimage, write_only image2d_t woimage, __global uint* output)\n\
1608 {\n\
1609 output[get_global_id(0)] = get_image_channel_data_type(roimage);\n\
1610 output[get_global_id(0) + 1] = get_image_channel_order(woimage);\n\
1611 }\n";
1612 Shader shader = compile(std::vector<const char*>({ kernel_source }));
1613 validate(shader);
1614 }
1615
TEST_F(ComputeTest,image1d_buffer_t)1616 TEST_F(ComputeTest, image1d_buffer_t)
1617 {
1618 const char* kernel_source =
1619 "__kernel void main_test(read_only image1d_buffer_t input, write_only image1d_buffer_t output)\n\
1620 {\n\
1621 write_imageui(output, get_global_id(0), read_imageui(input, get_global_id(0)));\n\
1622 }\n";
1623 Shader shader = compile(std::vector<const char*>({ kernel_source }));
1624 validate(shader);
1625 }
1626
TEST_F(ComputeTest,local_ptr)1627 TEST_F(ComputeTest, local_ptr)
1628 {
1629 struct uint2 { uint32_t x, y; };
1630 const char *kernel_source =
1631 "__kernel void main_test(__global uint *inout, __local uint2 *tmp)\n\
1632 {\n\
1633 tmp[get_local_id(0)].x = inout[get_global_id(0)] + 1;\n\
1634 tmp[get_local_id(0)].y = inout[get_global_id(0)] - 1;\n\
1635 barrier(CLK_LOCAL_MEM_FENCE);\n\
1636 inout[get_global_id(0)] = tmp[get_local_id(0) % 2].x * tmp[(get_local_id(0) + 1) % 2].y;\n\
1637 }\n";
1638 auto inout = ShaderArg<uint32_t>({ 2, 4 }, SHADER_ARG_INOUT);
1639 auto tmp = ShaderArg<struct uint2>(std::vector<struct uint2>(4096), SHADER_ARG_INPUT);
1640 const uint8_t expected[] = { 9, 5 };
1641 run_shader(kernel_source, inout.size(), 1, 1, inout, tmp);
1642 for (int i = 0; i < inout.size(); ++i)
1643 EXPECT_EQ(inout[i], expected[i]);
1644 }
1645
TEST_F(ComputeTest,two_local_ptrs)1646 TEST_F(ComputeTest, two_local_ptrs)
1647 {
1648 struct uint2 { uint32_t x, y; };
1649 const char *kernel_source =
1650 "__kernel void main_test(__global uint *inout, __local uint2 *tmp, __local uint *tmp2)\n\
1651 {\n\
1652 tmp[get_local_id(0)].x = inout[get_global_id(0)] + 1;\n\
1653 tmp[get_local_id(0)].y = inout[get_global_id(0)] - 1;\n\
1654 tmp2[get_local_id(0)] = get_global_id(0);\n\
1655 barrier(CLK_LOCAL_MEM_FENCE);\n\
1656 inout[get_global_id(0)] = tmp[get_local_id(0) % 2].x * tmp[(get_local_id(0) + 1) % 2].y + tmp2[get_local_id(0) % 2];\n\
1657 }\n";
1658 auto inout = ShaderArg<uint32_t>({ 2, 4 }, SHADER_ARG_INOUT);
1659 auto tmp = ShaderArg<struct uint2>(std::vector<struct uint2>(1024), SHADER_ARG_INPUT);
1660 auto tmp2 = ShaderArg<uint32_t>(std::vector<uint32_t>(1024), SHADER_ARG_INPUT);
1661 const uint8_t expected[] = { 9, 6 };
1662 run_shader(kernel_source, inout.size(), 1, 1, inout, tmp, tmp2);
1663 for (int i = 0; i < inout.size(); ++i)
1664 EXPECT_EQ(inout[i], expected[i]);
1665 }
1666
TEST_F(ComputeTest,int8_to_float)1667 TEST_F(ComputeTest, int8_to_float)
1668 {
1669 const char *kernel_source =
1670 "__kernel void main_test(__global char* in, __global float* out)\n\
1671 {\n\
1672 uint pos = get_global_id(0);\n\
1673 out[pos] = in[pos] / 100.0f;\n\
1674 }";
1675 auto in = ShaderArg<char>({ 10, 20, 30, 40 }, SHADER_ARG_INPUT);
1676 auto out = ShaderArg<float>(std::vector<float>(4, std::numeric_limits<float>::infinity()), SHADER_ARG_OUTPUT);
1677 const float expected[] = { 0.1f, 0.2f, 0.3f, 0.4f };
1678 run_shader(kernel_source, in.size(), 1, 1, in, out);
1679 for (int i = 0; i < in.size(); ++i)
1680 EXPECT_FLOAT_EQ(out[i], expected[i]);
1681 }
1682
TEST_F(ComputeTest,vec_hint_float4)1683 TEST_F(ComputeTest, vec_hint_float4)
1684 {
1685 const char *kernel_source =
1686 "__kernel __attribute__((vec_type_hint(float4))) void main_test(__global float *inout)\n\
1687 {\n\
1688 inout[get_global_id(0)] *= inout[get_global_id(1)];\n\
1689 }";
1690 Shader shader = compile({ kernel_source });
1691 EXPECT_EQ(shader.metadata->kernels[0].vec_hint_size, 4);
1692 EXPECT_EQ(shader.metadata->kernels[0].vec_hint_type, CLC_VEC_HINT_TYPE_FLOAT);
1693 }
1694
TEST_F(ComputeTest,vec_hint_uchar2)1695 TEST_F(ComputeTest, vec_hint_uchar2)
1696 {
1697 const char *kernel_source =
1698 "__kernel __attribute__((vec_type_hint(uchar2))) void main_test(__global float *inout)\n\
1699 {\n\
1700 inout[get_global_id(0)] *= inout[get_global_id(1)];\n\
1701 }";
1702 Shader shader = compile({ kernel_source });
1703 EXPECT_EQ(shader.metadata->kernels[0].vec_hint_size, 2);
1704 EXPECT_EQ(shader.metadata->kernels[0].vec_hint_type, CLC_VEC_HINT_TYPE_CHAR);
1705 }
1706
TEST_F(ComputeTest,vec_hint_none)1707 TEST_F(ComputeTest, vec_hint_none)
1708 {
1709 const char *kernel_source =
1710 "__kernel void main_test(__global float *inout)\n\
1711 {\n\
1712 inout[get_global_id(0)] *= inout[get_global_id(1)];\n\
1713 }";
1714 Shader shader = compile({ kernel_source });
1715 EXPECT_EQ(shader.metadata->kernels[0].vec_hint_size, 0);
1716 }
1717
TEST_F(ComputeTest,DISABLED_debug_layer_failure)1718 TEST_F(ComputeTest, DISABLED_debug_layer_failure)
1719 {
1720 const char *kernel_source =
1721 "__kernel void main_test(__global float *inout, float mul)\n\
1722 {\n\
1723 inout[get_global_id(0)] = inout[get_global_id(0)] * mul;\n\
1724 }\n";
1725 auto inout = ShaderArg<float>({ 0, 0.3f, -0.3f, 0.5f, -0.5f, 1.1f, -1.1f },
1726 SHADER_ARG_INOUT);
1727 auto mul = ShaderArg<float>(10.0f, SHADER_ARG_INPUT);
1728 const float expected[] = {
1729 0.0f, 3.0f, -3.0f, 5.0f, -5.0f, 11.0f, -11.0f
1730 };
1731 ComPtr<ID3D12InfoQueue> info_queue;
1732 dev->QueryInterface(info_queue.ReleaseAndGetAddressOf());
1733 if (!info_queue) {
1734 GTEST_SKIP() << "No info queue";
1735 return;
1736 }
1737
1738 info_queue->AddApplicationMessage(D3D12_MESSAGE_SEVERITY_ERROR, "This should cause the test to fail");
1739 run_shader(kernel_source, inout.size(), 1, 1, inout, mul);
1740 for (int i = 0; i < inout.size(); ++i)
1741 EXPECT_FLOAT_EQ(inout[i], expected[i]);
1742 }
1743
TEST_F(ComputeTest,compiler_defines)1744 TEST_F(ComputeTest, compiler_defines)
1745 {
1746 const char *kernel_source =
1747 "__kernel void main_test(__global int* out)\n\
1748 {\n\
1749 out[0] = OUT_VAL0;\n\
1750 out[1] = __OPENCL_C_VERSION__;\n\
1751 }";
1752 auto out = ShaderArg<int>(std::vector<int>(2, 0), SHADER_ARG_OUTPUT);
1753 CompileArgs compile_args = { 1, 1, 1 };
1754 compile_args.compiler_command_line = { "-DOUT_VAL0=5", "-cl-std=cl" };
1755 std::vector<RawShaderArg *> raw_args = { &out };
1756 run_shader({ kernel_source }, compile_args, out);
1757 EXPECT_EQ(out[0], 5);
1758 EXPECT_EQ(out[1], 100);
1759 }
1760
1761 /* There's a bug in WARP turning atomic_add(ptr, x) into
1762 * atomic_add(ptr, x * 4). Works fine on intel HW.
1763 */
TEST_F(ComputeTest,DISABLED_global_atomic_add)1764 TEST_F(ComputeTest, DISABLED_global_atomic_add)
1765 {
1766 const char *kernel_source =
1767 "__kernel void main_test(__global int *inout, __global int *old)\n\
1768 {\n\
1769 old[get_global_id(0)] = atomic_add(inout + get_global_id(0), 3);\n\
1770 }\n";
1771 auto inout = ShaderArg<int32_t>({ 2, 4 }, SHADER_ARG_INOUT);
1772 auto old = ShaderArg<int32_t>(std::vector<int32_t>(2, 0xdeadbeef), SHADER_ARG_OUTPUT);
1773 const int32_t expected_inout[] = { 5, 7 };
1774 const int32_t expected_old[] = { 2, 4 };
1775 run_shader(kernel_source, inout.size(), 1, 1, inout, old);
1776 for (int i = 0; i < inout.size(); ++i) {
1777 EXPECT_EQ(inout[i], expected_inout[i]);
1778 EXPECT_EQ(old[i], expected_old[i]);
1779 }
1780 }
1781
TEST_F(ComputeTest,global_atomic_imin)1782 TEST_F(ComputeTest, global_atomic_imin)
1783 {
1784 const char *kernel_source =
1785 "__kernel void main_test(__global int *inout, __global int *old)\n\
1786 {\n\
1787 old[get_global_id(0)] = atomic_min(inout + get_global_id(0), 1);\n\
1788 }\n";
1789 auto inout = ShaderArg<int32_t>({ 0, 2, -1 }, SHADER_ARG_INOUT);
1790 auto old = ShaderArg<int32_t>(std::vector<int32_t>(3, 0xdeadbeef), SHADER_ARG_OUTPUT);
1791 const int32_t expected_inout[] = { 0, 1, -1 };
1792 const int32_t expected_old[] = { 0, 2, -1 };
1793 run_shader(kernel_source, inout.size(), 1, 1, inout, old);
1794 for (int i = 0; i < inout.size(); ++i) {
1795 EXPECT_EQ(inout[i], expected_inout[i]);
1796 EXPECT_EQ(old[i], expected_old[i]);
1797 }
1798 }
1799
TEST_F(ComputeTest,global_atomic_and_or)1800 TEST_F(ComputeTest, global_atomic_and_or)
1801 {
1802 const char *kernel_source =
1803 "__attribute__((reqd_work_group_size(3, 1, 1)))\n\
1804 __kernel void main_test(__global int *inout)\n\
1805 {\n\
1806 atomic_and(inout, ~(1 << get_global_id(0)));\n\
1807 atomic_or(inout, (1 << (get_global_id(0) + 4)));\n\
1808 }\n";
1809 auto inout = ShaderArg<int32_t>(0xf, SHADER_ARG_INOUT);
1810 const int32_t expected[] = { 0x78 };
1811 run_shader(kernel_source, 3, 1, 1, inout);
1812 for (int i = 0; i < inout.size(); ++i)
1813 EXPECT_EQ(inout[i], expected[i]);
1814 }
1815
TEST_F(ComputeTest,global_atomic_cmpxchg)1816 TEST_F(ComputeTest, global_atomic_cmpxchg)
1817 {
1818 const char *kernel_source =
1819 "__attribute__((reqd_work_group_size(2, 1, 1)))\n\
1820 __kernel void main_test(__global int *inout)\n\
1821 {\n\
1822 while (atomic_cmpxchg(inout, get_global_id(0), get_global_id(0) + 1) != get_global_id(0))\n\
1823 ;\n\
1824 }\n";
1825 auto inout = ShaderArg<int32_t>(0, SHADER_ARG_INOUT);
1826 const int32_t expected_inout[] = { 2 };
1827 run_shader(kernel_source, 2, 1, 1, inout);
1828 for (int i = 0; i < inout.size(); ++i)
1829 EXPECT_EQ(inout[i], expected_inout[i]);
1830 }
1831
TEST_F(ComputeTest,local_atomic_and_or)1832 TEST_F(ComputeTest, local_atomic_and_or)
1833 {
1834 const char *kernel_source =
1835 "__attribute__((reqd_work_group_size(2, 1, 1)))\n\
1836 __kernel void main_test(__global ushort *inout)\n\
1837 {\n\
1838 __local ushort tmp;\n\
1839 atomic_and(&tmp, ~(0xff << (get_global_id(0) * 8)));\n\
1840 atomic_or(&tmp, inout[get_global_id(0)] << (get_global_id(0) * 8));\n\
1841 barrier(CLK_LOCAL_MEM_FENCE);\n\
1842 inout[get_global_id(0)] = tmp;\n\
1843 }\n";
1844 auto inout = ShaderArg<uint16_t>({ 2, 4 }, SHADER_ARG_INOUT);
1845 const uint16_t expected[] = { 0x402, 0x402 };
1846 run_shader(kernel_source, inout.size(), 1, 1, inout);
1847 for (int i = 0; i < inout.size(); ++i)
1848 EXPECT_EQ(inout[i], expected[i]);
1849 }
1850
TEST_F(ComputeTest,local_atomic_cmpxchg)1851 TEST_F(ComputeTest, local_atomic_cmpxchg)
1852 {
1853 const char *kernel_source =
1854 "__attribute__((reqd_work_group_size(2, 1, 1)))\n\
1855 __kernel void main_test(__global int *out)\n\
1856 {\n\
1857 __local uint tmp;\n\
1858 tmp = 0;\n\
1859 barrier(CLK_LOCAL_MEM_FENCE);\n\
1860 while (atomic_cmpxchg(&tmp, get_global_id(0), get_global_id(0) + 1) != get_global_id(0))\n\
1861 ;\n\
1862 barrier(CLK_LOCAL_MEM_FENCE);\n\
1863 out[0] = tmp;\n\
1864 }\n";
1865
1866 auto out = ShaderArg<uint32_t>(0xdeadbeef, SHADER_ARG_OUTPUT);
1867 const uint16_t expected[] = { 2 };
1868 run_shader(kernel_source, 2, 1, 1, out);
1869 for (int i = 0; i < out.size(); ++i)
1870 EXPECT_EQ(out[i], expected[i]);
1871 }
1872
TEST_F(ComputeTest,constant_sampler)1873 TEST_F(ComputeTest, constant_sampler)
1874 {
1875 const char* kernel_source =
1876 "__constant sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP | CLK_FILTER_LINEAR;\n\
1877 __kernel void main_test(read_only image2d_t input, write_only image2d_t output)\n\
1878 {\n\
1879 int2 coordsi = (int2)(get_global_id(0), get_global_id(1));\n\
1880 float2 coordsf = (float2)((float)coordsi.x / get_image_width(input), (float)coordsi.y / get_image_height(input));\n\
1881 write_imagef(output, coordsi, \n\
1882 read_imagef(input, sampler, coordsf) + \n\
1883 read_imagef(input, sampler, coordsf + (float2)(0.1, 0.1)));\n\
1884 }\n";
1885 Shader shader = compile(std::vector<const char*>({ kernel_source }));
1886 validate(shader);
1887 EXPECT_EQ(shader.dxil->metadata.num_const_samplers, 1);
1888 }
1889
TEST_F(ComputeTest,hi)1890 TEST_F(ComputeTest, hi)
1891 {
1892 const char *kernel_source = R"(
1893 __kernel void main_test(__global char3 *srcA, __global char2 *dst)
1894 {
1895 int tid = get_global_id(0);
1896
1897 char2 tmp = srcA[tid].hi;
1898 dst[tid] = tmp;
1899 })";
1900 Shader shader = compile(std::vector<const char*>({ kernel_source }));
1901 validate(shader);
1902 }
1903
TEST_F(ComputeTest,system_values)1904 TEST_F(ComputeTest, system_values)
1905 {
1906 const char *kernel_source =
1907 "__kernel void main_test(__global uint* outputs)\n\
1908 {\n\
1909 outputs[0] = get_work_dim();\n\
1910 outputs[1] = get_global_size(0);\n\
1911 outputs[2] = get_local_size(0);\n\
1912 outputs[3] = get_num_groups(0);\n\
1913 outputs[4] = get_group_id(0);\n\
1914 outputs[5] = get_global_offset(0);\n\
1915 outputs[6] = get_global_id(0);\n\
1916 }\n";
1917 auto out = ShaderArg<uint32_t>(std::vector<uint32_t>(6, 0xdeadbeef), SHADER_ARG_OUTPUT);
1918 const uint16_t expected[] = { 3, 1, 1, 1, 0, 0, 0, };
1919 CompileArgs args = { 1, 1, 1 };
1920 Shader shader = compile({ kernel_source });
1921 run_shader(shader, args, out);
1922 for (int i = 0; i < out.size(); ++i)
1923 EXPECT_EQ(out[i], expected[i]);
1924
1925 args.work_props.work_dim = 2;
1926 args.work_props.global_offset_x = 100;
1927 args.work_props.group_id_offset_x = 2;
1928 args.work_props.group_count_total_x = 5;
1929 const uint32_t expected_withoffsets[] = { 2, 5, 1, 5, 2, 100, 102 };
1930 run_shader(shader, args, out);
1931 for (int i = 0; i < out.size(); ++i)
1932 EXPECT_EQ(out[i], expected_withoffsets[i]);
1933 }
1934
TEST_F(ComputeTest,convert_round_sat)1935 TEST_F(ComputeTest, convert_round_sat)
1936 {
1937 const char *kernel_source =
1938 "__kernel void main_test(__global float *f, __global uchar *u)\n\
1939 {\n\
1940 uint idx = get_global_id(0);\n\
1941 u[idx] = convert_uchar_sat_rtp(f[idx]);\n\
1942 }\n";
1943 auto f = ShaderArg<float>({ -1.0f, 1.1f, 20.0f, 255.5f }, SHADER_ARG_INPUT);
1944 auto u = ShaderArg<uint8_t>({ 255, 0, 0, 0 }, SHADER_ARG_OUTPUT);
1945 const uint8_t expected[] = {
1946 0, 2, 20, 255
1947 };
1948
1949 run_shader(kernel_source, f.size(), 1, 1, f, u);
1950 for (int i = 0; i < u.size(); ++i)
1951 EXPECT_EQ(u[i], expected[i]);
1952 }
1953
TEST_F(ComputeTest,convert_round_sat_vec)1954 TEST_F(ComputeTest, convert_round_sat_vec)
1955 {
1956 const char *kernel_source =
1957 "__kernel void main_test(__global float16 *f, __global uchar16 *u)\n\
1958 {\n\
1959 uint idx = get_global_id(0);\n\
1960 u[idx] = convert_uchar16_sat_rtp(f[idx]);\n\
1961 }\n";
1962 auto f = ShaderArg<float>({
1963 -1.0f, 1.1f, 20.0f, 255.5f, -1.0f, 1.1f, 20.0f, 255.5f, -1.0f, 1.1f, 20.0f, 255.5f, -1.0f, 1.1f, 20.0f, 255.5f,
1964 -0.5f, 1.9f, 20.0f, 254.5f, -1.0f, 1.1f, 20.0f, 255.5f, -1.0f, 1.1f, 20.0f, 255.5f, -1.0f, 1.1f, 20.0f, 255.5f,
1965 0.0f, 1.3f, 20.0f, 255.1f, -1.0f, 1.1f, 20.0f, 255.5f, -1.0f, 1.1f, 20.0f, 255.5f, -1.0f, 1.1f, 20.0f, 255.5f,
1966 -0.0f, 1.5555f, 20.0f, 254.9f, -1.0f, 1.1f, 20.0f, 255.5f, -1.0f, 1.1f, 20.0f, 255.5f, -1.0f, 1.1f, 20.0f, 255.5f,
1967 }, SHADER_ARG_INPUT);
1968 auto u = ShaderArg<uint8_t>({
1969 255, 0, 0, 0, 255, 0, 0, 0, 255, 0, 0, 0, 255, 0, 0, 0,
1970 255, 0, 0, 0, 255, 0, 0, 0, 255, 0, 0, 0, 255, 0, 0, 0,
1971 255, 0, 0, 0, 255, 0, 0, 0, 255, 0, 0, 0, 255, 0, 0, 0,
1972 255, 0, 0, 0, 255, 0, 0, 0, 255, 0, 0, 0, 255, 0, 0, 0,
1973 }, SHADER_ARG_OUTPUT);
1974 const uint8_t expected[] = {
1975 0, 2, 20, 255, 0, 2, 20, 255, 0, 2, 20, 255, 0, 2, 20, 255,
1976 0, 2, 20, 255, 0, 2, 20, 255, 0, 2, 20, 255, 0, 2, 20, 255,
1977 0, 2, 20, 255, 0, 2, 20, 255, 0, 2, 20, 255, 0, 2, 20, 255,
1978 0, 2, 20, 255, 0, 2, 20, 255, 0, 2, 20, 255, 0, 2, 20, 255,
1979 };
1980
1981 run_shader(kernel_source, 4, 1, 1, f, u);
1982 for (int i = 0; i < u.size(); ++i)
1983 EXPECT_EQ(u[i], expected[i]);
1984 }
1985
TEST_F(ComputeTest,convert_char2_uchar2)1986 TEST_F(ComputeTest, convert_char2_uchar2)
1987 {
1988 const char *kernel_source =
1989 "__kernel void main_test( __global char2 *src, __global uchar2 *dest )\n\
1990 {\n\
1991 size_t i = get_global_id(0);\n\
1992 dest[i] = convert_uchar2_sat( src[i] );\n\
1993 }\n";
1994
1995 auto c = ShaderArg<int8_t>({ -127, -4, 0, 4, 126, 127, 16, 32 }, SHADER_ARG_INPUT);
1996 auto u = ShaderArg<uint8_t>({ 99, 99, 99, 99, 99, 99, 99, 99 }, SHADER_ARG_OUTPUT);
1997 const uint8_t expected[] = { 0, 0, 0, 4, 126, 127, 16, 32 };
1998 run_shader(kernel_source, 4, 1, 1, c, u);
1999 for (int i = 0; i < u.size(); i++)
2000 EXPECT_EQ(u[i], expected[i]);
2001 }
2002
TEST_F(ComputeTest,async_copy)2003 TEST_F(ComputeTest, async_copy)
2004 {
2005 const char *kernel_source = R"(
2006 __kernel void main_test( const __global char *src, __global char *dst, __local char *localBuffer, int copiesPerWorkgroup, int copiesPerWorkItem )
2007 {
2008 int i;
2009 for(i=0; i<copiesPerWorkItem; i++)
2010 localBuffer[ get_local_id( 0 )*copiesPerWorkItem+i ] = (char)(char)0;
2011 barrier( CLK_LOCAL_MEM_FENCE );
2012 event_t event;
2013 event = async_work_group_copy( (__local char*)localBuffer, (__global const char*)(src+copiesPerWorkgroup*get_group_id(0)), (size_t)copiesPerWorkgroup, 0 );
2014 wait_group_events( 1, &event );
2015 for(i=0; i<copiesPerWorkItem; i++)
2016 dst[ get_global_id( 0 )*copiesPerWorkItem+i ] = localBuffer[ get_local_id( 0 )*copiesPerWorkItem+i ];
2017 })";
2018 Shader shader = compile({ kernel_source });
2019 validate(shader);
2020 }
2021
TEST_F(ComputeTest,packed_struct_global)2022 TEST_F(ComputeTest, packed_struct_global)
2023 {
2024 #pragma pack(push, 1)
2025 struct s { uint8_t uc; uint64_t ul; uint16_t us; };
2026 #pragma pack(pop)
2027
2028 const char *kernel_source =
2029 "struct __attribute__((packed)) s {uchar uc; ulong ul; ushort us; };\n\
2030 __kernel void main_test(__global struct s *inout, global uint *size)\n\
2031 {\n\
2032 uint idx = get_global_id(0);\n\
2033 inout[idx].uc = idx + 1;\n\
2034 inout[idx].ul = ((ulong)(idx + 1 + 0xfbfcfdfe) << 32) | 0x12345678;\n\
2035 inout[idx].us = ((ulong)(idx + 1 + 0xa0) << 8) | 0x12;\n\
2036 *size = sizeof(struct s);\n\
2037 }\n";
2038 auto inout = ShaderArg<struct s>({0, 0, 0}, SHADER_ARG_OUTPUT);
2039 auto size = ShaderArg<uint32_t>(0, SHADER_ARG_OUTPUT);
2040 const struct s expected[] = {
2041 { 1, 0xfbfcfdff12345678, 0xa112 }
2042 };
2043
2044 run_shader(kernel_source, inout.size(), 1, 1, inout, size);
2045 for (int i = 0; i < inout.size(); ++i) {
2046 EXPECT_EQ(inout[i].uc, expected[i].uc);
2047 EXPECT_EQ(inout[i].ul, expected[i].ul);
2048 EXPECT_EQ(inout[i].us, expected[i].us);
2049 }
2050 EXPECT_EQ(size, sizeof(struct s));
2051 }
2052
TEST_F(ComputeTest,packed_struct_arg)2053 TEST_F(ComputeTest, packed_struct_arg)
2054 {
2055 #pragma pack(push, 1)
2056 struct s { uint8_t uc; uint64_t ul; uint16_t us; };
2057 #pragma pack(pop)
2058
2059 const char *kernel_source =
2060 "struct __attribute__((packed)) s {uchar uc; ulong ul; ushort us; };\n\
2061 __kernel void main_test(__global struct s *out, struct s in)\n\
2062 {\n\
2063 uint idx = get_global_id(0);\n\
2064 out[idx].uc = in.uc + 0x12;\n\
2065 out[idx].ul = in.ul + 0x123456789abcdef;\n\
2066 out[idx].us = in.us + 0x1234;\n\
2067 }\n";
2068 auto out = ShaderArg<struct s>({0, 0, 0}, SHADER_ARG_OUTPUT);
2069 auto in = ShaderArg<struct s>({1, 2, 3}, SHADER_ARG_INPUT);
2070 const struct s expected[] = {
2071 { 0x12 + 1, 0x123456789abcdef + 2, 0x1234 + 3 }
2072 };
2073
2074 run_shader(kernel_source, out.size(), 1, 1, out, in);
2075 for (int i = 0; i < out.size(); ++i) {
2076 EXPECT_EQ(out[i].uc, expected[i].uc);
2077 EXPECT_EQ(out[i].ul, expected[i].ul);
2078 EXPECT_EQ(out[i].us, expected[i].us);
2079 }
2080 }
2081
TEST_F(ComputeTest,packed_struct_local)2082 TEST_F(ComputeTest, packed_struct_local)
2083 {
2084 #pragma pack(push, 1)
2085 struct s { uint8_t uc; uint64_t ul; uint16_t us; };
2086 #pragma pack(pop)
2087
2088 const char *kernel_source =
2089 "struct __attribute__((packed)) s {uchar uc; ulong ul; ushort us; };\n\
2090 __kernel void main_test(__global struct s *out, __constant struct s *in)\n\
2091 {\n\
2092 uint idx = get_global_id(0);\n\
2093 __local struct s tmp[2];\n\
2094 tmp[get_local_id(0)] = in[idx];\n\
2095 barrier(CLK_LOCAL_MEM_FENCE);\n\
2096 out[idx] = tmp[(get_local_id(0) + 1) % 2];\n\
2097 }\n";
2098 auto out = ShaderArg<struct s>({{0, 0, 0}, {0, 0, 0}}, SHADER_ARG_OUTPUT);
2099 auto in = ShaderArg<struct s>({{1, 2, 3}, {0x12, 0x123456789abcdef, 0x1234} }, SHADER_ARG_INPUT);
2100 const struct s expected[] = {
2101 { 0x12, 0x123456789abcdef, 0x1234 },
2102 { 1, 2, 3 },
2103 };
2104
2105 run_shader(kernel_source, out.size(), 1, 1, out, in);
2106 for (int i = 0; i < out.size(); ++i) {
2107 EXPECT_EQ(out[i].uc, expected[i].uc);
2108 EXPECT_EQ(out[i].ul, expected[i].ul);
2109 EXPECT_EQ(out[i].us, expected[i].us);
2110 }
2111 }
2112
2113 /* DISABLED because current release versions of WARP either return
2114 * rubbish from reads or crash: they are not prepared to handle
2115 * non-float global constants */
TEST_F(ComputeTest,DISABLED_packed_struct_const)2116 TEST_F(ComputeTest, DISABLED_packed_struct_const)
2117 {
2118 #pragma pack(push, 1)
2119 struct s { uint8_t uc; uint64_t ul; uint16_t us; };
2120 #pragma pack(pop)
2121
2122 const char *kernel_source =
2123 "struct __attribute__((packed)) s {uchar uc; ulong ul; ushort us; };\n\
2124 __kernel void main_test(__global struct s *out, struct s in)\n\
2125 {\n\
2126 __constant struct s base[] = {\n\
2127 {0x12, 0x123456789abcdef, 0x1234},\n\
2128 {0x11, 0x123456789abcdee, 0x1233},\n\
2129 };\n\
2130 uint idx = get_global_id(0);\n\
2131 out[idx].uc = base[idx % 2].uc + in.uc;\n\
2132 out[idx].ul = base[idx % 2].ul + in.ul;\n\
2133 out[idx].us = base[idx % 2].us + in.us;\n\
2134 }\n";
2135 auto out = ShaderArg<struct s>(std::vector<struct s>(2, {0, 0, 0}), SHADER_ARG_OUTPUT);
2136 auto in = ShaderArg<struct s>({1, 2, 3}, SHADER_ARG_INPUT);
2137 const struct s expected[] = {
2138 { 0x12 + 1, 0x123456789abcdef + 2, 0x1234 + 3 },
2139 { 0x11 + 1, 0x123456789abcdee + 2, 0x1233 + 3 },
2140 };
2141
2142 run_shader(kernel_source, out.size(), 1, 1, out, in);
2143 for (int i = 0; i < out.size(); ++i) {
2144 EXPECT_EQ(out[i].uc, expected[i].uc);
2145 EXPECT_EQ(out[i].ul, expected[i].ul);
2146 EXPECT_EQ(out[i].us, expected[i].us);
2147 }
2148 }
2149
TEST_F(ComputeTest,DISABLED_printf)2150 TEST_F(ComputeTest, DISABLED_printf)
2151 {
2152 const char *kernel_source = R"(
2153 __kernel void main_test(__global float *src, __global uint *dest)
2154 {
2155 __constant char *format_str = "%s: %f";
2156 __constant char *str_val = "Test";
2157 *dest = printf(format_str, str_val, src[0]);
2158 })";
2159
2160 auto src = ShaderArg<float>({ 1.0f }, SHADER_ARG_INPUT);
2161 auto dest = ShaderArg<uint32_t>({ 0xdeadbeef }, SHADER_ARG_OUTPUT);
2162 run_shader(kernel_source, 1, 1, 1, src, dest);
2163 EXPECT_EQ(dest[0], 0);
2164 }
2165
TEST_F(ComputeTest,vload_half)2166 TEST_F(ComputeTest, vload_half)
2167 {
2168 const char *kernel_source = R"(
2169 __kernel void main_test(__global half *src, __global float4 *dest)
2170 {
2171 int offset = get_global_id(0);
2172 dest[offset] = vload_half4(offset, src);
2173 })";
2174 auto src = ShaderArg<uint16_t>({ 0x3c00, 0x4000, 0x4200, 0x4400,
2175 0x4500, 0x4600, 0x4700, 0x4800 }, SHADER_ARG_INPUT);
2176 auto dest = ShaderArg<float>({ FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX,
2177 FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX }, SHADER_ARG_OUTPUT);
2178 run_shader(kernel_source, 2, 1, 1, src, dest);
2179 for (unsigned i = 0; i < 8; ++i)
2180 EXPECT_FLOAT_EQ(dest[i], (float)(i + 1));
2181 }
2182
TEST_F(ComputeTest,vstore_half)2183 TEST_F(ComputeTest, vstore_half)
2184 {
2185 const char *kernel_source = R"(
2186 __kernel void main_test(__global half *dst, __global float4 *src)
2187 {
2188 int offset = get_global_id(0);
2189 vstore_half4(src[offset], offset, dst);
2190 })";
2191 auto dest = ShaderArg<uint16_t>({0xdead, 0xdead, 0xdead, 0xdead,
2192 0xdead, 0xdead, 0xdead, 0xdead}, SHADER_ARG_OUTPUT);
2193 auto src = ShaderArg<float>({ 1.0, 2.0, 3.0, 4.0,
2194 5.0, 6.0, 7.0, 8.0 }, SHADER_ARG_INPUT);
2195 run_shader(kernel_source, 2, 1, 1, dest, src);
2196 const uint16_t expected[] = { 0x3c00, 0x4000, 0x4200, 0x4400,
2197 0x4500, 0x4600, 0x4700, 0x4800 };
2198 for (unsigned i = 0; i < 8; ++i)
2199 EXPECT_EQ(dest[i], expected[i]);
2200 }
2201
TEST_F(ComputeTest,inline_function)2202 TEST_F(ComputeTest, inline_function)
2203 {
2204 const char *kernel_source = R"(
2205 inline float helper(float foo)
2206 {
2207 return foo * 2;
2208 }
2209
2210 __kernel void main_test(__global float *dst, __global float *src)
2211 {
2212 *dst = helper(*src);
2213 })";
2214 auto dest = ShaderArg<float>({ NAN }, SHADER_ARG_OUTPUT);
2215 auto src = ShaderArg<float>({ 1.0f }, SHADER_ARG_INPUT);
2216 run_shader(kernel_source, 1, 1, 1, dest, src);
2217 EXPECT_EQ(dest[0], 2.0f);
2218 }
2219
TEST_F(ComputeTest,unused_arg)2220 TEST_F(ComputeTest, unused_arg)
2221 {
2222 const char *kernel_source = R"(
2223 __kernel void main_test(__global int *dst, __global int *unused, __global int *src)
2224 {
2225 int i = get_global_id(0);
2226 dst[i] = src[i];
2227 })";
2228 auto dest = ShaderArg<int>({ -1, -1, -1, -1 }, SHADER_ARG_OUTPUT);
2229 auto src = ShaderArg<int>({ 1, 2, 3, 4 }, SHADER_ARG_INPUT);
2230 auto unused = ShaderArg<int>({ -1, -1, -1, -1 }, SHADER_ARG_INPUT);
2231 run_shader(kernel_source, 4, 1, 1, dest, unused, src);
2232 for (int i = 0; i < 4; ++i)
2233 EXPECT_EQ(dest[i], i + 1);
2234 }
2235
TEST_F(ComputeTest,spec_constant)2236 TEST_F(ComputeTest, spec_constant)
2237 {
2238 const char *spirv_asm = R"(
2239 OpCapability Addresses
2240 OpCapability Kernel
2241 OpCapability Int64
2242 %1 = OpExtInstImport "OpenCL.std"
2243 OpMemoryModel Physical64 OpenCL
2244 OpEntryPoint Kernel %2 "main_test" %__spirv_BuiltInGlobalInvocationId
2245 %4 = OpString "kernel_arg_type.main_test.uint*,"
2246 OpSource OpenCL_C 102000
2247 OpName %__spirv_BuiltInGlobalInvocationId "__spirv_BuiltInGlobalInvocationId"
2248 OpName %output "output"
2249 OpName %entry "entry"
2250 OpName %output_addr "output.addr"
2251 OpName %id "id"
2252 OpName %call "call"
2253 OpName %conv "conv"
2254 OpName %idxprom "idxprom"
2255 OpName %arrayidx "arrayidx"
2256 OpName %add "add"
2257 OpName %mul "mul"
2258 OpName %idxprom1 "idxprom1"
2259 OpName %arrayidx2 "arrayidx2"
2260 OpDecorate %__spirv_BuiltInGlobalInvocationId BuiltIn GlobalInvocationId
2261 OpDecorate %__spirv_BuiltInGlobalInvocationId Constant
2262 OpDecorate %id Alignment 4
2263 OpDecorate %output_addr Alignment 8
2264 OpDecorate %uint_1 SpecId 1
2265 %ulong = OpTypeInt 64 0
2266 %uint = OpTypeInt 32 0
2267 %uint_1 = OpSpecConstant %uint 1
2268 %v3ulong = OpTypeVector %ulong 3
2269 %_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
2270 %void = OpTypeVoid
2271 %_ptr_CrossWorkgroup_uint = OpTypePointer CrossWorkgroup %uint
2272 %24 = OpTypeFunction %void %_ptr_CrossWorkgroup_uint
2273 %_ptr_Function__ptr_CrossWorkgroup_uint = OpTypePointer Function %_ptr_CrossWorkgroup_uint
2274 %_ptr_Function_uint = OpTypePointer Function %uint
2275 %__spirv_BuiltInGlobalInvocationId = OpVariable %_ptr_Input_v3ulong Input
2276 %2 = OpFunction %void DontInline %24
2277 %output = OpFunctionParameter %_ptr_CrossWorkgroup_uint
2278 %entry = OpLabel
2279 %output_addr = OpVariable %_ptr_Function__ptr_CrossWorkgroup_uint Function
2280 %id = OpVariable %_ptr_Function_uint Function
2281 OpStore %output_addr %output Aligned 8
2282 %27 = OpLoad %v3ulong %__spirv_BuiltInGlobalInvocationId Aligned 32
2283 %call = OpCompositeExtract %ulong %27 0
2284 %conv = OpUConvert %uint %call
2285 OpStore %id %conv Aligned 4
2286 %28 = OpLoad %_ptr_CrossWorkgroup_uint %output_addr Aligned 8
2287 %29 = OpLoad %uint %id Aligned 4
2288 %idxprom = OpUConvert %ulong %29
2289 %arrayidx = OpInBoundsPtrAccessChain %_ptr_CrossWorkgroup_uint %28 %idxprom
2290 %30 = OpLoad %uint %arrayidx Aligned 4
2291 %31 = OpLoad %uint %id Aligned 4
2292 %add = OpIAdd %uint %31 %uint_1
2293 %mul = OpIMul %uint %30 %add
2294 %32 = OpLoad %_ptr_CrossWorkgroup_uint %output_addr Aligned 8
2295 %33 = OpLoad %uint %id Aligned 4
2296 %idxprom1 = OpUConvert %ulong %33
2297 %arrayidx2 = OpInBoundsPtrAccessChain %_ptr_CrossWorkgroup_uint %32 %idxprom1
2298 OpStore %arrayidx2 %mul Aligned 4
2299 OpReturn
2300 OpFunctionEnd)";
2301 Shader shader = assemble(spirv_asm);
2302 Shader spec_shader = specialize(shader, 1, 5);
2303
2304 auto inout = ShaderArg<uint32_t>({ 0x00000001, 0x10000001, 0x00020002, 0x04010203 },
2305 SHADER_ARG_INOUT);
2306 const uint32_t expected[] = {
2307 0x00000005, 0x60000006, 0x000e000e, 0x20081018
2308 };
2309 CompileArgs args = { inout.size(), 1, 1 };
2310 run_shader(spec_shader, args, inout);
2311 for (int i = 0; i < inout.size(); ++i)
2312 EXPECT_EQ(inout[i], expected[i]);
2313 }
2314