1 /*
2  * Copyright (C) 2011 Francisco Jerez.
3  * All Rights Reserved.
4  *
5  * Permission is hereby granted, free of charge, to any person obtaining
6  * a copy of this software and associated documentation files (the
7  * "Software"), to deal in the Software without restriction, including
8  * without limitation the rights to use, copy, modify, merge, publish,
9  * distribute, sublicense, and/or sell copies of the Software, and to
10  * permit persons to whom the Software is furnished to do so, subject to
11  * the following conditions:
12  *
13  * The above copyright notice and this permission notice (including the
14  * next paragraph) shall be included in all copies or substantial
15  * portions of the Software.
16  *
17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
18  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
19  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
20  * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
21  * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
22  * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
23  * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
24  *
25  */
26 
27 #include <fcntl.h>
28 #include <stdio.h>
29 #include <sys/stat.h>
30 #include <inttypes.h>
31 #include "pipe/p_state.h"
32 #include "pipe/p_context.h"
33 #include "pipe/p_screen.h"
34 #include "pipe/p_defines.h"
35 #include "pipe/p_shader_tokens.h"
36 #include "util/u_memory.h"
37 #include "util/u_inlines.h"
38 #include "util/u_sampler.h"
39 #include "util/format/u_format.h"
40 #include "tgsi/tgsi_text.h"
41 #include "pipe-loader/pipe_loader.h"
42 
43 #define MAX_RESOURCES 4
44 
45 struct context {
46         struct pipe_loader_device *dev;
47         struct pipe_screen *screen;
48         struct pipe_context *pipe;
49         void *hwcs;
50         void *hwsmp[MAX_RESOURCES];
51         struct pipe_resource *tex[MAX_RESOURCES];
52         bool tex_rw[MAX_RESOURCES];
53         struct pipe_sampler_view *view[MAX_RESOURCES];
54         struct pipe_surface *surf[MAX_RESOURCES];
55 };
56 
57 #define DUMP_COMPUTE_PARAM(p, c) do {                                   \
58                 uint64_t __v[4];                                        \
59                 int __i, __n;                                           \
60                                                                         \
61                 __n = ctx->screen->get_compute_param(ctx->screen,       \
62                                                      PIPE_SHADER_IR_TGSI, \
63                                                      c, __v);           \
64                 printf("%s: {", #c);                                    \
65                                                                         \
66                 for (__i = 0; __i < __n / sizeof(*__v); ++__i)          \
67                         printf(" %"PRIu64, __v[__i]);                   \
68                                                                         \
69                 printf(" }\n");                                         \
70         } while (0)
71 
init_ctx(struct context * ctx)72 static void init_ctx(struct context *ctx)
73 {
74         ASSERTED int ret;
75 
76         ret = pipe_loader_probe(&ctx->dev, 1);
77         assert(ret);
78 
79         ctx->screen = pipe_loader_create_screen(ctx->dev);
80         assert(ctx->screen);
81 
82         ctx->pipe = ctx->screen->context_create(ctx->screen, NULL, 0);
83         assert(ctx->pipe);
84 
85         DUMP_COMPUTE_PARAM(p, PIPE_COMPUTE_CAP_GRID_DIMENSION);
86         DUMP_COMPUTE_PARAM(p, PIPE_COMPUTE_CAP_MAX_GRID_SIZE);
87         DUMP_COMPUTE_PARAM(p, PIPE_COMPUTE_CAP_MAX_BLOCK_SIZE);
88 }
89 
destroy_ctx(struct context * ctx)90 static void destroy_ctx(struct context *ctx)
91 {
92         ctx->pipe->destroy(ctx->pipe);
93         ctx->screen->destroy(ctx->screen);
94         pipe_loader_release(&ctx->dev, 1);
95         FREE(ctx);
96 }
97 
98 static char *
preprocess_prog(struct context * ctx,const char * src,const char * defs)99 preprocess_prog(struct context *ctx, const char *src, const char *defs)
100 {
101         const char header[] =
102                 "#define RGLOBAL        RES[32767]\n"
103                 "#define RLOCAL         RES[32766]\n"
104                 "#define RPRIVATE       RES[32765]\n"
105                 "#define RINPUT         RES[32764]\n";
106         char cmd[512];
107         char tmp[] = "/tmp/test-compute.tgsi-XXXXXX";
108         char *buf;
109         int fd, ret;
110         struct stat st;
111         FILE *p;
112 
113         /* Open a temporary file */
114         fd = mkstemp(tmp);
115         assert(fd >= 0);
116         snprintf(cmd, sizeof(cmd), "cpp -P -nostdinc -undef %s > %s",
117                  defs ? defs : "", tmp);
118 
119         /* Preprocess */
120         p = popen(cmd, "w");
121         fwrite(header, strlen(header), 1, p);
122         fwrite(src, strlen(src), 1, p);
123         ret = pclose(p);
124         assert(!ret);
125 
126         /* Read back */
127         ret = fstat(fd, &st);
128         assert(!ret);
129 
130         buf = malloc(st.st_size + 1);
131         ret = read(fd, buf, st.st_size);
132         assert(ret == st.st_size);
133         buf[ret] = 0;
134 
135         /* Clean up */
136         close(fd);
137         unlink(tmp);
138 
139         return buf;
140 }
141 
init_prog(struct context * ctx,unsigned local_sz,unsigned private_sz,unsigned input_sz,const char * src,const char * defs)142 static void init_prog(struct context *ctx, unsigned local_sz,
143                       unsigned private_sz, unsigned input_sz,
144                       const char *src, const char *defs)
145 {
146         struct pipe_context *pipe = ctx->pipe;
147         struct tgsi_token prog[1024];
148         struct pipe_compute_state cs = {
149                 .ir_type = PIPE_SHADER_IR_TGSI,
150                 .prog = prog,
151                 .req_local_mem = local_sz,
152                 .req_private_mem = private_sz,
153                 .req_input_mem = input_sz
154         };
155         char *psrc = preprocess_prog(ctx, src, defs);
156         ASSERTED int ret;
157 
158         ret = tgsi_text_translate(psrc, prog, ARRAY_SIZE(prog));
159         assert(ret);
160         free(psrc);
161 
162         ctx->hwcs = pipe->create_compute_state(pipe, &cs);
163         assert(ctx->hwcs);
164 
165         pipe->bind_compute_state(pipe, ctx->hwcs);
166 }
167 
destroy_prog(struct context * ctx)168 static void destroy_prog(struct context *ctx)
169 {
170         struct pipe_context *pipe = ctx->pipe;
171 
172         pipe->delete_compute_state(pipe, ctx->hwcs);
173         ctx->hwcs = NULL;
174 }
175 
init_tex(struct context * ctx,int slot,enum pipe_texture_target target,bool rw,enum pipe_format format,int w,int h,void (* init)(void *,int,int,int))176 static void init_tex(struct context *ctx, int slot,
177                      enum pipe_texture_target target, bool rw,
178                      enum pipe_format format, int w, int h,
179                      void (*init)(void *, int, int, int))
180 {
181         struct pipe_context *pipe = ctx->pipe;
182         struct pipe_resource **tex = &ctx->tex[slot];
183         struct pipe_resource ttex = {
184                 .target = target,
185                 .format = format,
186                 .width0 = w,
187                 .height0 = h,
188                 .depth0 = 1,
189                 .array_size = 1,
190                 .bind = (PIPE_BIND_SAMPLER_VIEW |
191                          PIPE_BIND_COMPUTE_RESOURCE |
192                          PIPE_BIND_GLOBAL)
193         };
194         int dx = util_format_get_blocksize(format);
195         int dy = util_format_get_stride(format, w);
196         int nx = (target == PIPE_BUFFER ? (w / dx) :
197                   util_format_get_nblocksx(format, w));
198         int ny = (target == PIPE_BUFFER ? 1 :
199                   util_format_get_nblocksy(format, h));
200         struct pipe_transfer *xfer;
201         char *map;
202         int x, y;
203 
204         *tex = ctx->screen->resource_create(ctx->screen, &ttex);
205         assert(*tex);
206 
207         map = pipe->texture_map(pipe, *tex, 0, PIPE_MAP_WRITE,
208                                   &(struct pipe_box) { .width = w,
209                                                   .height = h,
210                                                   .depth = 1 }, &xfer);
211         assert(xfer);
212         assert(map);
213 
214         for (y = 0; y < ny; ++y) {
215                 for (x = 0; x < nx; ++x) {
216                         init(map + y * dy + x * dx, slot, x, y);
217                 }
218         }
219 
220         pipe->texture_unmap(pipe, xfer);
221 
222         ctx->tex_rw[slot] = rw;
223 }
224 
default_check(void * x,void * y,int sz)225 static bool default_check(void *x, void *y, int sz) {
226         return !memcmp(x, y, sz);
227 }
228 
check_tex(struct context * ctx,int slot,void (* expect)(void *,int,int,int),bool (* check)(void *,void *,int))229 static void check_tex(struct context *ctx, int slot,
230                       void (*expect)(void *, int, int, int),
231                       bool (*check)(void *, void *, int))
232 {
233         struct pipe_context *pipe = ctx->pipe;
234         struct pipe_resource *tex = ctx->tex[slot];
235         int dx = util_format_get_blocksize(tex->format);
236         int dy = util_format_get_stride(tex->format, tex->width0);
237         int nx = (tex->target == PIPE_BUFFER ? (tex->width0 / dx) :
238                   util_format_get_nblocksx(tex->format, tex->width0));
239         int ny = (tex->target == PIPE_BUFFER ? 1 :
240                   util_format_get_nblocksy(tex->format, tex->height0));
241         struct pipe_transfer *xfer;
242         char *map;
243         int x = 0, y, i;
244         int err = 0;
245 
246         if (!check)
247                 check = default_check;
248 
249         map = pipe->texture_map(pipe, tex, 0, PIPE_MAP_READ,
250                                   &(struct pipe_box) { .width = tex->width0,
251                                         .height = tex->height0,
252                                         .depth = 1 }, &xfer);
253         assert(xfer);
254         assert(map);
255 
256         for (y = 0; y < ny; ++y) {
257                 for (x = 0; x < nx; ++x) {
258                         uint32_t exp[4];
259                         uint32_t *res = (uint32_t *)(map + y * dy + x * dx);
260 
261                         expect(exp, slot, x, y);
262                         if (check(res, exp, dx) || (++err) > 20)
263                                 continue;
264 
265                         if (dx < 4) {
266                                 uint32_t u = 0, v = 0;
267 
268                                 for (i = 0; i < dx; i++) {
269                                         u |= ((uint8_t *)exp)[i] << (8 * i);
270                                         v |= ((uint8_t *)res)[i] << (8 * i);
271                                 }
272                                 printf("(%d, %d): got 0x%x, expected 0x%x\n",
273                                        x, y, v, u);
274                         } else {
275                                 for (i = 0; i < dx / 4; i++) {
276                                         printf("(%d, %d)[%d]: got 0x%x/%f,"
277                                                " expected 0x%x/%f\n", x, y, i,
278                                                res[i], ((float *)res)[i],
279                                                exp[i], ((float *)exp)[i]);
280                                 }
281                         }
282                 }
283         }
284 
285         pipe->texture_unmap(pipe, xfer);
286 
287         if (err)
288                 printf("(%d, %d): \x1b[31mFAIL\x1b[0m (%d)\n", x, y, err);
289         else
290                 printf("(%d, %d): \x1b[32mOK\x1b[0m\n", x, y);
291 }
292 
destroy_tex(struct context * ctx)293 static void destroy_tex(struct context *ctx)
294 {
295         int i;
296 
297         for (i = 0; i < MAX_RESOURCES; ++i) {
298                 if (ctx->tex[i])
299                         pipe_resource_reference(&ctx->tex[i], NULL);
300         }
301 }
302 
init_sampler_views(struct context * ctx,const int * slots)303 static void init_sampler_views(struct context *ctx, const int *slots)
304 {
305         struct pipe_context *pipe = ctx->pipe;
306         struct pipe_sampler_view tview;
307         int i;
308 
309         for (i = 0; *slots >= 0; ++i, ++slots) {
310                 u_sampler_view_default_template(&tview, ctx->tex[*slots],
311                                                 ctx->tex[*slots]->format);
312 
313                 ctx->view[i] = pipe->create_sampler_view(pipe, ctx->tex[*slots],
314                                                          &tview);
315                 assert(ctx->view[i]);
316         }
317 
318         pipe->set_sampler_views(pipe, PIPE_SHADER_COMPUTE, 0, i, 0, false, ctx->view);
319 }
320 
destroy_sampler_views(struct context * ctx)321 static void destroy_sampler_views(struct context *ctx)
322 {
323         struct pipe_context *pipe = ctx->pipe;
324         int i;
325 
326         pipe->set_sampler_views(pipe, PIPE_SHADER_COMPUTE, 0, 0, MAX_RESOURCES, false, NULL);
327 
328         for (i = 0; i < MAX_RESOURCES; ++i) {
329                 if (ctx->view[i]) {
330                         pipe->sampler_view_destroy(pipe, ctx->view[i]);
331                         ctx->view[i] = NULL;
332                 }
333         }
334 }
335 
init_compute_resources(struct context * ctx,const int * slots)336 static void init_compute_resources(struct context *ctx, const int *slots)
337 {
338         struct pipe_context *pipe = ctx->pipe;
339         int i;
340 
341         for (i = 0; *slots >= 0; ++i, ++slots) {
342                 struct pipe_surface tsurf = {
343                         .format = ctx->tex[*slots]->format,
344                         .writable = ctx->tex_rw[*slots]
345                 };
346 
347                 if (ctx->tex[*slots]->target == PIPE_BUFFER)
348                         tsurf.u.buf.last_element = ctx->tex[*slots]->width0 - 1;
349 
350                 ctx->surf[i] = pipe->create_surface(pipe, ctx->tex[*slots],
351                                                     &tsurf);
352                 assert(ctx->surf[i]);
353         }
354 
355         pipe->set_compute_resources(pipe, 0, i, ctx->surf);
356 }
357 
destroy_compute_resources(struct context * ctx)358 static void destroy_compute_resources(struct context *ctx)
359 {
360         struct pipe_context *pipe = ctx->pipe;
361         int i;
362 
363         pipe->set_compute_resources(pipe, 0, MAX_RESOURCES, NULL);
364 
365         for (i = 0; i < MAX_RESOURCES; ++i) {
366                 if (ctx->surf[i]) {
367                         pipe->surface_destroy(pipe, ctx->surf[i]);
368                         ctx->surf[i] = NULL;
369                 }
370         }
371 }
372 
init_sampler_states(struct context * ctx,int n)373 static void init_sampler_states(struct context *ctx, int n)
374 {
375         struct pipe_context *pipe = ctx->pipe;
376         struct pipe_sampler_state smp = {
377                 .normalized_coords = 1,
378         };
379         int i;
380 
381         for (i = 0; i < n; ++i) {
382                 ctx->hwsmp[i] = pipe->create_sampler_state(pipe, &smp);
383                 assert(ctx->hwsmp[i]);
384         }
385 
386         pipe->bind_sampler_states(pipe, PIPE_SHADER_COMPUTE, 0, i, ctx->hwsmp);
387 }
388 
destroy_sampler_states(struct context * ctx)389 static void destroy_sampler_states(struct context *ctx)
390 {
391         struct pipe_context *pipe = ctx->pipe;
392         int i;
393 
394         pipe->bind_sampler_states(pipe, PIPE_SHADER_COMPUTE,
395 				  0, MAX_RESOURCES, NULL);
396 
397         for (i = 0; i < MAX_RESOURCES; ++i) {
398                 if (ctx->hwsmp[i]) {
399                         pipe->delete_sampler_state(pipe, ctx->hwsmp[i]);
400                         ctx->hwsmp[i] = NULL;
401                 }
402         }
403 }
404 
init_globals(struct context * ctx,const int * slots,uint32_t ** handles)405 static void init_globals(struct context *ctx, const int *slots,
406                          uint32_t **handles)
407 {
408         struct pipe_context *pipe = ctx->pipe;
409         struct pipe_resource *res[MAX_RESOURCES];
410         int i;
411 
412         for (i = 0; *slots >= 0; ++i, ++slots)
413                 res[i] = ctx->tex[*slots];
414 
415         pipe->set_global_binding(pipe, 0, i, res, handles);
416 }
417 
destroy_globals(struct context * ctx)418 static void destroy_globals(struct context *ctx)
419 {
420         struct pipe_context *pipe = ctx->pipe;
421 
422         pipe->set_global_binding(pipe, 0, MAX_RESOURCES, NULL, NULL);
423 }
424 
launch_grid(struct context * ctx,const uint * block_layout,const uint * grid_layout,uint32_t pc,void * input)425 static void launch_grid(struct context *ctx, const uint *block_layout,
426                         const uint *grid_layout, uint32_t pc,
427                         void *input)
428 {
429         struct pipe_context *pipe = ctx->pipe;
430         struct pipe_grid_info info;
431         int i;
432 
433         for (i = 0; i < 3; i++) {
434                 info.block[i] = block_layout[i];
435                 info.grid[i] = grid_layout[i];
436         }
437         info.pc = pc;
438         info.input = input;
439 
440         pipe->launch_grid(pipe, &info);
441 }
442 
test_default_init(void * p,int s,int x,int y)443 static void test_default_init(void *p, int s, int x, int y)
444 {
445         *(uint32_t *)p = 0xdeadbeef;
446 }
447 
448 /* test_system_values */
test_system_values_expect(void * p,int s,int x,int y)449 static void test_system_values_expect(void *p, int s, int x, int y)
450 {
451         int id = x / 16, sv = (x % 16) / 4, c = x % 4;
452         int tid[] = { id % 20, (id % 240) / 20, id / 240, 0 };
453         int bsz[] = { 4, 3, 5, 1};
454         int gsz[] = { 5, 4, 1, 1};
455 
456         switch (sv) {
457         case 0:
458                 *(uint32_t *)p = tid[c] / bsz[c];
459                 break;
460         case 1:
461                 *(uint32_t *)p = bsz[c];
462                 break;
463         case 2:
464                 *(uint32_t *)p = gsz[c];
465                 break;
466         case 3:
467                 *(uint32_t *)p = tid[c] % bsz[c];
468                 break;
469         }
470 }
471 
test_system_values(struct context * ctx)472 static void test_system_values(struct context *ctx)
473 {
474         const char *src = "COMP\n"
475                 "DCL RES[0], BUFFER, RAW, WR\n"
476                 "DCL SV[0], BLOCK_ID[0]\n"
477                 "DCL SV[1], BLOCK_SIZE[0]\n"
478                 "DCL SV[2], GRID_SIZE[0]\n"
479                 "DCL SV[3], THREAD_ID[0]\n"
480                 "DCL TEMP[0], LOCAL\n"
481                 "DCL TEMP[1], LOCAL\n"
482                 "IMM UINT32 { 64, 0, 0, 0 }\n"
483                 "IMM UINT32 { 16, 0, 0, 0 }\n"
484                 "IMM UINT32 { 0, 0, 0, 0 }\n"
485                 "\n"
486                 "BGNSUB"
487                 "  UMUL TEMP[0], SV[0], SV[1]\n"
488                 "  UADD TEMP[0], TEMP[0], SV[3]\n"
489                 "  UMUL TEMP[1], SV[1], SV[2]\n"
490                 "  UMUL TEMP[0].w, TEMP[0], TEMP[1].zzzz\n"
491                 "  UMUL TEMP[0].zw, TEMP[0], TEMP[1].yyyy\n"
492                 "  UMUL TEMP[0].yzw, TEMP[0], TEMP[1].xxxx\n"
493                 "  UADD TEMP[0].xy, TEMP[0].xyxy, TEMP[0].zwzw\n"
494                 "  UADD TEMP[0].x, TEMP[0].xxxx, TEMP[0].yyyy\n"
495                 "  UMUL TEMP[0].x, TEMP[0], IMM[0]\n"
496                 "  STORE RES[0].xyzw, TEMP[0], SV[0]\n"
497                 "  UADD TEMP[0].x, TEMP[0], IMM[1]\n"
498                 "  STORE RES[0].xyzw, TEMP[0], SV[1]\n"
499                 "  UADD TEMP[0].x, TEMP[0], IMM[1]\n"
500                 "  STORE RES[0].xyzw, TEMP[0], SV[2]\n"
501                 "  UADD TEMP[0].x, TEMP[0], IMM[1]\n"
502                 "  STORE RES[0].xyzw, TEMP[0], SV[3]\n"
503                 "  RET\n"
504                 "ENDSUB\n";
505 
506         printf("- %s\n", __func__);
507 
508         init_prog(ctx, 0, 0, 0, src, NULL);
509         init_tex(ctx, 0, PIPE_BUFFER, true, PIPE_FORMAT_R32_FLOAT,
510                  76800, 0, test_default_init);
511         init_compute_resources(ctx, (int []) { 0, -1 });
512         launch_grid(ctx, (uint []){4, 3, 5}, (uint []){5, 4, 1}, 0, NULL);
513         check_tex(ctx, 0, test_system_values_expect, NULL);
514         destroy_compute_resources(ctx);
515         destroy_tex(ctx);
516         destroy_prog(ctx);
517 }
518 
519 /* test_resource_access */
test_resource_access_init0(void * p,int s,int x,int y)520 static void test_resource_access_init0(void *p, int s, int x, int y)
521 {
522         *(float *)p = 8.0 - (float)x;
523 }
524 
test_resource_access_expect(void * p,int s,int x,int y)525 static void test_resource_access_expect(void *p, int s, int x, int y)
526 {
527         *(float *)p = 8.0 - (float)((x + 4 * y) & 0x3f);
528 }
529 
test_resource_access(struct context * ctx)530 static void test_resource_access(struct context *ctx)
531 {
532         const char *src = "COMP\n"
533                 "DCL RES[0], BUFFER, RAW, WR\n"
534                 "DCL RES[1], 2D, RAW, WR\n"
535                 "DCL SV[0], BLOCK_ID[0]\n"
536                 "DCL TEMP[0], LOCAL\n"
537                 "DCL TEMP[1], LOCAL\n"
538                 "IMM UINT32 { 15, 0, 0, 0 }\n"
539                 "IMM UINT32 { 16, 1, 0, 0 }\n"
540                 "\n"
541                 "    BGNSUB\n"
542                 "       UADD TEMP[0].x, SV[0].xxxx, SV[0].yyyy\n"
543                 "       AND TEMP[0].x, TEMP[0], IMM[0]\n"
544                 "       UMUL TEMP[0].x, TEMP[0], IMM[1]\n"
545                 "       LOAD TEMP[0].xyzw, RES[0], TEMP[0]\n"
546                 "       UMUL TEMP[1], SV[0], IMM[1]\n"
547                 "       STORE RES[1].xyzw, TEMP[1], TEMP[0]\n"
548                 "       RET\n"
549                 "    ENDSUB\n";
550 
551         printf("- %s\n", __func__);
552 
553         init_prog(ctx, 0, 0, 0, src, NULL);
554         init_tex(ctx, 0, PIPE_BUFFER, true, PIPE_FORMAT_R32_FLOAT,
555                  256, 0, test_resource_access_init0);
556         init_tex(ctx, 1, PIPE_TEXTURE_2D, true, PIPE_FORMAT_R32_FLOAT,
557                  60, 12, test_default_init);
558         init_compute_resources(ctx, (int []) { 0, 1, -1 });
559         launch_grid(ctx, (uint []){1, 1, 1}, (uint []){15, 12, 1}, 0, NULL);
560         check_tex(ctx, 1, test_resource_access_expect, NULL);
561         destroy_compute_resources(ctx);
562         destroy_tex(ctx);
563         destroy_prog(ctx);
564 }
565 
566 /* test_function_calls */
test_function_calls_init(void * p,int s,int x,int y)567 static void test_function_calls_init(void *p, int s, int x, int y)
568 {
569         *(uint32_t *)p = 15 * y + x;
570 }
571 
test_function_calls_expect(void * p,int s,int x,int y)572 static void test_function_calls_expect(void *p, int s, int x, int y)
573 {
574         *(uint32_t *)p = (15 * y + x) < 4 ? 2 : 1 ;
575 }
576 
test_function_calls(struct context * ctx)577 static void test_function_calls(struct context *ctx)
578 {
579         const char *src = "COMP\n"
580                 "DCL RES[0], 2D, RAW, WR\n"
581                 "DCL SV[0], BLOCK_ID[0]\n"
582                 "DCL SV[1], BLOCK_SIZE[0]\n"
583                 "DCL SV[2], GRID_SIZE[0]\n"
584                 "DCL SV[3], THREAD_ID[0]\n"
585                 "DCL TEMP[0]\n"
586                 "DCL TEMP[1]\n"
587                 "DCL TEMP[2], LOCAL\n"
588                 "IMM UINT32 { 0, 11, 22, 33 }\n"
589                 "IMM FLT32 { 11, 33, 55, 99 }\n"
590                 "IMM UINT32 { 4, 1, 0, 0 }\n"
591                 "IMM UINT32 { 12, 0, 0, 0 }\n"
592                 "\n"
593                 "00: BGNSUB\n"
594                 "01:  UMUL TEMP[0].x, TEMP[0], TEMP[0]\n"
595                 "02:  UADD TEMP[1].x, TEMP[1], IMM[2].yyyy\n"
596                 "03:  USLT TEMP[0].x, TEMP[0], IMM[0]\n"
597                 "04:  RET\n"
598                 "05: ENDSUB\n"
599                 "06: BGNSUB\n"
600                 "07:  UMUL TEMP[0].x, TEMP[0], TEMP[0]\n"
601                 "08:  UADD TEMP[1].x, TEMP[1], IMM[2].yyyy\n"
602                 "09:  USLT TEMP[0].x, TEMP[0], IMM[0].yyyy\n"
603                 "10:  IF TEMP[0].xxxx\n"
604                 "11:   CAL :0\n"
605                 "12:  ENDIF\n"
606                 "13:  RET\n"
607                 "14: ENDSUB\n"
608                 "15: BGNSUB\n"
609                 "16:  UMUL TEMP[2], SV[0], SV[1]\n"
610                 "17:  UADD TEMP[2], TEMP[2], SV[3]\n"
611                 "18:  UMUL TEMP[2], TEMP[2], IMM[2]\n"
612                 "00:  MOV TEMP[1].x, IMM[2].wwww\n"
613                 "19:  LOAD TEMP[0].x, RES[0].xxxx, TEMP[2]\n"
614                 "20:  CAL :6\n"
615                 "21:  STORE RES[0].x, TEMP[2], TEMP[1].xxxx\n"
616                 "22:  RET\n"
617                 "23: ENDSUB\n";
618 
619         printf("- %s\n", __func__);
620 
621         init_prog(ctx, 0, 0, 0, src, NULL);
622         init_tex(ctx, 0, PIPE_TEXTURE_2D, true, PIPE_FORMAT_R32_FLOAT,
623                  15, 12, test_function_calls_init);
624         init_compute_resources(ctx, (int []) { 0, -1 });
625         launch_grid(ctx, (uint []){3, 3, 3}, (uint []){5, 4, 1}, 15, NULL);
626         check_tex(ctx, 0, test_function_calls_expect, NULL);
627         destroy_compute_resources(ctx);
628         destroy_tex(ctx);
629         destroy_prog(ctx);
630 }
631 
632 /* test_input_global */
test_input_global_expect(void * p,int s,int x,int y)633 static void test_input_global_expect(void *p, int s, int x, int y)
634 {
635         *(uint32_t *)p = 0xdeadbeef - (x == 0 ? 0x10001 + 2 * s : 0);
636 }
637 
test_input_global(struct context * ctx)638 static void test_input_global(struct context *ctx)
639 {
640         const char *src = "COMP\n"
641                 "DCL SV[0], THREAD_ID[0]\n"
642                 "DCL TEMP[0], LOCAL\n"
643                 "DCL TEMP[1], LOCAL\n"
644                 "IMM UINT32 { 8, 0, 0, 0 }\n"
645                 "\n"
646                 "    BGNSUB\n"
647                 "       UMUL TEMP[0], SV[0], IMM[0]\n"
648                 "       LOAD TEMP[1].xy, RINPUT, TEMP[0]\n"
649                 "       LOAD TEMP[0].x, RGLOBAL, TEMP[1].yyyy\n"
650                 "       UADD TEMP[1].x, TEMP[0], -TEMP[1]\n"
651                 "       STORE RGLOBAL.x, TEMP[1].yyyy, TEMP[1]\n"
652                 "       RET\n"
653                 "    ENDSUB\n";
654         uint32_t input[8] = { 0x10001, 0x10002, 0x10003, 0x10004,
655                               0x10005, 0x10006, 0x10007, 0x10008 };
656 
657         printf("- %s\n", __func__);
658 
659         init_prog(ctx, 0, 0, 32, src, NULL);
660         init_tex(ctx, 0, PIPE_BUFFER, true, PIPE_FORMAT_R32_FLOAT, 32, 0,
661                  test_default_init);
662         init_tex(ctx, 1, PIPE_BUFFER, true, PIPE_FORMAT_R32_FLOAT, 32, 0,
663                  test_default_init);
664         init_tex(ctx, 2, PIPE_BUFFER, true, PIPE_FORMAT_R32_FLOAT, 32, 0,
665                  test_default_init);
666         init_tex(ctx, 3, PIPE_BUFFER, true, PIPE_FORMAT_R32_FLOAT, 32, 0,
667                  test_default_init);
668         init_globals(ctx, (int []){ 0, 1, 2, 3, -1 },
669                      (uint32_t *[]){ &input[1], &input[3],
670                                      &input[5], &input[7] });
671         launch_grid(ctx, (uint []){4, 1, 1}, (uint []){1, 1, 1}, 0, input);
672         check_tex(ctx, 0, test_input_global_expect, NULL);
673         check_tex(ctx, 1, test_input_global_expect, NULL);
674         check_tex(ctx, 2, test_input_global_expect, NULL);
675         check_tex(ctx, 3, test_input_global_expect, NULL);
676         destroy_globals(ctx);
677         destroy_tex(ctx);
678         destroy_prog(ctx);
679 }
680 
681 /* test_private */
test_private_expect(void * p,int s,int x,int y)682 static void test_private_expect(void *p, int s, int x, int y)
683 {
684         *(uint32_t *)p = (x / 32) + x % 32;
685 }
686 
test_private(struct context * ctx)687 static void test_private(struct context *ctx)
688 {
689         const char *src = "COMP\n"
690                 "DCL RES[0], BUFFER, RAW, WR\n"
691                 "DCL SV[0], BLOCK_ID[0]\n"
692                 "DCL SV[1], BLOCK_SIZE[0]\n"
693                 "DCL SV[2], THREAD_ID[0]\n"
694                 "DCL TEMP[0], LOCAL\n"
695                 "DCL TEMP[1], LOCAL\n"
696                 "DCL TEMP[2], LOCAL\n"
697                 "IMM UINT32 { 128, 0, 0, 0 }\n"
698                 "IMM UINT32 { 4, 0, 0, 0 }\n"
699                 "\n"
700                 "    BGNSUB\n"
701                 "       UMUL TEMP[0].x, SV[0], SV[1]\n"
702                 "       UADD TEMP[0].x, TEMP[0], SV[2]\n"
703                 "       MOV TEMP[1].x, IMM[0].wwww\n"
704                 "       BGNLOOP\n"
705                 "               USEQ TEMP[2].x, TEMP[1], IMM[0]\n"
706                 "               IF TEMP[2]\n"
707                 "                       BRK\n"
708                 "               ENDIF\n"
709                 "               UDIV TEMP[2].x, TEMP[1], IMM[1]\n"
710                 "               UADD TEMP[2].x, TEMP[2], TEMP[0]\n"
711                 "               STORE RPRIVATE.x, TEMP[1], TEMP[2]\n"
712                 "               UADD TEMP[1].x, TEMP[1], IMM[1]\n"
713                 "       ENDLOOP\n"
714                 "       MOV TEMP[1].x, IMM[0].wwww\n"
715                 "       UMUL TEMP[0].x, TEMP[0], IMM[0]\n"
716                 "       BGNLOOP\n"
717                 "               USEQ TEMP[2].x, TEMP[1], IMM[0]\n"
718                 "               IF TEMP[2]\n"
719                 "                       BRK\n"
720                 "               ENDIF\n"
721                 "               LOAD TEMP[2].x, RPRIVATE, TEMP[1]\n"
722                 "               STORE RES[0].x, TEMP[0], TEMP[2]\n"
723                 "               UADD TEMP[0].x, TEMP[0], IMM[1]\n"
724                 "               UADD TEMP[1].x, TEMP[1], IMM[1]\n"
725                 "       ENDLOOP\n"
726                 "       RET\n"
727                 "    ENDSUB\n";
728 
729         printf("- %s\n", __func__);
730 
731         init_prog(ctx, 0, 128, 0, src, NULL);
732         init_tex(ctx, 0, PIPE_BUFFER, true, PIPE_FORMAT_R32_FLOAT,
733                  32768, 0, test_default_init);
734         init_compute_resources(ctx, (int []) { 0, -1 });
735         launch_grid(ctx, (uint []){16, 1, 1}, (uint []){16, 1, 1}, 0, NULL);
736         check_tex(ctx, 0, test_private_expect, NULL);
737         destroy_compute_resources(ctx);
738         destroy_tex(ctx);
739         destroy_prog(ctx);
740 }
741 
742 /* test_local */
test_local_expect(void * p,int s,int x,int y)743 static void test_local_expect(void *p, int s, int x, int y)
744 {
745         *(uint32_t *)p = x & 0x20 ? 2 : 1;
746 }
747 
test_local(struct context * ctx)748 static void test_local(struct context *ctx)
749 {
750         const char *src = "COMP\n"
751                 "DCL RES[0], BUFFER, RAW, WR\n"
752                 "DCL SV[0], BLOCK_ID[0]\n"
753                 "DCL SV[1], BLOCK_SIZE[0]\n"
754                 "DCL SV[2], THREAD_ID[0]\n"
755                 "DCL TEMP[0], LOCAL\n"
756                 "DCL TEMP[1], LOCAL\n"
757                 "DCL TEMP[2], LOCAL\n"
758                 "IMM UINT32 { 1, 0, 0, 0 }\n"
759                 "IMM UINT32 { 2, 0, 0, 0 }\n"
760                 "IMM UINT32 { 4, 0, 0, 0 }\n"
761                 "IMM UINT32 { 32, 0, 0, 0 }\n"
762                 "IMM UINT32 { 128, 0, 0, 0 }\n"
763                 "\n"
764                 "    BGNSUB\n"
765                 "       UMUL TEMP[0].x, SV[2], IMM[2]\n"
766                 "       STORE RLOCAL.x, TEMP[0], IMM[0].wwww\n"
767                 "       MFENCE RLOCAL\n"
768                 "       USLT TEMP[1].x, SV[2], IMM[3]\n"
769                 "       IF TEMP[1]\n"
770                 "               UADD TEMP[1].x, TEMP[0], IMM[4]\n"
771                 "               BGNLOOP\n"
772                 "                       LOAD TEMP[2].x, RLOCAL, TEMP[1]\n"
773                 "                       USEQ TEMP[2].x, TEMP[2], IMM[0]\n"
774                 "                       IF TEMP[2]\n"
775                 "                               BRK\n"
776                 "                       ENDIF\n"
777                 "               ENDLOOP\n"
778                 "               STORE RLOCAL.x, TEMP[0], IMM[0]\n"
779                 "               MFENCE RLOCAL\n"
780                 "               BGNLOOP\n"
781                 "                       LOAD TEMP[2].x, RLOCAL, TEMP[1]\n"
782                 "                       USEQ TEMP[2].x, TEMP[2], IMM[1]\n"
783                 "                       IF TEMP[2]\n"
784                 "                               BRK\n"
785                 "                       ENDIF\n"
786                 "               ENDLOOP\n"
787                 "       ELSE\n"
788                 "               UADD TEMP[1].x, TEMP[0], -IMM[4]\n"
789                 "               BGNLOOP\n"
790                 "                       LOAD TEMP[2].x, RLOCAL, TEMP[1]\n"
791                 "                       USEQ TEMP[2].x, TEMP[2], IMM[0].wwww\n"
792                 "                       IF TEMP[2]\n"
793                 "                               BRK\n"
794                 "                       ENDIF\n"
795                 "               ENDLOOP\n"
796                 "               STORE RLOCAL.x, TEMP[0], IMM[0]\n"
797                 "               MFENCE RLOCAL\n"
798                 "               BGNLOOP\n"
799                 "                       LOAD TEMP[2].x, RLOCAL, TEMP[1]\n"
800                 "                       USEQ TEMP[2].x, TEMP[2], IMM[0]\n"
801                 "                       IF TEMP[2]\n"
802                 "                               BRK\n"
803                 "                       ENDIF\n"
804                 "               ENDLOOP\n"
805                 "               STORE RLOCAL.x, TEMP[0], IMM[1]\n"
806                 "               MFENCE RLOCAL\n"
807                 "       ENDIF\n"
808                 "       UMUL TEMP[1].x, SV[0], SV[1]\n"
809                 "       UMUL TEMP[1].x, TEMP[1], IMM[2]\n"
810                 "       UADD TEMP[1].x, TEMP[1], TEMP[0]\n"
811                 "       LOAD TEMP[0].x, RLOCAL, TEMP[0]\n"
812                 "       STORE RES[0].x, TEMP[1], TEMP[0]\n"
813                 "       RET\n"
814                 "    ENDSUB\n";
815 
816         printf("- %s\n", __func__);
817 
818         init_prog(ctx, 256, 0, 0, src, NULL);
819         init_tex(ctx, 0, PIPE_BUFFER, true, PIPE_FORMAT_R32_FLOAT,
820                  4096, 0, test_default_init);
821         init_compute_resources(ctx, (int []) { 0, -1 });
822         launch_grid(ctx, (uint []){64, 1, 1}, (uint []){16, 1, 1}, 0, NULL);
823         check_tex(ctx, 0, test_local_expect, NULL);
824         destroy_compute_resources(ctx);
825         destroy_tex(ctx);
826         destroy_prog(ctx);
827 }
828 
829 /* test_sample */
test_sample_init(void * p,int s,int x,int y)830 static void test_sample_init(void *p, int s, int x, int y)
831 {
832         *(float *)p = s ? 1 : x * y;
833 }
834 
test_sample_expect(void * p,int s,int x,int y)835 static void test_sample_expect(void *p, int s, int x, int y)
836 {
837         switch (x % 4) {
838         case 0:
839                 *(float *)p = x / 4 * y;
840                 break;
841         case 1:
842         case 2:
843                 *(float *)p = 0;
844                 break;
845         case 3:
846                 *(float *)p = 1;
847                 break;
848         }
849 }
850 
test_sample(struct context * ctx)851 static void test_sample(struct context *ctx)
852 {
853         const char *src = "COMP\n"
854                 "DCL SVIEW[0], 2D, FLOAT\n"
855                 "DCL RES[0], 2D, RAW, WR\n"
856                 "DCL SAMP[0]\n"
857                 "DCL SV[0], BLOCK_ID[0]\n"
858                 "DCL TEMP[0], LOCAL\n"
859                 "DCL TEMP[1], LOCAL\n"
860                 "IMM UINT32 { 16, 1, 0, 0 }\n"
861                 "IMM FLT32 { 128, 32, 0, 0 }\n"
862                 "\n"
863                 "    BGNSUB\n"
864                 "       I2F TEMP[1], SV[0]\n"
865                 "       DIV TEMP[1], TEMP[1], IMM[1]\n"
866                 "       SAMPLE TEMP[1], TEMP[1], SVIEW[0], SAMP[0]\n"
867                 "       UMUL TEMP[0], SV[0], IMM[0]\n"
868                 "       STORE RES[0].xyzw, TEMP[0], TEMP[1]\n"
869                 "       RET\n"
870                 "    ENDSUB\n";
871 
872         printf("- %s\n", __func__);
873 
874         init_prog(ctx, 0, 0, 0, src, NULL);
875         init_tex(ctx, 0, PIPE_TEXTURE_2D, true, PIPE_FORMAT_R32_FLOAT,
876                  128, 32, test_sample_init);
877         init_tex(ctx, 1, PIPE_TEXTURE_2D, true, PIPE_FORMAT_R32_FLOAT,
878                  512, 32, test_sample_init);
879         init_compute_resources(ctx, (int []) { 1, -1 });
880         init_sampler_views(ctx, (int []) { 0, -1 });
881         init_sampler_states(ctx, 2);
882         launch_grid(ctx, (uint []){1, 1, 1}, (uint []){128, 32, 1}, 0, NULL);
883         check_tex(ctx, 1, test_sample_expect, NULL);
884         destroy_sampler_states(ctx);
885         destroy_sampler_views(ctx);
886         destroy_compute_resources(ctx);
887         destroy_tex(ctx);
888         destroy_prog(ctx);
889 }
890 
891 /* test_many_kern */
test_many_kern_expect(void * p,int s,int x,int y)892 static void test_many_kern_expect(void *p, int s, int x, int y)
893 {
894         *(uint32_t *)p = x;
895 }
896 
test_many_kern(struct context * ctx)897 static void test_many_kern(struct context *ctx)
898 {
899         const char *src = "COMP\n"
900                 "DCL RES[0], BUFFER, RAW, WR\n"
901                 "DCL TEMP[0], LOCAL\n"
902                 "IMM UINT32 { 0, 1, 2, 3 }\n"
903                 "IMM UINT32 { 4, 0, 0, 0 }\n"
904                 "\n"
905                 "    BGNSUB\n"
906                 "       UMUL TEMP[0].x, IMM[0].xxxx, IMM[1].xxxx\n"
907                 "       STORE RES[0].x, TEMP[0], IMM[0].xxxx\n"
908                 "       RET\n"
909                 "    ENDSUB\n"
910                 "    BGNSUB\n"
911                 "       UMUL TEMP[0].x, IMM[0].yyyy, IMM[1].xxxx\n"
912                 "       STORE RES[0].x, TEMP[0], IMM[0].yyyy\n"
913                 "       RET\n"
914                 "    ENDSUB\n"
915                 "    BGNSUB\n"
916                 "       UMUL TEMP[0].x, IMM[0].zzzz, IMM[1].xxxx\n"
917                 "       STORE RES[0].x, TEMP[0], IMM[0].zzzz\n"
918                 "       RET\n"
919                 "    ENDSUB\n"
920                 "    BGNSUB\n"
921                 "       UMUL TEMP[0].x, IMM[0].wwww, IMM[1].xxxx\n"
922                 "       STORE RES[0].x, TEMP[0], IMM[0].wwww\n"
923                 "       RET\n"
924                 "    ENDSUB\n";
925 
926         printf("- %s\n", __func__);
927 
928         init_prog(ctx, 0, 0, 0, src, NULL);
929         init_tex(ctx, 0, PIPE_BUFFER, true, PIPE_FORMAT_R32_FLOAT,
930                  16, 0, test_default_init);
931         init_compute_resources(ctx, (int []) { 0, -1 });
932         launch_grid(ctx, (uint []){1, 1, 1}, (uint []){1, 1, 1}, 0, NULL);
933         launch_grid(ctx, (uint []){1, 1, 1}, (uint []){1, 1, 1}, 5, NULL);
934         launch_grid(ctx, (uint []){1, 1, 1}, (uint []){1, 1, 1}, 10, NULL);
935         launch_grid(ctx, (uint []){1, 1, 1}, (uint []){1, 1, 1}, 15, NULL);
936         check_tex(ctx, 0, test_many_kern_expect, NULL);
937         destroy_compute_resources(ctx);
938         destroy_tex(ctx);
939         destroy_prog(ctx);
940 }
941 
942 /* test_constant */
test_constant_init(void * p,int s,int x,int y)943 static void test_constant_init(void *p, int s, int x, int y)
944 {
945         *(float *)p = s ? 0xdeadbeef : 8.0 - (float)x;
946 }
947 
test_constant_expect(void * p,int s,int x,int y)948 static void test_constant_expect(void *p, int s, int x, int y)
949 {
950         *(float *)p = 8.0 - (float)x;
951 }
952 
test_constant(struct context * ctx)953 static void test_constant(struct context *ctx)
954 {
955         const char *src = "COMP\n"
956                 "DCL RES[0], BUFFER, RAW\n"
957                 "DCL RES[1], BUFFER, RAW, WR\n"
958                 "DCL SV[0], BLOCK_ID[0]\n"
959                 "DCL TEMP[0], LOCAL\n"
960                 "DCL TEMP[1], LOCAL\n"
961                 "IMM UINT32 { 4, 0, 0, 0 }\n"
962                 "\n"
963                 "    BGNSUB\n"
964                 "       UMUL TEMP[0].x, SV[0], IMM[0]\n"
965                 "       LOAD TEMP[1].x, RES[0], TEMP[0]\n"
966                 "       STORE RES[1].x, TEMP[0], TEMP[1]\n"
967                 "       RET\n"
968                 "    ENDSUB\n";
969 
970         printf("- %s\n", __func__);
971 
972         init_prog(ctx, 0, 0, 0, src, NULL);
973         init_tex(ctx, 0, PIPE_BUFFER, false, PIPE_FORMAT_R32_FLOAT,
974                  256, 0, test_constant_init);
975         init_tex(ctx, 1, PIPE_BUFFER, true, PIPE_FORMAT_R32_FLOAT,
976                  256, 0, test_constant_init);
977         init_compute_resources(ctx, (int []) { 0, 1, -1 });
978         launch_grid(ctx, (uint []){1, 1, 1}, (uint []){64, 1, 1}, 0, NULL);
979         check_tex(ctx, 1, test_constant_expect, NULL);
980         destroy_compute_resources(ctx);
981         destroy_tex(ctx);
982         destroy_prog(ctx);
983 }
984 
985 /* test_resource_indirect */
test_resource_indirect_init(void * p,int s,int x,int y)986 static void test_resource_indirect_init(void *p, int s, int x, int y)
987 {
988         *(uint32_t *)p = s == 0 ? 0xdeadbeef :
989                 s == 1 ? x % 2 :
990                 s == 2 ? 2 * x :
991                 2 * x + 1;
992 }
993 
test_resource_indirect_expect(void * p,int s,int x,int y)994 static void test_resource_indirect_expect(void *p, int s, int x, int y)
995 {
996         *(uint32_t *)p = 2 * x + (x % 2 ? 1 : 0);
997 }
998 
test_resource_indirect(struct context * ctx)999 static void test_resource_indirect(struct context *ctx)
1000 {
1001         const char *src = "COMP\n"
1002                 "DCL RES[0], BUFFER, RAW, WR\n"
1003                 "DCL RES[1..3], BUFFER, RAW\n"
1004                 "DCL SV[0], BLOCK_ID[0]\n"
1005                 "DCL TEMP[0], LOCAL\n"
1006                 "DCL TEMP[1], LOCAL\n"
1007                 "IMM UINT32 { 4, 0, 0, 0 }\n"
1008                 "\n"
1009                 "    BGNSUB\n"
1010                 "       UMUL TEMP[0].x, SV[0], IMM[0]\n"
1011                 "       LOAD TEMP[1].x, RES[1], TEMP[0]\n"
1012                 "       LOAD TEMP[1].x, RES[TEMP[1].x+2], TEMP[0]\n"
1013                 "       STORE RES[0].x, TEMP[0], TEMP[1]\n"
1014                 "       RET\n"
1015                 "    ENDSUB\n";
1016 
1017         printf("- %s\n", __func__);
1018 
1019         init_prog(ctx, 0, 0, 0, src, NULL);
1020         init_tex(ctx, 0, PIPE_BUFFER, true, PIPE_FORMAT_R32_FLOAT,
1021                  256, 0, test_resource_indirect_init);
1022         init_tex(ctx, 1, PIPE_BUFFER, false, PIPE_FORMAT_R32_FLOAT,
1023                  256, 0, test_resource_indirect_init);
1024         init_tex(ctx, 2, PIPE_BUFFER, false, PIPE_FORMAT_R32_FLOAT,
1025                  256, 0, test_resource_indirect_init);
1026         init_tex(ctx, 3, PIPE_BUFFER, false, PIPE_FORMAT_R32_FLOAT,
1027                  256, 0, test_resource_indirect_init);
1028         init_compute_resources(ctx, (int []) { 0, 1, 2, 3, -1 });
1029         launch_grid(ctx, (uint []){1, 1, 1}, (uint []){64, 1, 1}, 0, NULL);
1030         check_tex(ctx, 0, test_resource_indirect_expect, NULL);
1031         destroy_compute_resources(ctx);
1032         destroy_tex(ctx);
1033         destroy_prog(ctx);
1034 }
1035 
1036 /* test_surface_ld */
1037 enum pipe_format surface_fmts[] = {
1038         PIPE_FORMAT_B8G8R8A8_UNORM,
1039         PIPE_FORMAT_B8G8R8X8_UNORM,
1040         PIPE_FORMAT_A8R8G8B8_UNORM,
1041         PIPE_FORMAT_X8R8G8B8_UNORM,
1042         PIPE_FORMAT_X8R8G8B8_UNORM,
1043         PIPE_FORMAT_L8_UNORM,
1044         PIPE_FORMAT_A8_UNORM,
1045         PIPE_FORMAT_I8_UNORM,
1046         PIPE_FORMAT_L8A8_UNORM,
1047         PIPE_FORMAT_R32_FLOAT,
1048         PIPE_FORMAT_R32G32_FLOAT,
1049         PIPE_FORMAT_R32G32B32A32_FLOAT,
1050         PIPE_FORMAT_R32_UNORM,
1051         PIPE_FORMAT_R32G32_UNORM,
1052         PIPE_FORMAT_R32G32B32A32_UNORM,
1053         PIPE_FORMAT_R32_SNORM,
1054         PIPE_FORMAT_R32G32_SNORM,
1055         PIPE_FORMAT_R32G32B32A32_SNORM,
1056         PIPE_FORMAT_R8_UINT,
1057         PIPE_FORMAT_R8G8_UINT,
1058         PIPE_FORMAT_R8G8B8A8_UINT,
1059         PIPE_FORMAT_R8_SINT,
1060         PIPE_FORMAT_R8G8_SINT,
1061         PIPE_FORMAT_R8G8B8A8_SINT,
1062         PIPE_FORMAT_R32_UINT,
1063         PIPE_FORMAT_R32G32_UINT,
1064         PIPE_FORMAT_R32G32B32A32_UINT,
1065         PIPE_FORMAT_R32_SINT,
1066         PIPE_FORMAT_R32G32_SINT,
1067         PIPE_FORMAT_R32G32B32A32_SINT
1068 };
1069 
test_surface_ld_init0f(void * p,int s,int x,int y)1070 static void test_surface_ld_init0f(void *p, int s, int x, int y)
1071 {
1072         float v[] = { 1.0, -.75, .50, -.25 };
1073         int i = 0;
1074 
1075         util_format_pack_rgba(surface_fmts[i], p, v, 1);
1076 }
1077 
test_surface_ld_init0i(void * p,int s,int x,int y)1078 static void test_surface_ld_init0i(void *p, int s, int x, int y)
1079 {
1080         int v[] = { 0xffffffff, 0xffff, 0xff, 0xf };
1081         int i = 0;
1082 
1083         util_format_pack_rgba(surface_fmts[i], p, v, 1);
1084 }
1085 
test_surface_ld_expectf(void * p,int s,int x,int y)1086 static void test_surface_ld_expectf(void *p, int s, int x, int y)
1087 {
1088         float v[4], w[4];
1089         int i = 0;
1090 
1091         test_surface_ld_init0f(v, s, x / 4, y);
1092         util_format_unpack_rgba(surface_fmts[i], w, v, 1);
1093         *(float *)p = w[x % 4];
1094 }
1095 
test_surface_ld_expecti(void * p,int s,int x,int y)1096 static void test_surface_ld_expecti(void *p, int s, int x, int y)
1097 {
1098         int32_t v[4], w[4];
1099         int i = 0;
1100 
1101         test_surface_ld_init0i(v, s, x / 4, y);
1102         util_format_unpack_rgba(surface_fmts[i], w, v, 1);
1103         *(uint32_t *)p = w[x % 4];
1104 }
1105 
test_surface_ld(struct context * ctx)1106 static void test_surface_ld(struct context *ctx)
1107 {
1108         const char *src = "COMP\n"
1109                 "DCL RES[0], 2D\n"
1110                 "DCL RES[1], 2D, RAW, WR\n"
1111                 "DCL SV[0], BLOCK_ID[0]\n"
1112                 "DCL TEMP[0], LOCAL\n"
1113                 "DCL TEMP[1], LOCAL\n"
1114                 "IMM UINT32 { 16, 1, 0, 0 }\n"
1115                 "\n"
1116                 "    BGNSUB\n"
1117                 "       LOAD TEMP[1], RES[0], SV[0]\n"
1118                 "       UMUL TEMP[0], SV[0], IMM[0]\n"
1119                 "       STORE RES[1].xyzw, TEMP[0], TEMP[1]\n"
1120                 "       RET\n"
1121                 "    ENDSUB\n";
1122         int i = 0;
1123 
1124         printf("- %s\n", __func__);
1125 
1126         init_prog(ctx, 0, 0, 0, src, NULL);
1127 
1128         for (i = 0; i < ARRAY_SIZE(surface_fmts); i++) {
1129                 bool is_int = util_format_is_pure_integer(surface_fmts[i]);
1130 
1131                 printf("   - %s\n", util_format_name(surface_fmts[i]));
1132 
1133                 if (!ctx->screen->is_format_supported(ctx->screen,
1134                        surface_fmts[i], PIPE_TEXTURE_2D, 1, 1,
1135                        PIPE_BIND_COMPUTE_RESOURCE)) {
1136                    printf("(unsupported)\n");
1137                    continue;
1138                 }
1139 
1140                 init_tex(ctx, 0, PIPE_TEXTURE_2D, true, surface_fmts[i],
1141                          128, 32, (is_int ? test_surface_ld_init0i : test_surface_ld_init0f));
1142                 init_tex(ctx, 1, PIPE_TEXTURE_2D, true, PIPE_FORMAT_R32_FLOAT,
1143                          512, 32, test_default_init);
1144                 init_compute_resources(ctx, (int []) { 0, 1, -1 });
1145                 init_sampler_states(ctx, 2);
1146                 launch_grid(ctx, (uint []){1, 1, 1}, (uint []){128, 32, 1}, 0,
1147                             NULL);
1148                 check_tex(ctx, 1, (is_int ? test_surface_ld_expecti : test_surface_ld_expectf), NULL);
1149                 destroy_sampler_states(ctx);
1150                 destroy_compute_resources(ctx);
1151                 destroy_tex(ctx);
1152         }
1153 
1154         destroy_prog(ctx);
1155 }
1156 
1157 /* test_surface_st */
test_surface_st_init0f(void * p,int s,int x,int y)1158 static void test_surface_st_init0f(void *p, int s, int x, int y)
1159 {
1160         float v[] = { 1.0, -.75, 0.5, -.25 };
1161         *(float *)p = v[x % 4];
1162 }
1163 
test_surface_st_init0i(void * p,int s,int x,int y)1164 static void test_surface_st_init0i(void *p, int s, int x, int y)
1165 {
1166         int v[] = { 0xffffffff, 0xffff, 0xff, 0xf };
1167         *(int32_t *)p = v[x % 4];
1168 }
1169 
test_surface_st_init1(void * p,int s,int x,int y)1170 static void test_surface_st_init1(void *p, int s, int x, int y)
1171 {
1172         int i = 0;
1173         memset(p, 1, util_format_get_blocksize(surface_fmts[i]));
1174 }
1175 
test_surface_st_expectf(void * p,int s,int x,int y)1176 static void test_surface_st_expectf(void *p, int s, int x, int y)
1177 {
1178         float vf[4];
1179         int i = 0, j;
1180 
1181         for (j = 0; j < 4; j++)
1182                 test_surface_st_init0f(&vf[j], s, 4 * x + j, y);
1183         util_format_pack_rgba(surface_fmts[i], p, vf, 1);
1184 }
1185 
test_surface_st_expects(void * p,int s,int x,int y)1186 static void test_surface_st_expects(void *p, int s, int x, int y)
1187 {
1188         int32_t v[4];
1189         int i = 0, j;
1190 
1191         for (j = 0; j < 4; j++)
1192                 test_surface_st_init0i(&v[j], s, 4 * x + j, y);
1193         util_format_pack_rgba(surface_fmts[i], p, v, 1);
1194 }
1195 
test_surface_st_expectu(void * p,int s,int x,int y)1196 static void test_surface_st_expectu(void *p, int s, int x, int y)
1197 {
1198         uint32_t v[4];
1199         int i = 0, j;
1200 
1201         for (j = 0; j < 4; j++)
1202                 test_surface_st_init0i(&v[j], s, 4 * x + j, y);
1203         util_format_pack_rgba(surface_fmts[i], p, v, 1);
1204 }
1205 
absdiff(uint32_t a,uint32_t b)1206 static unsigned absdiff(uint32_t a, uint32_t b)
1207 {
1208         return (a > b) ? (a - b) : (b - a);
1209 }
1210 
test_surface_st_check(void * x,void * y,int sz)1211 static bool test_surface_st_check(void *x, void *y, int sz)
1212 {
1213         int i = 0, j;
1214 
1215         if (util_format_is_float(surface_fmts[i])) {
1216                 return fabs(*(float *)x - *(float *)y) < 3.92156863e-3;
1217 
1218         } else if ((sz % 4) == 0) {
1219                 for (j = 0; j < sz / 4; j++)
1220                         if (absdiff(((uint32_t *)x)[j],
1221                                     ((uint32_t *)y)[j]) > 1)
1222                                 return false;
1223                 return true;
1224         } else {
1225                 return !memcmp(x, y, sz);
1226         }
1227 }
1228 
test_surface_st(struct context * ctx)1229 static void test_surface_st(struct context *ctx)
1230 {
1231         const char *src = "COMP\n"
1232                 "DCL RES[0], 2D, RAW\n"
1233                 "DCL RES[1], 2D, WR\n"
1234                 "DCL SV[0], BLOCK_ID[0]\n"
1235                 "DCL TEMP[0], LOCAL\n"
1236                 "DCL TEMP[1], LOCAL\n"
1237                 "IMM UINT32 { 16, 1, 0, 0 }\n"
1238                 "\n"
1239                 "    BGNSUB\n"
1240                 "       UMUL TEMP[0], SV[0], IMM[0]\n"
1241                 "       LOAD TEMP[1], RES[0], TEMP[0]\n"
1242                 "       STORE RES[1], SV[0], TEMP[1]\n"
1243                 "       RET\n"
1244                 "    ENDSUB\n";
1245         int i = 0;
1246 
1247         printf("- %s\n", __func__);
1248 
1249         init_prog(ctx, 0, 0, 0, src, NULL);
1250 
1251         for (i = 0; i < ARRAY_SIZE(surface_fmts); i++) {
1252                 bool is_signed = (util_format_description(surface_fmts[i])
1253                                   ->channel[0].type == UTIL_FORMAT_TYPE_SIGNED);
1254                 bool is_int = util_format_is_pure_integer(surface_fmts[i]);
1255 
1256                 printf("   - %s\n", util_format_name(surface_fmts[i]));
1257 
1258                 if (!ctx->screen->is_format_supported(ctx->screen,
1259                        surface_fmts[i], PIPE_TEXTURE_2D, 1, 1,
1260                        PIPE_BIND_COMPUTE_RESOURCE)) {
1261                    printf("(unsupported)\n");
1262                    continue;
1263                 }
1264 
1265                 init_tex(ctx, 0, PIPE_TEXTURE_2D, true, PIPE_FORMAT_R32_FLOAT,
1266                          512, 32, (is_int ? test_surface_st_init0i : test_surface_st_init0f));
1267                 init_tex(ctx, 1, PIPE_TEXTURE_2D, true, surface_fmts[i],
1268                          128, 32, test_surface_st_init1);
1269                 init_compute_resources(ctx, (int []) { 0, 1, -1 });
1270                 init_sampler_states(ctx, 2);
1271                 launch_grid(ctx, (uint []){1, 1, 1}, (uint []){128, 32, 1}, 0,
1272                             NULL);
1273                 check_tex(ctx, 1, (is_int && is_signed ? test_surface_st_expects :
1274                                    is_int && !is_signed ? test_surface_st_expectu :
1275                                    test_surface_st_expectf), test_surface_st_check);
1276                 destroy_sampler_states(ctx);
1277                 destroy_compute_resources(ctx);
1278                 destroy_tex(ctx);
1279         }
1280 
1281         destroy_prog(ctx);
1282 }
1283 
1284 /* test_barrier */
test_barrier_expect(void * p,int s,int x,int y)1285 static void test_barrier_expect(void *p, int s, int x, int y)
1286 {
1287         *(uint32_t *)p = 31;
1288 }
1289 
test_barrier(struct context * ctx)1290 static void test_barrier(struct context *ctx)
1291 {
1292         const char *src = "COMP\n"
1293                 "DCL RES[0], BUFFER, RAW, WR\n"
1294                 "DCL SV[0], BLOCK_ID[0]\n"
1295                 "DCL SV[1], BLOCK_SIZE[0]\n"
1296                 "DCL SV[2], THREAD_ID[0]\n"
1297                 "DCL TEMP[0], LOCAL\n"
1298                 "DCL TEMP[1], LOCAL\n"
1299                 "DCL TEMP[2], LOCAL\n"
1300                 "DCL TEMP[3], LOCAL\n"
1301                 "IMM UINT32 { 1, 0, 0, 0 }\n"
1302                 "IMM UINT32 { 4, 0, 0, 0 }\n"
1303                 "IMM UINT32 { 32, 0, 0, 0 }\n"
1304                 "\n"
1305                 "    BGNSUB\n"
1306                 "       UMUL TEMP[0].x, SV[2], IMM[1]\n"
1307                 "       MOV TEMP[1].x, IMM[0].wwww\n"
1308                 "       BGNLOOP\n"
1309                 "               BARRIER\n"
1310                 "               STORE RLOCAL.x, TEMP[0], TEMP[1]\n"
1311                 "               BARRIER\n"
1312                 "               MOV TEMP[2].x, IMM[0].wwww\n"
1313                 "               BGNLOOP\n"
1314                 "                       UMUL TEMP[3].x, TEMP[2], IMM[1]\n"
1315                 "                       LOAD TEMP[3].x, RLOCAL, TEMP[3]\n"
1316                 "                       USNE TEMP[3].x, TEMP[3], TEMP[1]\n"
1317                 "                       IF TEMP[3]\n"
1318                 "                               END\n"
1319                 "                       ENDIF\n"
1320                 "                       UADD TEMP[2].x, TEMP[2], IMM[0]\n"
1321                 "                       USEQ TEMP[3].x, TEMP[2], SV[1]\n"
1322                 "                       IF TEMP[3]\n"
1323                 "                               BRK\n"
1324                 "                       ENDIF\n"
1325                 "               ENDLOOP\n"
1326                 "               UADD TEMP[1].x, TEMP[1], IMM[0]\n"
1327                 "               USEQ TEMP[2].x, TEMP[1], IMM[2]\n"
1328                 "               IF TEMP[2]\n"
1329                 "                       BRK\n"
1330                 "               ENDIF\n"
1331                 "       ENDLOOP\n"
1332                 "       UMUL TEMP[1].x, SV[0], SV[1]\n"
1333                 "       UMUL TEMP[1].x, TEMP[1], IMM[1]\n"
1334                 "       UADD TEMP[1].x, TEMP[1], TEMP[0]\n"
1335                 "       LOAD TEMP[0].x, RLOCAL, TEMP[0]\n"
1336                 "       STORE RES[0].x, TEMP[1], TEMP[0]\n"
1337                 "       RET\n"
1338                 "    ENDSUB\n";
1339 
1340         printf("- %s\n", __func__);
1341 
1342         init_prog(ctx, 256, 0, 0, src, NULL);
1343         init_tex(ctx, 0, PIPE_BUFFER, true, PIPE_FORMAT_R32_FLOAT,
1344                  4096, 0, test_default_init);
1345         init_compute_resources(ctx, (int []) { 0, -1 });
1346         launch_grid(ctx, (uint []){64, 1, 1}, (uint []){16, 1, 1}, 0, NULL);
1347         check_tex(ctx, 0, test_barrier_expect, NULL);
1348         destroy_compute_resources(ctx);
1349         destroy_tex(ctx);
1350         destroy_prog(ctx);
1351 }
1352 
1353 /* test_atom_ops */
test_atom_ops_init(void * p,int s,int x,int y)1354 static void test_atom_ops_init(void *p, int s, int x, int y)
1355 {
1356         *(uint32_t *)p = 0xbad;
1357 }
1358 
test_atom_ops_expect(void * p,int s,int x,int y)1359 static void test_atom_ops_expect(void *p, int s, int x, int y)
1360 {
1361         switch (x) {
1362         case 0:
1363                 *(uint32_t *)p = 0xce6c8eef;
1364                 break;
1365         case 1:
1366                 *(uint32_t *)p = 0xdeadbeef;
1367                 break;
1368         case 2:
1369                 *(uint32_t *)p = 0x11111111;
1370                 break;
1371         case 3:
1372                 *(uint32_t *)p = 0x10011001;
1373                 break;
1374         case 4:
1375                 *(uint32_t *)p = 0xdfbdbfff;
1376                 break;
1377         case 5:
1378                 *(uint32_t *)p = 0x11111111;
1379                 break;
1380         case 6:
1381                 *(uint32_t *)p = 0x11111111;
1382                 break;
1383         case 7:
1384                 *(uint32_t *)p = 0xdeadbeef;
1385                 break;
1386         case 8:
1387                 *(uint32_t *)p = 0xdeadbeef;
1388                 break;
1389         case 9:
1390                 *(uint32_t *)p = 0x11111111;
1391                 break;
1392         }
1393 }
1394 
test_atom_ops(struct context * ctx,bool global)1395 static void test_atom_ops(struct context *ctx, bool global)
1396 {
1397         const char *src = "COMP\n"
1398                 "#ifdef TARGET_GLOBAL\n"
1399                 "#define target RES[0]\n"
1400                 "#else\n"
1401                 "#define target RLOCAL\n"
1402                 "#endif\n"
1403                 ""
1404                 "DCL RES[0], BUFFER, RAW, WR\n"
1405                 "#define threadid SV[0]\n"
1406                 "DCL threadid, THREAD_ID[0]\n"
1407                 ""
1408                 "#define offset TEMP[0]\n"
1409                 "DCL offset, LOCAL\n"
1410                 "#define tmp TEMP[1]\n"
1411                 "DCL tmp, LOCAL\n"
1412                 ""
1413                 "#define k0 IMM[0]\n"
1414                 "IMM UINT32 { 0, 0, 0, 0 }\n"
1415                 "#define k1 IMM[1]\n"
1416                 "IMM UINT32 { 1, 0, 0, 0 }\n"
1417                 "#define k2 IMM[2]\n"
1418                 "IMM UINT32 { 2, 0, 0, 0 }\n"
1419                 "#define k3 IMM[3]\n"
1420                 "IMM UINT32 { 3, 0, 0, 0 }\n"
1421                 "#define k4 IMM[4]\n"
1422                 "IMM UINT32 { 4, 0, 0, 0 }\n"
1423                 "#define k5 IMM[5]\n"
1424                 "IMM UINT32 { 5, 0, 0, 0 }\n"
1425                 "#define k6 IMM[6]\n"
1426                 "IMM UINT32 { 6, 0, 0, 0 }\n"
1427                 "#define k7 IMM[7]\n"
1428                 "IMM UINT32 { 7, 0, 0, 0 }\n"
1429                 "#define k8 IMM[8]\n"
1430                 "IMM UINT32 { 8, 0, 0, 0 }\n"
1431                 "#define k9 IMM[9]\n"
1432                 "IMM UINT32 { 9, 0, 0, 0 }\n"
1433                 "#define korig IMM[10].xxxx\n"
1434                 "#define karg IMM[10].yyyy\n"
1435                 "IMM UINT32 { 3735928559, 286331153, 0, 0 }\n"
1436                 "\n"
1437                 "    BGNSUB\n"
1438                 "       UMUL offset.x, threadid, k4\n"
1439                 "       STORE target.x, offset, korig\n"
1440                 "       USEQ tmp.x, threadid, k0\n"
1441                 "       IF tmp\n"
1442                 "               ATOMUADD tmp.x, target, offset, karg\n"
1443                 "               ATOMUADD tmp.x, target, offset, tmp\n"
1444                 "       ENDIF\n"
1445                 "       USEQ tmp.x, threadid, k1\n"
1446                 "       IF tmp\n"
1447                 "               ATOMXCHG tmp.x, target, offset, karg\n"
1448                 "               ATOMXCHG tmp.x, target, offset, tmp\n"
1449                 "       ENDIF\n"
1450                 "       USEQ tmp.x, threadid, k2\n"
1451                 "       IF tmp\n"
1452                 "               ATOMCAS tmp.x, target, offset, korig, karg\n"
1453                 "               ATOMCAS tmp.x, target, offset, tmp, k0\n"
1454                 "       ENDIF\n"
1455                 "       USEQ tmp.x, threadid, k3\n"
1456                 "       IF tmp\n"
1457                 "               ATOMAND tmp.x, target, offset, karg\n"
1458                 "               ATOMAND tmp.x, target, offset, tmp\n"
1459                 "       ENDIF\n"
1460                 "       USEQ tmp.x, threadid, k4\n"
1461                 "       IF tmp\n"
1462                 "               ATOMOR tmp.x, target, offset, karg\n"
1463                 "               ATOMOR tmp.x, target, offset, tmp\n"
1464                 "       ENDIF\n"
1465                 "       USEQ tmp.x, threadid, k5\n"
1466                 "       IF tmp\n"
1467                 "               ATOMXOR tmp.x, target, offset, karg\n"
1468                 "               ATOMXOR tmp.x, target, offset, tmp\n"
1469                 "       ENDIF\n"
1470                 "       USEQ tmp.x, threadid, k6\n"
1471                 "       IF tmp\n"
1472                 "               ATOMUMIN tmp.x, target, offset, karg\n"
1473                 "               ATOMUMIN tmp.x, target, offset, tmp\n"
1474                 "       ENDIF\n"
1475                 "       USEQ tmp.x, threadid, k7\n"
1476                 "       IF tmp\n"
1477                 "               ATOMUMAX tmp.x, target, offset, karg\n"
1478                 "               ATOMUMAX tmp.x, target, offset, tmp\n"
1479                 "       ENDIF\n"
1480                 "       USEQ tmp.x, threadid, k8\n"
1481                 "       IF tmp\n"
1482                 "               ATOMIMIN tmp.x, target, offset, karg\n"
1483                 "               ATOMIMIN tmp.x, target, offset, tmp\n"
1484                 "       ENDIF\n"
1485                 "       USEQ tmp.x, threadid, k9\n"
1486                 "       IF tmp\n"
1487                 "               ATOMIMAX tmp.x, target, offset, karg\n"
1488                 "               ATOMIMAX tmp.x, target, offset, tmp\n"
1489                 "       ENDIF\n"
1490                 "#ifdef TARGET_LOCAL\n"
1491                 "       LOAD tmp.x, RLOCAL, offset\n"
1492                 "       STORE RES[0].x, offset, tmp\n"
1493                 "#endif\n"
1494                 "       RET\n"
1495                 "    ENDSUB\n";
1496 
1497         printf("- %s (%s)\n", __func__, global ? "global" : "local");
1498 
1499         init_prog(ctx, 40, 0, 0, src,
1500                   (global ? "-DTARGET_GLOBAL" : "-DTARGET_LOCAL"));
1501         init_tex(ctx, 0, PIPE_BUFFER, true, PIPE_FORMAT_R32_FLOAT,
1502                  40, 0, test_atom_ops_init);
1503         init_compute_resources(ctx, (int []) { 0, -1 });
1504         launch_grid(ctx, (uint []){10, 1, 1}, (uint []){1, 1, 1}, 0, NULL);
1505         check_tex(ctx, 0, test_atom_ops_expect, NULL);
1506         destroy_compute_resources(ctx);
1507         destroy_tex(ctx);
1508         destroy_prog(ctx);
1509 }
1510 
1511 /* test_atom_race */
test_atom_race_expect(void * p,int s,int x,int y)1512 static void test_atom_race_expect(void *p, int s, int x, int y)
1513 {
1514         *(uint32_t *)p = x & 0x20 ? 0x11111111 : 0xffffffff;
1515 }
1516 
test_atom_race(struct context * ctx,bool global)1517 static void test_atom_race(struct context *ctx, bool global)
1518 {
1519         const char *src = "COMP\n"
1520                 "#ifdef TARGET_GLOBAL\n"
1521                 "#define target RES[0]\n"
1522                 "#else\n"
1523                 "#define target RLOCAL\n"
1524                 "#endif\n"
1525                 ""
1526                 "DCL RES[0], BUFFER, RAW, WR\n"
1527                 ""
1528                 "#define blockid SV[0]\n"
1529                 "DCL blockid, BLOCK_ID[0]\n"
1530                 "#define blocksz SV[1]\n"
1531                 "DCL blocksz, BLOCK_SIZE[0]\n"
1532                 "#define threadid SV[2]\n"
1533                 "DCL threadid, THREAD_ID[0]\n"
1534                 ""
1535                 "#define offset TEMP[0]\n"
1536                 "DCL offset, LOCAL\n"
1537                 "#define arg TEMP[1]\n"
1538                 "DCL arg, LOCAL\n"
1539                 "#define count TEMP[2]\n"
1540                 "DCL count, LOCAL\n"
1541                 "#define vlocal TEMP[3]\n"
1542                 "DCL vlocal, LOCAL\n"
1543                 "#define vshared TEMP[4]\n"
1544                 "DCL vshared, LOCAL\n"
1545                 "#define last TEMP[5]\n"
1546                 "DCL last, LOCAL\n"
1547                 "#define tmp0 TEMP[6]\n"
1548                 "DCL tmp0, LOCAL\n"
1549                 "#define tmp1 TEMP[7]\n"
1550                 "DCL tmp1, LOCAL\n"
1551                 ""
1552                 "#define k0 IMM[0]\n"
1553                 "IMM UINT32 { 0, 0, 0, 0 }\n"
1554                 "#define k1 IMM[1]\n"
1555                 "IMM UINT32 { 1, 0, 0, 0 }\n"
1556                 "#define k4 IMM[2]\n"
1557                 "IMM UINT32 { 4, 0, 0, 0 }\n"
1558                 "#define k32 IMM[3]\n"
1559                 "IMM UINT32 { 32, 0, 0, 0 }\n"
1560                 "#define k128 IMM[4]\n"
1561                 "IMM UINT32 { 128, 0, 0, 0 }\n"
1562                 "#define kdeadcafe IMM[5]\n"
1563                 "IMM UINT32 { 3735931646, 0, 0, 0 }\n"
1564                 "#define kallowed_set IMM[6]\n"
1565                 "IMM UINT32 { 559035650, 0, 0, 0 }\n"
1566                 "#define k11111111 IMM[7]\n"
1567                 "IMM UINT32 { 286331153, 0, 0, 0 }\n"
1568                 "\n"
1569                 "    BGNSUB\n"
1570                 "       MOV offset.x, threadid\n"
1571                 "#ifdef TARGET_GLOBAL\n"
1572                 "       UMUL tmp0.x, blockid, blocksz\n"
1573                 "       UADD offset.x, offset, tmp0\n"
1574                 "#endif\n"
1575                 "       UMUL offset.x, offset, k4\n"
1576                 "       USLT tmp0.x, threadid, k32\n"
1577                 "       STORE target.x, offset, k0\n"
1578                 "       BARRIER\n"
1579                 "       IF tmp0\n"
1580                 "               MOV vlocal.x, k0\n"
1581                 "               MOV arg.x, kdeadcafe\n"
1582                 "               BGNLOOP\n"
1583                 "                       INEG arg.x, arg\n"
1584                 "                       ATOMUADD vshared.x, target, offset, arg\n"
1585                 "                       SFENCE target\n"
1586                 "                       USNE tmp0.x, vshared, vlocal\n"
1587                 "                       IF tmp0\n"
1588                 "                               BRK\n"
1589                 "                       ENDIF\n"
1590                 "                       UADD vlocal.x, vlocal, arg\n"
1591                 "               ENDLOOP\n"
1592                 "               UADD vlocal.x, vshared, arg\n"
1593                 "               LOAD vshared.x, target, offset\n"
1594                 "               USEQ tmp0.x, vshared, vlocal\n"
1595                 "               STORE target.x, offset, tmp0\n"
1596                 "       ELSE\n"
1597                 "               UADD offset.x, offset, -k128\n"
1598                 "               MOV count.x, k0\n"
1599                 "               MOV last.x, k0\n"
1600                 "               BGNLOOP\n"
1601                 "                       LOAD vshared.x, target, offset\n"
1602                 "                       USEQ tmp0.x, vshared, kallowed_set.xxxx\n"
1603                 "                       USEQ tmp1.x, vshared, kallowed_set.yyyy\n"
1604                 "                       OR tmp0.x, tmp0, tmp1\n"
1605                 "                       IF tmp0\n"
1606                 "                               USEQ tmp0.x, vshared, last\n"
1607                 "                               IF tmp0\n"
1608                 "                                       CONT\n"
1609                 "                               ENDIF\n"
1610                 "                               MOV last.x, vshared\n"
1611                 "                       ELSE\n"
1612                 "                               END\n"
1613                 "                       ENDIF\n"
1614                 "                       UADD count.x, count, k1\n"
1615                 "                       USEQ tmp0.x, count, k128\n"
1616                 "                       IF tmp0\n"
1617                 "                               BRK\n"
1618                 "                       ENDIF\n"
1619                 "               ENDLOOP\n"
1620                 "               ATOMXCHG tmp0.x, target, offset, k11111111\n"
1621                 "               UADD offset.x, offset, k128\n"
1622                 "               ATOMXCHG tmp0.x, target, offset, k11111111\n"
1623                 "               SFENCE target\n"
1624                 "       ENDIF\n"
1625                 "#ifdef TARGET_LOCAL\n"
1626                 "       LOAD tmp0.x, RLOCAL, offset\n"
1627                 "       UMUL tmp1.x, blockid, blocksz\n"
1628                 "       UMUL tmp1.x, tmp1, k4\n"
1629                 "       UADD offset.x, offset, tmp1\n"
1630                 "       STORE RES[0].x, offset, tmp0\n"
1631                 "#endif\n"
1632                 "       RET\n"
1633                 "    ENDSUB\n";
1634 
1635         printf("- %s (%s)\n", __func__, global ? "global" : "local");
1636 
1637         init_prog(ctx, 256, 0, 0, src,
1638                   (global ? "-DTARGET_GLOBAL" : "-DTARGET_LOCAL"));
1639         init_tex(ctx, 0, PIPE_BUFFER, true, PIPE_FORMAT_R32_FLOAT,
1640                  4096, 0, test_default_init);
1641         init_compute_resources(ctx, (int []) { 0, -1 });
1642         launch_grid(ctx, (uint []){64, 1, 1}, (uint []){16, 1, 1}, 0, NULL);
1643         check_tex(ctx, 0, test_atom_race_expect, NULL);
1644         destroy_compute_resources(ctx);
1645         destroy_tex(ctx);
1646         destroy_prog(ctx);
1647 }
1648 
main(int argc,char * argv[])1649 int main(int argc, char *argv[])
1650 {
1651         struct context *ctx = CALLOC_STRUCT(context);
1652 
1653         unsigned tests = (argc > 1) ? strtoul(argv[1], NULL, 0) : ~0;
1654 
1655         init_ctx(ctx);
1656 
1657         if (tests & (1 << 0))
1658            test_system_values(ctx);
1659         if (tests & (1 << 1))
1660            test_resource_access(ctx);
1661         if (tests & (1 << 2))
1662            test_function_calls(ctx);
1663         if (tests & (1 << 3))
1664            test_input_global(ctx);
1665         if (tests & (1 << 4))
1666            test_private(ctx);
1667         if (tests & (1 << 5))
1668            test_local(ctx);
1669         if (tests & (1 << 6))
1670            test_sample(ctx);
1671         if (tests & (1 << 7))
1672            test_many_kern(ctx);
1673         if (tests & (1 << 8))
1674            test_constant(ctx);
1675         if (tests & (1 << 9))
1676            test_resource_indirect(ctx);
1677         if (tests & (1 << 10))
1678            test_surface_ld(ctx);
1679         if (tests & (1 << 11))
1680            test_surface_st(ctx);
1681         if (tests & (1 << 12))
1682            test_barrier(ctx);
1683         if (tests & (1 << 13))
1684            test_atom_ops(ctx, true);
1685         if (tests & (1 << 14))
1686            test_atom_race(ctx, true);
1687         if (tests & (1 << 15))
1688            test_atom_ops(ctx, false);
1689         if (tests & (1 << 16))
1690            test_atom_race(ctx, false);
1691 
1692         destroy_ctx(ctx);
1693 
1694         return 0;
1695 }
1696