1 /*
2  * Copyright (C) 2014 Intel Corporation
3  * Copyright © 2018 Intel Corporation
4  *
5  * Permission is hereby granted, free of charge, to any person obtaining a
6  * copy of this software and associated documentation files (the "Software"),
7  * to deal in the Software without restriction, including without limitation
8  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9  * and/or sell copies of the Software, and to permit persons to whom the
10  * Software is furnished to do so, subject to the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the next
13  * paragraph) shall be included in all copies or substantial portions of the
14  * Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
22  * IN THE SOFTWARE.
23  */
24 
25 /** @file atomicity.c
26  *
27  * Test the atomicity of the read-modify-write image operations
28  * defined by the spec.  The subtests can be classified in two groups:
29  *
30  * The ones that test bitwise operations (imageAtomicAnd(),
31  * imageAtomicOr(), imageAtomicXor()) and imageAtomicExchange() work
32  * by using an image as bitmap which is written to by a large number
33  * of shader invocations in parallel, each of them will use a bitwise
34  * built-in to flip an individual bit on the image.  If the
35  * read-modify-write operation is implemented atomically no write will
36  * overwrite any concurrent write supposed to flip a different bit in
37  * the same dword, so the whole bitmap will be inverted when the
38  * rendering completes.
39  *
40  * The remaining subtests (imageAtomicAdd(), imageAtomicMin(),
41  * imageAtomicMax(), imageAtomicCompSwap()) operate on a single 32-bit
42  * location of the image which is accessed concurrently from all
43  * shader invocations.  In each case a function written in terms of
44  * one of the built-ins is guaranteed to return a unique 32-bit value
45  * for each concurrent invocation as long as the read-modify-write
46  * operation is implemented atomically.  The way in which this is
47  * achieved differs for each built-in and is described in more detail
48  * below.
49  */
50 
51 #include "common.h"
52 
53 /** Window width. */
54 #define W 16
55 
56 /** Window height. */
57 #define H 96
58 
59 /** Total number of pixels in the window and image. */
60 #define N (W * H)
61 
62 static struct piglit_gl_test_config *piglit_config;
63 
64 static bool
init_image(const struct image_info img,uint32_t v)65 init_image(const struct image_info img, uint32_t v)
66 {
67         uint32_t pixels[N];
68 
69         return init_pixels(img, pixels, v, 0, 0, 0) &&
70                 upload_image(img, 0, pixels);
71 }
72 
73 static bool
check_fb_unique(const struct grid_info grid)74 check_fb_unique(const struct grid_info grid)
75 {
76         uint32_t pixels[H][W];
77         int frequency[N] = { 0 };
78         int i, j;
79 
80         if (!download_result(grid, pixels[0]))
81                 return false;
82 
83         for (i = 0; i < W; ++i) {
84                 for (j = 0; j < H; ++j) {
85                         if (frequency[pixels[j][i] % N]++) {
86                                 printf("Probe value at (%d, %d)\n", i, j);
87                                 printf("  Observed: 0x%08x\n", pixels[j][i]);
88                                 printf("  Value not unique.\n");
89                                 return false;
90                         }
91                 }
92         }
93 
94         return true;
95 }
96 
97 static bool
check_image_const(const struct image_info img,unsigned n,uint32_t v)98 check_image_const(const struct image_info img, unsigned n, uint32_t v)
99 {
100         uint32_t pixels[N];
101 
102         return download_image(img, 0, pixels) &&
103                 check_pixels(set_image_size(img, n, 1, 1, 1),
104                              pixels, v, 0, 0, 0);
105 }
106 
107 struct testcase
108 {
109 	uint32_t init_value;
110 	unsigned check_sz;
111 	uint32_t check_value;
112 	bool check_unique;
113 	const char * op;
114 };
115 
116 static struct testcase testdata[] = {
117         /*
118          * If imageAtomicAdd() is atomic the return values obtained
119          * from each call are guaranteed to be unique.
120          */
121 	{
122 		0, 1, N, true,
123 		"GRID_T op(ivec2 idx, GRID_T x) {\n"
124 		"       return GRID_T("
125 		"          imageAtomicAdd(img, IMAGE_ADDR(ivec2(0)), 1u),"
126 		"          0, 0, 1);\n"
127 		"}\n",
128 	},
129 
130         /*
131          * Call imageAtomicMin() on a fixed location from within a
132          * loop passing the most recent guess of the counter value
133          * decremented by one.
134          *
135          * If no race occurs the counter will be decremented by one
136          * and we're done, if another thread updates the counter in
137          * parallel imageAtomicMin() has no effect since
138          * min(x-n, x-1) = x-n for n >= 1, so we update our guess and
139          * repeat.  In the end we obtain a unique counter value for
140          * each fragment if the read-modify-write operation is atomic.
141          */
142 	{
143                 0xffffffff, 1, 0xffffffff - N, true,
144                 "GRID_T op(ivec2 idx, GRID_T x) {\n"
145                 "       uint old, v = 0xffffffffu;"
146                 "\n"
147                 "       do {\n"
148                 "               old = v;\n"
149                 "               v = imageAtomicMin(img, IMAGE_ADDR(ivec2(0)),"
150                 "                                  v - 1u);\n"
151                 "       } while (v != old);\n"
152                 "\n"
153                 "       return GRID_T(v, 0, 0, 1);\n"
154                 "}\n",
155 	},
156 
157         /*
158          * Use imageAtomicMax() on a fixed location to increment a
159          * counter as explained above for imageAtomicMin().  The
160          * atomicity of the built-in guarantees that the obtained
161          * values will be unique for each fragment.
162          */
163 	{
164                 0, 1, N, true,
165                 "GRID_T op(ivec2 idx, GRID_T x) {\n"
166                 "       uint old, v = 0u;"
167                 "\n"
168                 "       do {\n"
169                 "               old = v;\n"
170                 "               v = imageAtomicMax(img, IMAGE_ADDR(ivec2(0)),"
171                 "                                  v + 1u);\n"
172                 "       } while (v != old);\n"
173                 "\n"
174                 "       return GRID_T(v, 0, 0, 1);\n"
175                 "}\n",
176 	},
177 
178         /*
179          * Use imageAtomicAnd() to flip individual bits of a bitmap
180          * atomically.  The atomicity of the built-in guarantees that
181          * all bits will be clear on termination.
182          */
183 	{
184                 0xffffffff, N / 32, 0, false,
185                 "GRID_T op(ivec2 idx, GRID_T x) {\n"
186                 "       int i = IMAGE_ADDR(idx);\n"
187                 "       uint m = ~(1u << (i % 32));\n"
188                 "\n"
189                 "       imageAtomicAnd(img, i / 32, m);\n"
190                 "\n"
191                 "       return GRID_T(0, 0, 0, 1);\n"
192                 "}\n",
193 	},
194 
195         /*
196          * Use imageAtomicOr() to flip individual bits of a bitmap
197          * atomically.  The atomicity of the built-in guarantees that
198          * all bits will be set on termination.
199          */
200 	{
201                 0, N / 32, 0xffffffff, false,
202                 "GRID_T op(ivec2 idx, GRID_T x) {\n"
203                 "       int i = IMAGE_ADDR(idx);\n"
204                 "       uint m = (1u << (i % 32));\n"
205                 "\n"
206                 "       imageAtomicOr(img, i / 32, m);\n"
207                 "\n"
208                 "       return GRID_T(0, 0, 0, 1);\n"
209                 "}\n",
210 	},
211 
212         /*
213          * Use imageAtomicXor() to flip individual bits of a bitmap
214          * atomically.  The atomicity of the built-in guarantees that
215          * all bits will have been inverted on termination.
216          */
217 	{
218                 0x55555555, N / 32, 0xaaaaaaaa, false,
219                 "GRID_T op(ivec2 idx, GRID_T x) {\n"
220                 "       int i = IMAGE_ADDR(idx);\n"
221                 "       uint m = (1u << (i % 32));\n"
222                 "\n"
223                 "       imageAtomicXor(img, i / 32, m);\n"
224                 "\n"
225                 "       return GRID_T(0, 0, 0, 1);\n"
226                 "}\n",
227 	},
228 
229         /*
230          * Use imageAtomicExchange() to flip individual bits of a
231          * bitmap atomically.  The atomicity of the built-in
232          * guarantees that all bits will be set on termination.
233          */
234 	{
235                 0, N / 32, 0xffffffff, false,
236                 "GRID_T op(ivec2 idx, GRID_T x) {\n"
237                 "       int i = IMAGE_ADDR(idx);\n"
238                 "       uint m = (1u << (i % 32));\n"
239                 "       uint old = 0u;\n"
240                 "\n"
241                 "       do {\n"
242                 "               m |= old;\n"
243                 "               old = imageAtomicExchange("
244                 "                       img, i / 32, m);\n"
245                 "       } while ((old & ~m) != 0u);\n"
246                 "\n"
247                 "       return GRID_T(0, 0, 0, 1);\n"
248                 "}\n",
249 	},
250 #if 0
251         /*
252          * Use imageAtomicExchange() on a fixed location to increment
253          * a counter, implementing a sort of spin-lock.
254          *
255          * The counter has two states: locked (0xffffffff) and
256          * unlocked (any other value).  While locked a single thread
257          * owns the value of the counter, increments its value and
258          * puts it back to the same location, atomically releasing the
259          * counter.  The atomicity of the built-in guarantees that the
260          * obtained values will be unique for each fragment.
261          *
262          * Unlike the classic spin-lock implementation, this uses the
263          * same atomic call to perform either a lock or an unlock
264          * operation depending on the current thread state.  This is
265          * critical to avoid a dead-lock situation on machines where
266          * neighboring threads have limited parallelism (e.g. share
267          * the same instruction pointer).
268          *
269          * This could lead to a different kind of dead-lock on devices
270          * that simulate concurrency by context-switching threads
271          * based on some sort of priority queue: If there is a
272          * possibility for a low-priority thread to acquire the lock
273          * and be preempted before the end of the critical section, it
274          * will prevent higher priority threads from making progress
275          * while the higher priority threads may prevent the
276          * lock-owning thread from being scheduled again and releasing
277          * the lock.
278          *
279          * Disabled for now because the latter dead-lock can easily be
280          * reproduced on current Intel hardware where it causes a GPU
281          * hang.  It seems to work fine on nVidia though, it would be
282          * interesting to see if it works on other platforms.
283          */
284 	{
285                 0, 1, N, true,
286                 "GRID_T op(ivec2 idx, GRID_T x) {\n"
287                 "       uint p = 0xffffffffu, v = 0xffffffffu;\n"
288                 "\n"
289                 "       do {\n"
290                 "               if (p != 0xffffffffu)\n"
291                 "                       v = p++;\n"
292                 "               p = imageAtomicExchange("
293                 "                  img, IMAGE_ADDR(ivec2(0)), p);\n"
294                 "       } while (v == 0xffffffffu);\n"
295                 "\n"
296                 "       return GRID_T(v, 0, 0, 1);\n"
297                 "}\n",
298                 "imageAtomicExchange (locking)",
299 	},
300 #endif
301 
302         /*
303          * Use imageAtomicCompSwap() on a fixed location from within a
304          * loop passing the most recent guess of the counter value as
305          * comparison value and the same value incremented by one as
306          * argument.  The atomicity of the built-in guarantees that
307          * the obtained values will be unique for each fragment.
308          */
309 	{
310                 0, 1, N, true,
311                 "GRID_T op(ivec2 idx, GRID_T x) {\n"
312                 "       uint old, v = 0u;"
313                 "\n"
314                 "       do {\n"
315                 "               old = v;\n"
316                 "               v = imageAtomicCompSwap("
317                 "                  img, IMAGE_ADDR(ivec2(0)), v, v + 1u);\n"
318                 "       } while (v != old);\n"
319                 "\n"
320                 "       return GRID_T(v, 0, 0, 1);\n"
321                 "}\n",
322 	},
323 };
324 
325 /**
326  * Test skeleton: Init image to \a init_value, run the provided shader
327  * \a op, check that the first \a check_sz pixels of the image equal
328  * \a check_value and optionally check that the resulting fragment
329  * values on the framebuffer are unique.
330  */
331 static enum piglit_result
run_test(void * data)332 run_test(void * data)
333 {
334 	const struct testcase * test = (const struct testcase *)data;
335 
336         const struct grid_info grid =
337                 grid_info(GL_FRAGMENT_SHADER, GL_R32UI, W, H);
338         const struct image_info img =
339                 image_info(GL_TEXTURE_1D, GL_R32UI, W, H);
340         GLuint prog = generate_program(
341                 grid, GL_FRAGMENT_SHADER,
342                 concat(image_hunk(img, ""),
343                        hunk("volatile IMAGE_UNIFORM_T img;\n"),
344                        hunk(test->op), NULL));
345         bool ret = prog &&
346                 init_fb(grid) &&
347                 init_image(img, test->init_value) &&
348                 set_uniform_int(prog, "img", 0) &&
349                 draw_grid(grid, prog) &&
350                 check_image_const(img, test->check_sz, test->check_value) &&
351                 (!test->check_unique || check_fb_unique(grid));
352 
353         glDeleteProgram(prog);
354         return ret ? PIGLIT_PASS : PIGLIT_FAIL;
355 }
356 
357 static struct piglit_subtest tests[] = {
358 	{
359 		"imageAtomicAdd",
360 		"add",
361 		run_test,
362 		(void *)&testdata[0],
363 	},
364 	{
365 		"imageAtomicMin",
366 		"min",
367 		run_test,
368 		(void *)&testdata[1],
369 	},
370 	{
371                 "imageAtomicMax",
372 		"max",
373 		run_test,
374 		(void *)&testdata[2],
375 	},
376 	{
377                 "imageAtomicAnd",
378 		"and",
379 		run_test,
380 		(void *)&testdata[3],
381 	},
382 	{
383                 "imageAtomicOr",
384 		"or",
385 		run_test,
386 		(void *)&testdata[4],
387 	},
388 	{
389                 "imageAtomicXor",
390 		"xor",
391 		run_test,
392 		(void *)&testdata[5],
393 	},
394 	{
395                 "imageAtomicExchange",
396 		"exchange",
397 		run_test,
398 		(void *)&testdata[6],
399 	},
400 	{
401                 "imageAtomicCompSwap",
402 		"comp_swap",
403 		run_test,
404 		(void *)&testdata[7],
405 	},
406 	{0},
407 };
408 
409 PIGLIT_GL_TEST_CONFIG_BEGIN
410 
411 piglit_config = &config;
412 config.subtests = tests;
413 config.supports_gl_core_version = 32;
414 
415 config.window_width = W;
416 config.window_height = H;
417 config.window_visual = PIGLIT_GL_VISUAL_DOUBLE | PIGLIT_GL_VISUAL_RGBA;
418 config.khr_no_error_support = PIGLIT_NO_ERRORS;
419 
420 PIGLIT_GL_TEST_CONFIG_END
421 
422 void
piglit_init(int argc,char ** argv)423 piglit_init(int argc, char **argv)
424 {
425         piglit_require_extension("GL_ARB_shader_image_load_store");
426 
427         enum piglit_result result = PIGLIT_PASS;
428 
429 	result = piglit_run_selected_subtests(
430 		tests,
431 		piglit_config->selected_subtests,
432 		piglit_config->num_selected_subtests,
433 		result);
434 
435 	piglit_report_result(result);
436 }
437 
438 enum piglit_result
piglit_display(void)439 piglit_display(void)
440 {
441         return PIGLIT_FAIL;
442 }
443