1 /*
2 * Copyright (C) 2014 Intel Corporation
3 * Copyright © 2018 Intel Corporation
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
14 * Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
22 * IN THE SOFTWARE.
23 */
24
25 /** @file atomicity.c
26 *
27 * Test the atomicity of the read-modify-write image operations
28 * defined by the spec. The subtests can be classified in two groups:
29 *
30 * The ones that test bitwise operations (imageAtomicAnd(),
31 * imageAtomicOr(), imageAtomicXor()) and imageAtomicExchange() work
32 * by using an image as bitmap which is written to by a large number
33 * of shader invocations in parallel, each of them will use a bitwise
34 * built-in to flip an individual bit on the image. If the
35 * read-modify-write operation is implemented atomically no write will
36 * overwrite any concurrent write supposed to flip a different bit in
37 * the same dword, so the whole bitmap will be inverted when the
38 * rendering completes.
39 *
40 * The remaining subtests (imageAtomicAdd(), imageAtomicMin(),
41 * imageAtomicMax(), imageAtomicCompSwap()) operate on a single 32-bit
42 * location of the image which is accessed concurrently from all
43 * shader invocations. In each case a function written in terms of
44 * one of the built-ins is guaranteed to return a unique 32-bit value
45 * for each concurrent invocation as long as the read-modify-write
46 * operation is implemented atomically. The way in which this is
47 * achieved differs for each built-in and is described in more detail
48 * below.
49 */
50
51 #include "common.h"
52
53 /** Window width. */
54 #define W 16
55
56 /** Window height. */
57 #define H 96
58
59 /** Total number of pixels in the window and image. */
60 #define N (W * H)
61
62 static struct piglit_gl_test_config *piglit_config;
63
64 static bool
init_image(const struct image_info img,uint32_t v)65 init_image(const struct image_info img, uint32_t v)
66 {
67 uint32_t pixels[N];
68
69 return init_pixels(img, pixels, v, 0, 0, 0) &&
70 upload_image(img, 0, pixels);
71 }
72
73 static bool
check_fb_unique(const struct grid_info grid)74 check_fb_unique(const struct grid_info grid)
75 {
76 uint32_t pixels[H][W];
77 int frequency[N] = { 0 };
78 int i, j;
79
80 if (!download_result(grid, pixels[0]))
81 return false;
82
83 for (i = 0; i < W; ++i) {
84 for (j = 0; j < H; ++j) {
85 if (frequency[pixels[j][i] % N]++) {
86 printf("Probe value at (%d, %d)\n", i, j);
87 printf(" Observed: 0x%08x\n", pixels[j][i]);
88 printf(" Value not unique.\n");
89 return false;
90 }
91 }
92 }
93
94 return true;
95 }
96
97 static bool
check_image_const(const struct image_info img,unsigned n,uint32_t v)98 check_image_const(const struct image_info img, unsigned n, uint32_t v)
99 {
100 uint32_t pixels[N];
101
102 return download_image(img, 0, pixels) &&
103 check_pixels(set_image_size(img, n, 1, 1, 1),
104 pixels, v, 0, 0, 0);
105 }
106
107 struct testcase
108 {
109 uint32_t init_value;
110 unsigned check_sz;
111 uint32_t check_value;
112 bool check_unique;
113 const char * op;
114 };
115
116 static struct testcase testdata[] = {
117 /*
118 * If imageAtomicAdd() is atomic the return values obtained
119 * from each call are guaranteed to be unique.
120 */
121 {
122 0, 1, N, true,
123 "GRID_T op(ivec2 idx, GRID_T x) {\n"
124 " return GRID_T("
125 " imageAtomicAdd(img, IMAGE_ADDR(ivec2(0)), 1u),"
126 " 0, 0, 1);\n"
127 "}\n",
128 },
129
130 /*
131 * Call imageAtomicMin() on a fixed location from within a
132 * loop passing the most recent guess of the counter value
133 * decremented by one.
134 *
135 * If no race occurs the counter will be decremented by one
136 * and we're done, if another thread updates the counter in
137 * parallel imageAtomicMin() has no effect since
138 * min(x-n, x-1) = x-n for n >= 1, so we update our guess and
139 * repeat. In the end we obtain a unique counter value for
140 * each fragment if the read-modify-write operation is atomic.
141 */
142 {
143 0xffffffff, 1, 0xffffffff - N, true,
144 "GRID_T op(ivec2 idx, GRID_T x) {\n"
145 " uint old, v = 0xffffffffu;"
146 "\n"
147 " do {\n"
148 " old = v;\n"
149 " v = imageAtomicMin(img, IMAGE_ADDR(ivec2(0)),"
150 " v - 1u);\n"
151 " } while (v != old);\n"
152 "\n"
153 " return GRID_T(v, 0, 0, 1);\n"
154 "}\n",
155 },
156
157 /*
158 * Use imageAtomicMax() on a fixed location to increment a
159 * counter as explained above for imageAtomicMin(). The
160 * atomicity of the built-in guarantees that the obtained
161 * values will be unique for each fragment.
162 */
163 {
164 0, 1, N, true,
165 "GRID_T op(ivec2 idx, GRID_T x) {\n"
166 " uint old, v = 0u;"
167 "\n"
168 " do {\n"
169 " old = v;\n"
170 " v = imageAtomicMax(img, IMAGE_ADDR(ivec2(0)),"
171 " v + 1u);\n"
172 " } while (v != old);\n"
173 "\n"
174 " return GRID_T(v, 0, 0, 1);\n"
175 "}\n",
176 },
177
178 /*
179 * Use imageAtomicAnd() to flip individual bits of a bitmap
180 * atomically. The atomicity of the built-in guarantees that
181 * all bits will be clear on termination.
182 */
183 {
184 0xffffffff, N / 32, 0, false,
185 "GRID_T op(ivec2 idx, GRID_T x) {\n"
186 " int i = IMAGE_ADDR(idx);\n"
187 " uint m = ~(1u << (i % 32));\n"
188 "\n"
189 " imageAtomicAnd(img, i / 32, m);\n"
190 "\n"
191 " return GRID_T(0, 0, 0, 1);\n"
192 "}\n",
193 },
194
195 /*
196 * Use imageAtomicOr() to flip individual bits of a bitmap
197 * atomically. The atomicity of the built-in guarantees that
198 * all bits will be set on termination.
199 */
200 {
201 0, N / 32, 0xffffffff, false,
202 "GRID_T op(ivec2 idx, GRID_T x) {\n"
203 " int i = IMAGE_ADDR(idx);\n"
204 " uint m = (1u << (i % 32));\n"
205 "\n"
206 " imageAtomicOr(img, i / 32, m);\n"
207 "\n"
208 " return GRID_T(0, 0, 0, 1);\n"
209 "}\n",
210 },
211
212 /*
213 * Use imageAtomicXor() to flip individual bits of a bitmap
214 * atomically. The atomicity of the built-in guarantees that
215 * all bits will have been inverted on termination.
216 */
217 {
218 0x55555555, N / 32, 0xaaaaaaaa, false,
219 "GRID_T op(ivec2 idx, GRID_T x) {\n"
220 " int i = IMAGE_ADDR(idx);\n"
221 " uint m = (1u << (i % 32));\n"
222 "\n"
223 " imageAtomicXor(img, i / 32, m);\n"
224 "\n"
225 " return GRID_T(0, 0, 0, 1);\n"
226 "}\n",
227 },
228
229 /*
230 * Use imageAtomicExchange() to flip individual bits of a
231 * bitmap atomically. The atomicity of the built-in
232 * guarantees that all bits will be set on termination.
233 */
234 {
235 0, N / 32, 0xffffffff, false,
236 "GRID_T op(ivec2 idx, GRID_T x) {\n"
237 " int i = IMAGE_ADDR(idx);\n"
238 " uint m = (1u << (i % 32));\n"
239 " uint old = 0u;\n"
240 "\n"
241 " do {\n"
242 " m |= old;\n"
243 " old = imageAtomicExchange("
244 " img, i / 32, m);\n"
245 " } while ((old & ~m) != 0u);\n"
246 "\n"
247 " return GRID_T(0, 0, 0, 1);\n"
248 "}\n",
249 },
250 #if 0
251 /*
252 * Use imageAtomicExchange() on a fixed location to increment
253 * a counter, implementing a sort of spin-lock.
254 *
255 * The counter has two states: locked (0xffffffff) and
256 * unlocked (any other value). While locked a single thread
257 * owns the value of the counter, increments its value and
258 * puts it back to the same location, atomically releasing the
259 * counter. The atomicity of the built-in guarantees that the
260 * obtained values will be unique for each fragment.
261 *
262 * Unlike the classic spin-lock implementation, this uses the
263 * same atomic call to perform either a lock or an unlock
264 * operation depending on the current thread state. This is
265 * critical to avoid a dead-lock situation on machines where
266 * neighboring threads have limited parallelism (e.g. share
267 * the same instruction pointer).
268 *
269 * This could lead to a different kind of dead-lock on devices
270 * that simulate concurrency by context-switching threads
271 * based on some sort of priority queue: If there is a
272 * possibility for a low-priority thread to acquire the lock
273 * and be preempted before the end of the critical section, it
274 * will prevent higher priority threads from making progress
275 * while the higher priority threads may prevent the
276 * lock-owning thread from being scheduled again and releasing
277 * the lock.
278 *
279 * Disabled for now because the latter dead-lock can easily be
280 * reproduced on current Intel hardware where it causes a GPU
281 * hang. It seems to work fine on nVidia though, it would be
282 * interesting to see if it works on other platforms.
283 */
284 {
285 0, 1, N, true,
286 "GRID_T op(ivec2 idx, GRID_T x) {\n"
287 " uint p = 0xffffffffu, v = 0xffffffffu;\n"
288 "\n"
289 " do {\n"
290 " if (p != 0xffffffffu)\n"
291 " v = p++;\n"
292 " p = imageAtomicExchange("
293 " img, IMAGE_ADDR(ivec2(0)), p);\n"
294 " } while (v == 0xffffffffu);\n"
295 "\n"
296 " return GRID_T(v, 0, 0, 1);\n"
297 "}\n",
298 "imageAtomicExchange (locking)",
299 },
300 #endif
301
302 /*
303 * Use imageAtomicCompSwap() on a fixed location from within a
304 * loop passing the most recent guess of the counter value as
305 * comparison value and the same value incremented by one as
306 * argument. The atomicity of the built-in guarantees that
307 * the obtained values will be unique for each fragment.
308 */
309 {
310 0, 1, N, true,
311 "GRID_T op(ivec2 idx, GRID_T x) {\n"
312 " uint old, v = 0u;"
313 "\n"
314 " do {\n"
315 " old = v;\n"
316 " v = imageAtomicCompSwap("
317 " img, IMAGE_ADDR(ivec2(0)), v, v + 1u);\n"
318 " } while (v != old);\n"
319 "\n"
320 " return GRID_T(v, 0, 0, 1);\n"
321 "}\n",
322 },
323 };
324
325 /**
326 * Test skeleton: Init image to \a init_value, run the provided shader
327 * \a op, check that the first \a check_sz pixels of the image equal
328 * \a check_value and optionally check that the resulting fragment
329 * values on the framebuffer are unique.
330 */
331 static enum piglit_result
run_test(void * data)332 run_test(void * data)
333 {
334 const struct testcase * test = (const struct testcase *)data;
335
336 const struct grid_info grid =
337 grid_info(GL_FRAGMENT_SHADER, GL_R32UI, W, H);
338 const struct image_info img =
339 image_info(GL_TEXTURE_1D, GL_R32UI, W, H);
340 GLuint prog = generate_program(
341 grid, GL_FRAGMENT_SHADER,
342 concat(image_hunk(img, ""),
343 hunk("volatile IMAGE_UNIFORM_T img;\n"),
344 hunk(test->op), NULL));
345 bool ret = prog &&
346 init_fb(grid) &&
347 init_image(img, test->init_value) &&
348 set_uniform_int(prog, "img", 0) &&
349 draw_grid(grid, prog) &&
350 check_image_const(img, test->check_sz, test->check_value) &&
351 (!test->check_unique || check_fb_unique(grid));
352
353 glDeleteProgram(prog);
354 return ret ? PIGLIT_PASS : PIGLIT_FAIL;
355 }
356
357 static struct piglit_subtest tests[] = {
358 {
359 "imageAtomicAdd",
360 "add",
361 run_test,
362 (void *)&testdata[0],
363 },
364 {
365 "imageAtomicMin",
366 "min",
367 run_test,
368 (void *)&testdata[1],
369 },
370 {
371 "imageAtomicMax",
372 "max",
373 run_test,
374 (void *)&testdata[2],
375 },
376 {
377 "imageAtomicAnd",
378 "and",
379 run_test,
380 (void *)&testdata[3],
381 },
382 {
383 "imageAtomicOr",
384 "or",
385 run_test,
386 (void *)&testdata[4],
387 },
388 {
389 "imageAtomicXor",
390 "xor",
391 run_test,
392 (void *)&testdata[5],
393 },
394 {
395 "imageAtomicExchange",
396 "exchange",
397 run_test,
398 (void *)&testdata[6],
399 },
400 {
401 "imageAtomicCompSwap",
402 "comp_swap",
403 run_test,
404 (void *)&testdata[7],
405 },
406 {0},
407 };
408
409 PIGLIT_GL_TEST_CONFIG_BEGIN
410
411 piglit_config = &config;
412 config.subtests = tests;
413 config.supports_gl_core_version = 32;
414
415 config.window_width = W;
416 config.window_height = H;
417 config.window_visual = PIGLIT_GL_VISUAL_DOUBLE | PIGLIT_GL_VISUAL_RGBA;
418 config.khr_no_error_support = PIGLIT_NO_ERRORS;
419
420 PIGLIT_GL_TEST_CONFIG_END
421
422 void
piglit_init(int argc,char ** argv)423 piglit_init(int argc, char **argv)
424 {
425 piglit_require_extension("GL_ARB_shader_image_load_store");
426
427 enum piglit_result result = PIGLIT_PASS;
428
429 result = piglit_run_selected_subtests(
430 tests,
431 piglit_config->selected_subtests,
432 piglit_config->num_selected_subtests,
433 result);
434
435 piglit_report_result(result);
436 }
437
438 enum piglit_result
piglit_display(void)439 piglit_display(void)
440 {
441 return PIGLIT_FAIL;
442 }
443