spec/arb_shader_image_load_store/atomicity.c

/*
 * Copyright (C) 2014 Intel Corporation
 * Copyright © 2018 Intel Corporation
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice (including the next
 * paragraph) shall be included in all copies or substantial portions of the
 * Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
 * IN THE SOFTWARE.
 */

/** @file atomicity.c
 *
 * Test the atomicity of the read-modify-write image operations
 * defined by the spec.  The subtests can be classified in two groups:
 *
 * The ones that test bitwise operations (imageAtomicAnd(),
 * imageAtomicOr(), imageAtomicXor()) and imageAtomicExchange() work
 * by using an image as bitmap which is written to by a large number
 * of shader invocations in parallel, each of them will use a bitwise
 * built-in to flip an individual bit on the image.  If the
 * read-modify-write operation is implemented atomically no write will
 * overwrite any concurrent write supposed to flip a different bit in
 * the same dword, so the whole bitmap will be inverted when the
 * rendering completes.
 *
 * The remaining subtests (imageAtomicAdd(), imageAtomicMin(),
 * imageAtomicMax(), imageAtomicCompSwap()) operate on a single 32-bit
 * location of the image which is accessed concurrently from all
 * shader invocations.  In each case a function written in terms of
 * one of the built-ins is guaranteed to return a unique 32-bit value
 * for each concurrent invocation as long as the read-modify-write
 * operation is implemented atomically.  The way in which this is
 * achieved differs for each built-in and is described in more detail
 * below.
 */

#include "common.h"

/** Window width. */
#define W 16

/** Window height. */
#define H 96

/** Total number of pixels in the window and image. */
#define N (W * H)

static struct piglit_gl_test_config *piglit_config;

static bool
init_image(const struct image_info img, uint32_t v)
{
        uint32_t pixels[N];

        return init_pixels(img, pixels, v, 0, 0, 0) &&
                upload_image(img, 0, pixels);
}

static bool
check_fb_unique(const struct grid_info grid)
{
        uint32_t pixels[H][W];
        int frequency[N] = { 0 };
        int i, j;

        if (!download_result(grid, pixels[0]))
                return false;

        for (i = 0; i < W; ++i) {
                for (j = 0; j < H; ++j) {
                        if (frequency[pixels[j][i] % N]++) {
                                printf("Probe value at (%d, %d)\n", i, j);
                                printf("  Observed: 0x%08x\n", pixels[j][i]);
                                printf("  Value not unique.\n");
                                return false;
                        }
                }
        }

        return true;
}

static bool
check_image_const(const struct image_info img, unsigned n, uint32_t v)
{
        uint32_t pixels[N];

        return download_image(img, 0, pixels) &&
                check_pixels(set_image_size(img, n, 1, 1, 1),
                             pixels, v, 0, 0, 0);
}

struct testcase
{
	uint32_t init_value;
	unsigned check_sz;
	uint32_t check_value;
	bool check_unique;
	const char * op;
};

static struct testcase testdata[] = {
        /*
         * If imageAtomicAdd() is atomic the return values obtained
         * from each call are guaranteed to be unique.
         */
	{
		0, 1, N, true,
		"GRID_T op(ivec2 idx, GRID_T x) {\n"
		"       return GRID_T("
		"          imageAtomicAdd(img, IMAGE_ADDR(ivec2(0)), 1u),"
		"          0, 0, 1);\n"
		"}\n",
	},

        /*
         * Call imageAtomicMin() on a fixed location from within a
         * loop passing the most recent guess of the counter value
         * decremented by one.
         *
         * If no race occurs the counter will be decremented by one
         * and we're done, if another thread updates the counter in
         * parallel imageAtomicMin() has no effect since
         * min(x-n, x-1) = x-n for n >= 1, so we update our guess and
         * repeat.  In the end we obtain a unique counter value for
         * each fragment if the read-modify-write operation is atomic.
         */
	{
                0xffffffff, 1, 0xffffffff - N, true,
                "GRID_T op(ivec2 idx, GRID_T x) {\n"
                "       uint old, v = 0xffffffffu;"
                "\n"
                "       do {\n"
                "               old = v;\n"
                "               v = imageAtomicMin(img, IMAGE_ADDR(ivec2(0)),"
                "                                  v - 1u);\n"
                "       } while (v != old);\n"
                "\n"
                "       return GRID_T(v, 0, 0, 1);\n"
                "}\n",
	},

        /*
         * Use imageAtomicMax() on a fixed location to increment a
         * counter as explained above for imageAtomicMin().  The
         * atomicity of the built-in guarantees that the obtained
         * values will be unique for each fragment.
         */
	{
                0, 1, N, true,
                "GRID_T op(ivec2 idx, GRID_T x) {\n"
                "       uint old, v = 0u;"
                "\n"
                "       do {\n"
                "               old = v;\n"
                "               v = imageAtomicMax(img, IMAGE_ADDR(ivec2(0)),"
                "                                  v + 1u);\n"
                "       } while (v != old);\n"
                "\n"
                "       return GRID_T(v, 0, 0, 1);\n"
                "}\n",
	},

        /*
         * Use imageAtomicAnd() to flip individual bits of a bitmap
         * atomically.  The atomicity of the built-in guarantees that
         * all bits will be clear on termination.
         */
	{
                0xffffffff, N / 32, 0, false,
                "GRID_T op(ivec2 idx, GRID_T x) {\n"
                "       int i = IMAGE_ADDR(idx);\n"
                "       uint m = ~(1u << (i % 32));\n"
                "\n"
                "       imageAtomicAnd(img, i / 32, m);\n"
                "\n"
                "       return GRID_T(0, 0, 0, 1);\n"
                "}\n",
	},

        /*
         * Use imageAtomicOr() to flip individual bits of a bitmap
         * atomically.  The atomicity of the built-in guarantees that
         * all bits will be set on termination.
         */
	{
                0, N / 32, 0xffffffff, false,
                "GRID_T op(ivec2 idx, GRID_T x) {\n"
                "       int i = IMAGE_ADDR(idx);\n"
                "       uint m = (1u << (i % 32));\n"
                "\n"
                "       imageAtomicOr(img, i / 32, m);\n"
                "\n"
                "       return GRID_T(0, 0, 0, 1);\n"
                "}\n",
	},

        /*
         * Use imageAtomicXor() to flip individual bits of a bitmap
         * atomically.  The atomicity of the built-in guarantees that
         * all bits will have been inverted on termination.
         */
	{
                0x55555555, N / 32, 0xaaaaaaaa, false,
                "GRID_T op(ivec2 idx, GRID_T x) {\n"
                "       int i = IMAGE_ADDR(idx);\n"
                "       uint m = (1u << (i % 32));\n"
                "\n"
                "       imageAtomicXor(img, i / 32, m);\n"
                "\n"
                "       return GRID_T(0, 0, 0, 1);\n"
                "}\n",
	},

        /*
         * Use imageAtomicExchange() to flip individual bits of a
         * bitmap atomically.  The atomicity of the built-in
         * guarantees that all bits will be set on termination.
         */
	{
                0, N / 32, 0xffffffff, false,
                "GRID_T op(ivec2 idx, GRID_T x) {\n"
                "       int i = IMAGE_ADDR(idx);\n"
                "       uint m = (1u << (i % 32));\n"
                "       uint old = 0u;\n"
                "\n"
                "       do {\n"
                "               m |= old;\n"
                "               old = imageAtomicExchange("
                "                       img, i / 32, m);\n"
                "       } while ((old & ~m) != 0u);\n"
                "\n"
                "       return GRID_T(0, 0, 0, 1);\n"
                "}\n",
	},
#if 0
        /*
         * Use imageAtomicExchange() on a fixed location to increment
         * a counter, implementing a sort of spin-lock.
         *
         * The counter has two states: locked (0xffffffff) and
         * unlocked (any other value).  While locked a single thread
         * owns the value of the counter, increments its value and
         * puts it back to the same location, atomically releasing the
         * counter.  The atomicity of the built-in guarantees that the
         * obtained values will be unique for each fragment.
         *
         * Unlike the classic spin-lock implementation, this uses the
         * same atomic call to perform either a lock or an unlock
         * operation depending on the current thread state.  This is
         * critical to avoid a dead-lock situation on machines where
         * neighboring threads have limited parallelism (e.g. share
         * the same instruction pointer).
         *
         * This could lead to a different kind of dead-lock on devices
         * that simulate concurrency by context-switching threads
         * based on some sort of priority queue: If there is a
         * possibility for a low-priority thread to acquire the lock
         * and be preempted before the end of the critical section, it
         * will prevent higher priority threads from making progress
         * while the higher priority threads may prevent the
         * lock-owning thread from being scheduled again and releasing
         * the lock.
         *
         * Disabled for now because the latter dead-lock can easily be
         * reproduced on current Intel hardware where it causes a GPU
         * hang.  It seems to work fine on nVidia though, it would be
         * interesting to see if it works on other platforms.
         */
	{
                0, 1, N, true,
                "GRID_T op(ivec2 idx, GRID_T x) {\n"
                "       uint p = 0xffffffffu, v = 0xffffffffu;\n"
                "\n"
                "       do {\n"
                "               if (p != 0xffffffffu)\n"
                "                       v = p++;\n"
                "               p = imageAtomicExchange("
                "                  img, IMAGE_ADDR(ivec2(0)), p);\n"
                "       } while (v == 0xffffffffu);\n"
                "\n"
                "       return GRID_T(v, 0, 0, 1);\n"
                "}\n",
                "imageAtomicExchange (locking)",
	},
#endif

        /*
         * Use imageAtomicCompSwap() on a fixed location from within a
         * loop passing the most recent guess of the counter value as
         * comparison value and the same value incremented by one as
         * argument.  The atomicity of the built-in guarantees that
         * the obtained values will be unique for each fragment.
         */
	{
                0, 1, N, true,
                "GRID_T op(ivec2 idx, GRID_T x) {\n"
                "       uint old, v = 0u;"
                "\n"
                "       do {\n"
                "               old = v;\n"
                "               v = imageAtomicCompSwap("
                "                  img, IMAGE_ADDR(ivec2(0)), v, v + 1u);\n"
                "       } while (v != old);\n"
                "\n"
                "       return GRID_T(v, 0, 0, 1);\n"
                "}\n",
	},
};

/**
 * Test skeleton: Init image to \a init_value, run the provided shader
 * \a op, check that the first \a check_sz pixels of the image equal
 * \a check_value and optionally check that the resulting fragment
 * values on the framebuffer are unique.
 */
static enum piglit_result
run_test(void * data)
{
	const struct testcase * test = (const struct testcase *)data;

        const struct grid_info grid =
                grid_info(GL_FRAGMENT_SHADER, GL_R32UI, W, H);
        const struct image_info img =
                image_info(GL_TEXTURE_1D, GL_R32UI, W, H);
        GLuint prog = generate_program(
                grid, GL_FRAGMENT_SHADER,
                concat(image_hunk(img, ""),
                       hunk("volatile IMAGE_UNIFORM_T img;\n"),
                       hunk(test->op), NULL));
        bool ret = prog &&
                init_fb(grid) &&
                init_image(img, test->init_value) &&
                set_uniform_int(prog, "img", 0) &&
                draw_grid(grid, prog) &&
                check_image_const(img, test->check_sz, test->check_value) &&
                (!test->check_unique || check_fb_unique(grid));

        glDeleteProgram(prog);
        return ret ? PIGLIT_PASS : PIGLIT_FAIL;
}

static struct piglit_subtest tests[] = {
	{
		"imageAtomicAdd",
		"add",
		run_test,
		(void *)&testdata[0],
	},
	{
		"imageAtomicMin",
		"min",
		run_test,
		(void *)&testdata[1],
	},
	{
                "imageAtomicMax",
		"max",
		run_test,
		(void *)&testdata[2],
	},
	{
                "imageAtomicAnd",
		"and",
		run_test,
		(void *)&testdata[3],
	},
	{
                "imageAtomicOr",
		"or",
		run_test,
		(void *)&testdata[4],
	},
	{
                "imageAtomicXor",
		"xor",
		run_test,
		(void *)&testdata[5],
	},
	{
                "imageAtomicExchange",
		"exchange",
		run_test,
		(void *)&testdata[6],
	},
	{
                "imageAtomicCompSwap",
		"comp_swap",
		run_test,
		(void *)&testdata[7],
	},
	{0},
};

PIGLIT_GL_TEST_CONFIG_BEGIN

piglit_config = &config;
config.subtests = tests;
config.supports_gl_core_version = 32;

config.window_width = W;
config.window_height = H;
config.window_visual = PIGLIT_GL_VISUAL_DOUBLE | PIGLIT_GL_VISUAL_RGBA;
config.khr_no_error_support = PIGLIT_NO_ERRORS;

PIGLIT_GL_TEST_CONFIG_END

void
piglit_init(int argc, char **argv)
{
        piglit_require_extension("GL_ARB_shader_image_load_store");

        enum piglit_result result = PIGLIT_PASS;

	result = piglit_run_selected_subtests(
		tests,
		piglit_config->selected_subtests,
		piglit_config->num_selected_subtests,
		result);

	piglit_report_result(result);
}

enum piglit_result
piglit_display(void)
{
        return PIGLIT_FAIL;
}