1 /*
2  * Copyright 2016 Red Hat.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * on the rights to use, copy, modify, merge, publish, distribute, sub
8  * license, and/or sell copies of the Software, and to permit persons to whom
9  * the Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18  * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21  * USE OR OTHER DEALINGS IN THE SOFTWARE.
22  */
23 #include "util/u_inlines.h"
24 #include "util/u_math.h"
25 #include "util/u_memory.h"
26 #include "util/u_pstipple.h"
27 #include "pipe/p_shader_tokens.h"
28 #include "draw/draw_context.h"
29 #include "draw/draw_vertex.h"
30 #include "sp_context.h"
31 #include "sp_screen.h"
32 #include "sp_state.h"
33 #include "sp_texture.h"
34 #include "sp_tex_sample.h"
35 #include "sp_tex_tile_cache.h"
36 #include "tgsi/tgsi_parse.h"
37 
38 static void
cs_prepare(const struct sp_compute_shader * cs,struct tgsi_exec_machine * machine,int w,int h,int d,int g_w,int g_h,int g_d,int b_w,int b_h,int b_d,struct tgsi_sampler * sampler,struct tgsi_image * image,struct tgsi_buffer * buffer)39 cs_prepare(const struct sp_compute_shader *cs,
40            struct tgsi_exec_machine *machine,
41            int w, int h, int d,
42            int g_w, int g_h, int g_d,
43            int b_w, int b_h, int b_d,
44            struct tgsi_sampler *sampler,
45            struct tgsi_image *image,
46            struct tgsi_buffer *buffer )
47 {
48    int j;
49    /*
50     * Bind tokens/shader to the interpreter's machine state.
51     */
52    tgsi_exec_machine_bind_shader(machine,
53                                  cs->tokens,
54                                  sampler, image, buffer);
55 
56    if (machine->SysSemanticToIndex[TGSI_SEMANTIC_THREAD_ID] != -1) {
57       unsigned i = machine->SysSemanticToIndex[TGSI_SEMANTIC_THREAD_ID];
58       for (j = 0; j < TGSI_QUAD_SIZE; j++) {
59          machine->SystemValue[i].xyzw[0].i[j] = w;
60          machine->SystemValue[i].xyzw[1].i[j] = h;
61          machine->SystemValue[i].xyzw[2].i[j] = d;
62       }
63    }
64 
65    if (machine->SysSemanticToIndex[TGSI_SEMANTIC_GRID_SIZE] != -1) {
66       unsigned i = machine->SysSemanticToIndex[TGSI_SEMANTIC_GRID_SIZE];
67       for (j = 0; j < TGSI_QUAD_SIZE; j++) {
68          machine->SystemValue[i].xyzw[0].i[j] = g_w;
69          machine->SystemValue[i].xyzw[1].i[j] = g_h;
70          machine->SystemValue[i].xyzw[2].i[j] = g_d;
71       }
72    }
73 
74    if (machine->SysSemanticToIndex[TGSI_SEMANTIC_BLOCK_SIZE] != -1) {
75       unsigned i = machine->SysSemanticToIndex[TGSI_SEMANTIC_BLOCK_SIZE];
76       for (j = 0; j < TGSI_QUAD_SIZE; j++) {
77          machine->SystemValue[i].xyzw[0].i[j] = b_w;
78          machine->SystemValue[i].xyzw[1].i[j] = b_h;
79          machine->SystemValue[i].xyzw[2].i[j] = b_d;
80       }
81    }
82 }
83 
84 static bool
cs_run(const struct sp_compute_shader * cs,int g_w,int g_h,int g_d,struct tgsi_exec_machine * machine,bool restart)85 cs_run(const struct sp_compute_shader *cs,
86        int g_w, int g_h, int g_d,
87        struct tgsi_exec_machine *machine, bool restart)
88 {
89    if (!restart) {
90       if (machine->SysSemanticToIndex[TGSI_SEMANTIC_BLOCK_ID] != -1) {
91          unsigned i = machine->SysSemanticToIndex[TGSI_SEMANTIC_BLOCK_ID];
92          int j;
93          for (j = 0; j < TGSI_QUAD_SIZE; j++) {
94             machine->SystemValue[i].xyzw[0].i[j] = g_w;
95             machine->SystemValue[i].xyzw[1].i[j] = g_h;
96             machine->SystemValue[i].xyzw[2].i[j] = g_d;
97          }
98       }
99       machine->NonHelperMask = (1 << 1) - 1;
100    }
101 
102    tgsi_exec_machine_run(machine, restart ? machine->pc : 0);
103 
104    if (machine->pc != -1)
105       return true;
106    return false;
107 }
108 
109 static void
run_workgroup(const struct sp_compute_shader * cs,int g_w,int g_h,int g_d,int num_threads,struct tgsi_exec_machine ** machines)110 run_workgroup(const struct sp_compute_shader *cs,
111               int g_w, int g_h, int g_d, int num_threads,
112               struct tgsi_exec_machine **machines)
113 {
114    int i;
115    bool grp_hit_barrier, restart_threads = false;
116 
117    do {
118       grp_hit_barrier = false;
119       for (i = 0; i < num_threads; i++) {
120          grp_hit_barrier |= cs_run(cs, g_w, g_h, g_d, machines[i], restart_threads);
121       }
122       restart_threads = false;
123       if (grp_hit_barrier) {
124          grp_hit_barrier = false;
125          restart_threads = true;
126       }
127    } while (restart_threads);
128 }
129 
130 static void
cs_delete(const struct sp_compute_shader * cs,struct tgsi_exec_machine * machine)131 cs_delete(const struct sp_compute_shader *cs,
132           struct tgsi_exec_machine *machine)
133 {
134    if (machine->Tokens == cs->tokens) {
135       tgsi_exec_machine_bind_shader(machine, NULL, NULL, NULL, NULL);
136    }
137 }
138 
139 static void
fill_grid_size(struct pipe_context * context,const struct pipe_grid_info * info,uint32_t grid_size[3])140 fill_grid_size(struct pipe_context *context,
141                const struct pipe_grid_info *info,
142                uint32_t grid_size[3])
143 {
144    struct pipe_transfer *transfer;
145    uint32_t *params;
146    if (!info->indirect) {
147       grid_size[0] = info->grid[0];
148       grid_size[1] = info->grid[1];
149       grid_size[2] = info->grid[2];
150       return;
151    }
152    params = pipe_buffer_map_range(context, info->indirect,
153                                   info->indirect_offset,
154                                   3 * sizeof(uint32_t),
155                                   PIPE_MAP_READ,
156                                   &transfer);
157 
158    if (!transfer)
159       return;
160 
161    grid_size[0] = params[0];
162    grid_size[1] = params[1];
163    grid_size[2] = params[2];
164    pipe_buffer_unmap(context, transfer);
165 }
166 
167 void
softpipe_launch_grid(struct pipe_context * context,const struct pipe_grid_info * info)168 softpipe_launch_grid(struct pipe_context *context,
169                      const struct pipe_grid_info *info)
170 {
171    struct softpipe_context *softpipe = softpipe_context(context);
172    struct sp_compute_shader *cs = softpipe->cs;
173    int num_threads_in_group;
174    struct tgsi_exec_machine **machines;
175    int bwidth, bheight, bdepth;
176    int w, h, d, i;
177    int g_w, g_h, g_d;
178    uint32_t grid_size[3] = {0};
179    void *local_mem = NULL;
180 
181    softpipe_update_compute_samplers(softpipe);
182    bwidth = cs->info.properties[TGSI_PROPERTY_CS_FIXED_BLOCK_WIDTH];
183    bheight = cs->info.properties[TGSI_PROPERTY_CS_FIXED_BLOCK_HEIGHT];
184    bdepth = cs->info.properties[TGSI_PROPERTY_CS_FIXED_BLOCK_DEPTH];
185    num_threads_in_group = bwidth * bheight * bdepth;
186 
187    fill_grid_size(context, info, grid_size);
188 
189    if (cs->shader.req_local_mem) {
190       local_mem = CALLOC(1, cs->shader.req_local_mem);
191    }
192 
193    machines = CALLOC(sizeof(struct tgsi_exec_machine *), num_threads_in_group);
194    if (!machines) {
195       FREE(local_mem);
196       return;
197    }
198 
199    /* initialise machines + GRID_SIZE + THREAD_ID  + BLOCK_SIZE */
200    for (d = 0; d < bdepth; d++) {
201       for (h = 0; h < bheight; h++) {
202          for (w = 0; w < bwidth; w++) {
203             int idx = w + (h * bwidth) + (d * bheight * bwidth);
204             machines[idx] = tgsi_exec_machine_create(PIPE_SHADER_COMPUTE);
205 
206             machines[idx]->LocalMem = local_mem;
207             machines[idx]->LocalMemSize = cs->shader.req_local_mem;
208             cs_prepare(cs, machines[idx],
209                        w, h, d,
210                        grid_size[0], grid_size[1], grid_size[2],
211                        bwidth, bheight, bdepth,
212                        (struct tgsi_sampler *)softpipe->tgsi.sampler[PIPE_SHADER_COMPUTE],
213                        (struct tgsi_image *)softpipe->tgsi.image[PIPE_SHADER_COMPUTE],
214                        (struct tgsi_buffer *)softpipe->tgsi.buffer[PIPE_SHADER_COMPUTE]);
215             tgsi_exec_set_constant_buffers(machines[idx], PIPE_MAX_CONSTANT_BUFFERS,
216                                            softpipe->mapped_constants[PIPE_SHADER_COMPUTE],
217                                            softpipe->const_buffer_size[PIPE_SHADER_COMPUTE]);
218          }
219       }
220    }
221 
222    for (g_d = 0; g_d < grid_size[2]; g_d++) {
223       for (g_h = 0; g_h < grid_size[1]; g_h++) {
224          for (g_w = 0; g_w < grid_size[0]; g_w++) {
225             run_workgroup(cs, g_w, g_h, g_d, num_threads_in_group, machines);
226          }
227       }
228    }
229 
230    if (softpipe->active_statistics_queries) {
231       softpipe->pipeline_statistics.cs_invocations +=
232           grid_size[0] * grid_size[1] * grid_size[2];
233    }
234 
235    for (i = 0; i < num_threads_in_group; i++) {
236       cs_delete(cs, machines[i]);
237       tgsi_exec_machine_destroy(machines[i]);
238    }
239 
240    FREE(local_mem);
241    FREE(machines);
242 }
243