1 /*
2  * Copyright (c) 2014 - 2015 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21  * DEALINGS IN THE SOFTWARE.
22  */
23 
24 #include "util/ralloc.h"
25 #include "brw_context.h"
26 #include "brw_cs.h"
27 #include "brw_wm.h"
28 #include "intel_mipmap_tree.h"
29 #include "brw_state.h"
30 #include "intel_batchbuffer.h"
31 #include "compiler/brw_nir.h"
32 #include "brw_program.h"
33 #include "compiler/glsl/ir_uniform.h"
34 
35 struct brw_cs_parameters
brw_cs_get_parameters(const struct brw_context * brw)36 brw_cs_get_parameters(const struct brw_context *brw)
37 {
38    assert(brw->cs.base.prog_data);
39    struct brw_cs_prog_data *cs_prog_data =
40       brw_cs_prog_data(brw->cs.base.prog_data);
41 
42    struct brw_cs_parameters params = {};
43 
44    if (brw->compute.group_size) {
45       /* With ARB_compute_variable_group_size the group size is set at
46        * dispatch time, so we can't use the one provided by the compiler.
47        */
48       params.group_size = brw->compute.group_size[0] *
49                           brw->compute.group_size[1] *
50                           brw->compute.group_size[2];
51    } else {
52       params.group_size = cs_prog_data->local_size[0] *
53                           cs_prog_data->local_size[1] *
54                           cs_prog_data->local_size[2];
55    }
56 
57    params.simd_size =
58       brw_cs_simd_size_for_group_size(&brw->screen->devinfo,
59                                       cs_prog_data, params.group_size);
60    params.threads = DIV_ROUND_UP(params.group_size, params.simd_size);
61 
62    return params;
63 }
64 
65 static void
assign_cs_binding_table_offsets(const struct gen_device_info * devinfo,const struct gl_program * prog,struct brw_cs_prog_data * prog_data)66 assign_cs_binding_table_offsets(const struct gen_device_info *devinfo,
67                                 const struct gl_program *prog,
68                                 struct brw_cs_prog_data *prog_data)
69 {
70    uint32_t next_binding_table_offset = 0;
71 
72    /* May not be used if the gl_NumWorkGroups variable is not accessed. */
73    prog_data->binding_table.work_groups_start = next_binding_table_offset;
74    next_binding_table_offset++;
75 
76    brw_assign_common_binding_table_offsets(devinfo, prog, &prog_data->base,
77                                            next_binding_table_offset);
78 }
79 
80 static bool
brw_codegen_cs_prog(struct brw_context * brw,struct brw_program * cp,struct brw_cs_prog_key * key)81 brw_codegen_cs_prog(struct brw_context *brw,
82                     struct brw_program *cp,
83                     struct brw_cs_prog_key *key)
84 {
85    const struct gen_device_info *devinfo = &brw->screen->devinfo;
86    const GLuint *program;
87    void *mem_ctx = ralloc_context(NULL);
88    struct brw_cs_prog_data prog_data;
89    bool start_busy = false;
90    double start_time = 0;
91    nir_shader *nir = nir_shader_clone(mem_ctx, cp->program.nir);
92 
93    memset(&prog_data, 0, sizeof(prog_data));
94 
95    if (cp->program.info.cs.shared_size > 64 * 1024) {
96       cp->program.sh.data->LinkStatus = LINKING_FAILURE;
97       const char *error_str =
98          "Compute shader used more than 64KB of shared variables";
99       ralloc_strcat(&cp->program.sh.data->InfoLog, error_str);
100       _mesa_problem(NULL, "Failed to link compute shader: %s\n", error_str);
101 
102       ralloc_free(mem_ctx);
103       return false;
104    }
105 
106    assign_cs_binding_table_offsets(devinfo, &cp->program, &prog_data);
107 
108    brw_nir_setup_glsl_uniforms(mem_ctx, nir,
109                                &cp->program, &prog_data.base, true);
110 
111    if (unlikely(brw->perf_debug)) {
112       start_busy = (brw->batch.last_bo &&
113                     brw_bo_busy(brw->batch.last_bo));
114       start_time = get_time();
115    }
116 
117    int st_index = -1;
118    if (INTEL_DEBUG & DEBUG_SHADER_TIME)
119       st_index = brw_get_shader_time_index(brw, &cp->program, ST_CS, true);
120 
121    brw_nir_lower_cs_intrinsics(nir);
122 
123    char *error_str;
124    program = brw_compile_cs(brw->screen->compiler, brw, mem_ctx, key,
125                             &prog_data, nir, st_index, NULL, &error_str);
126    if (program == NULL) {
127       cp->program.sh.data->LinkStatus = LINKING_FAILURE;
128       ralloc_strcat(&cp->program.sh.data->InfoLog, error_str);
129       _mesa_problem(NULL, "Failed to compile compute shader: %s\n", error_str);
130 
131       ralloc_free(mem_ctx);
132       return false;
133    }
134 
135    if (unlikely(brw->perf_debug)) {
136       if (cp->compiled_once) {
137          brw_debug_recompile(brw, MESA_SHADER_COMPUTE, cp->program.Id,
138                              &key->base);
139       }
140       cp->compiled_once = true;
141 
142       if (start_busy && !brw_bo_busy(brw->batch.last_bo)) {
143          perf_debug("CS compile took %.03f ms and stalled the GPU\n",
144                     (get_time() - start_time) * 1000);
145       }
146    }
147 
148    brw_alloc_stage_scratch(brw, &brw->cs.base, prog_data.base.total_scratch);
149 
150    /* The param and pull_param arrays will be freed by the shader cache. */
151    ralloc_steal(NULL, prog_data.base.param);
152    ralloc_steal(NULL, prog_data.base.pull_param);
153    brw_upload_cache(&brw->cache, BRW_CACHE_CS_PROG,
154                     key, sizeof(*key),
155                     program, prog_data.base.program_size,
156                     &prog_data, sizeof(prog_data),
157                     &brw->cs.base.prog_offset, &brw->cs.base.prog_data);
158    ralloc_free(mem_ctx);
159 
160    return true;
161 }
162 
163 
164 void
brw_cs_populate_key(struct brw_context * brw,struct brw_cs_prog_key * key)165 brw_cs_populate_key(struct brw_context *brw, struct brw_cs_prog_key *key)
166 {
167    struct gl_context *ctx = &brw->ctx;
168    /* BRW_NEW_COMPUTE_PROGRAM */
169    const struct brw_program *cp =
170       (struct brw_program *) brw->programs[MESA_SHADER_COMPUTE];
171 
172    memset(key, 0, sizeof(*key));
173 
174    /* _NEW_TEXTURE */
175    brw_populate_base_prog_key(ctx, cp, &key->base);
176 }
177 
178 
179 void
brw_upload_cs_prog(struct brw_context * brw)180 brw_upload_cs_prog(struct brw_context *brw)
181 {
182    struct gl_context *ctx = &brw->ctx;
183    struct brw_cs_prog_key key;
184    struct brw_program *cp =
185       (struct brw_program *) brw->programs[MESA_SHADER_COMPUTE];
186 
187    if (!cp)
188       return;
189 
190    if (!brw_state_dirty(brw, _NEW_TEXTURE, BRW_NEW_COMPUTE_PROGRAM))
191       return;
192 
193    brw->cs.base.sampler_count =
194       util_last_bit(ctx->ComputeProgram._Current->SamplersUsed);
195 
196    brw_cs_populate_key(brw, &key);
197 
198    if (brw_search_cache(&brw->cache, BRW_CACHE_CS_PROG, &key, sizeof(key),
199                         &brw->cs.base.prog_offset, &brw->cs.base.prog_data,
200                         true))
201       return;
202 
203    if (brw_disk_cache_upload_program(brw, MESA_SHADER_COMPUTE))
204       return;
205 
206    cp = (struct brw_program *) brw->programs[MESA_SHADER_COMPUTE];
207    cp->id = key.base.program_string_id;
208 
209    ASSERTED bool success = brw_codegen_cs_prog(brw, cp, &key);
210    assert(success);
211 }
212 
213 void
brw_cs_populate_default_key(const struct brw_compiler * compiler,struct brw_cs_prog_key * key,struct gl_program * prog)214 brw_cs_populate_default_key(const struct brw_compiler *compiler,
215                             struct brw_cs_prog_key *key,
216                             struct gl_program *prog)
217 {
218    const struct gen_device_info *devinfo = compiler->devinfo;
219    memset(key, 0, sizeof(*key));
220    brw_populate_default_base_prog_key(devinfo, brw_program(prog), &key->base);
221 }
222 
223 bool
brw_cs_precompile(struct gl_context * ctx,struct gl_program * prog)224 brw_cs_precompile(struct gl_context *ctx, struct gl_program *prog)
225 {
226    struct brw_context *brw = brw_context(ctx);
227    struct brw_cs_prog_key key;
228 
229    struct brw_program *bcp = brw_program(prog);
230 
231    brw_cs_populate_default_key(brw->screen->compiler, &key, prog);
232 
233    uint32_t old_prog_offset = brw->cs.base.prog_offset;
234    struct brw_stage_prog_data *old_prog_data = brw->cs.base.prog_data;
235 
236    bool success = brw_codegen_cs_prog(brw, bcp, &key);
237 
238    brw->cs.base.prog_offset = old_prog_offset;
239    brw->cs.base.prog_data = old_prog_data;
240 
241    return success;
242 }
243