1 /*
2  Copyright (C) Intel Corp.  2006.  All Rights Reserved.
3  Intel funded Tungsten Graphics to
4  develop this 3D driver.
5 
6  Permission is hereby granted, free of charge, to any person obtaining
7  a copy of this software and associated documentation files (the
8  "Software"), to deal in the Software without restriction, including
9  without limitation the rights to use, copy, modify, merge, publish,
10  distribute, sublicense, and/or sell copies of the Software, and to
11  permit persons to whom the Software is furnished to do so, subject to
12  the following conditions:
13 
14  The above copyright notice and this permission notice (including the
15  next paragraph) shall be included in all copies or substantial
16  portions of the Software.
17 
18  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21  IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 
26  **********************************************************************/
27  /*
28   * Authors:
29   *   Keith Whitwell <keithw@vmware.com>
30   */
31 
32 #include <pthread.h>
33 #include "main/glspirv.h"
34 #include "program/prog_parameter.h"
35 #include "program/prog_print.h"
36 #include "program/prog_to_nir.h"
37 #include "program/program.h"
38 #include "program/programopt.h"
39 #include "tnl/tnl.h"
40 #include "util/ralloc.h"
41 #include "compiler/glsl/ir.h"
42 #include "compiler/glsl/program.h"
43 #include "compiler/glsl/gl_nir.h"
44 #include "compiler/glsl/glsl_to_nir.h"
45 
46 #include "brw_program.h"
47 #include "brw_context.h"
48 #include "compiler/brw_nir.h"
49 #include "brw_defines.h"
50 #include "brw_batch.h"
51 
52 #include "brw_cs.h"
53 #include "brw_gs.h"
54 #include "brw_vs.h"
55 #include "brw_wm.h"
56 #include "brw_state.h"
57 
58 #include "main/shaderapi.h"
59 #include "main/shaderobj.h"
60 
61 static bool
brw_nir_lower_uniforms(nir_shader * nir,bool is_scalar)62 brw_nir_lower_uniforms(nir_shader *nir, bool is_scalar)
63 {
64    if (is_scalar) {
65       nir_assign_var_locations(nir, nir_var_uniform, &nir->num_uniforms,
66                                type_size_scalar_bytes);
67       return nir_lower_io(nir, nir_var_uniform, type_size_scalar_bytes, 0);
68    } else {
69       nir_assign_var_locations(nir, nir_var_uniform, &nir->num_uniforms,
70                                type_size_vec4_bytes);
71       return nir_lower_io(nir, nir_var_uniform, type_size_vec4_bytes, 0);
72    }
73 }
74 
75 static struct gl_program *brw_new_program(struct gl_context *ctx,
76                                           gl_shader_stage stage,
77                                           GLuint id, bool is_arb_asm);
78 
79 nir_shader *
brw_create_nir(struct brw_context * brw,const struct gl_shader_program * shader_prog,struct gl_program * prog,gl_shader_stage stage,bool is_scalar)80 brw_create_nir(struct brw_context *brw,
81                const struct gl_shader_program *shader_prog,
82                struct gl_program *prog,
83                gl_shader_stage stage,
84                bool is_scalar)
85 {
86    const struct intel_device_info *devinfo = &brw->screen->devinfo;
87    struct gl_context *ctx = &brw->ctx;
88    const nir_shader_compiler_options *options =
89       ctx->Const.ShaderCompilerOptions[stage].NirOptions;
90    nir_shader *nir;
91 
92    /* First, lower the GLSL/Mesa IR or SPIR-V to NIR */
93    if (shader_prog) {
94       if (shader_prog->data->spirv) {
95          nir = _mesa_spirv_to_nir(ctx, shader_prog, stage, options);
96       } else {
97          nir = glsl_to_nir(ctx, shader_prog, stage, options);
98 
99          /* Remap the locations to slots so those requiring two slots will
100           * occupy two locations. For instance, if we have in the IR code a
101           * dvec3 attr0 in location 0 and vec4 attr1 in location 1, in NIR attr0
102           * will use locations/slots 0 and 1, and attr1 will use location/slot 2
103           */
104          if (nir->info.stage == MESA_SHADER_VERTEX)
105             nir_remap_dual_slot_attributes(nir, &prog->DualSlotInputs);
106       }
107       assert (nir);
108 
109       nir_remove_dead_variables(nir, nir_var_shader_in | nir_var_shader_out,
110                                 NULL);
111       nir_validate_shader(nir, "after glsl_to_nir or spirv_to_nir");
112       NIR_PASS_V(nir, nir_lower_io_to_temporaries,
113                  nir_shader_get_entrypoint(nir), true, false);
114    } else {
115       nir = prog_to_nir(prog, options);
116       NIR_PASS_V(nir, nir_lower_regs_to_ssa); /* turn registers into SSA */
117    }
118    nir_validate_shader(nir, "before brw_preprocess_nir");
119 
120    nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir));
121 
122    if (!ctx->SoftFP64 && ((nir->info.bit_sizes_int | nir->info.bit_sizes_float) & 64) &&
123        (options->lower_doubles_options & nir_lower_fp64_full_software)) {
124       ctx->SoftFP64 = glsl_float64_funcs_to_nir(ctx, options);
125    }
126 
127    brw_preprocess_nir(brw->screen->compiler, nir, ctx->SoftFP64);
128 
129    if (stage == MESA_SHADER_TESS_CTRL) {
130       /* Lower gl_PatchVerticesIn from a sys. value to a uniform on Gfx8+. */
131       static const gl_state_index16 tokens[STATE_LENGTH] =
132          { STATE_TCS_PATCH_VERTICES_IN };
133       nir_lower_patch_vertices(nir, 0, devinfo->ver >= 8 ? tokens : NULL);
134    }
135 
136    if (stage == MESA_SHADER_TESS_EVAL) {
137       /* Lower gl_PatchVerticesIn to a constant if we have a TCS, or
138        * a uniform if we don't.
139        */
140       struct gl_linked_shader *tcs =
141          shader_prog->_LinkedShaders[MESA_SHADER_TESS_CTRL];
142       uint32_t static_patch_vertices =
143          tcs ? tcs->Program->nir->info.tess.tcs_vertices_out : 0;
144       static const gl_state_index16 tokens[STATE_LENGTH] =
145          { STATE_TES_PATCH_VERTICES_IN };
146       nir_lower_patch_vertices(nir, static_patch_vertices, tokens);
147    }
148 
149    if (stage == MESA_SHADER_FRAGMENT) {
150       static const struct nir_lower_wpos_ytransform_options wpos_options = {
151          .state_tokens = {STATE_FB_WPOS_Y_TRANSFORM, 0, 0},
152          .fs_coord_pixel_center_integer = 1,
153          .fs_coord_origin_upper_left = 1,
154       };
155 
156       bool progress = false;
157       NIR_PASS(progress, nir, nir_lower_wpos_ytransform, &wpos_options);
158       if (progress) {
159          _mesa_add_state_reference(prog->Parameters,
160                                    wpos_options.state_tokens);
161       }
162    }
163 
164    return nir;
165 }
166 
167 static void
shared_type_info(const struct glsl_type * type,unsigned * size,unsigned * align)168 shared_type_info(const struct glsl_type *type, unsigned *size, unsigned *align)
169 {
170    assert(glsl_type_is_vector_or_scalar(type));
171 
172    uint32_t comp_size = glsl_type_is_boolean(type)
173       ? 4 : glsl_get_bit_size(type) / 8;
174    unsigned length = glsl_get_vector_elements(type);
175    *size = comp_size * length,
176    *align = comp_size * (length == 3 ? 4 : length);
177 }
178 
179 void
brw_nir_lower_resources(nir_shader * nir,struct gl_shader_program * shader_prog,struct gl_program * prog,const struct intel_device_info * devinfo)180 brw_nir_lower_resources(nir_shader *nir, struct gl_shader_program *shader_prog,
181                         struct gl_program *prog,
182                         const struct intel_device_info *devinfo)
183 {
184    NIR_PASS_V(nir, brw_nir_lower_uniforms, nir->options->lower_to_scalar);
185    NIR_PASS_V(prog->nir, gl_nir_lower_samplers, shader_prog);
186    BITSET_COPY(prog->info.textures_used, prog->nir->info.textures_used);
187    BITSET_COPY(prog->info.textures_used_by_txf, prog->nir->info.textures_used_by_txf);
188 
189    NIR_PASS_V(prog->nir, brw_nir_lower_storage_image, devinfo);
190 
191    if (prog->nir->info.stage == MESA_SHADER_COMPUTE &&
192        shader_prog->data->spirv) {
193       NIR_PASS_V(prog->nir, nir_lower_vars_to_explicit_types,
194                  nir_var_mem_shared, shared_type_info);
195       NIR_PASS_V(prog->nir, nir_lower_explicit_io,
196                  nir_var_mem_shared, nir_address_format_32bit_offset);
197    }
198 
199    NIR_PASS_V(prog->nir, gl_nir_lower_buffers, shader_prog);
200    /* Do a round of constant folding to clean up address calculations */
201    NIR_PASS_V(prog->nir, nir_opt_constant_folding);
202 }
203 
204 void
brw_shader_gather_info(nir_shader * nir,struct gl_program * prog)205 brw_shader_gather_info(nir_shader *nir, struct gl_program *prog)
206 {
207    nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir));
208 
209    /* Copy the info we just generated back into the gl_program */
210    const char *prog_name = prog->info.name;
211    const char *prog_label = prog->info.label;
212    prog->info = nir->info;
213    prog->info.name = prog_name;
214    prog->info.label = prog_label;
215 }
216 
217 static unsigned
get_new_program_id(struct brw_screen * screen)218 get_new_program_id(struct brw_screen *screen)
219 {
220    return p_atomic_inc_return(&screen->program_id);
221 }
222 
223 static struct gl_program *
brw_new_program(struct gl_context * ctx,gl_shader_stage stage,GLuint id,bool is_arb_asm)224 brw_new_program(struct gl_context *ctx,
225                 gl_shader_stage stage,
226                 GLuint id, bool is_arb_asm)
227 {
228    struct brw_context *brw = brw_context(ctx);
229    struct brw_program *prog = rzalloc(NULL, struct brw_program);
230 
231    if (prog) {
232       prog->id = get_new_program_id(brw->screen);
233 
234       return _mesa_init_gl_program(&prog->program, stage, id, is_arb_asm);
235    }
236 
237    return NULL;
238 }
239 
240 static void
brw_delete_program(struct gl_context * ctx,struct gl_program * prog)241 brw_delete_program(struct gl_context *ctx, struct gl_program *prog)
242 {
243    struct brw_context *brw = brw_context(ctx);
244 
245    /* Beware!  prog's refcount has reached zero, and it's about to be freed.
246     *
247     * In brw_upload_pipeline_state(), we compare brw->programs[i] to
248     * ctx->FooProgram._Current, and flag BRW_NEW_FOO_PROGRAM if the
249     * pointer has changed.
250     *
251     * We cannot leave brw->programs[i] as a dangling pointer to the dead
252     * program.  malloc() may allocate the same memory for a new gl_program,
253     * causing us to see matching pointers...but totally different programs.
254     *
255     * We cannot set brw->programs[i] to NULL, either.  If we've deleted the
256     * active program, Mesa may set ctx->FooProgram._Current to NULL.  That
257     * would cause us to see matching pointers (NULL == NULL), and fail to
258     * detect that a program has changed since our last draw.
259     *
260     * So, set it to a bogus gl_program pointer that will never match,
261     * causing us to properly reevaluate the state on our next draw.
262     *
263     * Getting this wrong causes heisenbugs which are very hard to catch,
264     * as you need a very specific allocation pattern to hit the problem.
265     */
266    static const struct gl_program deleted_program;
267 
268    for (int i = 0; i < MESA_SHADER_STAGES; i++) {
269       if (brw->programs[i] == prog)
270          brw->programs[i] = (struct gl_program *) &deleted_program;
271    }
272 
273    _mesa_delete_program( ctx, prog );
274 }
275 
276 
277 static GLboolean
brw_program_string_notify(struct gl_context * ctx,GLenum target,struct gl_program * prog)278 brw_program_string_notify(struct gl_context *ctx,
279                           GLenum target,
280                           struct gl_program *prog)
281 {
282    assert(target == GL_VERTEX_PROGRAM_ARB || !prog->arb.IsPositionInvariant);
283 
284    struct brw_context *brw = brw_context(ctx);
285    const struct brw_compiler *compiler = brw->screen->compiler;
286 
287    switch (target) {
288    case GL_FRAGMENT_PROGRAM_ARB: {
289       struct brw_program *newFP = brw_program(prog);
290       const struct brw_program *curFP =
291          brw_program_const(brw->programs[MESA_SHADER_FRAGMENT]);
292 
293       if (newFP == curFP)
294          brw->ctx.NewDriverState |= BRW_NEW_FRAGMENT_PROGRAM;
295       _mesa_program_fragment_position_to_sysval(&newFP->program);
296       newFP->id = get_new_program_id(brw->screen);
297 
298       prog->nir = brw_create_nir(brw, NULL, prog, MESA_SHADER_FRAGMENT, true);
299 
300       brw_nir_lower_resources(prog->nir, NULL, prog, &brw->screen->devinfo);
301 
302       brw_shader_gather_info(prog->nir, prog);
303 
304       brw_fs_precompile(ctx, prog);
305       break;
306    }
307    case GL_VERTEX_PROGRAM_ARB: {
308       struct brw_program *newVP = brw_program(prog);
309       const struct brw_program *curVP =
310          brw_program_const(brw->programs[MESA_SHADER_VERTEX]);
311 
312       if (newVP == curVP)
313          brw->ctx.NewDriverState |= BRW_NEW_VERTEX_PROGRAM;
314       if (newVP->program.arb.IsPositionInvariant) {
315          _mesa_insert_mvp_code(ctx, &newVP->program);
316       }
317       newVP->id = get_new_program_id(brw->screen);
318 
319       /* Also tell tnl about it:
320        */
321       _tnl_program_string(ctx, target, prog);
322 
323       prog->nir = brw_create_nir(brw, NULL, prog, MESA_SHADER_VERTEX,
324                                  compiler->scalar_stage[MESA_SHADER_VERTEX]);
325 
326       brw_nir_lower_resources(prog->nir, NULL, prog, &brw->screen->devinfo);
327 
328       brw_shader_gather_info(prog->nir, prog);
329 
330       brw_vs_precompile(ctx, prog);
331       break;
332    }
333    default:
334       /*
335        * driver->ProgramStringNotify is only called for ARB programs, fixed
336        * function vertex programs, and ir_to_mesa (which isn't used by the
337        * i965 back-end).  Therefore, even after geometry shaders are added,
338        * this function should only ever be called with a target of
339        * GL_VERTEX_PROGRAM_ARB or GL_FRAGMENT_PROGRAM_ARB.
340        */
341       unreachable("Unexpected target in brwProgramStringNotify");
342    }
343 
344    return true;
345 }
346 
347 static void
brw_memory_barrier(struct gl_context * ctx,GLbitfield barriers)348 brw_memory_barrier(struct gl_context *ctx, GLbitfield barriers)
349 {
350    struct brw_context *brw = brw_context(ctx);
351    const struct intel_device_info *devinfo = &brw->screen->devinfo;
352    unsigned bits = PIPE_CONTROL_DATA_CACHE_FLUSH | PIPE_CONTROL_CS_STALL;
353    assert(devinfo->ver >= 7 && devinfo->ver <= 11);
354 
355    if (barriers & (GL_VERTEX_ATTRIB_ARRAY_BARRIER_BIT |
356                    GL_ELEMENT_ARRAY_BARRIER_BIT |
357                    GL_COMMAND_BARRIER_BIT))
358       bits |= PIPE_CONTROL_VF_CACHE_INVALIDATE;
359 
360    if (barriers & GL_UNIFORM_BARRIER_BIT)
361       bits |= (PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE |
362                PIPE_CONTROL_CONST_CACHE_INVALIDATE);
363 
364    if (barriers & GL_TEXTURE_FETCH_BARRIER_BIT)
365       bits |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE;
366 
367    if (barriers & (GL_TEXTURE_UPDATE_BARRIER_BIT |
368                    GL_PIXEL_BUFFER_BARRIER_BIT))
369       bits |= (PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE |
370                PIPE_CONTROL_RENDER_TARGET_FLUSH);
371 
372    if (barriers & GL_FRAMEBUFFER_BARRIER_BIT)
373       bits |= (PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE |
374                PIPE_CONTROL_RENDER_TARGET_FLUSH);
375 
376    /* Typed surface messages are handled by the render cache on IVB, so we
377     * need to flush it too.
378     */
379    if (devinfo->verx10 == 70)
380       bits |= PIPE_CONTROL_RENDER_TARGET_FLUSH;
381 
382    brw_emit_pipe_control_flush(brw, bits);
383 }
384 
385 static void
brw_framebuffer_fetch_barrier(struct gl_context * ctx)386 brw_framebuffer_fetch_barrier(struct gl_context *ctx)
387 {
388    struct brw_context *brw = brw_context(ctx);
389    const struct intel_device_info *devinfo = &brw->screen->devinfo;
390 
391    if (!ctx->Extensions.EXT_shader_framebuffer_fetch) {
392       if (devinfo->ver >= 6) {
393          brw_emit_pipe_control_flush(brw,
394                                      PIPE_CONTROL_RENDER_TARGET_FLUSH |
395                                      PIPE_CONTROL_CS_STALL);
396          brw_emit_pipe_control_flush(brw,
397                                      PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE);
398       } else {
399          brw_emit_pipe_control_flush(brw,
400                                      PIPE_CONTROL_RENDER_TARGET_FLUSH);
401       }
402    }
403 }
404 
405 void
brw_get_scratch_bo(struct brw_context * brw,struct brw_bo ** scratch_bo,int size)406 brw_get_scratch_bo(struct brw_context *brw,
407                    struct brw_bo **scratch_bo, int size)
408 {
409    struct brw_bo *old_bo = *scratch_bo;
410 
411    if (old_bo && old_bo->size < size) {
412       brw_bo_unreference(old_bo);
413       old_bo = NULL;
414    }
415 
416    if (!old_bo) {
417       *scratch_bo =
418          brw_bo_alloc(brw->bufmgr, "scratch bo", size, BRW_MEMZONE_SCRATCH);
419    }
420 }
421 
422 /**
423  * Reserve enough scratch space for the given stage to hold \p per_thread_size
424  * bytes times the given \p thread_count.
425  */
426 void
brw_alloc_stage_scratch(struct brw_context * brw,struct brw_stage_state * stage_state,unsigned per_thread_size)427 brw_alloc_stage_scratch(struct brw_context *brw,
428                         struct brw_stage_state *stage_state,
429                         unsigned per_thread_size)
430 {
431    if (stage_state->per_thread_scratch >= per_thread_size)
432       return;
433 
434    stage_state->per_thread_scratch = per_thread_size;
435 
436    if (stage_state->scratch_bo)
437       brw_bo_unreference(stage_state->scratch_bo);
438 
439    const struct intel_device_info *devinfo = &brw->screen->devinfo;
440    assert(stage_state->stage < ARRAY_SIZE(devinfo->max_scratch_ids));
441    unsigned max_ids = devinfo->max_scratch_ids[stage_state->stage];
442    stage_state->scratch_bo =
443       brw_bo_alloc(brw->bufmgr, "shader scratch space",
444                    per_thread_size * max_ids, BRW_MEMZONE_SCRATCH);
445 }
446 
447 void
brw_init_frag_prog_functions(struct dd_function_table * functions)448 brw_init_frag_prog_functions(struct dd_function_table *functions)
449 {
450    assert(functions->ProgramStringNotify == _tnl_program_string);
451 
452    functions->NewProgram = brw_new_program;
453    functions->DeleteProgram = brw_delete_program;
454    functions->ProgramStringNotify = brw_program_string_notify;
455 
456    functions->LinkShader = brw_link_shader;
457 
458    functions->MemoryBarrier = brw_memory_barrier;
459    functions->FramebufferFetchBarrier = brw_framebuffer_fetch_barrier;
460 }
461 
462 struct shader_times {
463    uint64_t time;
464    uint64_t written;
465    uint64_t reset;
466 };
467 
468 void
brw_init_shader_time(struct brw_context * brw)469 brw_init_shader_time(struct brw_context *brw)
470 {
471    const int max_entries = 2048;
472    brw->shader_time.bo =
473       brw_bo_alloc(brw->bufmgr, "shader time",
474                    max_entries * BRW_SHADER_TIME_STRIDE * 3,
475                    BRW_MEMZONE_OTHER);
476    brw->shader_time.names = rzalloc_array(brw, const char *, max_entries);
477    brw->shader_time.ids = rzalloc_array(brw, int, max_entries);
478    brw->shader_time.types = rzalloc_array(brw, enum shader_time_shader_type,
479                                           max_entries);
480    brw->shader_time.cumulative = rzalloc_array(brw, struct shader_times,
481                                                max_entries);
482    brw->shader_time.max_entries = max_entries;
483 }
484 
485 static int
compare_time(const void * a,const void * b)486 compare_time(const void *a, const void *b)
487 {
488    uint64_t * const *a_val = a;
489    uint64_t * const *b_val = b;
490 
491    /* We don't just subtract because we're turning the value to an int. */
492    if (**a_val < **b_val)
493       return -1;
494    else if (**a_val == **b_val)
495       return 0;
496    else
497       return 1;
498 }
499 
500 static void
print_shader_time_line(const char * stage,const char * name,int shader_num,uint64_t time,uint64_t total)501 print_shader_time_line(const char *stage, const char *name,
502                        int shader_num, uint64_t time, uint64_t total)
503 {
504    fprintf(stderr, "%-6s%-18s", stage, name);
505 
506    if (shader_num != 0)
507       fprintf(stderr, "%4d: ", shader_num);
508    else
509       fprintf(stderr, "    : ");
510 
511    fprintf(stderr, "%16lld (%7.2f Gcycles)      %4.1f%%\n",
512            (long long)time,
513            (double)time / 1000000000.0,
514            (double)time / total * 100.0);
515 }
516 
517 static void
brw_report_shader_time(struct brw_context * brw)518 brw_report_shader_time(struct brw_context *brw)
519 {
520    if (!brw->shader_time.bo || !brw->shader_time.num_entries)
521       return;
522 
523    uint64_t scaled[brw->shader_time.num_entries];
524    uint64_t *sorted[brw->shader_time.num_entries];
525    uint64_t total_by_type[ST_CS + 1];
526    memset(total_by_type, 0, sizeof(total_by_type));
527    double total = 0;
528    for (int i = 0; i < brw->shader_time.num_entries; i++) {
529       uint64_t written = 0, reset = 0;
530       enum shader_time_shader_type type = brw->shader_time.types[i];
531 
532       sorted[i] = &scaled[i];
533 
534       switch (type) {
535       case ST_VS:
536       case ST_TCS:
537       case ST_TES:
538       case ST_GS:
539       case ST_FS8:
540       case ST_FS16:
541       case ST_FS32:
542       case ST_CS:
543          written = brw->shader_time.cumulative[i].written;
544          reset = brw->shader_time.cumulative[i].reset;
545          break;
546 
547       default:
548          /* I sometimes want to print things that aren't the 3 shader times.
549           * Just print the sum in that case.
550           */
551          written = 1;
552          reset = 0;
553          break;
554       }
555 
556       uint64_t time = brw->shader_time.cumulative[i].time;
557       if (written) {
558          scaled[i] = time / written * (written + reset);
559       } else {
560          scaled[i] = time;
561       }
562 
563       switch (type) {
564       case ST_VS:
565       case ST_TCS:
566       case ST_TES:
567       case ST_GS:
568       case ST_FS8:
569       case ST_FS16:
570       case ST_FS32:
571       case ST_CS:
572          total_by_type[type] += scaled[i];
573          break;
574       default:
575          break;
576       }
577 
578       total += scaled[i];
579    }
580 
581    if (total == 0) {
582       fprintf(stderr, "No shader time collected yet\n");
583       return;
584    }
585 
586    qsort(sorted, brw->shader_time.num_entries, sizeof(sorted[0]), compare_time);
587 
588    fprintf(stderr, "\n");
589    fprintf(stderr, "type          ID                  cycles spent                   %% of total\n");
590    for (int s = 0; s < brw->shader_time.num_entries; s++) {
591       const char *stage;
592       /* Work back from the sorted pointers times to a time to print. */
593       int i = sorted[s] - scaled;
594 
595       if (scaled[i] == 0)
596          continue;
597 
598       int shader_num = brw->shader_time.ids[i];
599       const char *shader_name = brw->shader_time.names[i];
600 
601       switch (brw->shader_time.types[i]) {
602       case ST_VS:
603          stage = "vs";
604          break;
605       case ST_TCS:
606          stage = "tcs";
607          break;
608       case ST_TES:
609          stage = "tes";
610          break;
611       case ST_GS:
612          stage = "gs";
613          break;
614       case ST_FS8:
615          stage = "fs8";
616          break;
617       case ST_FS16:
618          stage = "fs16";
619          break;
620       case ST_FS32:
621          stage = "fs32";
622          break;
623       case ST_CS:
624          stage = "cs";
625          break;
626       default:
627          stage = "other";
628          break;
629       }
630 
631       print_shader_time_line(stage, shader_name, shader_num,
632                              scaled[i], total);
633    }
634 
635    fprintf(stderr, "\n");
636    print_shader_time_line("total", "vs", 0, total_by_type[ST_VS], total);
637    print_shader_time_line("total", "tcs", 0, total_by_type[ST_TCS], total);
638    print_shader_time_line("total", "tes", 0, total_by_type[ST_TES], total);
639    print_shader_time_line("total", "gs", 0, total_by_type[ST_GS], total);
640    print_shader_time_line("total", "fs8", 0, total_by_type[ST_FS8], total);
641    print_shader_time_line("total", "fs16", 0, total_by_type[ST_FS16], total);
642    print_shader_time_line("total", "fs32", 0, total_by_type[ST_FS32], total);
643    print_shader_time_line("total", "cs", 0, total_by_type[ST_CS], total);
644 }
645 
646 static void
brw_collect_shader_time(struct brw_context * brw)647 brw_collect_shader_time(struct brw_context *brw)
648 {
649    if (!brw->shader_time.bo)
650       return;
651 
652    /* This probably stalls on the last rendering.  We could fix that by
653     * delaying reading the reports, but it doesn't look like it's a big
654     * overhead compared to the cost of tracking the time in the first place.
655     */
656    void *bo_map = brw_bo_map(brw, brw->shader_time.bo, MAP_READ | MAP_WRITE);
657 
658    for (int i = 0; i < brw->shader_time.num_entries; i++) {
659       uint32_t *times = bo_map + i * 3 * BRW_SHADER_TIME_STRIDE;
660 
661       brw->shader_time.cumulative[i].time += times[BRW_SHADER_TIME_STRIDE * 0 / 4];
662       brw->shader_time.cumulative[i].written += times[BRW_SHADER_TIME_STRIDE * 1 / 4];
663       brw->shader_time.cumulative[i].reset += times[BRW_SHADER_TIME_STRIDE * 2 / 4];
664    }
665 
666    /* Zero the BO out to clear it out for our next collection.
667     */
668    memset(bo_map, 0, brw->shader_time.bo->size);
669    brw_bo_unmap(brw->shader_time.bo);
670 }
671 
672 void
brw_collect_and_report_shader_time(struct brw_context * brw)673 brw_collect_and_report_shader_time(struct brw_context *brw)
674 {
675    brw_collect_shader_time(brw);
676 
677    if (brw->shader_time.report_time == 0 ||
678        get_time() - brw->shader_time.report_time >= 1.0) {
679       brw_report_shader_time(brw);
680       brw->shader_time.report_time = get_time();
681    }
682 }
683 
684 /**
685  * Chooses an index in the shader_time buffer and sets up tracking information
686  * for our printouts.
687  *
688  * Note that this holds on to references to the underlying programs, which may
689  * change their lifetimes compared to normal operation.
690  */
691 int
brw_get_shader_time_index(struct brw_context * brw,struct gl_program * prog,enum shader_time_shader_type type,bool is_glsl_sh)692 brw_get_shader_time_index(struct brw_context *brw, struct gl_program *prog,
693                           enum shader_time_shader_type type, bool is_glsl_sh)
694 {
695    int shader_time_index = brw->shader_time.num_entries++;
696    assert(shader_time_index < brw->shader_time.max_entries);
697    brw->shader_time.types[shader_time_index] = type;
698 
699    const char *name;
700    if (prog->Id == 0) {
701       name = "ff";
702    } else if (is_glsl_sh) {
703       name = prog->info.label ?
704          ralloc_strdup(brw->shader_time.names, prog->info.label) : "glsl";
705    } else {
706       name = "prog";
707    }
708 
709    brw->shader_time.names[shader_time_index] = name;
710    brw->shader_time.ids[shader_time_index] = prog->Id;
711 
712    return shader_time_index;
713 }
714 
715 void
brw_destroy_shader_time(struct brw_context * brw)716 brw_destroy_shader_time(struct brw_context *brw)
717 {
718    brw_bo_unreference(brw->shader_time.bo);
719    brw->shader_time.bo = NULL;
720 }
721 
722 void
brw_stage_prog_data_free(const void * p)723 brw_stage_prog_data_free(const void *p)
724 {
725    struct brw_stage_prog_data *prog_data = (struct brw_stage_prog_data *)p;
726 
727    ralloc_free(prog_data->param);
728    ralloc_free(prog_data->pull_param);
729 }
730 
731 void
brw_dump_arb_asm(const char * stage,struct gl_program * prog)732 brw_dump_arb_asm(const char *stage, struct gl_program *prog)
733 {
734    fprintf(stderr, "ARB_%s_program %d ir for native %s shader\n",
735            stage, prog->Id, stage);
736    _mesa_print_program(prog);
737 }
738 
739 void
brw_setup_tex_for_precompile(const struct intel_device_info * devinfo,struct brw_sampler_prog_key_data * tex,const struct gl_program * prog)740 brw_setup_tex_for_precompile(const struct intel_device_info *devinfo,
741                              struct brw_sampler_prog_key_data *tex,
742                              const struct gl_program *prog)
743 {
744    const bool has_shader_channel_select = devinfo->verx10 >= 75;
745    unsigned sampler_count = util_last_bit(prog->SamplersUsed);
746    for (unsigned i = 0; i < sampler_count; i++) {
747       if (!has_shader_channel_select && (prog->ShadowSamplers & (1 << i))) {
748          /* Assume DEPTH_TEXTURE_MODE is the default: X, X, X, 1 */
749          tex->swizzles[i] =
750             MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_ONE);
751       } else {
752          /* Color sampler: assume no swizzling. */
753          tex->swizzles[i] = SWIZZLE_XYZW;
754       }
755    }
756 }
757 
758 /**
759  * Sets up the starting offsets for the groups of binding table entries
760  * common to all pipeline stages.
761  *
762  * Unused groups are initialized to 0xd0d0d0d0 to make it obvious that they're
763  * unused but also make sure that addition of small offsets to them will
764  * trigger some of our asserts that surface indices are < BRW_MAX_SURFACES.
765  */
766 uint32_t
brw_assign_common_binding_table_offsets(const struct intel_device_info * devinfo,const struct gl_program * prog,struct brw_stage_prog_data * stage_prog_data,uint32_t next_binding_table_offset)767 brw_assign_common_binding_table_offsets(const struct intel_device_info *devinfo,
768                                         const struct gl_program *prog,
769                                         struct brw_stage_prog_data *stage_prog_data,
770                                         uint32_t next_binding_table_offset)
771 {
772    int num_textures = util_last_bit(prog->SamplersUsed);
773 
774    stage_prog_data->binding_table.texture_start = next_binding_table_offset;
775    next_binding_table_offset += num_textures;
776 
777    if (prog->info.num_ubos) {
778       assert(prog->info.num_ubos <= BRW_MAX_UBO);
779       stage_prog_data->binding_table.ubo_start = next_binding_table_offset;
780       next_binding_table_offset += prog->info.num_ubos;
781    } else {
782       stage_prog_data->binding_table.ubo_start = 0xd0d0d0d0;
783    }
784 
785    if (prog->info.num_ssbos || prog->info.num_abos) {
786       assert(prog->info.num_abos <= BRW_MAX_ABO);
787       assert(prog->info.num_ssbos <= BRW_MAX_SSBO);
788       stage_prog_data->binding_table.ssbo_start = next_binding_table_offset;
789       next_binding_table_offset += prog->info.num_abos + prog->info.num_ssbos;
790    } else {
791       stage_prog_data->binding_table.ssbo_start = 0xd0d0d0d0;
792    }
793 
794    if (INTEL_DEBUG(DEBUG_SHADER_TIME)) {
795       stage_prog_data->binding_table.shader_time_start = next_binding_table_offset;
796       next_binding_table_offset++;
797    } else {
798       stage_prog_data->binding_table.shader_time_start = 0xd0d0d0d0;
799    }
800 
801    if (prog->info.uses_texture_gather) {
802       if (devinfo->ver >= 8) {
803          stage_prog_data->binding_table.gather_texture_start =
804             stage_prog_data->binding_table.texture_start;
805       } else {
806          stage_prog_data->binding_table.gather_texture_start = next_binding_table_offset;
807          next_binding_table_offset += num_textures;
808       }
809    } else {
810       stage_prog_data->binding_table.gather_texture_start = 0xd0d0d0d0;
811    }
812 
813    if (prog->info.num_images) {
814       stage_prog_data->binding_table.image_start = next_binding_table_offset;
815       next_binding_table_offset += prog->info.num_images;
816    } else {
817       stage_prog_data->binding_table.image_start = 0xd0d0d0d0;
818    }
819 
820    /* This may or may not be used depending on how the compile goes. */
821    stage_prog_data->binding_table.pull_constants_start = next_binding_table_offset;
822    next_binding_table_offset++;
823 
824    /* Plane 0 is just the regular texture section */
825    stage_prog_data->binding_table.plane_start[0] = stage_prog_data->binding_table.texture_start;
826 
827    stage_prog_data->binding_table.plane_start[1] = next_binding_table_offset;
828    next_binding_table_offset += num_textures;
829 
830    stage_prog_data->binding_table.plane_start[2] = next_binding_table_offset;
831    next_binding_table_offset += num_textures;
832 
833    /* Set the binding table size.  Some callers may append new entries
834     * and increase this accordingly.
835     */
836    stage_prog_data->binding_table.size_bytes = next_binding_table_offset * 4;
837 
838    assert(next_binding_table_offset <= BRW_MAX_SURFACES);
839    return next_binding_table_offset;
840 }
841 
842 void
brw_populate_default_key(const struct brw_compiler * compiler,union brw_any_prog_key * prog_key,struct gl_shader_program * sh_prog,struct gl_program * prog)843 brw_populate_default_key(const struct brw_compiler *compiler,
844                          union brw_any_prog_key *prog_key,
845                          struct gl_shader_program *sh_prog,
846                          struct gl_program *prog)
847 {
848    switch (prog->info.stage) {
849    case MESA_SHADER_VERTEX:
850       brw_vs_populate_default_key(compiler, &prog_key->vs, prog);
851       break;
852    case MESA_SHADER_TESS_CTRL:
853       brw_tcs_populate_default_key(compiler, &prog_key->tcs, sh_prog, prog);
854       break;
855    case MESA_SHADER_TESS_EVAL:
856       brw_tes_populate_default_key(compiler, &prog_key->tes, sh_prog, prog);
857       break;
858    case MESA_SHADER_GEOMETRY:
859       brw_gs_populate_default_key(compiler, &prog_key->gs, prog);
860       break;
861    case MESA_SHADER_FRAGMENT:
862       brw_wm_populate_default_key(compiler, &prog_key->wm, prog);
863       break;
864    case MESA_SHADER_COMPUTE:
865       brw_cs_populate_default_key(compiler, &prog_key->cs, prog);
866       break;
867    default:
868       unreachable("Unsupported stage!");
869    }
870 }
871 
872 void
brw_debug_recompile(struct brw_context * brw,gl_shader_stage stage,unsigned api_id,struct brw_base_prog_key * key)873 brw_debug_recompile(struct brw_context *brw,
874                     gl_shader_stage stage,
875                     unsigned api_id,
876                     struct brw_base_prog_key *key)
877 {
878    const struct brw_compiler *compiler = brw->screen->compiler;
879    enum brw_cache_id cache_id = brw_stage_cache_id(stage);
880 
881    brw_shader_perf_log(compiler, brw, "Recompiling %s shader for program %d\n",
882                        _mesa_shader_stage_to_string(stage), api_id);
883 
884    const void *old_key =
885       brw_find_previous_compile(&brw->cache, cache_id, key->program_string_id);
886 
887    brw_debug_key_recompile(compiler, brw, stage, old_key, key);
888 }
889