1 /**************************************************************************
2  *
3  * Copyright 2003 VMware, Inc.
4  * All Rights Reserved.
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a
7  * copy of this software and associated documentation files (the
8  * "Software"), to deal in the Software without restriction, including
9  * without limitation the rights to use, copy, modify, merge, publish,
10  * distribute, sub license, and/or sell copies of the Software, and to
11  * permit persons to whom the Software is furnished to do so, subject to
12  * the following conditions:
13  *
14  * The above copyright notice and this permission notice (including the
15  * next paragraph) shall be included in all copies or substantial portions
16  * of the Software.
17  *
18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21  * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25  *
26  **************************************************************************/
27 
28 #include "i915_batch.h"
29 #include "i915_context.h"
30 #include "i915_debug.h"
31 #include "i915_fpc.h"
32 #include "i915_reg.h"
33 #include "i915_resource.h"
34 
35 #include "pipe/p_context.h"
36 #include "pipe/p_defines.h"
37 #include "pipe/p_format.h"
38 
39 #include "util/format/u_format.h"
40 #include "util/u_math.h"
41 #include "util/u_memory.h"
42 
43 struct i915_tracked_hw_state {
44    const char *name;
45    void (*validate)(struct i915_context *, unsigned *batch_space);
46    void (*emit)(struct i915_context *);
47    unsigned dirty, batch_space;
48 };
49 
50 static void
validate_flush(struct i915_context * i915,unsigned * batch_space)51 validate_flush(struct i915_context *i915, unsigned *batch_space)
52 {
53    *batch_space = i915->flush_dirty ? 1 : 0;
54 }
55 
56 static void
emit_flush(struct i915_context * i915)57 emit_flush(struct i915_context *i915)
58 {
59    /* Cache handling is very cheap atm. State handling can request to flushes:
60     * - I915_FLUSH_CACHE which is a flush everything request and
61     * - I915_PIPELINE_FLUSH which is specifically for the draw_offset flush.
62     * Because the cache handling is so dumb, no explicit "invalidate map cache".
63     * Also, the first is a strict superset of the latter, so the following logic
64     * works. */
65    if (i915->flush_dirty & I915_FLUSH_CACHE)
66       OUT_BATCH(MI_FLUSH | FLUSH_MAP_CACHE);
67    else if (i915->flush_dirty & I915_PIPELINE_FLUSH)
68       OUT_BATCH(MI_FLUSH | INHIBIT_FLUSH_RENDER_CACHE);
69 }
70 
71 uint32_t invariant_state[] = {
72    _3DSTATE_AA_CMD | AA_LINE_ECAAR_WIDTH_ENABLE | AA_LINE_ECAAR_WIDTH_1_0 |
73       AA_LINE_REGION_WIDTH_ENABLE | AA_LINE_REGION_WIDTH_1_0,
74 
75    _3DSTATE_DFLT_DIFFUSE_CMD, 0,
76 
77    _3DSTATE_DFLT_SPEC_CMD, 0,
78 
79    _3DSTATE_DFLT_Z_CMD, 0,
80 
81    _3DSTATE_COORD_SET_BINDINGS | CSB_TCB(0, 0) | CSB_TCB(1, 1) | CSB_TCB(2, 2) |
82       CSB_TCB(3, 3) | CSB_TCB(4, 4) | CSB_TCB(5, 5) | CSB_TCB(6, 6) |
83       CSB_TCB(7, 7),
84 
85    _3DSTATE_RASTER_RULES_CMD | ENABLE_POINT_RASTER_RULE |
86       OGL_POINT_RASTER_RULE | ENABLE_LINE_STRIP_PROVOKE_VRTX |
87       ENABLE_TRI_FAN_PROVOKE_VRTX | LINE_STRIP_PROVOKE_VRTX(1) |
88       TRI_FAN_PROVOKE_VRTX(2) | ENABLE_TEXKILL_3D_4D | TEXKILL_4D,
89 
90    _3DSTATE_DEPTH_SUBRECT_DISABLE,
91 
92    /* disable indirect state for now
93     */
94    _3DSTATE_LOAD_INDIRECT | 0, 0};
95 
96 static void
emit_invariant(struct i915_context * i915)97 emit_invariant(struct i915_context *i915)
98 {
99    i915_winsys_batchbuffer_write(
100       i915->batch, invariant_state,
101       ARRAY_SIZE(invariant_state) * sizeof(uint32_t));
102 }
103 
104 static void
validate_immediate(struct i915_context * i915,unsigned * batch_space)105 validate_immediate(struct i915_context *i915, unsigned *batch_space)
106 {
107    unsigned dirty = (1 << I915_IMMEDIATE_S0 | 1 << I915_IMMEDIATE_S1 |
108                      1 << I915_IMMEDIATE_S2 | 1 << I915_IMMEDIATE_S3 |
109                      1 << I915_IMMEDIATE_S3 | 1 << I915_IMMEDIATE_S4 |
110                      1 << I915_IMMEDIATE_S5 | 1 << I915_IMMEDIATE_S6) &
111                     i915->immediate_dirty;
112 
113    if (i915->immediate_dirty & (1 << I915_IMMEDIATE_S0) && i915->vbo)
114       i915->validation_buffers[i915->num_validation_buffers++] = i915->vbo;
115 
116    *batch_space = 1 + util_bitcount(dirty);
117 }
118 
119 static void
emit_immediate_s5(struct i915_context * i915,uint32_t imm)120 emit_immediate_s5(struct i915_context *i915, uint32_t imm)
121 {
122    struct i915_surface *surf = i915_surface(i915->framebuffer.cbufs[0]);
123 
124    if (surf) {
125       uint32_t writemask = imm & S5_WRITEDISABLE_MASK;
126       imm &= ~S5_WRITEDISABLE_MASK;
127 
128       /* The register bits are not in order. */
129       static const uint32_t writedisables[4] = {
130          S5_WRITEDISABLE_RED,
131          S5_WRITEDISABLE_GREEN,
132          S5_WRITEDISABLE_BLUE,
133          S5_WRITEDISABLE_ALPHA,
134       };
135 
136       for (int i = 0; i < 4; i++) {
137          if (writemask & writedisables[surf->color_swizzle[i]])
138             imm |= writedisables[i];
139       }
140    }
141 
142    OUT_BATCH(imm);
143 }
144 
145 static void
emit_immediate(struct i915_context * i915)146 emit_immediate(struct i915_context *i915)
147 {
148    /* remove unwanted bits and S7 */
149    unsigned dirty = (1 << I915_IMMEDIATE_S0 | 1 << I915_IMMEDIATE_S1 |
150                      1 << I915_IMMEDIATE_S2 | 1 << I915_IMMEDIATE_S3 |
151                      1 << I915_IMMEDIATE_S3 | 1 << I915_IMMEDIATE_S4 |
152                      1 << I915_IMMEDIATE_S5 | 1 << I915_IMMEDIATE_S6) &
153                     i915->immediate_dirty;
154    int i, num = util_bitcount(dirty);
155    assert(num && num <= I915_MAX_IMMEDIATE);
156 
157    OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 | dirty << 4 | (num - 1));
158 
159    if (i915->immediate_dirty & (1 << I915_IMMEDIATE_S0)) {
160       if (i915->vbo)
161          OUT_RELOC(i915->vbo, I915_USAGE_VERTEX,
162                    i915->current.immediate[I915_IMMEDIATE_S0]);
163       else
164          OUT_BATCH(0);
165    }
166 
167    for (i = 1; i < I915_MAX_IMMEDIATE; i++) {
168       if (dirty & (1 << i)) {
169          if (i == I915_IMMEDIATE_S5)
170             emit_immediate_s5(i915, i915->current.immediate[i]);
171          else
172             OUT_BATCH(i915->current.immediate[i]);
173       }
174    }
175 }
176 
177 static void
validate_dynamic(struct i915_context * i915,unsigned * batch_space)178 validate_dynamic(struct i915_context *i915, unsigned *batch_space)
179 {
180    *batch_space =
181       util_bitcount(i915->dynamic_dirty & ((1 << I915_MAX_DYNAMIC) - 1));
182 }
183 
184 static void
emit_dynamic(struct i915_context * i915)185 emit_dynamic(struct i915_context *i915)
186 {
187    int i;
188    for (i = 0; i < I915_MAX_DYNAMIC; i++) {
189       if (i915->dynamic_dirty & (1 << i))
190          OUT_BATCH(i915->current.dynamic[i]);
191    }
192 }
193 
194 static void
validate_static(struct i915_context * i915,unsigned * batch_space)195 validate_static(struct i915_context *i915, unsigned *batch_space)
196 {
197    *batch_space = 0;
198 
199    if (i915->current.cbuf_bo && (i915->static_dirty & I915_DST_BUF_COLOR)) {
200       i915->validation_buffers[i915->num_validation_buffers++] =
201          i915->current.cbuf_bo;
202       *batch_space += 3;
203    }
204 
205    if (i915->current.depth_bo && (i915->static_dirty & I915_DST_BUF_DEPTH)) {
206       i915->validation_buffers[i915->num_validation_buffers++] =
207          i915->current.depth_bo;
208       *batch_space += 3;
209    }
210 
211    if (i915->static_dirty & I915_DST_VARS)
212       *batch_space += 2;
213 
214    if (i915->static_dirty & I915_DST_RECT)
215       *batch_space += 5;
216 }
217 
218 static void
emit_static(struct i915_context * i915)219 emit_static(struct i915_context *i915)
220 {
221    if (i915->current.cbuf_bo && (i915->static_dirty & I915_DST_BUF_COLOR)) {
222       OUT_BATCH(_3DSTATE_BUF_INFO_CMD);
223       OUT_BATCH(i915->current.cbuf_flags);
224       OUT_RELOC(i915->current.cbuf_bo, I915_USAGE_RENDER, 0);
225    }
226 
227    /* What happens if no zbuf??
228     */
229    if (i915->current.depth_bo && (i915->static_dirty & I915_DST_BUF_DEPTH)) {
230       OUT_BATCH(_3DSTATE_BUF_INFO_CMD);
231       OUT_BATCH(i915->current.depth_flags);
232       OUT_RELOC(i915->current.depth_bo, I915_USAGE_RENDER, 0);
233    }
234 
235    if (i915->static_dirty & I915_DST_VARS) {
236       OUT_BATCH(_3DSTATE_DST_BUF_VARS_CMD);
237       OUT_BATCH(i915->current.dst_buf_vars);
238    }
239 }
240 
241 static void
validate_map(struct i915_context * i915,unsigned * batch_space)242 validate_map(struct i915_context *i915, unsigned *batch_space)
243 {
244    const uint32_t enabled = i915->current.sampler_enable_flags;
245    uint32_t unit;
246    struct i915_texture *tex;
247 
248    *batch_space = i915->current.sampler_enable_nr
249                      ? 2 + 3 * i915->current.sampler_enable_nr
250                      : 0;
251 
252    for (unit = 0; unit < I915_TEX_UNITS; unit++) {
253       if (enabled & (1 << unit)) {
254          tex = i915_texture(i915->fragment_sampler_views[unit]->texture);
255          i915->validation_buffers[i915->num_validation_buffers++] = tex->buffer;
256       }
257    }
258 }
259 
260 static void
emit_map(struct i915_context * i915)261 emit_map(struct i915_context *i915)
262 {
263    const uint32_t nr = i915->current.sampler_enable_nr;
264    if (nr) {
265       const uint32_t enabled = i915->current.sampler_enable_flags;
266       uint32_t unit;
267       uint32_t count = 0;
268       OUT_BATCH(_3DSTATE_MAP_STATE | (3 * nr));
269       OUT_BATCH(enabled);
270       for (unit = 0; unit < I915_TEX_UNITS; unit++) {
271          if (enabled & (1 << unit)) {
272             struct i915_texture *texture =
273                i915_texture(i915->fragment_sampler_views[unit]->texture);
274             struct i915_winsys_buffer *buf = texture->buffer;
275             unsigned offset = i915->current.texbuffer[unit][2];
276 
277             assert(buf);
278 
279             count++;
280 
281             OUT_RELOC(buf, I915_USAGE_SAMPLER, offset);
282             OUT_BATCH(i915->current.texbuffer[unit][0]); /* MS3 */
283             OUT_BATCH(i915->current.texbuffer[unit][1]); /* MS4 */
284          }
285       }
286       assert(count == nr);
287    }
288 }
289 
290 static void
validate_sampler(struct i915_context * i915,unsigned * batch_space)291 validate_sampler(struct i915_context *i915, unsigned *batch_space)
292 {
293    *batch_space = i915->current.sampler_enable_nr
294                      ? 2 + 3 * i915->current.sampler_enable_nr
295                      : 0;
296 }
297 
298 static void
emit_sampler(struct i915_context * i915)299 emit_sampler(struct i915_context *i915)
300 {
301    if (i915->current.sampler_enable_nr) {
302       int i;
303 
304       OUT_BATCH(_3DSTATE_SAMPLER_STATE | (3 * i915->current.sampler_enable_nr));
305 
306       OUT_BATCH(i915->current.sampler_enable_flags);
307 
308       for (i = 0; i < I915_TEX_UNITS; i++) {
309          if (i915->current.sampler_enable_flags & (1 << i)) {
310             OUT_BATCH(i915->current.sampler[i][0]);
311             OUT_BATCH(i915->current.sampler[i][1]);
312             OUT_BATCH(i915->current.sampler[i][2]);
313          }
314       }
315    }
316 }
317 
318 static void
validate_constants(struct i915_context * i915,unsigned * batch_space)319 validate_constants(struct i915_context *i915, unsigned *batch_space)
320 {
321    int nr = i915->fs->num_constants ? 2 + 4 * i915->fs->num_constants : 0;
322 
323    *batch_space = nr;
324 }
325 
326 static void
emit_constants(struct i915_context * i915)327 emit_constants(struct i915_context *i915)
328 {
329    /* Collate the user-defined constants with the fragment shader's
330     * immediates according to the constant_flags[] array.
331     */
332    const uint32_t nr = i915->fs->num_constants;
333 
334    assert(nr <= I915_MAX_CONSTANT);
335    if (nr) {
336       uint32_t i;
337 
338       OUT_BATCH(_3DSTATE_PIXEL_SHADER_CONSTANTS | (nr * 4));
339       OUT_BATCH((1 << nr) - 1);
340 
341       for (i = 0; i < nr; i++) {
342          const uint32_t *c;
343          if (i915->fs->constant_flags[i] == I915_CONSTFLAG_USER) {
344             /* grab user-defined constant */
345             c = (uint32_t *)i915_buffer(i915->constants[PIPE_SHADER_FRAGMENT])
346                    ->data;
347             c += 4 * i;
348          } else {
349             /* emit program constant */
350             c = (uint32_t *)i915->fs->constants[i];
351          }
352 #if 0 /* debug */
353          {
354             float *f = (float *) c;
355             printf("Const %2d: %f %f %f %f %s\n", i, f[0], f[1], f[2], f[3],
356                    (i915->fs->constant_flags[i] == I915_CONSTFLAG_USER
357                     ? "user" : "immediate"));
358          }
359 #endif
360          OUT_BATCH(*c++);
361          OUT_BATCH(*c++);
362          OUT_BATCH(*c++);
363          OUT_BATCH(*c++);
364       }
365    }
366 }
367 
368 static void
validate_program(struct i915_context * i915,unsigned * batch_space)369 validate_program(struct i915_context *i915, unsigned *batch_space)
370 {
371    /* we need more batch space if we want to emulate rgba framebuffers */
372    *batch_space = i915->fs->program_len + (i915->current.fixup_swizzle ? 3 : 0);
373 }
374 
375 static void
emit_program(struct i915_context * i915)376 emit_program(struct i915_context *i915)
377 {
378    /* we should always have, at least, a pass-through program */
379    assert(i915->fs->program_len > 0);
380 
381    /* If we're doing a fixup swizzle, that's 3 more dwords to add. */
382    uint32_t additional_size = 0;
383    if (i915->current.fixup_swizzle)
384       additional_size = 3;
385 
386    /* output the program: 1 dword of header, then 3 dwords per decl/instruction */
387    assert(i915->fs->program_len % 3 == 1);
388 
389    /* first word has the size, adjust it for fixup swizzle */
390    OUT_BATCH(i915->fs->program[0] + additional_size);
391 
392    for (int i = 1; i < i915->fs->program_len; i++)
393       OUT_BATCH(i915->fs->program[i]);
394 
395    /* we emit an additional mov with swizzle to fake RGBA framebuffers */
396    if (i915->current.fixup_swizzle) {
397       /* mov out_color, out_color.zyxw */
398       OUT_BATCH(A0_MOV | (REG_TYPE_OC << A0_DEST_TYPE_SHIFT) |
399                 A0_DEST_CHANNEL_ALL | (REG_TYPE_OC << A0_SRC0_TYPE_SHIFT) |
400                 (T_DIFFUSE << A0_SRC0_NR_SHIFT));
401       OUT_BATCH(i915->current.fixup_swizzle);
402       OUT_BATCH(0);
403    }
404 }
405 
406 static void
emit_draw_rect(struct i915_context * i915)407 emit_draw_rect(struct i915_context *i915)
408 {
409    if (i915->static_dirty & I915_DST_RECT) {
410       OUT_BATCH(_3DSTATE_DRAW_RECT_CMD);
411       OUT_BATCH(DRAW_RECT_DIS_DEPTH_OFS);
412       OUT_BATCH(i915->current.draw_offset);
413       OUT_BATCH(i915->current.draw_size);
414       OUT_BATCH(i915->current.draw_offset);
415    }
416 }
417 
418 static bool
i915_validate_state(struct i915_context * i915,unsigned * batch_space)419 i915_validate_state(struct i915_context *i915, unsigned *batch_space)
420 {
421    unsigned tmp;
422 
423    i915->num_validation_buffers = 0;
424    if (i915->hardware_dirty & I915_HW_INVARIANT)
425       *batch_space = ARRAY_SIZE(invariant_state);
426    else
427       *batch_space = 0;
428 
429 #if 0
430 static int counter_total = 0;
431 #define VALIDATE_ATOM(atom, hw_dirty)                                          \
432    if (i915->hardware_dirty & hw_dirty) {                                      \
433       static int counter_##atom = 0;                                           \
434       validate_##atom(i915, &tmp);                                             \
435       *batch_space += tmp;                                                     \
436       counter_##atom += tmp;                                                   \
437       counter_total += tmp;                                                    \
438       printf("%s: \t%d/%d \t%2.2f\n", #atom, counter_##atom, counter_total,    \
439              counter_##atom * 100.f / counter_total);                          \
440    }
441 #else
442 #define VALIDATE_ATOM(atom, hw_dirty)                                          \
443    if (i915->hardware_dirty & hw_dirty) {                                      \
444       validate_##atom(i915, &tmp);                                             \
445       *batch_space += tmp;                                                     \
446    }
447 #endif
448    VALIDATE_ATOM(flush, I915_HW_FLUSH);
449    VALIDATE_ATOM(immediate, I915_HW_IMMEDIATE);
450    VALIDATE_ATOM(dynamic, I915_HW_DYNAMIC);
451    VALIDATE_ATOM(static, I915_HW_STATIC);
452    VALIDATE_ATOM(map, I915_HW_MAP);
453    VALIDATE_ATOM(sampler, I915_HW_SAMPLER);
454    VALIDATE_ATOM(constants, I915_HW_CONSTANTS);
455    VALIDATE_ATOM(program, I915_HW_PROGRAM);
456 #undef VALIDATE_ATOM
457 
458    if (i915->num_validation_buffers == 0)
459       return true;
460 
461    if (!i915_winsys_validate_buffers(i915->batch, i915->validation_buffers,
462                                      i915->num_validation_buffers))
463       return false;
464 
465    return true;
466 }
467 
468 /* Push the state into the sarea and/or texture memory.
469  */
470 void
i915_emit_hardware_state(struct i915_context * i915)471 i915_emit_hardware_state(struct i915_context *i915)
472 {
473    unsigned batch_space;
474    uintptr_t save_ptr;
475 
476    assert(i915->dirty == 0);
477 
478    if (I915_DBG_ON(DBG_ATOMS))
479       i915_dump_hardware_dirty(i915, __FUNCTION__);
480 
481    if (!i915_validate_state(i915, &batch_space)) {
482       FLUSH_BATCH(NULL, I915_FLUSH_ASYNC);
483       assert(i915_validate_state(i915, &batch_space));
484    }
485 
486    if (!BEGIN_BATCH(batch_space)) {
487       FLUSH_BATCH(NULL, I915_FLUSH_ASYNC);
488       assert(i915_validate_state(i915, &batch_space));
489       assert(BEGIN_BATCH(batch_space));
490    }
491 
492    save_ptr = (uintptr_t)i915->batch->ptr;
493 
494 #define EMIT_ATOM(atom, hw_dirty)                                              \
495    if (i915->hardware_dirty & hw_dirty)                                        \
496       emit_##atom(i915);
497    EMIT_ATOM(flush, I915_HW_FLUSH);
498    EMIT_ATOM(invariant, I915_HW_INVARIANT);
499    EMIT_ATOM(immediate, I915_HW_IMMEDIATE);
500    EMIT_ATOM(dynamic, I915_HW_DYNAMIC);
501    EMIT_ATOM(static, I915_HW_STATIC);
502    EMIT_ATOM(map, I915_HW_MAP);
503    EMIT_ATOM(sampler, I915_HW_SAMPLER);
504    EMIT_ATOM(constants, I915_HW_CONSTANTS);
505    EMIT_ATOM(program, I915_HW_PROGRAM);
506    EMIT_ATOM(draw_rect, I915_HW_STATIC);
507 #undef EMIT_ATOM
508 
509    I915_DBG(DBG_EMIT, "%s: used %d dwords, %d dwords reserved\n", __FUNCTION__,
510             ((uintptr_t)i915->batch->ptr - save_ptr) / 4, batch_space);
511    assert(((uintptr_t)i915->batch->ptr - save_ptr) / 4 == batch_space);
512 
513    i915->hardware_dirty = 0;
514    i915->immediate_dirty = 0;
515    i915->dynamic_dirty = 0;
516    i915->static_dirty = 0;
517    i915->flush_dirty = 0;
518 }
519