1 /*
2  * Copyright © 2017 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 #include "genX_boilerplate.h"
25 #include "brw_defines.h"
26 #include "brw_state.h"
27 
28 static unsigned
flags_to_post_sync_op(uint32_t flags)29 flags_to_post_sync_op(uint32_t flags)
30 {
31    if (flags & PIPE_CONTROL_WRITE_IMMEDIATE)
32       return WriteImmediateData;
33 
34    if (flags & PIPE_CONTROL_WRITE_DEPTH_COUNT)
35       return WritePSDepthCount;
36 
37    if (flags & PIPE_CONTROL_WRITE_TIMESTAMP)
38       return WriteTimestamp;
39 
40    return 0;
41 }
42 
43 /**
44  * Do the given flags have a Post Sync or LRI Post Sync operation?
45  */
46 static enum pipe_control_flags
get_post_sync_flags(enum pipe_control_flags flags)47 get_post_sync_flags(enum pipe_control_flags flags)
48 {
49    flags &= PIPE_CONTROL_WRITE_IMMEDIATE |
50             PIPE_CONTROL_WRITE_DEPTH_COUNT |
51             PIPE_CONTROL_WRITE_TIMESTAMP |
52             PIPE_CONTROL_LRI_POST_SYNC_OP;
53 
54    /* Only one "Post Sync Op" is allowed, and it's mutually exclusive with
55     * "LRI Post Sync Operation".  So more than one bit set would be illegal.
56     */
57    assert(util_bitcount(flags) <= 1);
58 
59    return flags;
60 }
61 
62 #define IS_COMPUTE_PIPELINE(brw) \
63    (GFX_VER >= 7 && brw->last_pipeline == BRW_COMPUTE_PIPELINE)
64 
65 /* Closed interval - GFX_VER \in [x, y] */
66 #define IS_GFX_VER_BETWEEN(x, y) (GFX_VER >= x && GFX_VER <= y)
67 #define IS_GFX_VERx10_BETWEEN(x, y) \
68    (GFX_VERx10 >= x && GFX_VERx10 <= y)
69 
70 /**
71  * Emit a series of PIPE_CONTROL commands, taking into account any
72  * workarounds necessary to actually accomplish the caller's request.
73  *
74  * Unless otherwise noted, spec quotations in this function come from:
75  *
76  * Synchronization of the 3D Pipeline > PIPE_CONTROL Command > Programming
77  * Restrictions for PIPE_CONTROL.
78  *
79  * You should not use this function directly.  Use the helpers in
80  * brw_pipe_control.c instead, which may split the pipe control further.
81  */
82 void
genX(emit_raw_pipe_control)83 genX(emit_raw_pipe_control)(struct brw_context *brw, uint32_t flags,
84                             struct brw_bo *bo, uint32_t offset, uint64_t imm)
85 {
86    UNUSED const struct intel_device_info *devinfo = &brw->screen->devinfo;
87    enum pipe_control_flags post_sync_flags = get_post_sync_flags(flags);
88    enum pipe_control_flags non_lri_post_sync_flags =
89       post_sync_flags & ~PIPE_CONTROL_LRI_POST_SYNC_OP;
90 
91    /* Recursive PIPE_CONTROL workarounds --------------------------------
92     * (http://knowyourmeme.com/memes/xzibit-yo-dawg)
93     *
94     * We do these first because we want to look at the original operation,
95     * rather than any workarounds we set.
96     */
97    if (GFX_VER == 6 && (flags & PIPE_CONTROL_RENDER_TARGET_FLUSH)) {
98       /* Hardware workaround: SNB B-Spec says:
99        *
100        *    "[Dev-SNB{W/A}]: Before a PIPE_CONTROL with Write Cache Flush
101        *     Enable = 1, a PIPE_CONTROL with any non-zero post-sync-op is
102        *     required."
103        */
104       brw_emit_post_sync_nonzero_flush(brw);
105    }
106 
107    if (GFX_VER == 9 && (flags & PIPE_CONTROL_VF_CACHE_INVALIDATE)) {
108       /* The PIPE_CONTROL "VF Cache Invalidation Enable" bit description
109        * lists several workarounds:
110        *
111        *    "Project: SKL, KBL, BXT
112        *
113        *     If the VF Cache Invalidation Enable is set to a 1 in a
114        *     PIPE_CONTROL, a separate Null PIPE_CONTROL, all bitfields
115        *     sets to 0, with the VF Cache Invalidation Enable set to 0
116        *     needs to be sent prior to the PIPE_CONTROL with VF Cache
117        *     Invalidation Enable set to a 1."
118        */
119       genX(emit_raw_pipe_control)(brw, 0, NULL, 0, 0);
120    }
121 
122    if (GFX_VER == 9 && IS_COMPUTE_PIPELINE(brw) && post_sync_flags) {
123       /* Project: SKL / Argument: LRI Post Sync Operation [23]
124        *
125        * "PIPECONTROL command with “Command Streamer Stall Enable” must be
126        *  programmed prior to programming a PIPECONTROL command with "LRI
127        *  Post Sync Operation" in GPGPU mode of operation (i.e when
128        *  PIPELINE_SELECT command is set to GPGPU mode of operation)."
129        *
130        * The same text exists a few rows below for Post Sync Op.
131        */
132       genX(emit_raw_pipe_control)(brw, PIPE_CONTROL_CS_STALL, NULL, 0, 0);
133    }
134 
135    /* "Flush Types" workarounds ---------------------------------------------
136     * We do these now because they may add post-sync operations or CS stalls.
137     */
138 
139    if (IS_GFX_VER_BETWEEN(8, 10) &&
140        (flags & PIPE_CONTROL_VF_CACHE_INVALIDATE)) {
141       /* Project: BDW, SKL+ (stopping at CNL) / Argument: VF Invalidate
142        *
143        * "'Post Sync Operation' must be enabled to 'Write Immediate Data' or
144        *  'Write PS Depth Count' or 'Write Timestamp'."
145        */
146       if (!bo) {
147          flags |= PIPE_CONTROL_WRITE_IMMEDIATE;
148          post_sync_flags |= PIPE_CONTROL_WRITE_IMMEDIATE;
149          non_lri_post_sync_flags |= PIPE_CONTROL_WRITE_IMMEDIATE;
150          bo = brw->workaround_bo;
151          offset = brw->workaround_bo_offset;
152       }
153    }
154 
155    if (GFX_VERx10 < 75 && (flags & PIPE_CONTROL_DEPTH_STALL)) {
156       /* Project: PRE-HSW / Argument: Depth Stall
157        *
158        * "The following bits must be clear:
159        *  - Render Target Cache Flush Enable ([12] of DW1)
160        *  - Depth Cache Flush Enable ([0] of DW1)"
161        */
162       assert(!(flags & (PIPE_CONTROL_RENDER_TARGET_FLUSH |
163                         PIPE_CONTROL_DEPTH_CACHE_FLUSH)));
164    }
165 
166    if (GFX_VER >= 6 && (flags & PIPE_CONTROL_DEPTH_STALL)) {
167       /* From the PIPE_CONTROL instruction table, bit 13 (Depth Stall Enable):
168        *
169        *    "This bit must be DISABLED for operations other than writing
170        *     PS_DEPTH_COUNT."
171        *
172        * This seems like nonsense.  An Ivybridge workaround requires us to
173        * emit a PIPE_CONTROL with a depth stall and write immediate post-sync
174        * operation.  Gfx8+ requires us to emit depth stalls and depth cache
175        * flushes together.  So, it's hard to imagine this means anything other
176        * than "we originally intended this to be used for PS_DEPTH_COUNT".
177        *
178        * We ignore the supposed restriction and do nothing.
179        */
180    }
181 
182    if (GFX_VERx10 < 75 && (flags & PIPE_CONTROL_DEPTH_CACHE_FLUSH)) {
183       /* Project: PRE-HSW / Argument: Depth Cache Flush
184        *
185        * "Depth Stall must be clear ([13] of DW1)."
186        */
187       assert(!(flags & PIPE_CONTROL_DEPTH_STALL));
188    }
189 
190    if (flags & (PIPE_CONTROL_RENDER_TARGET_FLUSH |
191                 PIPE_CONTROL_STALL_AT_SCOREBOARD)) {
192       /* From the PIPE_CONTROL instruction table, bit 12 and bit 1:
193        *
194        *    "This bit must be DISABLED for End-of-pipe (Read) fences,
195        *     PS_DEPTH_COUNT or TIMESTAMP queries."
196        *
197        * TODO: Implement end-of-pipe checking.
198        */
199       assert(!(post_sync_flags & (PIPE_CONTROL_WRITE_DEPTH_COUNT |
200                                   PIPE_CONTROL_WRITE_TIMESTAMP)));
201    }
202 
203    if (GFX_VER < 11 && (flags & PIPE_CONTROL_STALL_AT_SCOREBOARD)) {
204       /* From the PIPE_CONTROL instruction table, bit 1:
205        *
206        *    "This bit is ignored if Depth Stall Enable is set.
207        *     Further, the render cache is not flushed even if Write Cache
208        *     Flush Enable bit is set."
209        *
210        * We assert that the caller doesn't do this combination, to try and
211        * prevent mistakes.  It shouldn't hurt the GPU, though.
212        *
213        * We skip this check on Gfx11+ as the "Stall and Pixel Scoreboard"
214        * and "Render Target Flush" combo is explicitly required for BTI
215        * update workarounds.
216        */
217       assert(!(flags & (PIPE_CONTROL_DEPTH_STALL |
218                         PIPE_CONTROL_RENDER_TARGET_FLUSH)));
219    }
220 
221    /* PIPE_CONTROL page workarounds ------------------------------------- */
222 
223    if (IS_GFX_VER_BETWEEN(7, 8) &&
224        (flags & PIPE_CONTROL_STATE_CACHE_INVALIDATE)) {
225       /* From the PIPE_CONTROL page itself:
226        *
227        *    "IVB, HSW, BDW
228        *     Restriction: Pipe_control with CS-stall bit set must be issued
229        *     before a pipe-control command that has the State Cache
230        *     Invalidate bit set."
231        */
232       flags |= PIPE_CONTROL_CS_STALL;
233    }
234 
235    if (GFX_VERx10 == 75) {
236       /* From the PIPE_CONTROL page itself:
237        *
238        *    "HSW - Programming Note: PIPECONTROL with RO Cache Invalidation:
239        *     Prior to programming a PIPECONTROL command with any of the RO
240        *     cache invalidation bit set, program a PIPECONTROL flush command
241        *     with “CS stall” bit and “HDC Flush” bit set."
242        *
243        * TODO: Actually implement this.  What's an HDC Flush?
244        */
245    }
246 
247    if (flags & PIPE_CONTROL_FLUSH_LLC) {
248       /* From the PIPE_CONTROL instruction table, bit 26 (Flush LLC):
249        *
250        *    "Project: ALL
251        *     SW must always program Post-Sync Operation to "Write Immediate
252        *     Data" when Flush LLC is set."
253        *
254        * For now, we just require the caller to do it.
255        */
256       assert(flags & PIPE_CONTROL_WRITE_IMMEDIATE);
257    }
258 
259    /* "Post-Sync Operation" workarounds -------------------------------- */
260 
261    /* Project: All / Argument: Global Snapshot Count Reset [19]
262     *
263     * "This bit must not be exercised on any product.
264     *  Requires stall bit ([20] of DW1) set."
265     *
266     * We don't use this, so we just assert that it isn't used.  The
267     * PIPE_CONTROL instruction page indicates that they intended this
268     * as a debug feature and don't think it is useful in production,
269     * but it may actually be usable, should we ever want to.
270     */
271    assert((flags & PIPE_CONTROL_GLOBAL_SNAPSHOT_COUNT_RESET) == 0);
272 
273    if (flags & (PIPE_CONTROL_MEDIA_STATE_CLEAR |
274                 PIPE_CONTROL_INDIRECT_STATE_POINTERS_DISABLE)) {
275       /* Project: All / Arguments:
276        *
277        * - Generic Media State Clear [16]
278        * - Indirect State Pointers Disable [16]
279        *
280        *    "Requires stall bit ([20] of DW1) set."
281        *
282        * Also, the PIPE_CONTROL instruction table, bit 16 (Generic Media
283        * State Clear) says:
284        *
285        *    "PIPECONTROL command with “Command Streamer Stall Enable” must be
286        *     programmed prior to programming a PIPECONTROL command with "Media
287        *     State Clear" set in GPGPU mode of operation"
288        *
289        * This is a subset of the earlier rule, so there's nothing to do.
290        */
291       flags |= PIPE_CONTROL_CS_STALL;
292    }
293 
294    if (flags & PIPE_CONTROL_STORE_DATA_INDEX) {
295       /* Project: All / Argument: Store Data Index
296        *
297        * "Post-Sync Operation ([15:14] of DW1) must be set to something other
298        *  than '0'."
299        *
300        * For now, we just assert that the caller does this.  We might want to
301        * automatically add a write to the workaround BO...
302        */
303       assert(non_lri_post_sync_flags != 0);
304    }
305 
306    if (flags & PIPE_CONTROL_SYNC_GFDT) {
307       /* Project: All / Argument: Sync GFDT
308        *
309        * "Post-Sync Operation ([15:14] of DW1) must be set to something other
310        *  than '0' or 0x2520[13] must be set."
311        *
312        * For now, we just assert that the caller does this.
313        */
314       assert(non_lri_post_sync_flags != 0);
315    }
316 
317    if (IS_GFX_VERx10_BETWEEN(60, 75) &&
318        (flags & PIPE_CONTROL_TLB_INVALIDATE)) {
319       /* Project: SNB, IVB, HSW / Argument: TLB inv
320        *
321        * "{All SKUs}{All Steppings}: Post-Sync Operation ([15:14] of DW1)
322        *  must be set to something other than '0'."
323        *
324        * For now, we just assert that the caller does this.
325        */
326       assert(non_lri_post_sync_flags != 0);
327    }
328 
329    if (GFX_VER >= 7 && (flags & PIPE_CONTROL_TLB_INVALIDATE)) {
330       /* Project: IVB+ / Argument: TLB inv
331        *
332        *    "Requires stall bit ([20] of DW1) set."
333        *
334        * Also, from the PIPE_CONTROL instruction table:
335        *
336        *    "Project: SKL+
337        *     Post Sync Operation or CS stall must be set to ensure a TLB
338        *     invalidation occurs.  Otherwise no cycle will occur to the TLB
339        *     cache to invalidate."
340        *
341        * This is not a subset of the earlier rule, so there's nothing to do.
342        */
343       flags |= PIPE_CONTROL_CS_STALL;
344    }
345 
346    if (GFX_VER == 9 && devinfo->gt == 4) {
347       /* TODO: The big Skylake GT4 post sync op workaround */
348    }
349 
350    /* "GPGPU specific workarounds" (both post-sync and flush) ------------ */
351 
352    if (IS_COMPUTE_PIPELINE(brw)) {
353       if (GFX_VER >= 9 && (flags & PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE)) {
354          /* Project: SKL+ / Argument: Tex Invalidate
355           * "Requires stall bit ([20] of DW) set for all GPGPU Workloads."
356           */
357          flags |= PIPE_CONTROL_CS_STALL;
358       }
359 
360       if (GFX_VER == 8 && (post_sync_flags ||
361                            (flags & (PIPE_CONTROL_NOTIFY_ENABLE |
362                                      PIPE_CONTROL_DEPTH_STALL |
363                                      PIPE_CONTROL_RENDER_TARGET_FLUSH |
364                                      PIPE_CONTROL_DEPTH_CACHE_FLUSH |
365                                      PIPE_CONTROL_DATA_CACHE_FLUSH)))) {
366          /* Project: BDW / Arguments:
367           *
368           * - LRI Post Sync Operation   [23]
369           * - Post Sync Op              [15:14]
370           * - Notify En                 [8]
371           * - Depth Stall               [13]
372           * - Render Target Cache Flush [12]
373           * - Depth Cache Flush         [0]
374           * - DC Flush Enable           [5]
375           *
376           *    "Requires stall bit ([20] of DW) set for all GPGPU and Media
377           *     Workloads."
378           *
379           * (The docs have separate table rows for each bit, with essentially
380           * the same workaround text.  We've combined them here.)
381           */
382          flags |= PIPE_CONTROL_CS_STALL;
383 
384          /* Also, from the PIPE_CONTROL instruction table, bit 20:
385           *
386           *    "Project: BDW
387           *     This bit must be always set when PIPE_CONTROL command is
388           *     programmed by GPGPU and MEDIA workloads, except for the cases
389           *     when only Read Only Cache Invalidation bits are set (State
390           *     Cache Invalidation Enable, Instruction cache Invalidation
391           *     Enable, Texture Cache Invalidation Enable, Constant Cache
392           *     Invalidation Enable). This is to WA FFDOP CG issue, this WA
393           *     need not implemented when FF_DOP_CG is disable via "Fixed
394           *     Function DOP Clock Gate Disable" bit in RC_PSMI_CTRL register."
395           *
396           * It sounds like we could avoid CS stalls in some cases, but we
397           * don't currently bother.  This list isn't exactly the list above,
398           * either...
399           */
400       }
401    }
402 
403    /* Implement the WaCsStallAtEveryFourthPipecontrol workaround on IVB, BYT:
404     *
405     * "Every 4th PIPE_CONTROL command, not counting the PIPE_CONTROL with
406     *  only read-cache-invalidate bit(s) set, must have a CS_STALL bit set."
407     *
408     * Note that the kernel does CS stalls between batches, so we only need
409     * to count them within a batch.  We currently naively count every 4, and
410     * don't skip the ones with only read-cache-invalidate bits set.  This
411     * may or may not be a problem...
412     */
413    if (GFX_VERx10 == 70) {
414       if (flags & PIPE_CONTROL_CS_STALL) {
415          /* If we're doing a CS stall, reset the counter and carry on. */
416          brw->pipe_controls_since_last_cs_stall = 0;
417       }
418 
419       /* If this is the fourth pipe control without a CS stall, do one now. */
420       if (++brw->pipe_controls_since_last_cs_stall == 4) {
421          brw->pipe_controls_since_last_cs_stall = 0;
422          flags |= PIPE_CONTROL_CS_STALL;
423       }
424    }
425 
426    /* "Stall" workarounds ----------------------------------------------
427     * These have to come after the earlier ones because we may have added
428     * some additional CS stalls above.
429     */
430 
431    if (GFX_VER < 9 && (flags & PIPE_CONTROL_CS_STALL)) {
432       /* Project: PRE-SKL, VLV, CHV
433        *
434        * "[All Stepping][All SKUs]:
435        *
436        *  One of the following must also be set:
437        *
438        *  - Render Target Cache Flush Enable ([12] of DW1)
439        *  - Depth Cache Flush Enable ([0] of DW1)
440        *  - Stall at Pixel Scoreboard ([1] of DW1)
441        *  - Depth Stall ([13] of DW1)
442        *  - Post-Sync Operation ([13] of DW1)
443        *  - DC Flush Enable ([5] of DW1)"
444        *
445        * If we don't already have one of those bits set, we choose to add
446        * "Stall at Pixel Scoreboard".  Some of the other bits require a
447        * CS stall as a workaround (see above), which would send us into
448        * an infinite recursion of PIPE_CONTROLs.  "Stall at Pixel Scoreboard"
449        * appears to be safe, so we choose that.
450        */
451       const uint32_t wa_bits = PIPE_CONTROL_RENDER_TARGET_FLUSH |
452                                PIPE_CONTROL_DEPTH_CACHE_FLUSH |
453                                PIPE_CONTROL_WRITE_IMMEDIATE |
454                                PIPE_CONTROL_WRITE_DEPTH_COUNT |
455                                PIPE_CONTROL_WRITE_TIMESTAMP |
456                                PIPE_CONTROL_STALL_AT_SCOREBOARD |
457                                PIPE_CONTROL_DEPTH_STALL |
458                                PIPE_CONTROL_DATA_CACHE_FLUSH;
459       if (!(flags & wa_bits))
460          flags |= PIPE_CONTROL_STALL_AT_SCOREBOARD;
461    }
462 
463    /* Emit --------------------------------------------------------------- */
464 
465    brw_batch_emit(brw, GENX(PIPE_CONTROL), pc) {
466    #if GFX_VER >= 9
467       pc.FlushLLC = 0;
468    #endif
469    #if GFX_VER >= 7
470       pc.LRIPostSyncOperation = NoLRIOperation;
471       pc.PipeControlFlushEnable = flags & PIPE_CONTROL_FLUSH_ENABLE;
472       pc.DCFlushEnable = flags & PIPE_CONTROL_DATA_CACHE_FLUSH;
473    #endif
474    #if GFX_VER >= 6
475       pc.StoreDataIndex = 0;
476       pc.CommandStreamerStallEnable = flags & PIPE_CONTROL_CS_STALL;
477       pc.GlobalSnapshotCountReset =
478          flags & PIPE_CONTROL_GLOBAL_SNAPSHOT_COUNT_RESET;
479       pc.TLBInvalidate = flags & PIPE_CONTROL_TLB_INVALIDATE;
480       pc.GenericMediaStateClear = flags & PIPE_CONTROL_MEDIA_STATE_CLEAR;
481       pc.StallAtPixelScoreboard = flags & PIPE_CONTROL_STALL_AT_SCOREBOARD;
482       pc.RenderTargetCacheFlushEnable =
483          flags & PIPE_CONTROL_RENDER_TARGET_FLUSH;
484       pc.DepthCacheFlushEnable = flags & PIPE_CONTROL_DEPTH_CACHE_FLUSH;
485       pc.StateCacheInvalidationEnable =
486          flags & PIPE_CONTROL_STATE_CACHE_INVALIDATE;
487       pc.VFCacheInvalidationEnable = flags & PIPE_CONTROL_VF_CACHE_INVALIDATE;
488       pc.ConstantCacheInvalidationEnable =
489          flags & PIPE_CONTROL_CONST_CACHE_INVALIDATE;
490    #else
491       pc.WriteCacheFlush = flags & PIPE_CONTROL_RENDER_TARGET_FLUSH;
492    #endif
493       pc.PostSyncOperation = flags_to_post_sync_op(flags);
494       pc.DepthStallEnable = flags & PIPE_CONTROL_DEPTH_STALL;
495       pc.InstructionCacheInvalidateEnable =
496          flags & PIPE_CONTROL_INSTRUCTION_INVALIDATE;
497       pc.NotifyEnable = flags & PIPE_CONTROL_NOTIFY_ENABLE;
498    #if GFX_VERx10 >= 45
499       pc.IndirectStatePointersDisable =
500          flags & PIPE_CONTROL_INDIRECT_STATE_POINTERS_DISABLE;
501    #endif
502    #if GFX_VER >= 6
503       pc.TextureCacheInvalidationEnable =
504          flags & PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE;
505    #elif GFX_VER == 5 || GFX_VERx10 == 45
506       pc.TextureCacheFlushEnable =
507          flags & PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE;
508    #endif
509       pc.Address = ggtt_bo(bo, offset);
510       if (GFX_VER < 7 && bo)
511          pc.DestinationAddressType = DAT_GGTT;
512       pc.ImmediateData = imm;
513    }
514 }
515