1 /*
2 * Copyright © 2017 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "genX_boilerplate.h"
25 #include "brw_defines.h"
26 #include "brw_state.h"
27
28 static unsigned
flags_to_post_sync_op(uint32_t flags)29 flags_to_post_sync_op(uint32_t flags)
30 {
31 if (flags & PIPE_CONTROL_WRITE_IMMEDIATE)
32 return WriteImmediateData;
33
34 if (flags & PIPE_CONTROL_WRITE_DEPTH_COUNT)
35 return WritePSDepthCount;
36
37 if (flags & PIPE_CONTROL_WRITE_TIMESTAMP)
38 return WriteTimestamp;
39
40 return 0;
41 }
42
43 /**
44 * Do the given flags have a Post Sync or LRI Post Sync operation?
45 */
46 static enum pipe_control_flags
get_post_sync_flags(enum pipe_control_flags flags)47 get_post_sync_flags(enum pipe_control_flags flags)
48 {
49 flags &= PIPE_CONTROL_WRITE_IMMEDIATE |
50 PIPE_CONTROL_WRITE_DEPTH_COUNT |
51 PIPE_CONTROL_WRITE_TIMESTAMP |
52 PIPE_CONTROL_LRI_POST_SYNC_OP;
53
54 /* Only one "Post Sync Op" is allowed, and it's mutually exclusive with
55 * "LRI Post Sync Operation". So more than one bit set would be illegal.
56 */
57 assert(util_bitcount(flags) <= 1);
58
59 return flags;
60 }
61
62 #define IS_COMPUTE_PIPELINE(brw) \
63 (GFX_VER >= 7 && brw->last_pipeline == BRW_COMPUTE_PIPELINE)
64
65 /* Closed interval - GFX_VER \in [x, y] */
66 #define IS_GFX_VER_BETWEEN(x, y) (GFX_VER >= x && GFX_VER <= y)
67 #define IS_GFX_VERx10_BETWEEN(x, y) \
68 (GFX_VERx10 >= x && GFX_VERx10 <= y)
69
70 /**
71 * Emit a series of PIPE_CONTROL commands, taking into account any
72 * workarounds necessary to actually accomplish the caller's request.
73 *
74 * Unless otherwise noted, spec quotations in this function come from:
75 *
76 * Synchronization of the 3D Pipeline > PIPE_CONTROL Command > Programming
77 * Restrictions for PIPE_CONTROL.
78 *
79 * You should not use this function directly. Use the helpers in
80 * brw_pipe_control.c instead, which may split the pipe control further.
81 */
82 void
genX(emit_raw_pipe_control)83 genX(emit_raw_pipe_control)(struct brw_context *brw, uint32_t flags,
84 struct brw_bo *bo, uint32_t offset, uint64_t imm)
85 {
86 UNUSED const struct intel_device_info *devinfo = &brw->screen->devinfo;
87 enum pipe_control_flags post_sync_flags = get_post_sync_flags(flags);
88 enum pipe_control_flags non_lri_post_sync_flags =
89 post_sync_flags & ~PIPE_CONTROL_LRI_POST_SYNC_OP;
90
91 /* Recursive PIPE_CONTROL workarounds --------------------------------
92 * (http://knowyourmeme.com/memes/xzibit-yo-dawg)
93 *
94 * We do these first because we want to look at the original operation,
95 * rather than any workarounds we set.
96 */
97 if (GFX_VER == 6 && (flags & PIPE_CONTROL_RENDER_TARGET_FLUSH)) {
98 /* Hardware workaround: SNB B-Spec says:
99 *
100 * "[Dev-SNB{W/A}]: Before a PIPE_CONTROL with Write Cache Flush
101 * Enable = 1, a PIPE_CONTROL with any non-zero post-sync-op is
102 * required."
103 */
104 brw_emit_post_sync_nonzero_flush(brw);
105 }
106
107 if (GFX_VER == 9 && (flags & PIPE_CONTROL_VF_CACHE_INVALIDATE)) {
108 /* The PIPE_CONTROL "VF Cache Invalidation Enable" bit description
109 * lists several workarounds:
110 *
111 * "Project: SKL, KBL, BXT
112 *
113 * If the VF Cache Invalidation Enable is set to a 1 in a
114 * PIPE_CONTROL, a separate Null PIPE_CONTROL, all bitfields
115 * sets to 0, with the VF Cache Invalidation Enable set to 0
116 * needs to be sent prior to the PIPE_CONTROL with VF Cache
117 * Invalidation Enable set to a 1."
118 */
119 genX(emit_raw_pipe_control)(brw, 0, NULL, 0, 0);
120 }
121
122 if (GFX_VER == 9 && IS_COMPUTE_PIPELINE(brw) && post_sync_flags) {
123 /* Project: SKL / Argument: LRI Post Sync Operation [23]
124 *
125 * "PIPECONTROL command with “Command Streamer Stall Enable” must be
126 * programmed prior to programming a PIPECONTROL command with "LRI
127 * Post Sync Operation" in GPGPU mode of operation (i.e when
128 * PIPELINE_SELECT command is set to GPGPU mode of operation)."
129 *
130 * The same text exists a few rows below for Post Sync Op.
131 */
132 genX(emit_raw_pipe_control)(brw, PIPE_CONTROL_CS_STALL, NULL, 0, 0);
133 }
134
135 /* "Flush Types" workarounds ---------------------------------------------
136 * We do these now because they may add post-sync operations or CS stalls.
137 */
138
139 if (IS_GFX_VER_BETWEEN(8, 10) &&
140 (flags & PIPE_CONTROL_VF_CACHE_INVALIDATE)) {
141 /* Project: BDW, SKL+ (stopping at CNL) / Argument: VF Invalidate
142 *
143 * "'Post Sync Operation' must be enabled to 'Write Immediate Data' or
144 * 'Write PS Depth Count' or 'Write Timestamp'."
145 */
146 if (!bo) {
147 flags |= PIPE_CONTROL_WRITE_IMMEDIATE;
148 post_sync_flags |= PIPE_CONTROL_WRITE_IMMEDIATE;
149 non_lri_post_sync_flags |= PIPE_CONTROL_WRITE_IMMEDIATE;
150 bo = brw->workaround_bo;
151 offset = brw->workaround_bo_offset;
152 }
153 }
154
155 if (GFX_VERx10 < 75 && (flags & PIPE_CONTROL_DEPTH_STALL)) {
156 /* Project: PRE-HSW / Argument: Depth Stall
157 *
158 * "The following bits must be clear:
159 * - Render Target Cache Flush Enable ([12] of DW1)
160 * - Depth Cache Flush Enable ([0] of DW1)"
161 */
162 assert(!(flags & (PIPE_CONTROL_RENDER_TARGET_FLUSH |
163 PIPE_CONTROL_DEPTH_CACHE_FLUSH)));
164 }
165
166 if (GFX_VER >= 6 && (flags & PIPE_CONTROL_DEPTH_STALL)) {
167 /* From the PIPE_CONTROL instruction table, bit 13 (Depth Stall Enable):
168 *
169 * "This bit must be DISABLED for operations other than writing
170 * PS_DEPTH_COUNT."
171 *
172 * This seems like nonsense. An Ivybridge workaround requires us to
173 * emit a PIPE_CONTROL with a depth stall and write immediate post-sync
174 * operation. Gfx8+ requires us to emit depth stalls and depth cache
175 * flushes together. So, it's hard to imagine this means anything other
176 * than "we originally intended this to be used for PS_DEPTH_COUNT".
177 *
178 * We ignore the supposed restriction and do nothing.
179 */
180 }
181
182 if (GFX_VERx10 < 75 && (flags & PIPE_CONTROL_DEPTH_CACHE_FLUSH)) {
183 /* Project: PRE-HSW / Argument: Depth Cache Flush
184 *
185 * "Depth Stall must be clear ([13] of DW1)."
186 */
187 assert(!(flags & PIPE_CONTROL_DEPTH_STALL));
188 }
189
190 if (flags & (PIPE_CONTROL_RENDER_TARGET_FLUSH |
191 PIPE_CONTROL_STALL_AT_SCOREBOARD)) {
192 /* From the PIPE_CONTROL instruction table, bit 12 and bit 1:
193 *
194 * "This bit must be DISABLED for End-of-pipe (Read) fences,
195 * PS_DEPTH_COUNT or TIMESTAMP queries."
196 *
197 * TODO: Implement end-of-pipe checking.
198 */
199 assert(!(post_sync_flags & (PIPE_CONTROL_WRITE_DEPTH_COUNT |
200 PIPE_CONTROL_WRITE_TIMESTAMP)));
201 }
202
203 if (GFX_VER < 11 && (flags & PIPE_CONTROL_STALL_AT_SCOREBOARD)) {
204 /* From the PIPE_CONTROL instruction table, bit 1:
205 *
206 * "This bit is ignored if Depth Stall Enable is set.
207 * Further, the render cache is not flushed even if Write Cache
208 * Flush Enable bit is set."
209 *
210 * We assert that the caller doesn't do this combination, to try and
211 * prevent mistakes. It shouldn't hurt the GPU, though.
212 *
213 * We skip this check on Gfx11+ as the "Stall and Pixel Scoreboard"
214 * and "Render Target Flush" combo is explicitly required for BTI
215 * update workarounds.
216 */
217 assert(!(flags & (PIPE_CONTROL_DEPTH_STALL |
218 PIPE_CONTROL_RENDER_TARGET_FLUSH)));
219 }
220
221 /* PIPE_CONTROL page workarounds ------------------------------------- */
222
223 if (IS_GFX_VER_BETWEEN(7, 8) &&
224 (flags & PIPE_CONTROL_STATE_CACHE_INVALIDATE)) {
225 /* From the PIPE_CONTROL page itself:
226 *
227 * "IVB, HSW, BDW
228 * Restriction: Pipe_control with CS-stall bit set must be issued
229 * before a pipe-control command that has the State Cache
230 * Invalidate bit set."
231 */
232 flags |= PIPE_CONTROL_CS_STALL;
233 }
234
235 if (GFX_VERx10 == 75) {
236 /* From the PIPE_CONTROL page itself:
237 *
238 * "HSW - Programming Note: PIPECONTROL with RO Cache Invalidation:
239 * Prior to programming a PIPECONTROL command with any of the RO
240 * cache invalidation bit set, program a PIPECONTROL flush command
241 * with “CS stall” bit and “HDC Flush” bit set."
242 *
243 * TODO: Actually implement this. What's an HDC Flush?
244 */
245 }
246
247 if (flags & PIPE_CONTROL_FLUSH_LLC) {
248 /* From the PIPE_CONTROL instruction table, bit 26 (Flush LLC):
249 *
250 * "Project: ALL
251 * SW must always program Post-Sync Operation to "Write Immediate
252 * Data" when Flush LLC is set."
253 *
254 * For now, we just require the caller to do it.
255 */
256 assert(flags & PIPE_CONTROL_WRITE_IMMEDIATE);
257 }
258
259 /* "Post-Sync Operation" workarounds -------------------------------- */
260
261 /* Project: All / Argument: Global Snapshot Count Reset [19]
262 *
263 * "This bit must not be exercised on any product.
264 * Requires stall bit ([20] of DW1) set."
265 *
266 * We don't use this, so we just assert that it isn't used. The
267 * PIPE_CONTROL instruction page indicates that they intended this
268 * as a debug feature and don't think it is useful in production,
269 * but it may actually be usable, should we ever want to.
270 */
271 assert((flags & PIPE_CONTROL_GLOBAL_SNAPSHOT_COUNT_RESET) == 0);
272
273 if (flags & (PIPE_CONTROL_MEDIA_STATE_CLEAR |
274 PIPE_CONTROL_INDIRECT_STATE_POINTERS_DISABLE)) {
275 /* Project: All / Arguments:
276 *
277 * - Generic Media State Clear [16]
278 * - Indirect State Pointers Disable [16]
279 *
280 * "Requires stall bit ([20] of DW1) set."
281 *
282 * Also, the PIPE_CONTROL instruction table, bit 16 (Generic Media
283 * State Clear) says:
284 *
285 * "PIPECONTROL command with “Command Streamer Stall Enable” must be
286 * programmed prior to programming a PIPECONTROL command with "Media
287 * State Clear" set in GPGPU mode of operation"
288 *
289 * This is a subset of the earlier rule, so there's nothing to do.
290 */
291 flags |= PIPE_CONTROL_CS_STALL;
292 }
293
294 if (flags & PIPE_CONTROL_STORE_DATA_INDEX) {
295 /* Project: All / Argument: Store Data Index
296 *
297 * "Post-Sync Operation ([15:14] of DW1) must be set to something other
298 * than '0'."
299 *
300 * For now, we just assert that the caller does this. We might want to
301 * automatically add a write to the workaround BO...
302 */
303 assert(non_lri_post_sync_flags != 0);
304 }
305
306 if (flags & PIPE_CONTROL_SYNC_GFDT) {
307 /* Project: All / Argument: Sync GFDT
308 *
309 * "Post-Sync Operation ([15:14] of DW1) must be set to something other
310 * than '0' or 0x2520[13] must be set."
311 *
312 * For now, we just assert that the caller does this.
313 */
314 assert(non_lri_post_sync_flags != 0);
315 }
316
317 if (IS_GFX_VERx10_BETWEEN(60, 75) &&
318 (flags & PIPE_CONTROL_TLB_INVALIDATE)) {
319 /* Project: SNB, IVB, HSW / Argument: TLB inv
320 *
321 * "{All SKUs}{All Steppings}: Post-Sync Operation ([15:14] of DW1)
322 * must be set to something other than '0'."
323 *
324 * For now, we just assert that the caller does this.
325 */
326 assert(non_lri_post_sync_flags != 0);
327 }
328
329 if (GFX_VER >= 7 && (flags & PIPE_CONTROL_TLB_INVALIDATE)) {
330 /* Project: IVB+ / Argument: TLB inv
331 *
332 * "Requires stall bit ([20] of DW1) set."
333 *
334 * Also, from the PIPE_CONTROL instruction table:
335 *
336 * "Project: SKL+
337 * Post Sync Operation or CS stall must be set to ensure a TLB
338 * invalidation occurs. Otherwise no cycle will occur to the TLB
339 * cache to invalidate."
340 *
341 * This is not a subset of the earlier rule, so there's nothing to do.
342 */
343 flags |= PIPE_CONTROL_CS_STALL;
344 }
345
346 if (GFX_VER == 9 && devinfo->gt == 4) {
347 /* TODO: The big Skylake GT4 post sync op workaround */
348 }
349
350 /* "GPGPU specific workarounds" (both post-sync and flush) ------------ */
351
352 if (IS_COMPUTE_PIPELINE(brw)) {
353 if (GFX_VER >= 9 && (flags & PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE)) {
354 /* Project: SKL+ / Argument: Tex Invalidate
355 * "Requires stall bit ([20] of DW) set for all GPGPU Workloads."
356 */
357 flags |= PIPE_CONTROL_CS_STALL;
358 }
359
360 if (GFX_VER == 8 && (post_sync_flags ||
361 (flags & (PIPE_CONTROL_NOTIFY_ENABLE |
362 PIPE_CONTROL_DEPTH_STALL |
363 PIPE_CONTROL_RENDER_TARGET_FLUSH |
364 PIPE_CONTROL_DEPTH_CACHE_FLUSH |
365 PIPE_CONTROL_DATA_CACHE_FLUSH)))) {
366 /* Project: BDW / Arguments:
367 *
368 * - LRI Post Sync Operation [23]
369 * - Post Sync Op [15:14]
370 * - Notify En [8]
371 * - Depth Stall [13]
372 * - Render Target Cache Flush [12]
373 * - Depth Cache Flush [0]
374 * - DC Flush Enable [5]
375 *
376 * "Requires stall bit ([20] of DW) set for all GPGPU and Media
377 * Workloads."
378 *
379 * (The docs have separate table rows for each bit, with essentially
380 * the same workaround text. We've combined them here.)
381 */
382 flags |= PIPE_CONTROL_CS_STALL;
383
384 /* Also, from the PIPE_CONTROL instruction table, bit 20:
385 *
386 * "Project: BDW
387 * This bit must be always set when PIPE_CONTROL command is
388 * programmed by GPGPU and MEDIA workloads, except for the cases
389 * when only Read Only Cache Invalidation bits are set (State
390 * Cache Invalidation Enable, Instruction cache Invalidation
391 * Enable, Texture Cache Invalidation Enable, Constant Cache
392 * Invalidation Enable). This is to WA FFDOP CG issue, this WA
393 * need not implemented when FF_DOP_CG is disable via "Fixed
394 * Function DOP Clock Gate Disable" bit in RC_PSMI_CTRL register."
395 *
396 * It sounds like we could avoid CS stalls in some cases, but we
397 * don't currently bother. This list isn't exactly the list above,
398 * either...
399 */
400 }
401 }
402
403 /* Implement the WaCsStallAtEveryFourthPipecontrol workaround on IVB, BYT:
404 *
405 * "Every 4th PIPE_CONTROL command, not counting the PIPE_CONTROL with
406 * only read-cache-invalidate bit(s) set, must have a CS_STALL bit set."
407 *
408 * Note that the kernel does CS stalls between batches, so we only need
409 * to count them within a batch. We currently naively count every 4, and
410 * don't skip the ones with only read-cache-invalidate bits set. This
411 * may or may not be a problem...
412 */
413 if (GFX_VERx10 == 70) {
414 if (flags & PIPE_CONTROL_CS_STALL) {
415 /* If we're doing a CS stall, reset the counter and carry on. */
416 brw->pipe_controls_since_last_cs_stall = 0;
417 }
418
419 /* If this is the fourth pipe control without a CS stall, do one now. */
420 if (++brw->pipe_controls_since_last_cs_stall == 4) {
421 brw->pipe_controls_since_last_cs_stall = 0;
422 flags |= PIPE_CONTROL_CS_STALL;
423 }
424 }
425
426 /* "Stall" workarounds ----------------------------------------------
427 * These have to come after the earlier ones because we may have added
428 * some additional CS stalls above.
429 */
430
431 if (GFX_VER < 9 && (flags & PIPE_CONTROL_CS_STALL)) {
432 /* Project: PRE-SKL, VLV, CHV
433 *
434 * "[All Stepping][All SKUs]:
435 *
436 * One of the following must also be set:
437 *
438 * - Render Target Cache Flush Enable ([12] of DW1)
439 * - Depth Cache Flush Enable ([0] of DW1)
440 * - Stall at Pixel Scoreboard ([1] of DW1)
441 * - Depth Stall ([13] of DW1)
442 * - Post-Sync Operation ([13] of DW1)
443 * - DC Flush Enable ([5] of DW1)"
444 *
445 * If we don't already have one of those bits set, we choose to add
446 * "Stall at Pixel Scoreboard". Some of the other bits require a
447 * CS stall as a workaround (see above), which would send us into
448 * an infinite recursion of PIPE_CONTROLs. "Stall at Pixel Scoreboard"
449 * appears to be safe, so we choose that.
450 */
451 const uint32_t wa_bits = PIPE_CONTROL_RENDER_TARGET_FLUSH |
452 PIPE_CONTROL_DEPTH_CACHE_FLUSH |
453 PIPE_CONTROL_WRITE_IMMEDIATE |
454 PIPE_CONTROL_WRITE_DEPTH_COUNT |
455 PIPE_CONTROL_WRITE_TIMESTAMP |
456 PIPE_CONTROL_STALL_AT_SCOREBOARD |
457 PIPE_CONTROL_DEPTH_STALL |
458 PIPE_CONTROL_DATA_CACHE_FLUSH;
459 if (!(flags & wa_bits))
460 flags |= PIPE_CONTROL_STALL_AT_SCOREBOARD;
461 }
462
463 /* Emit --------------------------------------------------------------- */
464
465 brw_batch_emit(brw, GENX(PIPE_CONTROL), pc) {
466 #if GFX_VER >= 9
467 pc.FlushLLC = 0;
468 #endif
469 #if GFX_VER >= 7
470 pc.LRIPostSyncOperation = NoLRIOperation;
471 pc.PipeControlFlushEnable = flags & PIPE_CONTROL_FLUSH_ENABLE;
472 pc.DCFlushEnable = flags & PIPE_CONTROL_DATA_CACHE_FLUSH;
473 #endif
474 #if GFX_VER >= 6
475 pc.StoreDataIndex = 0;
476 pc.CommandStreamerStallEnable = flags & PIPE_CONTROL_CS_STALL;
477 pc.GlobalSnapshotCountReset =
478 flags & PIPE_CONTROL_GLOBAL_SNAPSHOT_COUNT_RESET;
479 pc.TLBInvalidate = flags & PIPE_CONTROL_TLB_INVALIDATE;
480 pc.GenericMediaStateClear = flags & PIPE_CONTROL_MEDIA_STATE_CLEAR;
481 pc.StallAtPixelScoreboard = flags & PIPE_CONTROL_STALL_AT_SCOREBOARD;
482 pc.RenderTargetCacheFlushEnable =
483 flags & PIPE_CONTROL_RENDER_TARGET_FLUSH;
484 pc.DepthCacheFlushEnable = flags & PIPE_CONTROL_DEPTH_CACHE_FLUSH;
485 pc.StateCacheInvalidationEnable =
486 flags & PIPE_CONTROL_STATE_CACHE_INVALIDATE;
487 pc.VFCacheInvalidationEnable = flags & PIPE_CONTROL_VF_CACHE_INVALIDATE;
488 pc.ConstantCacheInvalidationEnable =
489 flags & PIPE_CONTROL_CONST_CACHE_INVALIDATE;
490 #else
491 pc.WriteCacheFlush = flags & PIPE_CONTROL_RENDER_TARGET_FLUSH;
492 #endif
493 pc.PostSyncOperation = flags_to_post_sync_op(flags);
494 pc.DepthStallEnable = flags & PIPE_CONTROL_DEPTH_STALL;
495 pc.InstructionCacheInvalidateEnable =
496 flags & PIPE_CONTROL_INSTRUCTION_INVALIDATE;
497 pc.NotifyEnable = flags & PIPE_CONTROL_NOTIFY_ENABLE;
498 #if GFX_VERx10 >= 45
499 pc.IndirectStatePointersDisable =
500 flags & PIPE_CONTROL_INDIRECT_STATE_POINTERS_DISABLE;
501 #endif
502 #if GFX_VER >= 6
503 pc.TextureCacheInvalidationEnable =
504 flags & PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE;
505 #elif GFX_VER == 5 || GFX_VERx10 == 45
506 pc.TextureCacheFlushEnable =
507 flags & PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE;
508 #endif
509 pc.Address = ggtt_bo(bo, offset);
510 if (GFX_VER < 7 && bo)
511 pc.DestinationAddressType = DAT_GGTT;
512 pc.ImmediateData = imm;
513 }
514 }
515