1 // SPDX-License-Identifier: MIT 2 /* 3 * Copyright © 2014 Intel Corporation 4 */ 5 6 /** 7 * DOC: Logical Rings, Logical Ring Contexts and Execlists 8 * 9 * Motivation: 10 * GEN8 brings an expansion of the HW contexts: "Logical Ring Contexts". 11 * These expanded contexts enable a number of new abilities, especially 12 * "Execlists" (also implemented in this file). 13 * 14 * One of the main differences with the legacy HW contexts is that logical 15 * ring contexts incorporate many more things to the context's state, like 16 * PDPs or ringbuffer control registers: 17 * 18 * The reason why PDPs are included in the context is straightforward: as 19 * PPGTTs (per-process GTTs) are actually per-context, having the PDPs 20 * contained there mean you don't need to do a ppgtt->switch_mm yourself, 21 * instead, the GPU will do it for you on the context switch. 22 * 23 * But, what about the ringbuffer control registers (head, tail, etc..)? 24 * shouldn't we just need a set of those per engine command streamer? This is 25 * where the name "Logical Rings" starts to make sense: by virtualizing the 26 * rings, the engine cs shifts to a new "ring buffer" with every context 27 * switch. When you want to submit a workload to the GPU you: A) choose your 28 * context, B) find its appropriate virtualized ring, C) write commands to it 29 * and then, finally, D) tell the GPU to switch to that context. 30 * 31 * Instead of the legacy MI_SET_CONTEXT, the way you tell the GPU to switch 32 * to a contexts is via a context execution list, ergo "Execlists". 33 * 34 * LRC implementation: 35 * Regarding the creation of contexts, we have: 36 * 37 * - One global default context. 38 * - One local default context for each opened fd. 39 * - One local extra context for each context create ioctl call. 40 * 41 * Now that ringbuffers belong per-context (and not per-engine, like before) 42 * and that contexts are uniquely tied to a given engine (and not reusable, 43 * like before) we need: 44 * 45 * - One ringbuffer per-engine inside each context. 46 * - One backing object per-engine inside each context. 47 * 48 * The global default context starts its life with these new objects fully 49 * allocated and populated. The local default context for each opened fd is 50 * more complex, because we don't know at creation time which engine is going 51 * to use them. To handle this, we have implemented a deferred creation of LR 52 * contexts: 53 * 54 * The local context starts its life as a hollow or blank holder, that only 55 * gets populated for a given engine once we receive an execbuffer. If later 56 * on we receive another execbuffer ioctl for the same context but a different 57 * engine, we allocate/populate a new ringbuffer and context backing object and 58 * so on. 59 * 60 * Finally, regarding local contexts created using the ioctl call: as they are 61 * only allowed with the render ring, we can allocate & populate them right 62 * away (no need to defer anything, at least for now). 63 * 64 * Execlists implementation: 65 * Execlists are the new method by which, on gen8+ hardware, workloads are 66 * submitted for execution (as opposed to the legacy, ringbuffer-based, method). 67 * This method works as follows: 68 * 69 * When a request is committed, its commands (the BB start and any leading or 70 * trailing commands, like the seqno breadcrumbs) are placed in the ringbuffer 71 * for the appropriate context. The tail pointer in the hardware context is not 72 * updated at this time, but instead, kept by the driver in the ringbuffer 73 * structure. A structure representing this request is added to a request queue 74 * for the appropriate engine: this structure contains a copy of the context's 75 * tail after the request was written to the ring buffer and a pointer to the 76 * context itself. 77 * 78 * If the engine's request queue was empty before the request was added, the 79 * queue is processed immediately. Otherwise the queue will be processed during 80 * a context switch interrupt. In any case, elements on the queue will get sent 81 * (in pairs) to the GPU's ExecLists Submit Port (ELSP, for short) with a 82 * globally unique 20-bits submission ID. 83 * 84 * When execution of a request completes, the GPU updates the context status 85 * buffer with a context complete event and generates a context switch interrupt. 86 * During the interrupt handling, the driver examines the events in the buffer: 87 * for each context complete event, if the announced ID matches that on the head 88 * of the request queue, then that request is retired and removed from the queue. 89 * 90 * After processing, if any requests were retired and the queue is not empty 91 * then a new execution list can be submitted. The two requests at the front of 92 * the queue are next to be submitted but since a context may not occur twice in 93 * an execution list, if subsequent requests have the same ID as the first then 94 * the two requests must be combined. This is done simply by discarding requests 95 * at the head of the queue until either only one requests is left (in which case 96 * we use a NULL second context) or the first two requests have unique IDs. 97 * 98 * By always executing the first two requests in the queue the driver ensures 99 * that the GPU is kept as busy as possible. In the case where a single context 100 * completes but a second context is still executing, the request for this second 101 * context will be at the head of the queue when we remove the first one. This 102 * request will then be resubmitted along with a new request for a different context, 103 * which will cause the hardware to continue executing the second request and queue 104 * the new request (the GPU detects the condition of a context getting preempted 105 * with the same context and optimizes the context switch flow by not doing 106 * preemption, but just sampling the new tail pointer). 107 * 108 */ 109 #include <linux/interrupt.h> 110 111 #include "i915_drv.h" 112 #include "i915_trace.h" 113 #include "i915_vgpu.h" 114 #include "gen8_engine_cs.h" 115 #include "intel_breadcrumbs.h" 116 #include "intel_context.h" 117 #include "intel_engine_heartbeat.h" 118 #include "intel_engine_pm.h" 119 #include "intel_engine_regs.h" 120 #include "intel_engine_stats.h" 121 #include "intel_execlists_submission.h" 122 #include "intel_gt.h" 123 #include "intel_gt_irq.h" 124 #include "intel_gt_pm.h" 125 #include "intel_gt_regs.h" 126 #include "intel_gt_requests.h" 127 #include "intel_lrc.h" 128 #include "intel_lrc_reg.h" 129 #include "intel_mocs.h" 130 #include "intel_reset.h" 131 #include "intel_ring.h" 132 #include "intel_workarounds.h" 133 #include "shmem_utils.h" 134 135 #define RING_EXECLIST_QFULL (1 << 0x2) 136 #define RING_EXECLIST1_VALID (1 << 0x3) 137 #define RING_EXECLIST0_VALID (1 << 0x4) 138 #define RING_EXECLIST_ACTIVE_STATUS (3 << 0xE) 139 #define RING_EXECLIST1_ACTIVE (1 << 0x11) 140 #define RING_EXECLIST0_ACTIVE (1 << 0x12) 141 142 #define GEN8_CTX_STATUS_IDLE_ACTIVE (1 << 0) 143 #define GEN8_CTX_STATUS_PREEMPTED (1 << 1) 144 #define GEN8_CTX_STATUS_ELEMENT_SWITCH (1 << 2) 145 #define GEN8_CTX_STATUS_ACTIVE_IDLE (1 << 3) 146 #define GEN8_CTX_STATUS_COMPLETE (1 << 4) 147 #define GEN8_CTX_STATUS_LITE_RESTORE (1 << 15) 148 149 #define GEN8_CTX_STATUS_COMPLETED_MASK \ 150 (GEN8_CTX_STATUS_COMPLETE | GEN8_CTX_STATUS_PREEMPTED) 151 152 #define GEN12_CTX_STATUS_SWITCHED_TO_NEW_QUEUE (0x1) /* lower csb dword */ 153 #define GEN12_CTX_SWITCH_DETAIL(csb_dw) ((csb_dw) & 0xF) /* upper csb dword */ 154 #define GEN12_CSB_SW_CTX_ID_MASK GENMASK(25, 15) 155 #define GEN12_IDLE_CTX_ID 0x7FF 156 #define GEN12_CSB_CTX_VALID(csb_dw) \ 157 (FIELD_GET(GEN12_CSB_SW_CTX_ID_MASK, csb_dw) != GEN12_IDLE_CTX_ID) 158 159 #define XEHP_CTX_STATUS_SWITCHED_TO_NEW_QUEUE BIT(1) /* upper csb dword */ 160 #define XEHP_CSB_SW_CTX_ID_MASK GENMASK(31, 10) 161 #define XEHP_IDLE_CTX_ID 0xFFFF 162 #define XEHP_CSB_CTX_VALID(csb_dw) \ 163 (FIELD_GET(XEHP_CSB_SW_CTX_ID_MASK, csb_dw) != XEHP_IDLE_CTX_ID) 164 165 /* Typical size of the average request (2 pipecontrols and a MI_BB) */ 166 #define EXECLISTS_REQUEST_SIZE 64 /* bytes */ 167 168 struct virtual_engine { 169 struct intel_engine_cs base; 170 struct intel_context context; 171 struct rcu_work rcu; 172 173 /* 174 * We allow only a single request through the virtual engine at a time 175 * (each request in the timeline waits for the completion fence of 176 * the previous before being submitted). By restricting ourselves to 177 * only submitting a single request, each request is placed on to a 178 * physical to maximise load spreading (by virtue of the late greedy 179 * scheduling -- each real engine takes the next available request 180 * upon idling). 181 */ 182 struct i915_request *request; 183 184 /* 185 * We keep a rbtree of available virtual engines inside each physical 186 * engine, sorted by priority. Here we preallocate the nodes we need 187 * for the virtual engine, indexed by physical_engine->id. 188 */ 189 struct ve_node { 190 struct rb_node rb; 191 int prio; 192 } nodes[I915_NUM_ENGINES]; 193 194 /* And finally, which physical engines this virtual engine maps onto. */ 195 unsigned int num_siblings; 196 struct intel_engine_cs *siblings[]; 197 }; 198 199 static struct virtual_engine *to_virtual_engine(struct intel_engine_cs *engine) 200 { 201 GEM_BUG_ON(!intel_engine_is_virtual(engine)); 202 return container_of(engine, struct virtual_engine, base); 203 } 204 205 static struct intel_context * 206 execlists_create_virtual(struct intel_engine_cs **siblings, unsigned int count, 207 unsigned long flags); 208 209 static struct i915_request * 210 __active_request(const struct intel_timeline * const tl, 211 struct i915_request *rq, 212 int error) 213 { 214 struct i915_request *active = rq; 215 216 list_for_each_entry_from_reverse(rq, &tl->requests, link) { 217 if (__i915_request_is_complete(rq)) 218 break; 219 220 if (error) { 221 i915_request_set_error_once(rq, error); 222 __i915_request_skip(rq); 223 } 224 active = rq; 225 } 226 227 return active; 228 } 229 230 static struct i915_request * 231 active_request(const struct intel_timeline * const tl, struct i915_request *rq) 232 { 233 return __active_request(tl, rq, 0); 234 } 235 236 static void ring_set_paused(const struct intel_engine_cs *engine, int state) 237 { 238 /* 239 * We inspect HWS_PREEMPT with a semaphore inside 240 * engine->emit_fini_breadcrumb. If the dword is true, 241 * the ring is paused as the semaphore will busywait 242 * until the dword is false. 243 */ 244 engine->status_page.addr[I915_GEM_HWS_PREEMPT] = state; 245 if (state) 246 wmb(); 247 } 248 249 static struct i915_priolist *to_priolist(struct rb_node *rb) 250 { 251 return rb_entry(rb, struct i915_priolist, node); 252 } 253 254 static int rq_prio(const struct i915_request *rq) 255 { 256 return READ_ONCE(rq->sched.attr.priority); 257 } 258 259 static int effective_prio(const struct i915_request *rq) 260 { 261 int prio = rq_prio(rq); 262 263 /* 264 * If this request is special and must not be interrupted at any 265 * cost, so be it. Note we are only checking the most recent request 266 * in the context and so may be masking an earlier vip request. It 267 * is hoped that under the conditions where nopreempt is used, this 268 * will not matter (i.e. all requests to that context will be 269 * nopreempt for as long as desired). 270 */ 271 if (i915_request_has_nopreempt(rq)) 272 prio = I915_PRIORITY_UNPREEMPTABLE; 273 274 return prio; 275 } 276 277 static int queue_prio(const struct i915_sched_engine *sched_engine) 278 { 279 struct rb_node *rb; 280 281 rb = rb_first_cached(&sched_engine->queue); 282 if (!rb) 283 return INT_MIN; 284 285 return to_priolist(rb)->priority; 286 } 287 288 static int virtual_prio(const struct intel_engine_execlists *el) 289 { 290 struct rb_node *rb = rb_first_cached(&el->virtual); 291 292 return rb ? rb_entry(rb, struct ve_node, rb)->prio : INT_MIN; 293 } 294 295 static bool need_preempt(const struct intel_engine_cs *engine, 296 const struct i915_request *rq) 297 { 298 int last_prio; 299 300 if (!intel_engine_has_semaphores(engine)) 301 return false; 302 303 /* 304 * Check if the current priority hint merits a preemption attempt. 305 * 306 * We record the highest value priority we saw during rescheduling 307 * prior to this dequeue, therefore we know that if it is strictly 308 * less than the current tail of ESLP[0], we do not need to force 309 * a preempt-to-idle cycle. 310 * 311 * However, the priority hint is a mere hint that we may need to 312 * preempt. If that hint is stale or we may be trying to preempt 313 * ourselves, ignore the request. 314 * 315 * More naturally we would write 316 * prio >= max(0, last); 317 * except that we wish to prevent triggering preemption at the same 318 * priority level: the task that is running should remain running 319 * to preserve FIFO ordering of dependencies. 320 */ 321 last_prio = max(effective_prio(rq), I915_PRIORITY_NORMAL - 1); 322 if (engine->sched_engine->queue_priority_hint <= last_prio) 323 return false; 324 325 /* 326 * Check against the first request in ELSP[1], it will, thanks to the 327 * power of PI, be the highest priority of that context. 328 */ 329 if (!list_is_last(&rq->sched.link, &engine->sched_engine->requests) && 330 rq_prio(list_next_entry(rq, sched.link)) > last_prio) 331 return true; 332 333 /* 334 * If the inflight context did not trigger the preemption, then maybe 335 * it was the set of queued requests? Pick the highest priority in 336 * the queue (the first active priolist) and see if it deserves to be 337 * running instead of ELSP[0]. 338 * 339 * The highest priority request in the queue can not be either 340 * ELSP[0] or ELSP[1] as, thanks again to PI, if it was the same 341 * context, it's priority would not exceed ELSP[0] aka last_prio. 342 */ 343 return max(virtual_prio(&engine->execlists), 344 queue_prio(engine->sched_engine)) > last_prio; 345 } 346 347 __maybe_unused static bool 348 assert_priority_queue(const struct i915_request *prev, 349 const struct i915_request *next) 350 { 351 /* 352 * Without preemption, the prev may refer to the still active element 353 * which we refuse to let go. 354 * 355 * Even with preemption, there are times when we think it is better not 356 * to preempt and leave an ostensibly lower priority request in flight. 357 */ 358 if (i915_request_is_active(prev)) 359 return true; 360 361 return rq_prio(prev) >= rq_prio(next); 362 } 363 364 static struct i915_request * 365 __unwind_incomplete_requests(struct intel_engine_cs *engine) 366 { 367 struct i915_request *rq, *rn, *active = NULL; 368 struct list_head *pl; 369 int prio = I915_PRIORITY_INVALID; 370 371 lockdep_assert_held(&engine->sched_engine->lock); 372 373 list_for_each_entry_safe_reverse(rq, rn, 374 &engine->sched_engine->requests, 375 sched.link) { 376 if (__i915_request_is_complete(rq)) { 377 list_del_init(&rq->sched.link); 378 continue; 379 } 380 381 __i915_request_unsubmit(rq); 382 383 GEM_BUG_ON(rq_prio(rq) == I915_PRIORITY_INVALID); 384 if (rq_prio(rq) != prio) { 385 prio = rq_prio(rq); 386 pl = i915_sched_lookup_priolist(engine->sched_engine, 387 prio); 388 } 389 GEM_BUG_ON(i915_sched_engine_is_empty(engine->sched_engine)); 390 391 list_move(&rq->sched.link, pl); 392 set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags); 393 394 /* Check in case we rollback so far we wrap [size/2] */ 395 if (intel_ring_direction(rq->ring, 396 rq->tail, 397 rq->ring->tail + 8) > 0) 398 rq->context->lrc.desc |= CTX_DESC_FORCE_RESTORE; 399 400 active = rq; 401 } 402 403 return active; 404 } 405 406 struct i915_request * 407 execlists_unwind_incomplete_requests(struct intel_engine_execlists *execlists) 408 { 409 struct intel_engine_cs *engine = 410 container_of(execlists, typeof(*engine), execlists); 411 412 return __unwind_incomplete_requests(engine); 413 } 414 415 static void 416 execlists_context_status_change(struct i915_request *rq, unsigned long status) 417 { 418 /* 419 * Only used when GVT-g is enabled now. When GVT-g is disabled, 420 * The compiler should eliminate this function as dead-code. 421 */ 422 if (!IS_ENABLED(CONFIG_DRM_I915_GVT)) 423 return; 424 425 atomic_notifier_call_chain(&rq->engine->context_status_notifier, 426 status, rq); 427 } 428 429 static void reset_active(struct i915_request *rq, 430 struct intel_engine_cs *engine) 431 { 432 struct intel_context * const ce = rq->context; 433 u32 head; 434 435 /* 436 * The executing context has been cancelled. We want to prevent 437 * further execution along this context and propagate the error on 438 * to anything depending on its results. 439 * 440 * In __i915_request_submit(), we apply the -EIO and remove the 441 * requests' payloads for any banned requests. But first, we must 442 * rewind the context back to the start of the incomplete request so 443 * that we do not jump back into the middle of the batch. 444 * 445 * We preserve the breadcrumbs and semaphores of the incomplete 446 * requests so that inter-timeline dependencies (i.e other timelines) 447 * remain correctly ordered. And we defer to __i915_request_submit() 448 * so that all asynchronous waits are correctly handled. 449 */ 450 ENGINE_TRACE(engine, "{ reset rq=%llx:%lld }\n", 451 rq->fence.context, rq->fence.seqno); 452 453 /* On resubmission of the active request, payload will be scrubbed */ 454 if (__i915_request_is_complete(rq)) 455 head = rq->tail; 456 else 457 head = __active_request(ce->timeline, rq, -EIO)->head; 458 head = intel_ring_wrap(ce->ring, head); 459 460 /* Scrub the context image to prevent replaying the previous batch */ 461 lrc_init_regs(ce, engine, true); 462 463 /* We've switched away, so this should be a no-op, but intent matters */ 464 ce->lrc.lrca = lrc_update_regs(ce, engine, head); 465 } 466 467 static bool bad_request(const struct i915_request *rq) 468 { 469 return rq->fence.error && i915_request_started(rq); 470 } 471 472 static struct intel_engine_cs * 473 __execlists_schedule_in(struct i915_request *rq) 474 { 475 struct intel_engine_cs * const engine = rq->engine; 476 struct intel_context * const ce = rq->context; 477 478 intel_context_get(ce); 479 480 if (unlikely(intel_context_is_closed(ce) && 481 !intel_engine_has_heartbeat(engine))) 482 intel_context_set_banned(ce); 483 484 if (unlikely(intel_context_is_banned(ce) || bad_request(rq))) 485 reset_active(rq, engine); 486 487 if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) 488 lrc_check_regs(ce, engine, "before"); 489 490 if (ce->tag) { 491 /* Use a fixed tag for OA and friends */ 492 GEM_BUG_ON(ce->tag <= BITS_PER_LONG); 493 ce->lrc.ccid = ce->tag; 494 } else if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 50)) { 495 /* We don't need a strict matching tag, just different values */ 496 unsigned int tag = ffs(READ_ONCE(engine->context_tag)); 497 498 GEM_BUG_ON(tag == 0 || tag >= BITS_PER_LONG); 499 clear_bit(tag - 1, &engine->context_tag); 500 ce->lrc.ccid = tag << (XEHP_SW_CTX_ID_SHIFT - 32); 501 502 BUILD_BUG_ON(BITS_PER_LONG > GEN12_MAX_CONTEXT_HW_ID); 503 504 } else { 505 /* We don't need a strict matching tag, just different values */ 506 unsigned int tag = __ffs(engine->context_tag); 507 508 GEM_BUG_ON(tag >= BITS_PER_LONG); 509 __clear_bit(tag, &engine->context_tag); 510 ce->lrc.ccid = (1 + tag) << (GEN11_SW_CTX_ID_SHIFT - 32); 511 512 BUILD_BUG_ON(BITS_PER_LONG > GEN12_MAX_CONTEXT_HW_ID); 513 } 514 515 ce->lrc.ccid |= engine->execlists.ccid; 516 517 __intel_gt_pm_get(engine->gt); 518 if (engine->fw_domain && !engine->fw_active++) 519 intel_uncore_forcewake_get(engine->uncore, engine->fw_domain); 520 execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_IN); 521 intel_engine_context_in(engine); 522 523 CE_TRACE(ce, "schedule-in, ccid:%x\n", ce->lrc.ccid); 524 525 return engine; 526 } 527 528 static void execlists_schedule_in(struct i915_request *rq, int idx) 529 { 530 struct intel_context * const ce = rq->context; 531 struct intel_engine_cs *old; 532 533 GEM_BUG_ON(!intel_engine_pm_is_awake(rq->engine)); 534 trace_i915_request_in(rq, idx); 535 536 old = ce->inflight; 537 if (!old) 538 old = __execlists_schedule_in(rq); 539 WRITE_ONCE(ce->inflight, ptr_inc(old)); 540 541 GEM_BUG_ON(intel_context_inflight(ce) != rq->engine); 542 } 543 544 static void 545 resubmit_virtual_request(struct i915_request *rq, struct virtual_engine *ve) 546 { 547 struct intel_engine_cs *engine = rq->engine; 548 549 spin_lock_irq(&engine->sched_engine->lock); 550 551 clear_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags); 552 WRITE_ONCE(rq->engine, &ve->base); 553 ve->base.submit_request(rq); 554 555 spin_unlock_irq(&engine->sched_engine->lock); 556 } 557 558 static void kick_siblings(struct i915_request *rq, struct intel_context *ce) 559 { 560 struct virtual_engine *ve = container_of(ce, typeof(*ve), context); 561 struct intel_engine_cs *engine = rq->engine; 562 563 /* 564 * After this point, the rq may be transferred to a new sibling, so 565 * before we clear ce->inflight make sure that the context has been 566 * removed from the b->signalers and furthermore we need to make sure 567 * that the concurrent iterator in signal_irq_work is no longer 568 * following ce->signal_link. 569 */ 570 if (!list_empty(&ce->signals)) 571 intel_context_remove_breadcrumbs(ce, engine->breadcrumbs); 572 573 /* 574 * This engine is now too busy to run this virtual request, so 575 * see if we can find an alternative engine for it to execute on. 576 * Once a request has become bonded to this engine, we treat it the 577 * same as other native request. 578 */ 579 if (i915_request_in_priority_queue(rq) && 580 rq->execution_mask != engine->mask) 581 resubmit_virtual_request(rq, ve); 582 583 if (READ_ONCE(ve->request)) 584 tasklet_hi_schedule(&ve->base.sched_engine->tasklet); 585 } 586 587 static void __execlists_schedule_out(struct i915_request * const rq, 588 struct intel_context * const ce) 589 { 590 struct intel_engine_cs * const engine = rq->engine; 591 unsigned int ccid; 592 593 /* 594 * NB process_csb() is not under the engine->sched_engine->lock and hence 595 * schedule_out can race with schedule_in meaning that we should 596 * refrain from doing non-trivial work here. 597 */ 598 599 CE_TRACE(ce, "schedule-out, ccid:%x\n", ce->lrc.ccid); 600 GEM_BUG_ON(ce->inflight != engine); 601 602 if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) 603 lrc_check_regs(ce, engine, "after"); 604 605 /* 606 * If we have just completed this context, the engine may now be 607 * idle and we want to re-enter powersaving. 608 */ 609 if (intel_timeline_is_last(ce->timeline, rq) && 610 __i915_request_is_complete(rq)) 611 intel_engine_add_retire(engine, ce->timeline); 612 613 ccid = ce->lrc.ccid; 614 if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 50)) { 615 ccid >>= XEHP_SW_CTX_ID_SHIFT - 32; 616 ccid &= XEHP_MAX_CONTEXT_HW_ID; 617 } else { 618 ccid >>= GEN11_SW_CTX_ID_SHIFT - 32; 619 ccid &= GEN12_MAX_CONTEXT_HW_ID; 620 } 621 622 if (ccid < BITS_PER_LONG) { 623 GEM_BUG_ON(ccid == 0); 624 GEM_BUG_ON(test_bit(ccid - 1, &engine->context_tag)); 625 __set_bit(ccid - 1, &engine->context_tag); 626 } 627 628 lrc_update_runtime(ce); 629 intel_engine_context_out(engine); 630 execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_OUT); 631 if (engine->fw_domain && !--engine->fw_active) 632 intel_uncore_forcewake_put(engine->uncore, engine->fw_domain); 633 intel_gt_pm_put_async(engine->gt); 634 635 /* 636 * If this is part of a virtual engine, its next request may 637 * have been blocked waiting for access to the active context. 638 * We have to kick all the siblings again in case we need to 639 * switch (e.g. the next request is not runnable on this 640 * engine). Hopefully, we will already have submitted the next 641 * request before the tasklet runs and do not need to rebuild 642 * each virtual tree and kick everyone again. 643 */ 644 if (ce->engine != engine) 645 kick_siblings(rq, ce); 646 647 WRITE_ONCE(ce->inflight, NULL); 648 intel_context_put(ce); 649 } 650 651 static inline void execlists_schedule_out(struct i915_request *rq) 652 { 653 struct intel_context * const ce = rq->context; 654 655 trace_i915_request_out(rq); 656 657 GEM_BUG_ON(!ce->inflight); 658 ce->inflight = ptr_dec(ce->inflight); 659 if (!__intel_context_inflight_count(ce->inflight)) 660 __execlists_schedule_out(rq, ce); 661 662 i915_request_put(rq); 663 } 664 665 static u64 execlists_update_context(struct i915_request *rq) 666 { 667 struct intel_context *ce = rq->context; 668 u64 desc; 669 u32 tail, prev; 670 671 desc = ce->lrc.desc; 672 if (rq->engine->flags & I915_ENGINE_HAS_EU_PRIORITY) 673 desc |= lrc_desc_priority(rq_prio(rq)); 674 675 /* 676 * WaIdleLiteRestore:bdw,skl 677 * 678 * We should never submit the context with the same RING_TAIL twice 679 * just in case we submit an empty ring, which confuses the HW. 680 * 681 * We append a couple of NOOPs (gen8_emit_wa_tail) after the end of 682 * the normal request to be able to always advance the RING_TAIL on 683 * subsequent resubmissions (for lite restore). Should that fail us, 684 * and we try and submit the same tail again, force the context 685 * reload. 686 * 687 * If we need to return to a preempted context, we need to skip the 688 * lite-restore and force it to reload the RING_TAIL. Otherwise, the 689 * HW has a tendency to ignore us rewinding the TAIL to the end of 690 * an earlier request. 691 */ 692 GEM_BUG_ON(ce->lrc_reg_state[CTX_RING_TAIL] != rq->ring->tail); 693 prev = rq->ring->tail; 694 tail = intel_ring_set_tail(rq->ring, rq->tail); 695 if (unlikely(intel_ring_direction(rq->ring, tail, prev) <= 0)) 696 desc |= CTX_DESC_FORCE_RESTORE; 697 ce->lrc_reg_state[CTX_RING_TAIL] = tail; 698 rq->tail = rq->wa_tail; 699 700 /* 701 * Make sure the context image is complete before we submit it to HW. 702 * 703 * Ostensibly, writes (including the WCB) should be flushed prior to 704 * an uncached write such as our mmio register access, the empirical 705 * evidence (esp. on Braswell) suggests that the WC write into memory 706 * may not be visible to the HW prior to the completion of the UC 707 * register write and that we may begin execution from the context 708 * before its image is complete leading to invalid PD chasing. 709 */ 710 wmb(); 711 712 ce->lrc.desc &= ~CTX_DESC_FORCE_RESTORE; 713 return desc; 714 } 715 716 static void write_desc(struct intel_engine_execlists *execlists, u64 desc, u32 port) 717 { 718 if (execlists->ctrl_reg) { 719 writel(lower_32_bits(desc), execlists->submit_reg + port * 2); 720 writel(upper_32_bits(desc), execlists->submit_reg + port * 2 + 1); 721 } else { 722 writel(upper_32_bits(desc), execlists->submit_reg); 723 writel(lower_32_bits(desc), execlists->submit_reg); 724 } 725 } 726 727 static __maybe_unused char * 728 dump_port(char *buf, int buflen, const char *prefix, struct i915_request *rq) 729 { 730 if (!rq) 731 return ""; 732 733 snprintf(buf, buflen, "%sccid:%x %llx:%lld%s prio %d", 734 prefix, 735 rq->context->lrc.ccid, 736 rq->fence.context, rq->fence.seqno, 737 __i915_request_is_complete(rq) ? "!" : 738 __i915_request_has_started(rq) ? "*" : 739 "", 740 rq_prio(rq)); 741 742 return buf; 743 } 744 745 static __maybe_unused noinline void 746 trace_ports(const struct intel_engine_execlists *execlists, 747 const char *msg, 748 struct i915_request * const *ports) 749 { 750 const struct intel_engine_cs *engine = 751 container_of(execlists, typeof(*engine), execlists); 752 char __maybe_unused p0[40], p1[40]; 753 754 if (!ports[0]) 755 return; 756 757 ENGINE_TRACE(engine, "%s { %s%s }\n", msg, 758 dump_port(p0, sizeof(p0), "", ports[0]), 759 dump_port(p1, sizeof(p1), ", ", ports[1])); 760 } 761 762 static bool 763 reset_in_progress(const struct intel_engine_cs *engine) 764 { 765 return unlikely(!__tasklet_is_enabled(&engine->sched_engine->tasklet)); 766 } 767 768 static __maybe_unused noinline bool 769 assert_pending_valid(const struct intel_engine_execlists *execlists, 770 const char *msg) 771 { 772 struct intel_engine_cs *engine = 773 container_of(execlists, typeof(*engine), execlists); 774 struct i915_request * const *port, *rq, *prev = NULL; 775 struct intel_context *ce = NULL; 776 u32 ccid = -1; 777 778 trace_ports(execlists, msg, execlists->pending); 779 780 /* We may be messing around with the lists during reset, lalala */ 781 if (reset_in_progress(engine)) 782 return true; 783 784 if (!execlists->pending[0]) { 785 GEM_TRACE_ERR("%s: Nothing pending for promotion!\n", 786 engine->name); 787 return false; 788 } 789 790 if (execlists->pending[execlists_num_ports(execlists)]) { 791 GEM_TRACE_ERR("%s: Excess pending[%d] for promotion!\n", 792 engine->name, execlists_num_ports(execlists)); 793 return false; 794 } 795 796 for (port = execlists->pending; (rq = *port); port++) { 797 unsigned long flags; 798 bool ok = true; 799 800 GEM_BUG_ON(!kref_read(&rq->fence.refcount)); 801 GEM_BUG_ON(!i915_request_is_active(rq)); 802 803 if (ce == rq->context) { 804 GEM_TRACE_ERR("%s: Dup context:%llx in pending[%zd]\n", 805 engine->name, 806 ce->timeline->fence_context, 807 port - execlists->pending); 808 return false; 809 } 810 ce = rq->context; 811 812 if (ccid == ce->lrc.ccid) { 813 GEM_TRACE_ERR("%s: Dup ccid:%x context:%llx in pending[%zd]\n", 814 engine->name, 815 ccid, ce->timeline->fence_context, 816 port - execlists->pending); 817 return false; 818 } 819 ccid = ce->lrc.ccid; 820 821 /* 822 * Sentinels are supposed to be the last request so they flush 823 * the current execution off the HW. Check that they are the only 824 * request in the pending submission. 825 * 826 * NB: Due to the async nature of preempt-to-busy and request 827 * cancellation we need to handle the case where request 828 * becomes a sentinel in parallel to CSB processing. 829 */ 830 if (prev && i915_request_has_sentinel(prev) && 831 !READ_ONCE(prev->fence.error)) { 832 GEM_TRACE_ERR("%s: context:%llx after sentinel in pending[%zd]\n", 833 engine->name, 834 ce->timeline->fence_context, 835 port - execlists->pending); 836 return false; 837 } 838 prev = rq; 839 840 /* 841 * We want virtual requests to only be in the first slot so 842 * that they are never stuck behind a hog and can be immediately 843 * transferred onto the next idle engine. 844 */ 845 if (rq->execution_mask != engine->mask && 846 port != execlists->pending) { 847 GEM_TRACE_ERR("%s: virtual engine:%llx not in prime position[%zd]\n", 848 engine->name, 849 ce->timeline->fence_context, 850 port - execlists->pending); 851 return false; 852 } 853 854 /* Hold tightly onto the lock to prevent concurrent retires! */ 855 if (!spin_trylock_irqsave(&rq->lock, flags)) 856 continue; 857 858 if (__i915_request_is_complete(rq)) 859 goto unlock; 860 861 if (i915_active_is_idle(&ce->active) && 862 !intel_context_is_barrier(ce)) { 863 GEM_TRACE_ERR("%s: Inactive context:%llx in pending[%zd]\n", 864 engine->name, 865 ce->timeline->fence_context, 866 port - execlists->pending); 867 ok = false; 868 goto unlock; 869 } 870 871 if (!i915_vma_is_pinned(ce->state)) { 872 GEM_TRACE_ERR("%s: Unpinned context:%llx in pending[%zd]\n", 873 engine->name, 874 ce->timeline->fence_context, 875 port - execlists->pending); 876 ok = false; 877 goto unlock; 878 } 879 880 if (!i915_vma_is_pinned(ce->ring->vma)) { 881 GEM_TRACE_ERR("%s: Unpinned ring:%llx in pending[%zd]\n", 882 engine->name, 883 ce->timeline->fence_context, 884 port - execlists->pending); 885 ok = false; 886 goto unlock; 887 } 888 889 unlock: 890 spin_unlock_irqrestore(&rq->lock, flags); 891 if (!ok) 892 return false; 893 } 894 895 return ce; 896 } 897 898 static void execlists_submit_ports(struct intel_engine_cs *engine) 899 { 900 struct intel_engine_execlists *execlists = &engine->execlists; 901 unsigned int n; 902 903 GEM_BUG_ON(!assert_pending_valid(execlists, "submit")); 904 905 /* 906 * We can skip acquiring intel_runtime_pm_get() here as it was taken 907 * on our behalf by the request (see i915_gem_mark_busy()) and it will 908 * not be relinquished until the device is idle (see 909 * i915_gem_idle_work_handler()). As a precaution, we make sure 910 * that all ELSP are drained i.e. we have processed the CSB, 911 * before allowing ourselves to idle and calling intel_runtime_pm_put(). 912 */ 913 GEM_BUG_ON(!intel_engine_pm_is_awake(engine)); 914 915 /* 916 * ELSQ note: the submit queue is not cleared after being submitted 917 * to the HW so we need to make sure we always clean it up. This is 918 * currently ensured by the fact that we always write the same number 919 * of elsq entries, keep this in mind before changing the loop below. 920 */ 921 for (n = execlists_num_ports(execlists); n--; ) { 922 struct i915_request *rq = execlists->pending[n]; 923 924 write_desc(execlists, 925 rq ? execlists_update_context(rq) : 0, 926 n); 927 } 928 929 /* we need to manually load the submit queue */ 930 if (execlists->ctrl_reg) 931 writel(EL_CTRL_LOAD, execlists->ctrl_reg); 932 } 933 934 static bool ctx_single_port_submission(const struct intel_context *ce) 935 { 936 return (IS_ENABLED(CONFIG_DRM_I915_GVT) && 937 intel_context_force_single_submission(ce)); 938 } 939 940 static bool can_merge_ctx(const struct intel_context *prev, 941 const struct intel_context *next) 942 { 943 if (prev != next) 944 return false; 945 946 if (ctx_single_port_submission(prev)) 947 return false; 948 949 return true; 950 } 951 952 static unsigned long i915_request_flags(const struct i915_request *rq) 953 { 954 return READ_ONCE(rq->fence.flags); 955 } 956 957 static bool can_merge_rq(const struct i915_request *prev, 958 const struct i915_request *next) 959 { 960 GEM_BUG_ON(prev == next); 961 GEM_BUG_ON(!assert_priority_queue(prev, next)); 962 963 /* 964 * We do not submit known completed requests. Therefore if the next 965 * request is already completed, we can pretend to merge it in 966 * with the previous context (and we will skip updating the ELSP 967 * and tracking). Thus hopefully keeping the ELSP full with active 968 * contexts, despite the best efforts of preempt-to-busy to confuse 969 * us. 970 */ 971 if (__i915_request_is_complete(next)) 972 return true; 973 974 if (unlikely((i915_request_flags(prev) | i915_request_flags(next)) & 975 (BIT(I915_FENCE_FLAG_NOPREEMPT) | 976 BIT(I915_FENCE_FLAG_SENTINEL)))) 977 return false; 978 979 if (!can_merge_ctx(prev->context, next->context)) 980 return false; 981 982 GEM_BUG_ON(i915_seqno_passed(prev->fence.seqno, next->fence.seqno)); 983 return true; 984 } 985 986 static bool virtual_matches(const struct virtual_engine *ve, 987 const struct i915_request *rq, 988 const struct intel_engine_cs *engine) 989 { 990 const struct intel_engine_cs *inflight; 991 992 if (!rq) 993 return false; 994 995 if (!(rq->execution_mask & engine->mask)) /* We peeked too soon! */ 996 return false; 997 998 /* 999 * We track when the HW has completed saving the context image 1000 * (i.e. when we have seen the final CS event switching out of 1001 * the context) and must not overwrite the context image before 1002 * then. This restricts us to only using the active engine 1003 * while the previous virtualized request is inflight (so 1004 * we reuse the register offsets). This is a very small 1005 * hystersis on the greedy seelction algorithm. 1006 */ 1007 inflight = intel_context_inflight(&ve->context); 1008 if (inflight && inflight != engine) 1009 return false; 1010 1011 return true; 1012 } 1013 1014 static struct virtual_engine * 1015 first_virtual_engine(struct intel_engine_cs *engine) 1016 { 1017 struct intel_engine_execlists *el = &engine->execlists; 1018 struct rb_node *rb = rb_first_cached(&el->virtual); 1019 1020 while (rb) { 1021 struct virtual_engine *ve = 1022 rb_entry(rb, typeof(*ve), nodes[engine->id].rb); 1023 struct i915_request *rq = READ_ONCE(ve->request); 1024 1025 /* lazily cleanup after another engine handled rq */ 1026 if (!rq || !virtual_matches(ve, rq, engine)) { 1027 rb_erase_cached(rb, &el->virtual); 1028 RB_CLEAR_NODE(rb); 1029 rb = rb_first_cached(&el->virtual); 1030 continue; 1031 } 1032 1033 return ve; 1034 } 1035 1036 return NULL; 1037 } 1038 1039 static void virtual_xfer_context(struct virtual_engine *ve, 1040 struct intel_engine_cs *engine) 1041 { 1042 unsigned int n; 1043 1044 if (likely(engine == ve->siblings[0])) 1045 return; 1046 1047 GEM_BUG_ON(READ_ONCE(ve->context.inflight)); 1048 if (!intel_engine_has_relative_mmio(engine)) 1049 lrc_update_offsets(&ve->context, engine); 1050 1051 /* 1052 * Move the bound engine to the top of the list for 1053 * future execution. We then kick this tasklet first 1054 * before checking others, so that we preferentially 1055 * reuse this set of bound registers. 1056 */ 1057 for (n = 1; n < ve->num_siblings; n++) { 1058 if (ve->siblings[n] == engine) { 1059 swap(ve->siblings[n], ve->siblings[0]); 1060 break; 1061 } 1062 } 1063 } 1064 1065 static void defer_request(struct i915_request *rq, struct list_head * const pl) 1066 { 1067 LIST_HEAD(list); 1068 1069 /* 1070 * We want to move the interrupted request to the back of 1071 * the round-robin list (i.e. its priority level), but 1072 * in doing so, we must then move all requests that were in 1073 * flight and were waiting for the interrupted request to 1074 * be run after it again. 1075 */ 1076 do { 1077 struct i915_dependency *p; 1078 1079 GEM_BUG_ON(i915_request_is_active(rq)); 1080 list_move_tail(&rq->sched.link, pl); 1081 1082 for_each_waiter(p, rq) { 1083 struct i915_request *w = 1084 container_of(p->waiter, typeof(*w), sched); 1085 1086 if (p->flags & I915_DEPENDENCY_WEAK) 1087 continue; 1088 1089 /* Leave semaphores spinning on the other engines */ 1090 if (w->engine != rq->engine) 1091 continue; 1092 1093 /* No waiter should start before its signaler */ 1094 GEM_BUG_ON(i915_request_has_initial_breadcrumb(w) && 1095 __i915_request_has_started(w) && 1096 !__i915_request_is_complete(rq)); 1097 1098 if (!i915_request_is_ready(w)) 1099 continue; 1100 1101 if (rq_prio(w) < rq_prio(rq)) 1102 continue; 1103 1104 GEM_BUG_ON(rq_prio(w) > rq_prio(rq)); 1105 GEM_BUG_ON(i915_request_is_active(w)); 1106 list_move_tail(&w->sched.link, &list); 1107 } 1108 1109 rq = list_first_entry_or_null(&list, typeof(*rq), sched.link); 1110 } while (rq); 1111 } 1112 1113 static void defer_active(struct intel_engine_cs *engine) 1114 { 1115 struct i915_request *rq; 1116 1117 rq = __unwind_incomplete_requests(engine); 1118 if (!rq) 1119 return; 1120 1121 defer_request(rq, i915_sched_lookup_priolist(engine->sched_engine, 1122 rq_prio(rq))); 1123 } 1124 1125 static bool 1126 timeslice_yield(const struct intel_engine_execlists *el, 1127 const struct i915_request *rq) 1128 { 1129 /* 1130 * Once bitten, forever smitten! 1131 * 1132 * If the active context ever busy-waited on a semaphore, 1133 * it will be treated as a hog until the end of its timeslice (i.e. 1134 * until it is scheduled out and replaced by a new submission, 1135 * possibly even its own lite-restore). The HW only sends an interrupt 1136 * on the first miss, and we do know if that semaphore has been 1137 * signaled, or even if it is now stuck on another semaphore. Play 1138 * safe, yield if it might be stuck -- it will be given a fresh 1139 * timeslice in the near future. 1140 */ 1141 return rq->context->lrc.ccid == READ_ONCE(el->yield); 1142 } 1143 1144 static bool needs_timeslice(const struct intel_engine_cs *engine, 1145 const struct i915_request *rq) 1146 { 1147 if (!intel_engine_has_timeslices(engine)) 1148 return false; 1149 1150 /* If not currently active, or about to switch, wait for next event */ 1151 if (!rq || __i915_request_is_complete(rq)) 1152 return false; 1153 1154 /* We do not need to start the timeslice until after the ACK */ 1155 if (READ_ONCE(engine->execlists.pending[0])) 1156 return false; 1157 1158 /* If ELSP[1] is occupied, always check to see if worth slicing */ 1159 if (!list_is_last_rcu(&rq->sched.link, 1160 &engine->sched_engine->requests)) { 1161 ENGINE_TRACE(engine, "timeslice required for second inflight context\n"); 1162 return true; 1163 } 1164 1165 /* Otherwise, ELSP[0] is by itself, but may be waiting in the queue */ 1166 if (!i915_sched_engine_is_empty(engine->sched_engine)) { 1167 ENGINE_TRACE(engine, "timeslice required for queue\n"); 1168 return true; 1169 } 1170 1171 if (!RB_EMPTY_ROOT(&engine->execlists.virtual.rb_root)) { 1172 ENGINE_TRACE(engine, "timeslice required for virtual\n"); 1173 return true; 1174 } 1175 1176 return false; 1177 } 1178 1179 static bool 1180 timeslice_expired(struct intel_engine_cs *engine, const struct i915_request *rq) 1181 { 1182 const struct intel_engine_execlists *el = &engine->execlists; 1183 1184 if (i915_request_has_nopreempt(rq) && __i915_request_has_started(rq)) 1185 return false; 1186 1187 if (!needs_timeslice(engine, rq)) 1188 return false; 1189 1190 return timer_expired(&el->timer) || timeslice_yield(el, rq); 1191 } 1192 1193 static unsigned long timeslice(const struct intel_engine_cs *engine) 1194 { 1195 return READ_ONCE(engine->props.timeslice_duration_ms); 1196 } 1197 1198 static void start_timeslice(struct intel_engine_cs *engine) 1199 { 1200 struct intel_engine_execlists *el = &engine->execlists; 1201 unsigned long duration; 1202 1203 /* Disable the timer if there is nothing to switch to */ 1204 duration = 0; 1205 if (needs_timeslice(engine, *el->active)) { 1206 /* Avoid continually prolonging an active timeslice */ 1207 if (timer_active(&el->timer)) { 1208 /* 1209 * If we just submitted a new ELSP after an old 1210 * context, that context may have already consumed 1211 * its timeslice, so recheck. 1212 */ 1213 if (!timer_pending(&el->timer)) 1214 tasklet_hi_schedule(&engine->sched_engine->tasklet); 1215 return; 1216 } 1217 1218 duration = timeslice(engine); 1219 } 1220 1221 set_timer_ms(&el->timer, duration); 1222 } 1223 1224 static void record_preemption(struct intel_engine_execlists *execlists) 1225 { 1226 (void)I915_SELFTEST_ONLY(execlists->preempt_hang.count++); 1227 } 1228 1229 static unsigned long active_preempt_timeout(struct intel_engine_cs *engine, 1230 const struct i915_request *rq) 1231 { 1232 if (!rq) 1233 return 0; 1234 1235 /* Force a fast reset for terminated contexts (ignoring sysfs!) */ 1236 if (unlikely(intel_context_is_banned(rq->context) || bad_request(rq))) 1237 return 1; 1238 1239 return READ_ONCE(engine->props.preempt_timeout_ms); 1240 } 1241 1242 static void set_preempt_timeout(struct intel_engine_cs *engine, 1243 const struct i915_request *rq) 1244 { 1245 if (!intel_engine_has_preempt_reset(engine)) 1246 return; 1247 1248 set_timer_ms(&engine->execlists.preempt, 1249 active_preempt_timeout(engine, rq)); 1250 } 1251 1252 static bool completed(const struct i915_request *rq) 1253 { 1254 if (i915_request_has_sentinel(rq)) 1255 return false; 1256 1257 return __i915_request_is_complete(rq); 1258 } 1259 1260 static void execlists_dequeue(struct intel_engine_cs *engine) 1261 { 1262 struct intel_engine_execlists * const execlists = &engine->execlists; 1263 struct i915_sched_engine * const sched_engine = engine->sched_engine; 1264 struct i915_request **port = execlists->pending; 1265 struct i915_request ** const last_port = port + execlists->port_mask; 1266 struct i915_request *last, * const *active; 1267 struct virtual_engine *ve; 1268 struct rb_node *rb; 1269 bool submit = false; 1270 1271 /* 1272 * Hardware submission is through 2 ports. Conceptually each port 1273 * has a (RING_START, RING_HEAD, RING_TAIL) tuple. RING_START is 1274 * static for a context, and unique to each, so we only execute 1275 * requests belonging to a single context from each ring. RING_HEAD 1276 * is maintained by the CS in the context image, it marks the place 1277 * where it got up to last time, and through RING_TAIL we tell the CS 1278 * where we want to execute up to this time. 1279 * 1280 * In this list the requests are in order of execution. Consecutive 1281 * requests from the same context are adjacent in the ringbuffer. We 1282 * can combine these requests into a single RING_TAIL update: 1283 * 1284 * RING_HEAD...req1...req2 1285 * ^- RING_TAIL 1286 * since to execute req2 the CS must first execute req1. 1287 * 1288 * Our goal then is to point each port to the end of a consecutive 1289 * sequence of requests as being the most optimal (fewest wake ups 1290 * and context switches) submission. 1291 */ 1292 1293 spin_lock(&sched_engine->lock); 1294 1295 /* 1296 * If the queue is higher priority than the last 1297 * request in the currently active context, submit afresh. 1298 * We will resubmit again afterwards in case we need to split 1299 * the active context to interject the preemption request, 1300 * i.e. we will retrigger preemption following the ack in case 1301 * of trouble. 1302 * 1303 */ 1304 active = execlists->active; 1305 while ((last = *active) && completed(last)) 1306 active++; 1307 1308 if (last) { 1309 if (need_preempt(engine, last)) { 1310 ENGINE_TRACE(engine, 1311 "preempting last=%llx:%lld, prio=%d, hint=%d\n", 1312 last->fence.context, 1313 last->fence.seqno, 1314 last->sched.attr.priority, 1315 sched_engine->queue_priority_hint); 1316 record_preemption(execlists); 1317 1318 /* 1319 * Don't let the RING_HEAD advance past the breadcrumb 1320 * as we unwind (and until we resubmit) so that we do 1321 * not accidentally tell it to go backwards. 1322 */ 1323 ring_set_paused(engine, 1); 1324 1325 /* 1326 * Note that we have not stopped the GPU at this point, 1327 * so we are unwinding the incomplete requests as they 1328 * remain inflight and so by the time we do complete 1329 * the preemption, some of the unwound requests may 1330 * complete! 1331 */ 1332 __unwind_incomplete_requests(engine); 1333 1334 last = NULL; 1335 } else if (timeslice_expired(engine, last)) { 1336 ENGINE_TRACE(engine, 1337 "expired:%s last=%llx:%lld, prio=%d, hint=%d, yield?=%s\n", 1338 yesno(timer_expired(&execlists->timer)), 1339 last->fence.context, last->fence.seqno, 1340 rq_prio(last), 1341 sched_engine->queue_priority_hint, 1342 yesno(timeslice_yield(execlists, last))); 1343 1344 /* 1345 * Consume this timeslice; ensure we start a new one. 1346 * 1347 * The timeslice expired, and we will unwind the 1348 * running contexts and recompute the next ELSP. 1349 * If that submit will be the same pair of contexts 1350 * (due to dependency ordering), we will skip the 1351 * submission. If we don't cancel the timer now, 1352 * we will see that the timer has expired and 1353 * reschedule the tasklet; continually until the 1354 * next context switch or other preeemption event. 1355 * 1356 * Since we have decided to reschedule based on 1357 * consumption of this timeslice, if we submit the 1358 * same context again, grant it a full timeslice. 1359 */ 1360 cancel_timer(&execlists->timer); 1361 ring_set_paused(engine, 1); 1362 defer_active(engine); 1363 1364 /* 1365 * Unlike for preemption, if we rewind and continue 1366 * executing the same context as previously active, 1367 * the order of execution will remain the same and 1368 * the tail will only advance. We do not need to 1369 * force a full context restore, as a lite-restore 1370 * is sufficient to resample the monotonic TAIL. 1371 * 1372 * If we switch to any other context, similarly we 1373 * will not rewind TAIL of current context, and 1374 * normal save/restore will preserve state and allow 1375 * us to later continue executing the same request. 1376 */ 1377 last = NULL; 1378 } else { 1379 /* 1380 * Otherwise if we already have a request pending 1381 * for execution after the current one, we can 1382 * just wait until the next CS event before 1383 * queuing more. In either case we will force a 1384 * lite-restore preemption event, but if we wait 1385 * we hopefully coalesce several updates into a single 1386 * submission. 1387 */ 1388 if (active[1]) { 1389 /* 1390 * Even if ELSP[1] is occupied and not worthy 1391 * of timeslices, our queue might be. 1392 */ 1393 spin_unlock(&sched_engine->lock); 1394 return; 1395 } 1396 } 1397 } 1398 1399 /* XXX virtual is always taking precedence */ 1400 while ((ve = first_virtual_engine(engine))) { 1401 struct i915_request *rq; 1402 1403 spin_lock(&ve->base.sched_engine->lock); 1404 1405 rq = ve->request; 1406 if (unlikely(!virtual_matches(ve, rq, engine))) 1407 goto unlock; /* lost the race to a sibling */ 1408 1409 GEM_BUG_ON(rq->engine != &ve->base); 1410 GEM_BUG_ON(rq->context != &ve->context); 1411 1412 if (unlikely(rq_prio(rq) < queue_prio(sched_engine))) { 1413 spin_unlock(&ve->base.sched_engine->lock); 1414 break; 1415 } 1416 1417 if (last && !can_merge_rq(last, rq)) { 1418 spin_unlock(&ve->base.sched_engine->lock); 1419 spin_unlock(&engine->sched_engine->lock); 1420 return; /* leave this for another sibling */ 1421 } 1422 1423 ENGINE_TRACE(engine, 1424 "virtual rq=%llx:%lld%s, new engine? %s\n", 1425 rq->fence.context, 1426 rq->fence.seqno, 1427 __i915_request_is_complete(rq) ? "!" : 1428 __i915_request_has_started(rq) ? "*" : 1429 "", 1430 yesno(engine != ve->siblings[0])); 1431 1432 WRITE_ONCE(ve->request, NULL); 1433 WRITE_ONCE(ve->base.sched_engine->queue_priority_hint, INT_MIN); 1434 1435 rb = &ve->nodes[engine->id].rb; 1436 rb_erase_cached(rb, &execlists->virtual); 1437 RB_CLEAR_NODE(rb); 1438 1439 GEM_BUG_ON(!(rq->execution_mask & engine->mask)); 1440 WRITE_ONCE(rq->engine, engine); 1441 1442 if (__i915_request_submit(rq)) { 1443 /* 1444 * Only after we confirm that we will submit 1445 * this request (i.e. it has not already 1446 * completed), do we want to update the context. 1447 * 1448 * This serves two purposes. It avoids 1449 * unnecessary work if we are resubmitting an 1450 * already completed request after timeslicing. 1451 * But more importantly, it prevents us altering 1452 * ve->siblings[] on an idle context, where 1453 * we may be using ve->siblings[] in 1454 * virtual_context_enter / virtual_context_exit. 1455 */ 1456 virtual_xfer_context(ve, engine); 1457 GEM_BUG_ON(ve->siblings[0] != engine); 1458 1459 submit = true; 1460 last = rq; 1461 } 1462 1463 i915_request_put(rq); 1464 unlock: 1465 spin_unlock(&ve->base.sched_engine->lock); 1466 1467 /* 1468 * Hmm, we have a bunch of virtual engine requests, 1469 * but the first one was already completed (thanks 1470 * preempt-to-busy!). Keep looking at the veng queue 1471 * until we have no more relevant requests (i.e. 1472 * the normal submit queue has higher priority). 1473 */ 1474 if (submit) 1475 break; 1476 } 1477 1478 while ((rb = rb_first_cached(&sched_engine->queue))) { 1479 struct i915_priolist *p = to_priolist(rb); 1480 struct i915_request *rq, *rn; 1481 1482 priolist_for_each_request_consume(rq, rn, p) { 1483 bool merge = true; 1484 1485 /* 1486 * Can we combine this request with the current port? 1487 * It has to be the same context/ringbuffer and not 1488 * have any exceptions (e.g. GVT saying never to 1489 * combine contexts). 1490 * 1491 * If we can combine the requests, we can execute both 1492 * by updating the RING_TAIL to point to the end of the 1493 * second request, and so we never need to tell the 1494 * hardware about the first. 1495 */ 1496 if (last && !can_merge_rq(last, rq)) { 1497 /* 1498 * If we are on the second port and cannot 1499 * combine this request with the last, then we 1500 * are done. 1501 */ 1502 if (port == last_port) 1503 goto done; 1504 1505 /* 1506 * We must not populate both ELSP[] with the 1507 * same LRCA, i.e. we must submit 2 different 1508 * contexts if we submit 2 ELSP. 1509 */ 1510 if (last->context == rq->context) 1511 goto done; 1512 1513 if (i915_request_has_sentinel(last)) 1514 goto done; 1515 1516 /* 1517 * We avoid submitting virtual requests into 1518 * the secondary ports so that we can migrate 1519 * the request immediately to another engine 1520 * rather than wait for the primary request. 1521 */ 1522 if (rq->execution_mask != engine->mask) 1523 goto done; 1524 1525 /* 1526 * If GVT overrides us we only ever submit 1527 * port[0], leaving port[1] empty. Note that we 1528 * also have to be careful that we don't queue 1529 * the same context (even though a different 1530 * request) to the second port. 1531 */ 1532 if (ctx_single_port_submission(last->context) || 1533 ctx_single_port_submission(rq->context)) 1534 goto done; 1535 1536 merge = false; 1537 } 1538 1539 if (__i915_request_submit(rq)) { 1540 if (!merge) { 1541 *port++ = i915_request_get(last); 1542 last = NULL; 1543 } 1544 1545 GEM_BUG_ON(last && 1546 !can_merge_ctx(last->context, 1547 rq->context)); 1548 GEM_BUG_ON(last && 1549 i915_seqno_passed(last->fence.seqno, 1550 rq->fence.seqno)); 1551 1552 submit = true; 1553 last = rq; 1554 } 1555 } 1556 1557 rb_erase_cached(&p->node, &sched_engine->queue); 1558 i915_priolist_free(p); 1559 } 1560 done: 1561 *port++ = i915_request_get(last); 1562 1563 /* 1564 * Here be a bit of magic! Or sleight-of-hand, whichever you prefer. 1565 * 1566 * We choose the priority hint such that if we add a request of greater 1567 * priority than this, we kick the submission tasklet to decide on 1568 * the right order of submitting the requests to hardware. We must 1569 * also be prepared to reorder requests as they are in-flight on the 1570 * HW. We derive the priority hint then as the first "hole" in 1571 * the HW submission ports and if there are no available slots, 1572 * the priority of the lowest executing request, i.e. last. 1573 * 1574 * When we do receive a higher priority request ready to run from the 1575 * user, see queue_request(), the priority hint is bumped to that 1576 * request triggering preemption on the next dequeue (or subsequent 1577 * interrupt for secondary ports). 1578 */ 1579 sched_engine->queue_priority_hint = queue_prio(sched_engine); 1580 i915_sched_engine_reset_on_empty(sched_engine); 1581 spin_unlock(&sched_engine->lock); 1582 1583 /* 1584 * We can skip poking the HW if we ended up with exactly the same set 1585 * of requests as currently running, e.g. trying to timeslice a pair 1586 * of ordered contexts. 1587 */ 1588 if (submit && 1589 memcmp(active, 1590 execlists->pending, 1591 (port - execlists->pending) * sizeof(*port))) { 1592 *port = NULL; 1593 while (port-- != execlists->pending) 1594 execlists_schedule_in(*port, port - execlists->pending); 1595 1596 WRITE_ONCE(execlists->yield, -1); 1597 set_preempt_timeout(engine, *active); 1598 execlists_submit_ports(engine); 1599 } else { 1600 ring_set_paused(engine, 0); 1601 while (port-- != execlists->pending) 1602 i915_request_put(*port); 1603 *execlists->pending = NULL; 1604 } 1605 } 1606 1607 static void execlists_dequeue_irq(struct intel_engine_cs *engine) 1608 { 1609 local_irq_disable(); /* Suspend interrupts across request submission */ 1610 execlists_dequeue(engine); 1611 local_irq_enable(); /* flush irq_work (e.g. breadcrumb enabling) */ 1612 } 1613 1614 static void clear_ports(struct i915_request **ports, int count) 1615 { 1616 memset_p((void **)ports, NULL, count); 1617 } 1618 1619 static void 1620 copy_ports(struct i915_request **dst, struct i915_request **src, int count) 1621 { 1622 /* A memcpy_p() would be very useful here! */ 1623 while (count--) 1624 WRITE_ONCE(*dst++, *src++); /* avoid write tearing */ 1625 } 1626 1627 static struct i915_request ** 1628 cancel_port_requests(struct intel_engine_execlists * const execlists, 1629 struct i915_request **inactive) 1630 { 1631 struct i915_request * const *port; 1632 1633 for (port = execlists->pending; *port; port++) 1634 *inactive++ = *port; 1635 clear_ports(execlists->pending, ARRAY_SIZE(execlists->pending)); 1636 1637 /* Mark the end of active before we overwrite *active */ 1638 for (port = xchg(&execlists->active, execlists->pending); *port; port++) 1639 *inactive++ = *port; 1640 clear_ports(execlists->inflight, ARRAY_SIZE(execlists->inflight)); 1641 1642 smp_wmb(); /* complete the seqlock for execlists_active() */ 1643 WRITE_ONCE(execlists->active, execlists->inflight); 1644 1645 /* Having cancelled all outstanding process_csb(), stop their timers */ 1646 GEM_BUG_ON(execlists->pending[0]); 1647 cancel_timer(&execlists->timer); 1648 cancel_timer(&execlists->preempt); 1649 1650 return inactive; 1651 } 1652 1653 static void invalidate_csb_entries(const u64 *first, const u64 *last) 1654 { 1655 clflush((void *)first); 1656 clflush((void *)last); 1657 } 1658 1659 /* 1660 * Starting with Gen12, the status has a new format: 1661 * 1662 * bit 0: switched to new queue 1663 * bit 1: reserved 1664 * bit 2: semaphore wait mode (poll or signal), only valid when 1665 * switch detail is set to "wait on semaphore" 1666 * bits 3-5: engine class 1667 * bits 6-11: engine instance 1668 * bits 12-14: reserved 1669 * bits 15-25: sw context id of the lrc the GT switched to 1670 * bits 26-31: sw counter of the lrc the GT switched to 1671 * bits 32-35: context switch detail 1672 * - 0: ctx complete 1673 * - 1: wait on sync flip 1674 * - 2: wait on vblank 1675 * - 3: wait on scanline 1676 * - 4: wait on semaphore 1677 * - 5: context preempted (not on SEMAPHORE_WAIT or 1678 * WAIT_FOR_EVENT) 1679 * bit 36: reserved 1680 * bits 37-43: wait detail (for switch detail 1 to 4) 1681 * bits 44-46: reserved 1682 * bits 47-57: sw context id of the lrc the GT switched away from 1683 * bits 58-63: sw counter of the lrc the GT switched away from 1684 * 1685 * Xe_HP csb shuffles things around compared to TGL: 1686 * 1687 * bits 0-3: context switch detail (same possible values as TGL) 1688 * bits 4-9: engine instance 1689 * bits 10-25: sw context id of the lrc the GT switched to 1690 * bits 26-31: sw counter of the lrc the GT switched to 1691 * bit 32: semaphore wait mode (poll or signal), Only valid when 1692 * switch detail is set to "wait on semaphore" 1693 * bit 33: switched to new queue 1694 * bits 34-41: wait detail (for switch detail 1 to 4) 1695 * bits 42-57: sw context id of the lrc the GT switched away from 1696 * bits 58-63: sw counter of the lrc the GT switched away from 1697 */ 1698 static inline bool 1699 __gen12_csb_parse(bool ctx_to_valid, bool ctx_away_valid, bool new_queue, 1700 u8 switch_detail) 1701 { 1702 /* 1703 * The context switch detail is not guaranteed to be 5 when a preemption 1704 * occurs, so we can't just check for that. The check below works for 1705 * all the cases we care about, including preemptions of WAIT 1706 * instructions and lite-restore. Preempt-to-idle via the CTRL register 1707 * would require some extra handling, but we don't support that. 1708 */ 1709 if (!ctx_away_valid || new_queue) { 1710 GEM_BUG_ON(!ctx_to_valid); 1711 return true; 1712 } 1713 1714 /* 1715 * switch detail = 5 is covered by the case above and we do not expect a 1716 * context switch on an unsuccessful wait instruction since we always 1717 * use polling mode. 1718 */ 1719 GEM_BUG_ON(switch_detail); 1720 return false; 1721 } 1722 1723 static bool xehp_csb_parse(const u64 csb) 1724 { 1725 return __gen12_csb_parse(XEHP_CSB_CTX_VALID(lower_32_bits(csb)), /* cxt to */ 1726 XEHP_CSB_CTX_VALID(upper_32_bits(csb)), /* cxt away */ 1727 upper_32_bits(csb) & XEHP_CTX_STATUS_SWITCHED_TO_NEW_QUEUE, 1728 GEN12_CTX_SWITCH_DETAIL(lower_32_bits(csb))); 1729 } 1730 1731 static bool gen12_csb_parse(const u64 csb) 1732 { 1733 return __gen12_csb_parse(GEN12_CSB_CTX_VALID(lower_32_bits(csb)), /* cxt to */ 1734 GEN12_CSB_CTX_VALID(upper_32_bits(csb)), /* cxt away */ 1735 lower_32_bits(csb) & GEN12_CTX_STATUS_SWITCHED_TO_NEW_QUEUE, 1736 GEN12_CTX_SWITCH_DETAIL(upper_32_bits(csb))); 1737 } 1738 1739 static bool gen8_csb_parse(const u64 csb) 1740 { 1741 return csb & (GEN8_CTX_STATUS_IDLE_ACTIVE | GEN8_CTX_STATUS_PREEMPTED); 1742 } 1743 1744 static noinline u64 1745 wa_csb_read(const struct intel_engine_cs *engine, u64 * const csb) 1746 { 1747 u64 entry; 1748 1749 /* 1750 * Reading from the HWSP has one particular advantage: we can detect 1751 * a stale entry. Since the write into HWSP is broken, we have no reason 1752 * to trust the HW at all, the mmio entry may equally be unordered, so 1753 * we prefer the path that is self-checking and as a last resort, 1754 * return the mmio value. 1755 * 1756 * tgl,dg1:HSDES#22011327657 1757 */ 1758 preempt_disable(); 1759 if (wait_for_atomic_us((entry = READ_ONCE(*csb)) != -1, 10)) { 1760 int idx = csb - engine->execlists.csb_status; 1761 int status; 1762 1763 status = GEN8_EXECLISTS_STATUS_BUF; 1764 if (idx >= 6) { 1765 status = GEN11_EXECLISTS_STATUS_BUF2; 1766 idx -= 6; 1767 } 1768 status += sizeof(u64) * idx; 1769 1770 entry = intel_uncore_read64(engine->uncore, 1771 _MMIO(engine->mmio_base + status)); 1772 } 1773 preempt_enable(); 1774 1775 return entry; 1776 } 1777 1778 static u64 csb_read(const struct intel_engine_cs *engine, u64 * const csb) 1779 { 1780 u64 entry = READ_ONCE(*csb); 1781 1782 /* 1783 * Unfortunately, the GPU does not always serialise its write 1784 * of the CSB entries before its write of the CSB pointer, at least 1785 * from the perspective of the CPU, using what is known as a Global 1786 * Observation Point. We may read a new CSB tail pointer, but then 1787 * read the stale CSB entries, causing us to misinterpret the 1788 * context-switch events, and eventually declare the GPU hung. 1789 * 1790 * icl:HSDES#1806554093 1791 * tgl:HSDES#22011248461 1792 */ 1793 if (unlikely(entry == -1)) 1794 entry = wa_csb_read(engine, csb); 1795 1796 /* Consume this entry so that we can spot its future reuse. */ 1797 WRITE_ONCE(*csb, -1); 1798 1799 /* ELSP is an implicit wmb() before the GPU wraps and overwrites csb */ 1800 return entry; 1801 } 1802 1803 static void new_timeslice(struct intel_engine_execlists *el) 1804 { 1805 /* By cancelling, we will start afresh in start_timeslice() */ 1806 cancel_timer(&el->timer); 1807 } 1808 1809 static struct i915_request ** 1810 process_csb(struct intel_engine_cs *engine, struct i915_request **inactive) 1811 { 1812 struct intel_engine_execlists * const execlists = &engine->execlists; 1813 u64 * const buf = execlists->csb_status; 1814 const u8 num_entries = execlists->csb_size; 1815 struct i915_request **prev; 1816 u8 head, tail; 1817 1818 /* 1819 * As we modify our execlists state tracking we require exclusive 1820 * access. Either we are inside the tasklet, or the tasklet is disabled 1821 * and we assume that is only inside the reset paths and so serialised. 1822 */ 1823 GEM_BUG_ON(!tasklet_is_locked(&engine->sched_engine->tasklet) && 1824 !reset_in_progress(engine)); 1825 1826 /* 1827 * Note that csb_write, csb_status may be either in HWSP or mmio. 1828 * When reading from the csb_write mmio register, we have to be 1829 * careful to only use the GEN8_CSB_WRITE_PTR portion, which is 1830 * the low 4bits. As it happens we know the next 4bits are always 1831 * zero and so we can simply masked off the low u8 of the register 1832 * and treat it identically to reading from the HWSP (without having 1833 * to use explicit shifting and masking, and probably bifurcating 1834 * the code to handle the legacy mmio read). 1835 */ 1836 head = execlists->csb_head; 1837 tail = READ_ONCE(*execlists->csb_write); 1838 if (unlikely(head == tail)) 1839 return inactive; 1840 1841 /* 1842 * We will consume all events from HW, or at least pretend to. 1843 * 1844 * The sequence of events from the HW is deterministic, and derived 1845 * from our writes to the ELSP, with a smidgen of variability for 1846 * the arrival of the asynchronous requests wrt to the inflight 1847 * execution. If the HW sends an event that does not correspond with 1848 * the one we are expecting, we have to abandon all hope as we lose 1849 * all tracking of what the engine is actually executing. We will 1850 * only detect we are out of sequence with the HW when we get an 1851 * 'impossible' event because we have already drained our own 1852 * preemption/promotion queue. If this occurs, we know that we likely 1853 * lost track of execution earlier and must unwind and restart, the 1854 * simplest way is by stop processing the event queue and force the 1855 * engine to reset. 1856 */ 1857 execlists->csb_head = tail; 1858 ENGINE_TRACE(engine, "cs-irq head=%d, tail=%d\n", head, tail); 1859 1860 /* 1861 * Hopefully paired with a wmb() in HW! 1862 * 1863 * We must complete the read of the write pointer before any reads 1864 * from the CSB, so that we do not see stale values. Without an rmb 1865 * (lfence) the HW may speculatively perform the CSB[] reads *before* 1866 * we perform the READ_ONCE(*csb_write). 1867 */ 1868 rmb(); 1869 1870 /* Remember who was last running under the timer */ 1871 prev = inactive; 1872 *prev = NULL; 1873 1874 do { 1875 bool promote; 1876 u64 csb; 1877 1878 if (++head == num_entries) 1879 head = 0; 1880 1881 /* 1882 * We are flying near dragons again. 1883 * 1884 * We hold a reference to the request in execlist_port[] 1885 * but no more than that. We are operating in softirq 1886 * context and so cannot hold any mutex or sleep. That 1887 * prevents us stopping the requests we are processing 1888 * in port[] from being retired simultaneously (the 1889 * breadcrumb will be complete before we see the 1890 * context-switch). As we only hold the reference to the 1891 * request, any pointer chasing underneath the request 1892 * is subject to a potential use-after-free. Thus we 1893 * store all of the bookkeeping within port[] as 1894 * required, and avoid using unguarded pointers beneath 1895 * request itself. The same applies to the atomic 1896 * status notifier. 1897 */ 1898 1899 csb = csb_read(engine, buf + head); 1900 ENGINE_TRACE(engine, "csb[%d]: status=0x%08x:0x%08x\n", 1901 head, upper_32_bits(csb), lower_32_bits(csb)); 1902 1903 if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 50)) 1904 promote = xehp_csb_parse(csb); 1905 else if (GRAPHICS_VER(engine->i915) >= 12) 1906 promote = gen12_csb_parse(csb); 1907 else 1908 promote = gen8_csb_parse(csb); 1909 if (promote) { 1910 struct i915_request * const *old = execlists->active; 1911 1912 if (GEM_WARN_ON(!*execlists->pending)) { 1913 execlists->error_interrupt |= ERROR_CSB; 1914 break; 1915 } 1916 1917 ring_set_paused(engine, 0); 1918 1919 /* Point active to the new ELSP; prevent overwriting */ 1920 WRITE_ONCE(execlists->active, execlists->pending); 1921 smp_wmb(); /* notify execlists_active() */ 1922 1923 /* cancel old inflight, prepare for switch */ 1924 trace_ports(execlists, "preempted", old); 1925 while (*old) 1926 *inactive++ = *old++; 1927 1928 /* switch pending to inflight */ 1929 GEM_BUG_ON(!assert_pending_valid(execlists, "promote")); 1930 copy_ports(execlists->inflight, 1931 execlists->pending, 1932 execlists_num_ports(execlists)); 1933 smp_wmb(); /* complete the seqlock */ 1934 WRITE_ONCE(execlists->active, execlists->inflight); 1935 1936 /* XXX Magic delay for tgl */ 1937 ENGINE_POSTING_READ(engine, RING_CONTEXT_STATUS_PTR); 1938 1939 WRITE_ONCE(execlists->pending[0], NULL); 1940 } else { 1941 if (GEM_WARN_ON(!*execlists->active)) { 1942 execlists->error_interrupt |= ERROR_CSB; 1943 break; 1944 } 1945 1946 /* port0 completed, advanced to port1 */ 1947 trace_ports(execlists, "completed", execlists->active); 1948 1949 /* 1950 * We rely on the hardware being strongly 1951 * ordered, that the breadcrumb write is 1952 * coherent (visible from the CPU) before the 1953 * user interrupt is processed. One might assume 1954 * that the breadcrumb write being before the 1955 * user interrupt and the CS event for the context 1956 * switch would therefore be before the CS event 1957 * itself... 1958 */ 1959 if (GEM_SHOW_DEBUG() && 1960 !__i915_request_is_complete(*execlists->active)) { 1961 struct i915_request *rq = *execlists->active; 1962 const u32 *regs __maybe_unused = 1963 rq->context->lrc_reg_state; 1964 1965 ENGINE_TRACE(engine, 1966 "context completed before request!\n"); 1967 ENGINE_TRACE(engine, 1968 "ring:{start:0x%08x, head:%04x, tail:%04x, ctl:%08x, mode:%08x}\n", 1969 ENGINE_READ(engine, RING_START), 1970 ENGINE_READ(engine, RING_HEAD) & HEAD_ADDR, 1971 ENGINE_READ(engine, RING_TAIL) & TAIL_ADDR, 1972 ENGINE_READ(engine, RING_CTL), 1973 ENGINE_READ(engine, RING_MI_MODE)); 1974 ENGINE_TRACE(engine, 1975 "rq:{start:%08x, head:%04x, tail:%04x, seqno:%llx:%d, hwsp:%d}, ", 1976 i915_ggtt_offset(rq->ring->vma), 1977 rq->head, rq->tail, 1978 rq->fence.context, 1979 lower_32_bits(rq->fence.seqno), 1980 hwsp_seqno(rq)); 1981 ENGINE_TRACE(engine, 1982 "ctx:{start:%08x, head:%04x, tail:%04x}, ", 1983 regs[CTX_RING_START], 1984 regs[CTX_RING_HEAD], 1985 regs[CTX_RING_TAIL]); 1986 } 1987 1988 *inactive++ = *execlists->active++; 1989 1990 GEM_BUG_ON(execlists->active - execlists->inflight > 1991 execlists_num_ports(execlists)); 1992 } 1993 } while (head != tail); 1994 1995 /* 1996 * Gen11 has proven to fail wrt global observation point between 1997 * entry and tail update, failing on the ordering and thus 1998 * we see an old entry in the context status buffer. 1999 * 2000 * Forcibly evict out entries for the next gpu csb update, 2001 * to increase the odds that we get a fresh entries with non 2002 * working hardware. The cost for doing so comes out mostly with 2003 * the wash as hardware, working or not, will need to do the 2004 * invalidation before. 2005 */ 2006 invalidate_csb_entries(&buf[0], &buf[num_entries - 1]); 2007 2008 /* 2009 * We assume that any event reflects a change in context flow 2010 * and merits a fresh timeslice. We reinstall the timer after 2011 * inspecting the queue to see if we need to resumbit. 2012 */ 2013 if (*prev != *execlists->active) /* elide lite-restores */ 2014 new_timeslice(execlists); 2015 2016 return inactive; 2017 } 2018 2019 static void post_process_csb(struct i915_request **port, 2020 struct i915_request **last) 2021 { 2022 while (port != last) 2023 execlists_schedule_out(*port++); 2024 } 2025 2026 static void __execlists_hold(struct i915_request *rq) 2027 { 2028 LIST_HEAD(list); 2029 2030 do { 2031 struct i915_dependency *p; 2032 2033 if (i915_request_is_active(rq)) 2034 __i915_request_unsubmit(rq); 2035 2036 clear_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags); 2037 list_move_tail(&rq->sched.link, 2038 &rq->engine->sched_engine->hold); 2039 i915_request_set_hold(rq); 2040 RQ_TRACE(rq, "on hold\n"); 2041 2042 for_each_waiter(p, rq) { 2043 struct i915_request *w = 2044 container_of(p->waiter, typeof(*w), sched); 2045 2046 if (p->flags & I915_DEPENDENCY_WEAK) 2047 continue; 2048 2049 /* Leave semaphores spinning on the other engines */ 2050 if (w->engine != rq->engine) 2051 continue; 2052 2053 if (!i915_request_is_ready(w)) 2054 continue; 2055 2056 if (__i915_request_is_complete(w)) 2057 continue; 2058 2059 if (i915_request_on_hold(w)) 2060 continue; 2061 2062 list_move_tail(&w->sched.link, &list); 2063 } 2064 2065 rq = list_first_entry_or_null(&list, typeof(*rq), sched.link); 2066 } while (rq); 2067 } 2068 2069 static bool execlists_hold(struct intel_engine_cs *engine, 2070 struct i915_request *rq) 2071 { 2072 if (i915_request_on_hold(rq)) 2073 return false; 2074 2075 spin_lock_irq(&engine->sched_engine->lock); 2076 2077 if (__i915_request_is_complete(rq)) { /* too late! */ 2078 rq = NULL; 2079 goto unlock; 2080 } 2081 2082 /* 2083 * Transfer this request onto the hold queue to prevent it 2084 * being resumbitted to HW (and potentially completed) before we have 2085 * released it. Since we may have already submitted following 2086 * requests, we need to remove those as well. 2087 */ 2088 GEM_BUG_ON(i915_request_on_hold(rq)); 2089 GEM_BUG_ON(rq->engine != engine); 2090 __execlists_hold(rq); 2091 GEM_BUG_ON(list_empty(&engine->sched_engine->hold)); 2092 2093 unlock: 2094 spin_unlock_irq(&engine->sched_engine->lock); 2095 return rq; 2096 } 2097 2098 static bool hold_request(const struct i915_request *rq) 2099 { 2100 struct i915_dependency *p; 2101 bool result = false; 2102 2103 /* 2104 * If one of our ancestors is on hold, we must also be on hold, 2105 * otherwise we will bypass it and execute before it. 2106 */ 2107 rcu_read_lock(); 2108 for_each_signaler(p, rq) { 2109 const struct i915_request *s = 2110 container_of(p->signaler, typeof(*s), sched); 2111 2112 if (s->engine != rq->engine) 2113 continue; 2114 2115 result = i915_request_on_hold(s); 2116 if (result) 2117 break; 2118 } 2119 rcu_read_unlock(); 2120 2121 return result; 2122 } 2123 2124 static void __execlists_unhold(struct i915_request *rq) 2125 { 2126 LIST_HEAD(list); 2127 2128 do { 2129 struct i915_dependency *p; 2130 2131 RQ_TRACE(rq, "hold release\n"); 2132 2133 GEM_BUG_ON(!i915_request_on_hold(rq)); 2134 GEM_BUG_ON(!i915_sw_fence_signaled(&rq->submit)); 2135 2136 i915_request_clear_hold(rq); 2137 list_move_tail(&rq->sched.link, 2138 i915_sched_lookup_priolist(rq->engine->sched_engine, 2139 rq_prio(rq))); 2140 set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags); 2141 2142 /* Also release any children on this engine that are ready */ 2143 for_each_waiter(p, rq) { 2144 struct i915_request *w = 2145 container_of(p->waiter, typeof(*w), sched); 2146 2147 if (p->flags & I915_DEPENDENCY_WEAK) 2148 continue; 2149 2150 if (w->engine != rq->engine) 2151 continue; 2152 2153 if (!i915_request_on_hold(w)) 2154 continue; 2155 2156 /* Check that no other parents are also on hold */ 2157 if (hold_request(w)) 2158 continue; 2159 2160 list_move_tail(&w->sched.link, &list); 2161 } 2162 2163 rq = list_first_entry_or_null(&list, typeof(*rq), sched.link); 2164 } while (rq); 2165 } 2166 2167 static void execlists_unhold(struct intel_engine_cs *engine, 2168 struct i915_request *rq) 2169 { 2170 spin_lock_irq(&engine->sched_engine->lock); 2171 2172 /* 2173 * Move this request back to the priority queue, and all of its 2174 * children and grandchildren that were suspended along with it. 2175 */ 2176 __execlists_unhold(rq); 2177 2178 if (rq_prio(rq) > engine->sched_engine->queue_priority_hint) { 2179 engine->sched_engine->queue_priority_hint = rq_prio(rq); 2180 tasklet_hi_schedule(&engine->sched_engine->tasklet); 2181 } 2182 2183 spin_unlock_irq(&engine->sched_engine->lock); 2184 } 2185 2186 struct execlists_capture { 2187 struct work_struct work; 2188 struct i915_request *rq; 2189 struct i915_gpu_coredump *error; 2190 }; 2191 2192 static void execlists_capture_work(struct work_struct *work) 2193 { 2194 struct execlists_capture *cap = container_of(work, typeof(*cap), work); 2195 const gfp_t gfp = __GFP_KSWAPD_RECLAIM | __GFP_RETRY_MAYFAIL | 2196 __GFP_NOWARN; 2197 struct intel_engine_cs *engine = cap->rq->engine; 2198 struct intel_gt_coredump *gt = cap->error->gt; 2199 struct intel_engine_capture_vma *vma; 2200 2201 /* Compress all the objects attached to the request, slow! */ 2202 vma = intel_engine_coredump_add_request(gt->engine, cap->rq, gfp); 2203 if (vma) { 2204 struct i915_vma_compress *compress = 2205 i915_vma_capture_prepare(gt); 2206 2207 intel_engine_coredump_add_vma(gt->engine, vma, compress); 2208 i915_vma_capture_finish(gt, compress); 2209 } 2210 2211 gt->simulated = gt->engine->simulated; 2212 cap->error->simulated = gt->simulated; 2213 2214 /* Publish the error state, and announce it to the world */ 2215 i915_error_state_store(cap->error); 2216 i915_gpu_coredump_put(cap->error); 2217 2218 /* Return this request and all that depend upon it for signaling */ 2219 execlists_unhold(engine, cap->rq); 2220 i915_request_put(cap->rq); 2221 2222 kfree(cap); 2223 } 2224 2225 static struct execlists_capture *capture_regs(struct intel_engine_cs *engine) 2226 { 2227 const gfp_t gfp = GFP_ATOMIC | __GFP_NOWARN; 2228 struct execlists_capture *cap; 2229 2230 cap = kmalloc(sizeof(*cap), gfp); 2231 if (!cap) 2232 return NULL; 2233 2234 cap->error = i915_gpu_coredump_alloc(engine->i915, gfp); 2235 if (!cap->error) 2236 goto err_cap; 2237 2238 cap->error->gt = intel_gt_coredump_alloc(engine->gt, gfp); 2239 if (!cap->error->gt) 2240 goto err_gpu; 2241 2242 cap->error->gt->engine = intel_engine_coredump_alloc(engine, gfp); 2243 if (!cap->error->gt->engine) 2244 goto err_gt; 2245 2246 cap->error->gt->engine->hung = true; 2247 2248 return cap; 2249 2250 err_gt: 2251 kfree(cap->error->gt); 2252 err_gpu: 2253 kfree(cap->error); 2254 err_cap: 2255 kfree(cap); 2256 return NULL; 2257 } 2258 2259 static struct i915_request * 2260 active_context(struct intel_engine_cs *engine, u32 ccid) 2261 { 2262 const struct intel_engine_execlists * const el = &engine->execlists; 2263 struct i915_request * const *port, *rq; 2264 2265 /* 2266 * Use the most recent result from process_csb(), but just in case 2267 * we trigger an error (via interrupt) before the first CS event has 2268 * been written, peek at the next submission. 2269 */ 2270 2271 for (port = el->active; (rq = *port); port++) { 2272 if (rq->context->lrc.ccid == ccid) { 2273 ENGINE_TRACE(engine, 2274 "ccid:%x found at active:%zd\n", 2275 ccid, port - el->active); 2276 return rq; 2277 } 2278 } 2279 2280 for (port = el->pending; (rq = *port); port++) { 2281 if (rq->context->lrc.ccid == ccid) { 2282 ENGINE_TRACE(engine, 2283 "ccid:%x found at pending:%zd\n", 2284 ccid, port - el->pending); 2285 return rq; 2286 } 2287 } 2288 2289 ENGINE_TRACE(engine, "ccid:%x not found\n", ccid); 2290 return NULL; 2291 } 2292 2293 static u32 active_ccid(struct intel_engine_cs *engine) 2294 { 2295 return ENGINE_READ_FW(engine, RING_EXECLIST_STATUS_HI); 2296 } 2297 2298 static void execlists_capture(struct intel_engine_cs *engine) 2299 { 2300 struct execlists_capture *cap; 2301 2302 if (!IS_ENABLED(CONFIG_DRM_I915_CAPTURE_ERROR)) 2303 return; 2304 2305 /* 2306 * We need to _quickly_ capture the engine state before we reset. 2307 * We are inside an atomic section (softirq) here and we are delaying 2308 * the forced preemption event. 2309 */ 2310 cap = capture_regs(engine); 2311 if (!cap) 2312 return; 2313 2314 spin_lock_irq(&engine->sched_engine->lock); 2315 cap->rq = active_context(engine, active_ccid(engine)); 2316 if (cap->rq) { 2317 cap->rq = active_request(cap->rq->context->timeline, cap->rq); 2318 cap->rq = i915_request_get_rcu(cap->rq); 2319 } 2320 spin_unlock_irq(&engine->sched_engine->lock); 2321 if (!cap->rq) 2322 goto err_free; 2323 2324 /* 2325 * Remove the request from the execlists queue, and take ownership 2326 * of the request. We pass it to our worker who will _slowly_ compress 2327 * all the pages the _user_ requested for debugging their batch, after 2328 * which we return it to the queue for signaling. 2329 * 2330 * By removing them from the execlists queue, we also remove the 2331 * requests from being processed by __unwind_incomplete_requests() 2332 * during the intel_engine_reset(), and so they will *not* be replayed 2333 * afterwards. 2334 * 2335 * Note that because we have not yet reset the engine at this point, 2336 * it is possible for the request that we have identified as being 2337 * guilty, did in fact complete and we will then hit an arbitration 2338 * point allowing the outstanding preemption to succeed. The likelihood 2339 * of that is very low (as capturing of the engine registers should be 2340 * fast enough to run inside an irq-off atomic section!), so we will 2341 * simply hold that request accountable for being non-preemptible 2342 * long enough to force the reset. 2343 */ 2344 if (!execlists_hold(engine, cap->rq)) 2345 goto err_rq; 2346 2347 INIT_WORK(&cap->work, execlists_capture_work); 2348 schedule_work(&cap->work); 2349 return; 2350 2351 err_rq: 2352 i915_request_put(cap->rq); 2353 err_free: 2354 i915_gpu_coredump_put(cap->error); 2355 kfree(cap); 2356 } 2357 2358 static void execlists_reset(struct intel_engine_cs *engine, const char *msg) 2359 { 2360 const unsigned int bit = I915_RESET_ENGINE + engine->id; 2361 unsigned long *lock = &engine->gt->reset.flags; 2362 2363 if (!intel_has_reset_engine(engine->gt)) 2364 return; 2365 2366 if (test_and_set_bit(bit, lock)) 2367 return; 2368 2369 ENGINE_TRACE(engine, "reset for %s\n", msg); 2370 2371 /* Mark this tasklet as disabled to avoid waiting for it to complete */ 2372 tasklet_disable_nosync(&engine->sched_engine->tasklet); 2373 2374 ring_set_paused(engine, 1); /* Freeze the current request in place */ 2375 execlists_capture(engine); 2376 intel_engine_reset(engine, msg); 2377 2378 tasklet_enable(&engine->sched_engine->tasklet); 2379 clear_and_wake_up_bit(bit, lock); 2380 } 2381 2382 static bool preempt_timeout(const struct intel_engine_cs *const engine) 2383 { 2384 const struct timer_list *t = &engine->execlists.preempt; 2385 2386 if (!CONFIG_DRM_I915_PREEMPT_TIMEOUT) 2387 return false; 2388 2389 if (!timer_expired(t)) 2390 return false; 2391 2392 return engine->execlists.pending[0]; 2393 } 2394 2395 /* 2396 * Check the unread Context Status Buffers and manage the submission of new 2397 * contexts to the ELSP accordingly. 2398 */ 2399 static void execlists_submission_tasklet(struct tasklet_struct *t) 2400 { 2401 struct i915_sched_engine *sched_engine = 2402 from_tasklet(sched_engine, t, tasklet); 2403 struct intel_engine_cs * const engine = sched_engine->private_data; 2404 struct i915_request *post[2 * EXECLIST_MAX_PORTS]; 2405 struct i915_request **inactive; 2406 2407 rcu_read_lock(); 2408 inactive = process_csb(engine, post); 2409 GEM_BUG_ON(inactive - post > ARRAY_SIZE(post)); 2410 2411 if (unlikely(preempt_timeout(engine))) { 2412 cancel_timer(&engine->execlists.preempt); 2413 engine->execlists.error_interrupt |= ERROR_PREEMPT; 2414 } 2415 2416 if (unlikely(READ_ONCE(engine->execlists.error_interrupt))) { 2417 const char *msg; 2418 2419 /* Generate the error message in priority wrt to the user! */ 2420 if (engine->execlists.error_interrupt & GENMASK(15, 0)) 2421 msg = "CS error"; /* thrown by a user payload */ 2422 else if (engine->execlists.error_interrupt & ERROR_CSB) 2423 msg = "invalid CSB event"; 2424 else if (engine->execlists.error_interrupt & ERROR_PREEMPT) 2425 msg = "preemption time out"; 2426 else 2427 msg = "internal error"; 2428 2429 engine->execlists.error_interrupt = 0; 2430 execlists_reset(engine, msg); 2431 } 2432 2433 if (!engine->execlists.pending[0]) { 2434 execlists_dequeue_irq(engine); 2435 start_timeslice(engine); 2436 } 2437 2438 post_process_csb(post, inactive); 2439 rcu_read_unlock(); 2440 } 2441 2442 static void execlists_irq_handler(struct intel_engine_cs *engine, u16 iir) 2443 { 2444 bool tasklet = false; 2445 2446 if (unlikely(iir & GT_CS_MASTER_ERROR_INTERRUPT)) { 2447 u32 eir; 2448 2449 /* Upper 16b are the enabling mask, rsvd for internal errors */ 2450 eir = ENGINE_READ(engine, RING_EIR) & GENMASK(15, 0); 2451 ENGINE_TRACE(engine, "CS error: %x\n", eir); 2452 2453 /* Disable the error interrupt until after the reset */ 2454 if (likely(eir)) { 2455 ENGINE_WRITE(engine, RING_EMR, ~0u); 2456 ENGINE_WRITE(engine, RING_EIR, eir); 2457 WRITE_ONCE(engine->execlists.error_interrupt, eir); 2458 tasklet = true; 2459 } 2460 } 2461 2462 if (iir & GT_WAIT_SEMAPHORE_INTERRUPT) { 2463 WRITE_ONCE(engine->execlists.yield, 2464 ENGINE_READ_FW(engine, RING_EXECLIST_STATUS_HI)); 2465 ENGINE_TRACE(engine, "semaphore yield: %08x\n", 2466 engine->execlists.yield); 2467 if (del_timer(&engine->execlists.timer)) 2468 tasklet = true; 2469 } 2470 2471 if (iir & GT_CONTEXT_SWITCH_INTERRUPT) 2472 tasklet = true; 2473 2474 if (iir & GT_RENDER_USER_INTERRUPT) 2475 intel_engine_signal_breadcrumbs(engine); 2476 2477 if (tasklet) 2478 tasklet_hi_schedule(&engine->sched_engine->tasklet); 2479 } 2480 2481 static void __execlists_kick(struct intel_engine_execlists *execlists) 2482 { 2483 struct intel_engine_cs *engine = 2484 container_of(execlists, typeof(*engine), execlists); 2485 2486 /* Kick the tasklet for some interrupt coalescing and reset handling */ 2487 tasklet_hi_schedule(&engine->sched_engine->tasklet); 2488 } 2489 2490 #define execlists_kick(t, member) \ 2491 __execlists_kick(container_of(t, struct intel_engine_execlists, member)) 2492 2493 static void execlists_timeslice(struct timer_list *timer) 2494 { 2495 execlists_kick(timer, timer); 2496 } 2497 2498 static void execlists_preempt(struct timer_list *timer) 2499 { 2500 execlists_kick(timer, preempt); 2501 } 2502 2503 static void queue_request(struct intel_engine_cs *engine, 2504 struct i915_request *rq) 2505 { 2506 GEM_BUG_ON(!list_empty(&rq->sched.link)); 2507 list_add_tail(&rq->sched.link, 2508 i915_sched_lookup_priolist(engine->sched_engine, 2509 rq_prio(rq))); 2510 set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags); 2511 } 2512 2513 static bool submit_queue(struct intel_engine_cs *engine, 2514 const struct i915_request *rq) 2515 { 2516 struct i915_sched_engine *sched_engine = engine->sched_engine; 2517 2518 if (rq_prio(rq) <= sched_engine->queue_priority_hint) 2519 return false; 2520 2521 sched_engine->queue_priority_hint = rq_prio(rq); 2522 return true; 2523 } 2524 2525 static bool ancestor_on_hold(const struct intel_engine_cs *engine, 2526 const struct i915_request *rq) 2527 { 2528 GEM_BUG_ON(i915_request_on_hold(rq)); 2529 return !list_empty(&engine->sched_engine->hold) && hold_request(rq); 2530 } 2531 2532 static void execlists_submit_request(struct i915_request *request) 2533 { 2534 struct intel_engine_cs *engine = request->engine; 2535 unsigned long flags; 2536 2537 /* Will be called from irq-context when using foreign fences. */ 2538 spin_lock_irqsave(&engine->sched_engine->lock, flags); 2539 2540 if (unlikely(ancestor_on_hold(engine, request))) { 2541 RQ_TRACE(request, "ancestor on hold\n"); 2542 list_add_tail(&request->sched.link, 2543 &engine->sched_engine->hold); 2544 i915_request_set_hold(request); 2545 } else { 2546 queue_request(engine, request); 2547 2548 GEM_BUG_ON(i915_sched_engine_is_empty(engine->sched_engine)); 2549 GEM_BUG_ON(list_empty(&request->sched.link)); 2550 2551 if (submit_queue(engine, request)) 2552 __execlists_kick(&engine->execlists); 2553 } 2554 2555 spin_unlock_irqrestore(&engine->sched_engine->lock, flags); 2556 } 2557 2558 static int 2559 __execlists_context_pre_pin(struct intel_context *ce, 2560 struct intel_engine_cs *engine, 2561 struct i915_gem_ww_ctx *ww, void **vaddr) 2562 { 2563 int err; 2564 2565 err = lrc_pre_pin(ce, engine, ww, vaddr); 2566 if (err) 2567 return err; 2568 2569 if (!__test_and_set_bit(CONTEXT_INIT_BIT, &ce->flags)) { 2570 lrc_init_state(ce, engine, *vaddr); 2571 2572 __i915_gem_object_flush_map(ce->state->obj, 0, engine->context_size); 2573 } 2574 2575 return 0; 2576 } 2577 2578 static int execlists_context_pre_pin(struct intel_context *ce, 2579 struct i915_gem_ww_ctx *ww, 2580 void **vaddr) 2581 { 2582 return __execlists_context_pre_pin(ce, ce->engine, ww, vaddr); 2583 } 2584 2585 static int execlists_context_pin(struct intel_context *ce, void *vaddr) 2586 { 2587 return lrc_pin(ce, ce->engine, vaddr); 2588 } 2589 2590 static int execlists_context_alloc(struct intel_context *ce) 2591 { 2592 return lrc_alloc(ce, ce->engine); 2593 } 2594 2595 static void execlists_context_cancel_request(struct intel_context *ce, 2596 struct i915_request *rq) 2597 { 2598 struct intel_engine_cs *engine = NULL; 2599 2600 i915_request_active_engine(rq, &engine); 2601 2602 if (engine && intel_engine_pulse(engine)) 2603 intel_gt_handle_error(engine->gt, engine->mask, 0, 2604 "request cancellation by %s", 2605 current->comm); 2606 } 2607 2608 static struct intel_context * 2609 execlists_create_parallel(struct intel_engine_cs **engines, 2610 unsigned int num_siblings, 2611 unsigned int width) 2612 { 2613 struct intel_context *parent = NULL, *ce, *err; 2614 int i; 2615 2616 GEM_BUG_ON(num_siblings != 1); 2617 2618 for (i = 0; i < width; ++i) { 2619 ce = intel_context_create(engines[i]); 2620 if (IS_ERR(ce)) { 2621 err = ce; 2622 goto unwind; 2623 } 2624 2625 if (i == 0) 2626 parent = ce; 2627 else 2628 intel_context_bind_parent_child(parent, ce); 2629 } 2630 2631 parent->parallel.fence_context = dma_fence_context_alloc(1); 2632 2633 intel_context_set_nopreempt(parent); 2634 for_each_child(parent, ce) 2635 intel_context_set_nopreempt(ce); 2636 2637 return parent; 2638 2639 unwind: 2640 if (parent) 2641 intel_context_put(parent); 2642 return err; 2643 } 2644 2645 static const struct intel_context_ops execlists_context_ops = { 2646 .flags = COPS_HAS_INFLIGHT, 2647 2648 .alloc = execlists_context_alloc, 2649 2650 .cancel_request = execlists_context_cancel_request, 2651 2652 .pre_pin = execlists_context_pre_pin, 2653 .pin = execlists_context_pin, 2654 .unpin = lrc_unpin, 2655 .post_unpin = lrc_post_unpin, 2656 2657 .enter = intel_context_enter_engine, 2658 .exit = intel_context_exit_engine, 2659 2660 .reset = lrc_reset, 2661 .destroy = lrc_destroy, 2662 2663 .create_parallel = execlists_create_parallel, 2664 .create_virtual = execlists_create_virtual, 2665 }; 2666 2667 static int emit_pdps(struct i915_request *rq) 2668 { 2669 const struct intel_engine_cs * const engine = rq->engine; 2670 struct i915_ppgtt * const ppgtt = i915_vm_to_ppgtt(rq->context->vm); 2671 int err, i; 2672 u32 *cs; 2673 2674 GEM_BUG_ON(intel_vgpu_active(rq->engine->i915)); 2675 2676 /* 2677 * Beware ye of the dragons, this sequence is magic! 2678 * 2679 * Small changes to this sequence can cause anything from 2680 * GPU hangs to forcewake errors and machine lockups! 2681 */ 2682 2683 cs = intel_ring_begin(rq, 2); 2684 if (IS_ERR(cs)) 2685 return PTR_ERR(cs); 2686 2687 *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE; 2688 *cs++ = MI_NOOP; 2689 intel_ring_advance(rq, cs); 2690 2691 /* Flush any residual operations from the context load */ 2692 err = engine->emit_flush(rq, EMIT_FLUSH); 2693 if (err) 2694 return err; 2695 2696 /* Magic required to prevent forcewake errors! */ 2697 err = engine->emit_flush(rq, EMIT_INVALIDATE); 2698 if (err) 2699 return err; 2700 2701 cs = intel_ring_begin(rq, 4 * GEN8_3LVL_PDPES + 2); 2702 if (IS_ERR(cs)) 2703 return PTR_ERR(cs); 2704 2705 /* Ensure the LRI have landed before we invalidate & continue */ 2706 *cs++ = MI_LOAD_REGISTER_IMM(2 * GEN8_3LVL_PDPES) | MI_LRI_FORCE_POSTED; 2707 for (i = GEN8_3LVL_PDPES; i--; ) { 2708 const dma_addr_t pd_daddr = i915_page_dir_dma_addr(ppgtt, i); 2709 u32 base = engine->mmio_base; 2710 2711 *cs++ = i915_mmio_reg_offset(GEN8_RING_PDP_UDW(base, i)); 2712 *cs++ = upper_32_bits(pd_daddr); 2713 *cs++ = i915_mmio_reg_offset(GEN8_RING_PDP_LDW(base, i)); 2714 *cs++ = lower_32_bits(pd_daddr); 2715 } 2716 *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE; 2717 intel_ring_advance(rq, cs); 2718 2719 intel_ring_advance(rq, cs); 2720 2721 return 0; 2722 } 2723 2724 static int execlists_request_alloc(struct i915_request *request) 2725 { 2726 int ret; 2727 2728 GEM_BUG_ON(!intel_context_is_pinned(request->context)); 2729 2730 /* 2731 * Flush enough space to reduce the likelihood of waiting after 2732 * we start building the request - in which case we will just 2733 * have to repeat work. 2734 */ 2735 request->reserved_space += EXECLISTS_REQUEST_SIZE; 2736 2737 /* 2738 * Note that after this point, we have committed to using 2739 * this request as it is being used to both track the 2740 * state of engine initialisation and liveness of the 2741 * golden renderstate above. Think twice before you try 2742 * to cancel/unwind this request now. 2743 */ 2744 2745 if (!i915_vm_is_4lvl(request->context->vm)) { 2746 ret = emit_pdps(request); 2747 if (ret) 2748 return ret; 2749 } 2750 2751 /* Unconditionally invalidate GPU caches and TLBs. */ 2752 ret = request->engine->emit_flush(request, EMIT_INVALIDATE); 2753 if (ret) 2754 return ret; 2755 2756 request->reserved_space -= EXECLISTS_REQUEST_SIZE; 2757 return 0; 2758 } 2759 2760 static void reset_csb_pointers(struct intel_engine_cs *engine) 2761 { 2762 struct intel_engine_execlists * const execlists = &engine->execlists; 2763 const unsigned int reset_value = execlists->csb_size - 1; 2764 2765 ring_set_paused(engine, 0); 2766 2767 /* 2768 * Sometimes Icelake forgets to reset its pointers on a GPU reset. 2769 * Bludgeon them with a mmio update to be sure. 2770 */ 2771 ENGINE_WRITE(engine, RING_CONTEXT_STATUS_PTR, 2772 0xffff << 16 | reset_value << 8 | reset_value); 2773 ENGINE_POSTING_READ(engine, RING_CONTEXT_STATUS_PTR); 2774 2775 /* 2776 * After a reset, the HW starts writing into CSB entry [0]. We 2777 * therefore have to set our HEAD pointer back one entry so that 2778 * the *first* entry we check is entry 0. To complicate this further, 2779 * as we don't wait for the first interrupt after reset, we have to 2780 * fake the HW write to point back to the last entry so that our 2781 * inline comparison of our cached head position against the last HW 2782 * write works even before the first interrupt. 2783 */ 2784 execlists->csb_head = reset_value; 2785 WRITE_ONCE(*execlists->csb_write, reset_value); 2786 wmb(); /* Make sure this is visible to HW (paranoia?) */ 2787 2788 /* Check that the GPU does indeed update the CSB entries! */ 2789 memset(execlists->csb_status, -1, (reset_value + 1) * sizeof(u64)); 2790 invalidate_csb_entries(&execlists->csb_status[0], 2791 &execlists->csb_status[reset_value]); 2792 2793 /* Once more for luck and our trusty paranoia */ 2794 ENGINE_WRITE(engine, RING_CONTEXT_STATUS_PTR, 2795 0xffff << 16 | reset_value << 8 | reset_value); 2796 ENGINE_POSTING_READ(engine, RING_CONTEXT_STATUS_PTR); 2797 2798 GEM_BUG_ON(READ_ONCE(*execlists->csb_write) != reset_value); 2799 } 2800 2801 static void sanitize_hwsp(struct intel_engine_cs *engine) 2802 { 2803 struct intel_timeline *tl; 2804 2805 list_for_each_entry(tl, &engine->status_page.timelines, engine_link) 2806 intel_timeline_reset_seqno(tl); 2807 } 2808 2809 static void execlists_sanitize(struct intel_engine_cs *engine) 2810 { 2811 GEM_BUG_ON(execlists_active(&engine->execlists)); 2812 2813 /* 2814 * Poison residual state on resume, in case the suspend didn't! 2815 * 2816 * We have to assume that across suspend/resume (or other loss 2817 * of control) that the contents of our pinned buffers has been 2818 * lost, replaced by garbage. Since this doesn't always happen, 2819 * let's poison such state so that we more quickly spot when 2820 * we falsely assume it has been preserved. 2821 */ 2822 if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) 2823 memset(engine->status_page.addr, POISON_INUSE, PAGE_SIZE); 2824 2825 reset_csb_pointers(engine); 2826 2827 /* 2828 * The kernel_context HWSP is stored in the status_page. As above, 2829 * that may be lost on resume/initialisation, and so we need to 2830 * reset the value in the HWSP. 2831 */ 2832 sanitize_hwsp(engine); 2833 2834 /* And scrub the dirty cachelines for the HWSP */ 2835 clflush_cache_range(engine->status_page.addr, PAGE_SIZE); 2836 2837 intel_engine_reset_pinned_contexts(engine); 2838 } 2839 2840 static void enable_error_interrupt(struct intel_engine_cs *engine) 2841 { 2842 u32 status; 2843 2844 engine->execlists.error_interrupt = 0; 2845 ENGINE_WRITE(engine, RING_EMR, ~0u); 2846 ENGINE_WRITE(engine, RING_EIR, ~0u); /* clear all existing errors */ 2847 2848 status = ENGINE_READ(engine, RING_ESR); 2849 if (unlikely(status)) { 2850 drm_err(&engine->i915->drm, 2851 "engine '%s' resumed still in error: %08x\n", 2852 engine->name, status); 2853 __intel_gt_reset(engine->gt, engine->mask); 2854 } 2855 2856 /* 2857 * On current gen8+, we have 2 signals to play with 2858 * 2859 * - I915_ERROR_INSTUCTION (bit 0) 2860 * 2861 * Generate an error if the command parser encounters an invalid 2862 * instruction 2863 * 2864 * This is a fatal error. 2865 * 2866 * - CP_PRIV (bit 2) 2867 * 2868 * Generate an error on privilege violation (where the CP replaces 2869 * the instruction with a no-op). This also fires for writes into 2870 * read-only scratch pages. 2871 * 2872 * This is a non-fatal error, parsing continues. 2873 * 2874 * * there are a few others defined for odd HW that we do not use 2875 * 2876 * Since CP_PRIV fires for cases where we have chosen to ignore the 2877 * error (as the HW is validating and suppressing the mistakes), we 2878 * only unmask the instruction error bit. 2879 */ 2880 ENGINE_WRITE(engine, RING_EMR, ~I915_ERROR_INSTRUCTION); 2881 } 2882 2883 static void enable_execlists(struct intel_engine_cs *engine) 2884 { 2885 u32 mode; 2886 2887 assert_forcewakes_active(engine->uncore, FORCEWAKE_ALL); 2888 2889 intel_engine_set_hwsp_writemask(engine, ~0u); /* HWSTAM */ 2890 2891 if (GRAPHICS_VER(engine->i915) >= 11) 2892 mode = _MASKED_BIT_ENABLE(GEN11_GFX_DISABLE_LEGACY_MODE); 2893 else 2894 mode = _MASKED_BIT_ENABLE(GFX_RUN_LIST_ENABLE); 2895 ENGINE_WRITE_FW(engine, RING_MODE_GEN7, mode); 2896 2897 ENGINE_WRITE_FW(engine, RING_MI_MODE, _MASKED_BIT_DISABLE(STOP_RING)); 2898 2899 ENGINE_WRITE_FW(engine, 2900 RING_HWS_PGA, 2901 i915_ggtt_offset(engine->status_page.vma)); 2902 ENGINE_POSTING_READ(engine, RING_HWS_PGA); 2903 2904 enable_error_interrupt(engine); 2905 } 2906 2907 static int execlists_resume(struct intel_engine_cs *engine) 2908 { 2909 intel_mocs_init_engine(engine); 2910 intel_breadcrumbs_reset(engine->breadcrumbs); 2911 2912 enable_execlists(engine); 2913 2914 if (engine->class == RENDER_CLASS) 2915 xehp_enable_ccs_engines(engine); 2916 2917 return 0; 2918 } 2919 2920 static void execlists_reset_prepare(struct intel_engine_cs *engine) 2921 { 2922 ENGINE_TRACE(engine, "depth<-%d\n", 2923 atomic_read(&engine->sched_engine->tasklet.count)); 2924 2925 /* 2926 * Prevent request submission to the hardware until we have 2927 * completed the reset in i915_gem_reset_finish(). If a request 2928 * is completed by one engine, it may then queue a request 2929 * to a second via its execlists->tasklet *just* as we are 2930 * calling engine->resume() and also writing the ELSP. 2931 * Turning off the execlists->tasklet until the reset is over 2932 * prevents the race. 2933 */ 2934 __tasklet_disable_sync_once(&engine->sched_engine->tasklet); 2935 GEM_BUG_ON(!reset_in_progress(engine)); 2936 2937 /* 2938 * We stop engines, otherwise we might get failed reset and a 2939 * dead gpu (on elk). Also as modern gpu as kbl can suffer 2940 * from system hang if batchbuffer is progressing when 2941 * the reset is issued, regardless of READY_TO_RESET ack. 2942 * Thus assume it is best to stop engines on all gens 2943 * where we have a gpu reset. 2944 * 2945 * WaKBLVECSSemaphoreWaitPoll:kbl (on ALL_ENGINES) 2946 * 2947 * FIXME: Wa for more modern gens needs to be validated 2948 */ 2949 ring_set_paused(engine, 1); 2950 intel_engine_stop_cs(engine); 2951 2952 engine->execlists.reset_ccid = active_ccid(engine); 2953 } 2954 2955 static struct i915_request ** 2956 reset_csb(struct intel_engine_cs *engine, struct i915_request **inactive) 2957 { 2958 struct intel_engine_execlists * const execlists = &engine->execlists; 2959 2960 mb(); /* paranoia: read the CSB pointers from after the reset */ 2961 clflush(execlists->csb_write); 2962 mb(); 2963 2964 inactive = process_csb(engine, inactive); /* drain preemption events */ 2965 2966 /* Following the reset, we need to reload the CSB read/write pointers */ 2967 reset_csb_pointers(engine); 2968 2969 return inactive; 2970 } 2971 2972 static void 2973 execlists_reset_active(struct intel_engine_cs *engine, bool stalled) 2974 { 2975 struct intel_context *ce; 2976 struct i915_request *rq; 2977 u32 head; 2978 2979 /* 2980 * Save the currently executing context, even if we completed 2981 * its request, it was still running at the time of the 2982 * reset and will have been clobbered. 2983 */ 2984 rq = active_context(engine, engine->execlists.reset_ccid); 2985 if (!rq) 2986 return; 2987 2988 ce = rq->context; 2989 GEM_BUG_ON(!i915_vma_is_pinned(ce->state)); 2990 2991 if (__i915_request_is_complete(rq)) { 2992 /* Idle context; tidy up the ring so we can restart afresh */ 2993 head = intel_ring_wrap(ce->ring, rq->tail); 2994 goto out_replay; 2995 } 2996 2997 /* We still have requests in-flight; the engine should be active */ 2998 GEM_BUG_ON(!intel_engine_pm_is_awake(engine)); 2999 3000 /* Context has requests still in-flight; it should not be idle! */ 3001 GEM_BUG_ON(i915_active_is_idle(&ce->active)); 3002 3003 rq = active_request(ce->timeline, rq); 3004 head = intel_ring_wrap(ce->ring, rq->head); 3005 GEM_BUG_ON(head == ce->ring->tail); 3006 3007 /* 3008 * If this request hasn't started yet, e.g. it is waiting on a 3009 * semaphore, we need to avoid skipping the request or else we 3010 * break the signaling chain. However, if the context is corrupt 3011 * the request will not restart and we will be stuck with a wedged 3012 * device. It is quite often the case that if we issue a reset 3013 * while the GPU is loading the context image, that the context 3014 * image becomes corrupt. 3015 * 3016 * Otherwise, if we have not started yet, the request should replay 3017 * perfectly and we do not need to flag the result as being erroneous. 3018 */ 3019 if (!__i915_request_has_started(rq)) 3020 goto out_replay; 3021 3022 /* 3023 * If the request was innocent, we leave the request in the ELSP 3024 * and will try to replay it on restarting. The context image may 3025 * have been corrupted by the reset, in which case we may have 3026 * to service a new GPU hang, but more likely we can continue on 3027 * without impact. 3028 * 3029 * If the request was guilty, we presume the context is corrupt 3030 * and have to at least restore the RING register in the context 3031 * image back to the expected values to skip over the guilty request. 3032 */ 3033 __i915_request_reset(rq, stalled); 3034 3035 /* 3036 * We want a simple context + ring to execute the breadcrumb update. 3037 * We cannot rely on the context being intact across the GPU hang, 3038 * so clear it and rebuild just what we need for the breadcrumb. 3039 * All pending requests for this context will be zapped, and any 3040 * future request will be after userspace has had the opportunity 3041 * to recreate its own state. 3042 */ 3043 out_replay: 3044 ENGINE_TRACE(engine, "replay {head:%04x, tail:%04x}\n", 3045 head, ce->ring->tail); 3046 lrc_reset_regs(ce, engine); 3047 ce->lrc.lrca = lrc_update_regs(ce, engine, head); 3048 } 3049 3050 static void execlists_reset_csb(struct intel_engine_cs *engine, bool stalled) 3051 { 3052 struct intel_engine_execlists * const execlists = &engine->execlists; 3053 struct i915_request *post[2 * EXECLIST_MAX_PORTS]; 3054 struct i915_request **inactive; 3055 3056 rcu_read_lock(); 3057 inactive = reset_csb(engine, post); 3058 3059 execlists_reset_active(engine, true); 3060 3061 inactive = cancel_port_requests(execlists, inactive); 3062 post_process_csb(post, inactive); 3063 rcu_read_unlock(); 3064 } 3065 3066 static void execlists_reset_rewind(struct intel_engine_cs *engine, bool stalled) 3067 { 3068 unsigned long flags; 3069 3070 ENGINE_TRACE(engine, "\n"); 3071 3072 /* Process the csb, find the guilty context and throw away */ 3073 execlists_reset_csb(engine, stalled); 3074 3075 /* Push back any incomplete requests for replay after the reset. */ 3076 rcu_read_lock(); 3077 spin_lock_irqsave(&engine->sched_engine->lock, flags); 3078 __unwind_incomplete_requests(engine); 3079 spin_unlock_irqrestore(&engine->sched_engine->lock, flags); 3080 rcu_read_unlock(); 3081 } 3082 3083 static void nop_submission_tasklet(struct tasklet_struct *t) 3084 { 3085 struct i915_sched_engine *sched_engine = 3086 from_tasklet(sched_engine, t, tasklet); 3087 struct intel_engine_cs * const engine = sched_engine->private_data; 3088 3089 /* The driver is wedged; don't process any more events. */ 3090 WRITE_ONCE(engine->sched_engine->queue_priority_hint, INT_MIN); 3091 } 3092 3093 static void execlists_reset_cancel(struct intel_engine_cs *engine) 3094 { 3095 struct intel_engine_execlists * const execlists = &engine->execlists; 3096 struct i915_sched_engine * const sched_engine = engine->sched_engine; 3097 struct i915_request *rq, *rn; 3098 struct rb_node *rb; 3099 unsigned long flags; 3100 3101 ENGINE_TRACE(engine, "\n"); 3102 3103 /* 3104 * Before we call engine->cancel_requests(), we should have exclusive 3105 * access to the submission state. This is arranged for us by the 3106 * caller disabling the interrupt generation, the tasklet and other 3107 * threads that may then access the same state, giving us a free hand 3108 * to reset state. However, we still need to let lockdep be aware that 3109 * we know this state may be accessed in hardirq context, so we 3110 * disable the irq around this manipulation and we want to keep 3111 * the spinlock focused on its duties and not accidentally conflate 3112 * coverage to the submission's irq state. (Similarly, although we 3113 * shouldn't need to disable irq around the manipulation of the 3114 * submission's irq state, we also wish to remind ourselves that 3115 * it is irq state.) 3116 */ 3117 execlists_reset_csb(engine, true); 3118 3119 rcu_read_lock(); 3120 spin_lock_irqsave(&engine->sched_engine->lock, flags); 3121 3122 /* Mark all executing requests as skipped. */ 3123 list_for_each_entry(rq, &engine->sched_engine->requests, sched.link) 3124 i915_request_put(i915_request_mark_eio(rq)); 3125 intel_engine_signal_breadcrumbs(engine); 3126 3127 /* Flush the queued requests to the timeline list (for retiring). */ 3128 while ((rb = rb_first_cached(&sched_engine->queue))) { 3129 struct i915_priolist *p = to_priolist(rb); 3130 3131 priolist_for_each_request_consume(rq, rn, p) { 3132 if (i915_request_mark_eio(rq)) { 3133 __i915_request_submit(rq); 3134 i915_request_put(rq); 3135 } 3136 } 3137 3138 rb_erase_cached(&p->node, &sched_engine->queue); 3139 i915_priolist_free(p); 3140 } 3141 3142 /* On-hold requests will be flushed to timeline upon their release */ 3143 list_for_each_entry(rq, &sched_engine->hold, sched.link) 3144 i915_request_put(i915_request_mark_eio(rq)); 3145 3146 /* Cancel all attached virtual engines */ 3147 while ((rb = rb_first_cached(&execlists->virtual))) { 3148 struct virtual_engine *ve = 3149 rb_entry(rb, typeof(*ve), nodes[engine->id].rb); 3150 3151 rb_erase_cached(rb, &execlists->virtual); 3152 RB_CLEAR_NODE(rb); 3153 3154 spin_lock(&ve->base.sched_engine->lock); 3155 rq = fetch_and_zero(&ve->request); 3156 if (rq) { 3157 if (i915_request_mark_eio(rq)) { 3158 rq->engine = engine; 3159 __i915_request_submit(rq); 3160 i915_request_put(rq); 3161 } 3162 i915_request_put(rq); 3163 3164 ve->base.sched_engine->queue_priority_hint = INT_MIN; 3165 } 3166 spin_unlock(&ve->base.sched_engine->lock); 3167 } 3168 3169 /* Remaining _unready_ requests will be nop'ed when submitted */ 3170 3171 sched_engine->queue_priority_hint = INT_MIN; 3172 sched_engine->queue = RB_ROOT_CACHED; 3173 3174 GEM_BUG_ON(__tasklet_is_enabled(&engine->sched_engine->tasklet)); 3175 engine->sched_engine->tasklet.callback = nop_submission_tasklet; 3176 3177 spin_unlock_irqrestore(&engine->sched_engine->lock, flags); 3178 rcu_read_unlock(); 3179 } 3180 3181 static void execlists_reset_finish(struct intel_engine_cs *engine) 3182 { 3183 struct intel_engine_execlists * const execlists = &engine->execlists; 3184 3185 /* 3186 * After a GPU reset, we may have requests to replay. Do so now while 3187 * we still have the forcewake to be sure that the GPU is not allowed 3188 * to sleep before we restart and reload a context. 3189 * 3190 * If the GPU reset fails, the engine may still be alive with requests 3191 * inflight. We expect those to complete, or for the device to be 3192 * reset as the next level of recovery, and as a final resort we 3193 * will declare the device wedged. 3194 */ 3195 GEM_BUG_ON(!reset_in_progress(engine)); 3196 3197 /* And kick in case we missed a new request submission. */ 3198 if (__tasklet_enable(&engine->sched_engine->tasklet)) 3199 __execlists_kick(execlists); 3200 3201 ENGINE_TRACE(engine, "depth->%d\n", 3202 atomic_read(&engine->sched_engine->tasklet.count)); 3203 } 3204 3205 static void gen8_logical_ring_enable_irq(struct intel_engine_cs *engine) 3206 { 3207 ENGINE_WRITE(engine, RING_IMR, 3208 ~(engine->irq_enable_mask | engine->irq_keep_mask)); 3209 ENGINE_POSTING_READ(engine, RING_IMR); 3210 } 3211 3212 static void gen8_logical_ring_disable_irq(struct intel_engine_cs *engine) 3213 { 3214 ENGINE_WRITE(engine, RING_IMR, ~engine->irq_keep_mask); 3215 } 3216 3217 static void execlists_park(struct intel_engine_cs *engine) 3218 { 3219 cancel_timer(&engine->execlists.timer); 3220 cancel_timer(&engine->execlists.preempt); 3221 } 3222 3223 static void add_to_engine(struct i915_request *rq) 3224 { 3225 lockdep_assert_held(&rq->engine->sched_engine->lock); 3226 list_move_tail(&rq->sched.link, &rq->engine->sched_engine->requests); 3227 } 3228 3229 static void remove_from_engine(struct i915_request *rq) 3230 { 3231 struct intel_engine_cs *engine, *locked; 3232 3233 /* 3234 * Virtual engines complicate acquiring the engine timeline lock, 3235 * as their rq->engine pointer is not stable until under that 3236 * engine lock. The simple ploy we use is to take the lock then 3237 * check that the rq still belongs to the newly locked engine. 3238 */ 3239 locked = READ_ONCE(rq->engine); 3240 spin_lock_irq(&locked->sched_engine->lock); 3241 while (unlikely(locked != (engine = READ_ONCE(rq->engine)))) { 3242 spin_unlock(&locked->sched_engine->lock); 3243 spin_lock(&engine->sched_engine->lock); 3244 locked = engine; 3245 } 3246 list_del_init(&rq->sched.link); 3247 3248 clear_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags); 3249 clear_bit(I915_FENCE_FLAG_HOLD, &rq->fence.flags); 3250 3251 /* Prevent further __await_execution() registering a cb, then flush */ 3252 set_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags); 3253 3254 spin_unlock_irq(&locked->sched_engine->lock); 3255 3256 i915_request_notify_execute_cb_imm(rq); 3257 } 3258 3259 static bool can_preempt(struct intel_engine_cs *engine) 3260 { 3261 if (GRAPHICS_VER(engine->i915) > 8) 3262 return true; 3263 3264 /* GPGPU on bdw requires extra w/a; not implemented */ 3265 return engine->class != RENDER_CLASS; 3266 } 3267 3268 static void kick_execlists(const struct i915_request *rq, int prio) 3269 { 3270 struct intel_engine_cs *engine = rq->engine; 3271 struct i915_sched_engine *sched_engine = engine->sched_engine; 3272 const struct i915_request *inflight; 3273 3274 /* 3275 * We only need to kick the tasklet once for the high priority 3276 * new context we add into the queue. 3277 */ 3278 if (prio <= sched_engine->queue_priority_hint) 3279 return; 3280 3281 rcu_read_lock(); 3282 3283 /* Nothing currently active? We're overdue for a submission! */ 3284 inflight = execlists_active(&engine->execlists); 3285 if (!inflight) 3286 goto unlock; 3287 3288 /* 3289 * If we are already the currently executing context, don't 3290 * bother evaluating if we should preempt ourselves. 3291 */ 3292 if (inflight->context == rq->context) 3293 goto unlock; 3294 3295 ENGINE_TRACE(engine, 3296 "bumping queue-priority-hint:%d for rq:%llx:%lld, inflight:%llx:%lld prio %d\n", 3297 prio, 3298 rq->fence.context, rq->fence.seqno, 3299 inflight->fence.context, inflight->fence.seqno, 3300 inflight->sched.attr.priority); 3301 3302 sched_engine->queue_priority_hint = prio; 3303 3304 /* 3305 * Allow preemption of low -> normal -> high, but we do 3306 * not allow low priority tasks to preempt other low priority 3307 * tasks under the impression that latency for low priority 3308 * tasks does not matter (as much as background throughput), 3309 * so kiss. 3310 */ 3311 if (prio >= max(I915_PRIORITY_NORMAL, rq_prio(inflight))) 3312 tasklet_hi_schedule(&sched_engine->tasklet); 3313 3314 unlock: 3315 rcu_read_unlock(); 3316 } 3317 3318 static void execlists_set_default_submission(struct intel_engine_cs *engine) 3319 { 3320 engine->submit_request = execlists_submit_request; 3321 engine->sched_engine->schedule = i915_schedule; 3322 engine->sched_engine->kick_backend = kick_execlists; 3323 engine->sched_engine->tasklet.callback = execlists_submission_tasklet; 3324 } 3325 3326 static void execlists_shutdown(struct intel_engine_cs *engine) 3327 { 3328 /* Synchronise with residual timers and any softirq they raise */ 3329 del_timer_sync(&engine->execlists.timer); 3330 del_timer_sync(&engine->execlists.preempt); 3331 tasklet_kill(&engine->sched_engine->tasklet); 3332 } 3333 3334 static void execlists_release(struct intel_engine_cs *engine) 3335 { 3336 engine->sanitize = NULL; /* no longer in control, nothing to sanitize */ 3337 3338 execlists_shutdown(engine); 3339 3340 intel_engine_cleanup_common(engine); 3341 lrc_fini_wa_ctx(engine); 3342 } 3343 3344 static ktime_t __execlists_engine_busyness(struct intel_engine_cs *engine, 3345 ktime_t *now) 3346 { 3347 struct intel_engine_execlists_stats *stats = &engine->stats.execlists; 3348 ktime_t total = stats->total; 3349 3350 /* 3351 * If the engine is executing something at the moment 3352 * add it to the total. 3353 */ 3354 *now = ktime_get(); 3355 if (READ_ONCE(stats->active)) 3356 total = ktime_add(total, ktime_sub(*now, stats->start)); 3357 3358 return total; 3359 } 3360 3361 static ktime_t execlists_engine_busyness(struct intel_engine_cs *engine, 3362 ktime_t *now) 3363 { 3364 struct intel_engine_execlists_stats *stats = &engine->stats.execlists; 3365 unsigned int seq; 3366 ktime_t total; 3367 3368 do { 3369 seq = read_seqcount_begin(&stats->lock); 3370 total = __execlists_engine_busyness(engine, now); 3371 } while (read_seqcount_retry(&stats->lock, seq)); 3372 3373 return total; 3374 } 3375 3376 static void 3377 logical_ring_default_vfuncs(struct intel_engine_cs *engine) 3378 { 3379 /* Default vfuncs which can be overridden by each engine. */ 3380 3381 engine->resume = execlists_resume; 3382 3383 engine->cops = &execlists_context_ops; 3384 engine->request_alloc = execlists_request_alloc; 3385 engine->add_active_request = add_to_engine; 3386 engine->remove_active_request = remove_from_engine; 3387 3388 engine->reset.prepare = execlists_reset_prepare; 3389 engine->reset.rewind = execlists_reset_rewind; 3390 engine->reset.cancel = execlists_reset_cancel; 3391 engine->reset.finish = execlists_reset_finish; 3392 3393 engine->park = execlists_park; 3394 engine->unpark = NULL; 3395 3396 engine->emit_flush = gen8_emit_flush_xcs; 3397 engine->emit_init_breadcrumb = gen8_emit_init_breadcrumb; 3398 engine->emit_fini_breadcrumb = gen8_emit_fini_breadcrumb_xcs; 3399 if (GRAPHICS_VER(engine->i915) >= 12) { 3400 engine->emit_fini_breadcrumb = gen12_emit_fini_breadcrumb_xcs; 3401 engine->emit_flush = gen12_emit_flush_xcs; 3402 } 3403 engine->set_default_submission = execlists_set_default_submission; 3404 3405 if (GRAPHICS_VER(engine->i915) < 11) { 3406 engine->irq_enable = gen8_logical_ring_enable_irq; 3407 engine->irq_disable = gen8_logical_ring_disable_irq; 3408 } else { 3409 /* 3410 * TODO: On Gen11 interrupt masks need to be clear 3411 * to allow C6 entry. Keep interrupts enabled at 3412 * and take the hit of generating extra interrupts 3413 * until a more refined solution exists. 3414 */ 3415 } 3416 intel_engine_set_irq_handler(engine, execlists_irq_handler); 3417 3418 engine->flags |= I915_ENGINE_SUPPORTS_STATS; 3419 if (!intel_vgpu_active(engine->i915)) { 3420 engine->flags |= I915_ENGINE_HAS_SEMAPHORES; 3421 if (can_preempt(engine)) { 3422 engine->flags |= I915_ENGINE_HAS_PREEMPTION; 3423 if (CONFIG_DRM_I915_TIMESLICE_DURATION) 3424 engine->flags |= I915_ENGINE_HAS_TIMESLICES; 3425 } 3426 } 3427 3428 if (intel_engine_has_preemption(engine)) 3429 engine->emit_bb_start = gen8_emit_bb_start; 3430 else 3431 engine->emit_bb_start = gen8_emit_bb_start_noarb; 3432 3433 engine->busyness = execlists_engine_busyness; 3434 } 3435 3436 static void logical_ring_default_irqs(struct intel_engine_cs *engine) 3437 { 3438 unsigned int shift = 0; 3439 3440 if (GRAPHICS_VER(engine->i915) < 11) { 3441 const u8 irq_shifts[] = { 3442 [RCS0] = GEN8_RCS_IRQ_SHIFT, 3443 [BCS0] = GEN8_BCS_IRQ_SHIFT, 3444 [VCS0] = GEN8_VCS0_IRQ_SHIFT, 3445 [VCS1] = GEN8_VCS1_IRQ_SHIFT, 3446 [VECS0] = GEN8_VECS_IRQ_SHIFT, 3447 }; 3448 3449 shift = irq_shifts[engine->id]; 3450 } 3451 3452 engine->irq_enable_mask = GT_RENDER_USER_INTERRUPT << shift; 3453 engine->irq_keep_mask = GT_CONTEXT_SWITCH_INTERRUPT << shift; 3454 engine->irq_keep_mask |= GT_CS_MASTER_ERROR_INTERRUPT << shift; 3455 engine->irq_keep_mask |= GT_WAIT_SEMAPHORE_INTERRUPT << shift; 3456 } 3457 3458 static void rcs_submission_override(struct intel_engine_cs *engine) 3459 { 3460 switch (GRAPHICS_VER(engine->i915)) { 3461 case 12: 3462 engine->emit_flush = gen12_emit_flush_rcs; 3463 engine->emit_fini_breadcrumb = gen12_emit_fini_breadcrumb_rcs; 3464 break; 3465 case 11: 3466 engine->emit_flush = gen11_emit_flush_rcs; 3467 engine->emit_fini_breadcrumb = gen11_emit_fini_breadcrumb_rcs; 3468 break; 3469 default: 3470 engine->emit_flush = gen8_emit_flush_rcs; 3471 engine->emit_fini_breadcrumb = gen8_emit_fini_breadcrumb_rcs; 3472 break; 3473 } 3474 } 3475 3476 int intel_execlists_submission_setup(struct intel_engine_cs *engine) 3477 { 3478 struct intel_engine_execlists * const execlists = &engine->execlists; 3479 struct drm_i915_private *i915 = engine->i915; 3480 struct intel_uncore *uncore = engine->uncore; 3481 u32 base = engine->mmio_base; 3482 3483 tasklet_setup(&engine->sched_engine->tasklet, execlists_submission_tasklet); 3484 timer_setup(&engine->execlists.timer, execlists_timeslice, 0); 3485 timer_setup(&engine->execlists.preempt, execlists_preempt, 0); 3486 3487 logical_ring_default_vfuncs(engine); 3488 logical_ring_default_irqs(engine); 3489 3490 if (engine->flags & I915_ENGINE_HAS_RCS_REG_STATE) 3491 rcs_submission_override(engine); 3492 3493 lrc_init_wa_ctx(engine); 3494 3495 if (HAS_LOGICAL_RING_ELSQ(i915)) { 3496 execlists->submit_reg = uncore->regs + 3497 i915_mmio_reg_offset(RING_EXECLIST_SQ_CONTENTS(base)); 3498 execlists->ctrl_reg = uncore->regs + 3499 i915_mmio_reg_offset(RING_EXECLIST_CONTROL(base)); 3500 3501 engine->fw_domain = intel_uncore_forcewake_for_reg(engine->uncore, 3502 RING_EXECLIST_CONTROL(engine->mmio_base), 3503 FW_REG_WRITE); 3504 } else { 3505 execlists->submit_reg = uncore->regs + 3506 i915_mmio_reg_offset(RING_ELSP(base)); 3507 } 3508 3509 execlists->csb_status = 3510 (u64 *)&engine->status_page.addr[I915_HWS_CSB_BUF0_INDEX]; 3511 3512 execlists->csb_write = 3513 &engine->status_page.addr[INTEL_HWS_CSB_WRITE_INDEX(i915)]; 3514 3515 if (GRAPHICS_VER(i915) < 11) 3516 execlists->csb_size = GEN8_CSB_ENTRIES; 3517 else 3518 execlists->csb_size = GEN11_CSB_ENTRIES; 3519 3520 engine->context_tag = GENMASK(BITS_PER_LONG - 2, 0); 3521 if (GRAPHICS_VER(engine->i915) >= 11 && 3522 GRAPHICS_VER_FULL(engine->i915) < IP_VER(12, 50)) { 3523 execlists->ccid |= engine->instance << (GEN11_ENGINE_INSTANCE_SHIFT - 32); 3524 execlists->ccid |= engine->class << (GEN11_ENGINE_CLASS_SHIFT - 32); 3525 } 3526 3527 /* Finally, take ownership and responsibility for cleanup! */ 3528 engine->sanitize = execlists_sanitize; 3529 engine->release = execlists_release; 3530 3531 return 0; 3532 } 3533 3534 static struct list_head *virtual_queue(struct virtual_engine *ve) 3535 { 3536 return &ve->base.sched_engine->default_priolist.requests; 3537 } 3538 3539 static void rcu_virtual_context_destroy(struct work_struct *wrk) 3540 { 3541 struct virtual_engine *ve = 3542 container_of(wrk, typeof(*ve), rcu.work); 3543 unsigned int n; 3544 3545 GEM_BUG_ON(ve->context.inflight); 3546 3547 /* Preempt-to-busy may leave a stale request behind. */ 3548 if (unlikely(ve->request)) { 3549 struct i915_request *old; 3550 3551 spin_lock_irq(&ve->base.sched_engine->lock); 3552 3553 old = fetch_and_zero(&ve->request); 3554 if (old) { 3555 GEM_BUG_ON(!__i915_request_is_complete(old)); 3556 __i915_request_submit(old); 3557 i915_request_put(old); 3558 } 3559 3560 spin_unlock_irq(&ve->base.sched_engine->lock); 3561 } 3562 3563 /* 3564 * Flush the tasklet in case it is still running on another core. 3565 * 3566 * This needs to be done before we remove ourselves from the siblings' 3567 * rbtrees as in the case it is running in parallel, it may reinsert 3568 * the rb_node into a sibling. 3569 */ 3570 tasklet_kill(&ve->base.sched_engine->tasklet); 3571 3572 /* Decouple ourselves from the siblings, no more access allowed. */ 3573 for (n = 0; n < ve->num_siblings; n++) { 3574 struct intel_engine_cs *sibling = ve->siblings[n]; 3575 struct rb_node *node = &ve->nodes[sibling->id].rb; 3576 3577 if (RB_EMPTY_NODE(node)) 3578 continue; 3579 3580 spin_lock_irq(&sibling->sched_engine->lock); 3581 3582 /* Detachment is lazily performed in the sched_engine->tasklet */ 3583 if (!RB_EMPTY_NODE(node)) 3584 rb_erase_cached(node, &sibling->execlists.virtual); 3585 3586 spin_unlock_irq(&sibling->sched_engine->lock); 3587 } 3588 GEM_BUG_ON(__tasklet_is_scheduled(&ve->base.sched_engine->tasklet)); 3589 GEM_BUG_ON(!list_empty(virtual_queue(ve))); 3590 3591 lrc_fini(&ve->context); 3592 intel_context_fini(&ve->context); 3593 3594 if (ve->base.breadcrumbs) 3595 intel_breadcrumbs_put(ve->base.breadcrumbs); 3596 if (ve->base.sched_engine) 3597 i915_sched_engine_put(ve->base.sched_engine); 3598 intel_engine_free_request_pool(&ve->base); 3599 3600 kfree(ve); 3601 } 3602 3603 static void virtual_context_destroy(struct kref *kref) 3604 { 3605 struct virtual_engine *ve = 3606 container_of(kref, typeof(*ve), context.ref); 3607 3608 GEM_BUG_ON(!list_empty(&ve->context.signals)); 3609 3610 /* 3611 * When destroying the virtual engine, we have to be aware that 3612 * it may still be in use from an hardirq/softirq context causing 3613 * the resubmission of a completed request (background completion 3614 * due to preempt-to-busy). Before we can free the engine, we need 3615 * to flush the submission code and tasklets that are still potentially 3616 * accessing the engine. Flushing the tasklets requires process context, 3617 * and since we can guard the resubmit onto the engine with an RCU read 3618 * lock, we can delegate the free of the engine to an RCU worker. 3619 */ 3620 INIT_RCU_WORK(&ve->rcu, rcu_virtual_context_destroy); 3621 queue_rcu_work(system_wq, &ve->rcu); 3622 } 3623 3624 static void virtual_engine_initial_hint(struct virtual_engine *ve) 3625 { 3626 int swp; 3627 3628 /* 3629 * Pick a random sibling on starting to help spread the load around. 3630 * 3631 * New contexts are typically created with exactly the same order 3632 * of siblings, and often started in batches. Due to the way we iterate 3633 * the array of sibling when submitting requests, sibling[0] is 3634 * prioritised for dequeuing. If we make sure that sibling[0] is fairly 3635 * randomised across the system, we also help spread the load by the 3636 * first engine we inspect being different each time. 3637 * 3638 * NB This does not force us to execute on this engine, it will just 3639 * typically be the first we inspect for submission. 3640 */ 3641 swp = prandom_u32_max(ve->num_siblings); 3642 if (swp) 3643 swap(ve->siblings[swp], ve->siblings[0]); 3644 } 3645 3646 static int virtual_context_alloc(struct intel_context *ce) 3647 { 3648 struct virtual_engine *ve = container_of(ce, typeof(*ve), context); 3649 3650 return lrc_alloc(ce, ve->siblings[0]); 3651 } 3652 3653 static int virtual_context_pre_pin(struct intel_context *ce, 3654 struct i915_gem_ww_ctx *ww, 3655 void **vaddr) 3656 { 3657 struct virtual_engine *ve = container_of(ce, typeof(*ve), context); 3658 3659 /* Note: we must use a real engine class for setting up reg state */ 3660 return __execlists_context_pre_pin(ce, ve->siblings[0], ww, vaddr); 3661 } 3662 3663 static int virtual_context_pin(struct intel_context *ce, void *vaddr) 3664 { 3665 struct virtual_engine *ve = container_of(ce, typeof(*ve), context); 3666 3667 return lrc_pin(ce, ve->siblings[0], vaddr); 3668 } 3669 3670 static void virtual_context_enter(struct intel_context *ce) 3671 { 3672 struct virtual_engine *ve = container_of(ce, typeof(*ve), context); 3673 unsigned int n; 3674 3675 for (n = 0; n < ve->num_siblings; n++) 3676 intel_engine_pm_get(ve->siblings[n]); 3677 3678 intel_timeline_enter(ce->timeline); 3679 } 3680 3681 static void virtual_context_exit(struct intel_context *ce) 3682 { 3683 struct virtual_engine *ve = container_of(ce, typeof(*ve), context); 3684 unsigned int n; 3685 3686 intel_timeline_exit(ce->timeline); 3687 3688 for (n = 0; n < ve->num_siblings; n++) 3689 intel_engine_pm_put(ve->siblings[n]); 3690 } 3691 3692 static struct intel_engine_cs * 3693 virtual_get_sibling(struct intel_engine_cs *engine, unsigned int sibling) 3694 { 3695 struct virtual_engine *ve = to_virtual_engine(engine); 3696 3697 if (sibling >= ve->num_siblings) 3698 return NULL; 3699 3700 return ve->siblings[sibling]; 3701 } 3702 3703 static const struct intel_context_ops virtual_context_ops = { 3704 .flags = COPS_HAS_INFLIGHT, 3705 3706 .alloc = virtual_context_alloc, 3707 3708 .cancel_request = execlists_context_cancel_request, 3709 3710 .pre_pin = virtual_context_pre_pin, 3711 .pin = virtual_context_pin, 3712 .unpin = lrc_unpin, 3713 .post_unpin = lrc_post_unpin, 3714 3715 .enter = virtual_context_enter, 3716 .exit = virtual_context_exit, 3717 3718 .destroy = virtual_context_destroy, 3719 3720 .get_sibling = virtual_get_sibling, 3721 }; 3722 3723 static intel_engine_mask_t virtual_submission_mask(struct virtual_engine *ve) 3724 { 3725 struct i915_request *rq; 3726 intel_engine_mask_t mask; 3727 3728 rq = READ_ONCE(ve->request); 3729 if (!rq) 3730 return 0; 3731 3732 /* The rq is ready for submission; rq->execution_mask is now stable. */ 3733 mask = rq->execution_mask; 3734 if (unlikely(!mask)) { 3735 /* Invalid selection, submit to a random engine in error */ 3736 i915_request_set_error_once(rq, -ENODEV); 3737 mask = ve->siblings[0]->mask; 3738 } 3739 3740 ENGINE_TRACE(&ve->base, "rq=%llx:%lld, mask=%x, prio=%d\n", 3741 rq->fence.context, rq->fence.seqno, 3742 mask, ve->base.sched_engine->queue_priority_hint); 3743 3744 return mask; 3745 } 3746 3747 static void virtual_submission_tasklet(struct tasklet_struct *t) 3748 { 3749 struct i915_sched_engine *sched_engine = 3750 from_tasklet(sched_engine, t, tasklet); 3751 struct virtual_engine * const ve = 3752 (struct virtual_engine *)sched_engine->private_data; 3753 const int prio = READ_ONCE(sched_engine->queue_priority_hint); 3754 intel_engine_mask_t mask; 3755 unsigned int n; 3756 3757 rcu_read_lock(); 3758 mask = virtual_submission_mask(ve); 3759 rcu_read_unlock(); 3760 if (unlikely(!mask)) 3761 return; 3762 3763 for (n = 0; n < ve->num_siblings; n++) { 3764 struct intel_engine_cs *sibling = READ_ONCE(ve->siblings[n]); 3765 struct ve_node * const node = &ve->nodes[sibling->id]; 3766 struct rb_node **parent, *rb; 3767 bool first; 3768 3769 if (!READ_ONCE(ve->request)) 3770 break; /* already handled by a sibling's tasklet */ 3771 3772 spin_lock_irq(&sibling->sched_engine->lock); 3773 3774 if (unlikely(!(mask & sibling->mask))) { 3775 if (!RB_EMPTY_NODE(&node->rb)) { 3776 rb_erase_cached(&node->rb, 3777 &sibling->execlists.virtual); 3778 RB_CLEAR_NODE(&node->rb); 3779 } 3780 3781 goto unlock_engine; 3782 } 3783 3784 if (unlikely(!RB_EMPTY_NODE(&node->rb))) { 3785 /* 3786 * Cheat and avoid rebalancing the tree if we can 3787 * reuse this node in situ. 3788 */ 3789 first = rb_first_cached(&sibling->execlists.virtual) == 3790 &node->rb; 3791 if (prio == node->prio || (prio > node->prio && first)) 3792 goto submit_engine; 3793 3794 rb_erase_cached(&node->rb, &sibling->execlists.virtual); 3795 } 3796 3797 rb = NULL; 3798 first = true; 3799 parent = &sibling->execlists.virtual.rb_root.rb_node; 3800 while (*parent) { 3801 struct ve_node *other; 3802 3803 rb = *parent; 3804 other = rb_entry(rb, typeof(*other), rb); 3805 if (prio > other->prio) { 3806 parent = &rb->rb_left; 3807 } else { 3808 parent = &rb->rb_right; 3809 first = false; 3810 } 3811 } 3812 3813 rb_link_node(&node->rb, rb, parent); 3814 rb_insert_color_cached(&node->rb, 3815 &sibling->execlists.virtual, 3816 first); 3817 3818 submit_engine: 3819 GEM_BUG_ON(RB_EMPTY_NODE(&node->rb)); 3820 node->prio = prio; 3821 if (first && prio > sibling->sched_engine->queue_priority_hint) 3822 tasklet_hi_schedule(&sibling->sched_engine->tasklet); 3823 3824 unlock_engine: 3825 spin_unlock_irq(&sibling->sched_engine->lock); 3826 3827 if (intel_context_inflight(&ve->context)) 3828 break; 3829 } 3830 } 3831 3832 static void virtual_submit_request(struct i915_request *rq) 3833 { 3834 struct virtual_engine *ve = to_virtual_engine(rq->engine); 3835 unsigned long flags; 3836 3837 ENGINE_TRACE(&ve->base, "rq=%llx:%lld\n", 3838 rq->fence.context, 3839 rq->fence.seqno); 3840 3841 GEM_BUG_ON(ve->base.submit_request != virtual_submit_request); 3842 3843 spin_lock_irqsave(&ve->base.sched_engine->lock, flags); 3844 3845 /* By the time we resubmit a request, it may be completed */ 3846 if (__i915_request_is_complete(rq)) { 3847 __i915_request_submit(rq); 3848 goto unlock; 3849 } 3850 3851 if (ve->request) { /* background completion from preempt-to-busy */ 3852 GEM_BUG_ON(!__i915_request_is_complete(ve->request)); 3853 __i915_request_submit(ve->request); 3854 i915_request_put(ve->request); 3855 } 3856 3857 ve->base.sched_engine->queue_priority_hint = rq_prio(rq); 3858 ve->request = i915_request_get(rq); 3859 3860 GEM_BUG_ON(!list_empty(virtual_queue(ve))); 3861 list_move_tail(&rq->sched.link, virtual_queue(ve)); 3862 3863 tasklet_hi_schedule(&ve->base.sched_engine->tasklet); 3864 3865 unlock: 3866 spin_unlock_irqrestore(&ve->base.sched_engine->lock, flags); 3867 } 3868 3869 static struct intel_context * 3870 execlists_create_virtual(struct intel_engine_cs **siblings, unsigned int count, 3871 unsigned long flags) 3872 { 3873 struct virtual_engine *ve; 3874 unsigned int n; 3875 int err; 3876 3877 ve = kzalloc(struct_size(ve, siblings, count), GFP_KERNEL); 3878 if (!ve) 3879 return ERR_PTR(-ENOMEM); 3880 3881 ve->base.i915 = siblings[0]->i915; 3882 ve->base.gt = siblings[0]->gt; 3883 ve->base.uncore = siblings[0]->uncore; 3884 ve->base.id = -1; 3885 3886 ve->base.class = OTHER_CLASS; 3887 ve->base.uabi_class = I915_ENGINE_CLASS_INVALID; 3888 ve->base.instance = I915_ENGINE_CLASS_INVALID_VIRTUAL; 3889 ve->base.uabi_instance = I915_ENGINE_CLASS_INVALID_VIRTUAL; 3890 3891 /* 3892 * The decision on whether to submit a request using semaphores 3893 * depends on the saturated state of the engine. We only compute 3894 * this during HW submission of the request, and we need for this 3895 * state to be globally applied to all requests being submitted 3896 * to this engine. Virtual engines encompass more than one physical 3897 * engine and so we cannot accurately tell in advance if one of those 3898 * engines is already saturated and so cannot afford to use a semaphore 3899 * and be pessimized in priority for doing so -- if we are the only 3900 * context using semaphores after all other clients have stopped, we 3901 * will be starved on the saturated system. Such a global switch for 3902 * semaphores is less than ideal, but alas is the current compromise. 3903 */ 3904 ve->base.saturated = ALL_ENGINES; 3905 3906 snprintf(ve->base.name, sizeof(ve->base.name), "virtual"); 3907 3908 intel_engine_init_execlists(&ve->base); 3909 3910 ve->base.sched_engine = i915_sched_engine_create(ENGINE_VIRTUAL); 3911 if (!ve->base.sched_engine) { 3912 err = -ENOMEM; 3913 goto err_put; 3914 } 3915 ve->base.sched_engine->private_data = &ve->base; 3916 3917 ve->base.cops = &virtual_context_ops; 3918 ve->base.request_alloc = execlists_request_alloc; 3919 3920 ve->base.sched_engine->schedule = i915_schedule; 3921 ve->base.sched_engine->kick_backend = kick_execlists; 3922 ve->base.submit_request = virtual_submit_request; 3923 3924 INIT_LIST_HEAD(virtual_queue(ve)); 3925 tasklet_setup(&ve->base.sched_engine->tasklet, virtual_submission_tasklet); 3926 3927 intel_context_init(&ve->context, &ve->base); 3928 3929 ve->base.breadcrumbs = intel_breadcrumbs_create(NULL); 3930 if (!ve->base.breadcrumbs) { 3931 err = -ENOMEM; 3932 goto err_put; 3933 } 3934 3935 for (n = 0; n < count; n++) { 3936 struct intel_engine_cs *sibling = siblings[n]; 3937 3938 GEM_BUG_ON(!is_power_of_2(sibling->mask)); 3939 if (sibling->mask & ve->base.mask) { 3940 DRM_DEBUG("duplicate %s entry in load balancer\n", 3941 sibling->name); 3942 err = -EINVAL; 3943 goto err_put; 3944 } 3945 3946 /* 3947 * The virtual engine implementation is tightly coupled to 3948 * the execlists backend -- we push out request directly 3949 * into a tree inside each physical engine. We could support 3950 * layering if we handle cloning of the requests and 3951 * submitting a copy into each backend. 3952 */ 3953 if (sibling->sched_engine->tasklet.callback != 3954 execlists_submission_tasklet) { 3955 err = -ENODEV; 3956 goto err_put; 3957 } 3958 3959 GEM_BUG_ON(RB_EMPTY_NODE(&ve->nodes[sibling->id].rb)); 3960 RB_CLEAR_NODE(&ve->nodes[sibling->id].rb); 3961 3962 ve->siblings[ve->num_siblings++] = sibling; 3963 ve->base.mask |= sibling->mask; 3964 ve->base.logical_mask |= sibling->logical_mask; 3965 3966 /* 3967 * All physical engines must be compatible for their emission 3968 * functions (as we build the instructions during request 3969 * construction and do not alter them before submission 3970 * on the physical engine). We use the engine class as a guide 3971 * here, although that could be refined. 3972 */ 3973 if (ve->base.class != OTHER_CLASS) { 3974 if (ve->base.class != sibling->class) { 3975 DRM_DEBUG("invalid mixing of engine class, sibling %d, already %d\n", 3976 sibling->class, ve->base.class); 3977 err = -EINVAL; 3978 goto err_put; 3979 } 3980 continue; 3981 } 3982 3983 ve->base.class = sibling->class; 3984 ve->base.uabi_class = sibling->uabi_class; 3985 snprintf(ve->base.name, sizeof(ve->base.name), 3986 "v%dx%d", ve->base.class, count); 3987 ve->base.context_size = sibling->context_size; 3988 3989 ve->base.add_active_request = sibling->add_active_request; 3990 ve->base.remove_active_request = sibling->remove_active_request; 3991 ve->base.emit_bb_start = sibling->emit_bb_start; 3992 ve->base.emit_flush = sibling->emit_flush; 3993 ve->base.emit_init_breadcrumb = sibling->emit_init_breadcrumb; 3994 ve->base.emit_fini_breadcrumb = sibling->emit_fini_breadcrumb; 3995 ve->base.emit_fini_breadcrumb_dw = 3996 sibling->emit_fini_breadcrumb_dw; 3997 3998 ve->base.flags = sibling->flags; 3999 } 4000 4001 ve->base.flags |= I915_ENGINE_IS_VIRTUAL; 4002 4003 virtual_engine_initial_hint(ve); 4004 return &ve->context; 4005 4006 err_put: 4007 intel_context_put(&ve->context); 4008 return ERR_PTR(err); 4009 } 4010 4011 void intel_execlists_show_requests(struct intel_engine_cs *engine, 4012 struct drm_printer *m, 4013 void (*show_request)(struct drm_printer *m, 4014 const struct i915_request *rq, 4015 const char *prefix, 4016 int indent), 4017 unsigned int max) 4018 { 4019 const struct intel_engine_execlists *execlists = &engine->execlists; 4020 struct i915_sched_engine *sched_engine = engine->sched_engine; 4021 struct i915_request *rq, *last; 4022 unsigned long flags; 4023 unsigned int count; 4024 struct rb_node *rb; 4025 4026 spin_lock_irqsave(&sched_engine->lock, flags); 4027 4028 last = NULL; 4029 count = 0; 4030 list_for_each_entry(rq, &sched_engine->requests, sched.link) { 4031 if (count++ < max - 1) 4032 show_request(m, rq, "\t\t", 0); 4033 else 4034 last = rq; 4035 } 4036 if (last) { 4037 if (count > max) { 4038 drm_printf(m, 4039 "\t\t...skipping %d executing requests...\n", 4040 count - max); 4041 } 4042 show_request(m, last, "\t\t", 0); 4043 } 4044 4045 if (sched_engine->queue_priority_hint != INT_MIN) 4046 drm_printf(m, "\t\tQueue priority hint: %d\n", 4047 READ_ONCE(sched_engine->queue_priority_hint)); 4048 4049 last = NULL; 4050 count = 0; 4051 for (rb = rb_first_cached(&sched_engine->queue); rb; rb = rb_next(rb)) { 4052 struct i915_priolist *p = rb_entry(rb, typeof(*p), node); 4053 4054 priolist_for_each_request(rq, p) { 4055 if (count++ < max - 1) 4056 show_request(m, rq, "\t\t", 0); 4057 else 4058 last = rq; 4059 } 4060 } 4061 if (last) { 4062 if (count > max) { 4063 drm_printf(m, 4064 "\t\t...skipping %d queued requests...\n", 4065 count - max); 4066 } 4067 show_request(m, last, "\t\t", 0); 4068 } 4069 4070 last = NULL; 4071 count = 0; 4072 for (rb = rb_first_cached(&execlists->virtual); rb; rb = rb_next(rb)) { 4073 struct virtual_engine *ve = 4074 rb_entry(rb, typeof(*ve), nodes[engine->id].rb); 4075 struct i915_request *rq = READ_ONCE(ve->request); 4076 4077 if (rq) { 4078 if (count++ < max - 1) 4079 show_request(m, rq, "\t\t", 0); 4080 else 4081 last = rq; 4082 } 4083 } 4084 if (last) { 4085 if (count > max) { 4086 drm_printf(m, 4087 "\t\t...skipping %d virtual requests...\n", 4088 count - max); 4089 } 4090 show_request(m, last, "\t\t", 0); 4091 } 4092 4093 spin_unlock_irqrestore(&sched_engine->lock, flags); 4094 } 4095 4096 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) 4097 #include "selftest_execlists.c" 4098 #endif 4099