1 // SPDX-License-Identifier: MIT 2 /* 3 * Copyright © 2014 Intel Corporation 4 */ 5 6 #include <linux/circ_buf.h> 7 8 #include "gem/i915_gem_context.h" 9 #include "gt/gen8_engine_cs.h" 10 #include "gt/intel_breadcrumbs.h" 11 #include "gt/intel_context.h" 12 #include "gt/intel_engine_heartbeat.h" 13 #include "gt/intel_engine_pm.h" 14 #include "gt/intel_engine_regs.h" 15 #include "gt/intel_gpu_commands.h" 16 #include "gt/intel_gt.h" 17 #include "gt/intel_gt_clock_utils.h" 18 #include "gt/intel_gt_irq.h" 19 #include "gt/intel_gt_pm.h" 20 #include "gt/intel_gt_regs.h" 21 #include "gt/intel_gt_requests.h" 22 #include "gt/intel_lrc.h" 23 #include "gt/intel_lrc_reg.h" 24 #include "gt/intel_mocs.h" 25 #include "gt/intel_ring.h" 26 27 #include "intel_guc_ads.h" 28 #include "intel_guc_capture.h" 29 #include "intel_guc_submission.h" 30 31 #include "i915_drv.h" 32 #include "i915_trace.h" 33 34 /** 35 * DOC: GuC-based command submission 36 * 37 * The Scratch registers: 38 * There are 16 MMIO-based registers start from 0xC180. The kernel driver writes 39 * a value to the action register (SOFT_SCRATCH_0) along with any data. It then 40 * triggers an interrupt on the GuC via another register write (0xC4C8). 41 * Firmware writes a success/fail code back to the action register after 42 * processes the request. The kernel driver polls waiting for this update and 43 * then proceeds. 44 * 45 * Command Transport buffers (CTBs): 46 * Covered in detail in other sections but CTBs (Host to GuC - H2G, GuC to Host 47 * - G2H) are a message interface between the i915 and GuC. 48 * 49 * Context registration: 50 * Before a context can be submitted it must be registered with the GuC via a 51 * H2G. A unique guc_id is associated with each context. The context is either 52 * registered at request creation time (normal operation) or at submission time 53 * (abnormal operation, e.g. after a reset). 54 * 55 * Context submission: 56 * The i915 updates the LRC tail value in memory. The i915 must enable the 57 * scheduling of the context within the GuC for the GuC to actually consider it. 58 * Therefore, the first time a disabled context is submitted we use a schedule 59 * enable H2G, while follow up submissions are done via the context submit H2G, 60 * which informs the GuC that a previously enabled context has new work 61 * available. 62 * 63 * Context unpin: 64 * To unpin a context a H2G is used to disable scheduling. When the 65 * corresponding G2H returns indicating the scheduling disable operation has 66 * completed it is safe to unpin the context. While a disable is in flight it 67 * isn't safe to resubmit the context so a fence is used to stall all future 68 * requests of that context until the G2H is returned. 69 * 70 * Context deregistration: 71 * Before a context can be destroyed or if we steal its guc_id we must 72 * deregister the context with the GuC via H2G. If stealing the guc_id it isn't 73 * safe to submit anything to this guc_id until the deregister completes so a 74 * fence is used to stall all requests associated with this guc_id until the 75 * corresponding G2H returns indicating the guc_id has been deregistered. 76 * 77 * submission_state.guc_ids: 78 * Unique number associated with private GuC context data passed in during 79 * context registration / submission / deregistration. 64k available. Simple ida 80 * is used for allocation. 81 * 82 * Stealing guc_ids: 83 * If no guc_ids are available they can be stolen from another context at 84 * request creation time if that context is unpinned. If a guc_id can't be found 85 * we punt this problem to the user as we believe this is near impossible to hit 86 * during normal use cases. 87 * 88 * Locking: 89 * In the GuC submission code we have 3 basic spin locks which protect 90 * everything. Details about each below. 91 * 92 * sched_engine->lock 93 * This is the submission lock for all contexts that share an i915 schedule 94 * engine (sched_engine), thus only one of the contexts which share a 95 * sched_engine can be submitting at a time. Currently only one sched_engine is 96 * used for all of GuC submission but that could change in the future. 97 * 98 * guc->submission_state.lock 99 * Global lock for GuC submission state. Protects guc_ids and destroyed contexts 100 * list. 101 * 102 * ce->guc_state.lock 103 * Protects everything under ce->guc_state. Ensures that a context is in the 104 * correct state before issuing a H2G. e.g. We don't issue a schedule disable 105 * on a disabled context (bad idea), we don't issue a schedule enable when a 106 * schedule disable is in flight, etc... Also protects list of inflight requests 107 * on the context and the priority management state. Lock is individual to each 108 * context. 109 * 110 * Lock ordering rules: 111 * sched_engine->lock -> ce->guc_state.lock 112 * guc->submission_state.lock -> ce->guc_state.lock 113 * 114 * Reset races: 115 * When a full GT reset is triggered it is assumed that some G2H responses to 116 * H2Gs can be lost as the GuC is also reset. Losing these G2H can prove to be 117 * fatal as we do certain operations upon receiving a G2H (e.g. destroy 118 * contexts, release guc_ids, etc...). When this occurs we can scrub the 119 * context state and cleanup appropriately, however this is quite racey. 120 * To avoid races, the reset code must disable submission before scrubbing for 121 * the missing G2H, while the submission code must check for submission being 122 * disabled and skip sending H2Gs and updating context states when it is. Both 123 * sides must also make sure to hold the relevant locks. 124 */ 125 126 /* GuC Virtual Engine */ 127 struct guc_virtual_engine { 128 struct intel_engine_cs base; 129 struct intel_context context; 130 }; 131 132 static struct intel_context * 133 guc_create_virtual(struct intel_engine_cs **siblings, unsigned int count, 134 unsigned long flags); 135 136 static struct intel_context * 137 guc_create_parallel(struct intel_engine_cs **engines, 138 unsigned int num_siblings, 139 unsigned int width); 140 141 #define GUC_REQUEST_SIZE 64 /* bytes */ 142 143 /* 144 * We reserve 1/16 of the guc_ids for multi-lrc as these need to be contiguous 145 * per the GuC submission interface. A different allocation algorithm is used 146 * (bitmap vs. ida) between multi-lrc and single-lrc hence the reason to 147 * partition the guc_id space. We believe the number of multi-lrc contexts in 148 * use should be low and 1/16 should be sufficient. Minimum of 32 guc_ids for 149 * multi-lrc. 150 */ 151 #define NUMBER_MULTI_LRC_GUC_ID(guc) \ 152 ((guc)->submission_state.num_guc_ids / 16) 153 154 /* 155 * Below is a set of functions which control the GuC scheduling state which 156 * require a lock. 157 */ 158 #define SCHED_STATE_WAIT_FOR_DEREGISTER_TO_REGISTER BIT(0) 159 #define SCHED_STATE_DESTROYED BIT(1) 160 #define SCHED_STATE_PENDING_DISABLE BIT(2) 161 #define SCHED_STATE_BANNED BIT(3) 162 #define SCHED_STATE_ENABLED BIT(4) 163 #define SCHED_STATE_PENDING_ENABLE BIT(5) 164 #define SCHED_STATE_REGISTERED BIT(6) 165 #define SCHED_STATE_POLICY_REQUIRED BIT(7) 166 #define SCHED_STATE_BLOCKED_SHIFT 8 167 #define SCHED_STATE_BLOCKED BIT(SCHED_STATE_BLOCKED_SHIFT) 168 #define SCHED_STATE_BLOCKED_MASK (0xfff << SCHED_STATE_BLOCKED_SHIFT) 169 170 static inline void init_sched_state(struct intel_context *ce) 171 { 172 lockdep_assert_held(&ce->guc_state.lock); 173 ce->guc_state.sched_state &= SCHED_STATE_BLOCKED_MASK; 174 } 175 176 __maybe_unused 177 static bool sched_state_is_init(struct intel_context *ce) 178 { 179 /* Kernel contexts can have SCHED_STATE_REGISTERED after suspend. */ 180 return !(ce->guc_state.sched_state & 181 ~(SCHED_STATE_BLOCKED_MASK | SCHED_STATE_REGISTERED)); 182 } 183 184 static inline bool 185 context_wait_for_deregister_to_register(struct intel_context *ce) 186 { 187 return ce->guc_state.sched_state & 188 SCHED_STATE_WAIT_FOR_DEREGISTER_TO_REGISTER; 189 } 190 191 static inline void 192 set_context_wait_for_deregister_to_register(struct intel_context *ce) 193 { 194 lockdep_assert_held(&ce->guc_state.lock); 195 ce->guc_state.sched_state |= 196 SCHED_STATE_WAIT_FOR_DEREGISTER_TO_REGISTER; 197 } 198 199 static inline void 200 clr_context_wait_for_deregister_to_register(struct intel_context *ce) 201 { 202 lockdep_assert_held(&ce->guc_state.lock); 203 ce->guc_state.sched_state &= 204 ~SCHED_STATE_WAIT_FOR_DEREGISTER_TO_REGISTER; 205 } 206 207 static inline bool 208 context_destroyed(struct intel_context *ce) 209 { 210 return ce->guc_state.sched_state & SCHED_STATE_DESTROYED; 211 } 212 213 static inline void 214 set_context_destroyed(struct intel_context *ce) 215 { 216 lockdep_assert_held(&ce->guc_state.lock); 217 ce->guc_state.sched_state |= SCHED_STATE_DESTROYED; 218 } 219 220 static inline bool context_pending_disable(struct intel_context *ce) 221 { 222 return ce->guc_state.sched_state & SCHED_STATE_PENDING_DISABLE; 223 } 224 225 static inline void set_context_pending_disable(struct intel_context *ce) 226 { 227 lockdep_assert_held(&ce->guc_state.lock); 228 ce->guc_state.sched_state |= SCHED_STATE_PENDING_DISABLE; 229 } 230 231 static inline void clr_context_pending_disable(struct intel_context *ce) 232 { 233 lockdep_assert_held(&ce->guc_state.lock); 234 ce->guc_state.sched_state &= ~SCHED_STATE_PENDING_DISABLE; 235 } 236 237 static inline bool context_banned(struct intel_context *ce) 238 { 239 return ce->guc_state.sched_state & SCHED_STATE_BANNED; 240 } 241 242 static inline void set_context_banned(struct intel_context *ce) 243 { 244 lockdep_assert_held(&ce->guc_state.lock); 245 ce->guc_state.sched_state |= SCHED_STATE_BANNED; 246 } 247 248 static inline void clr_context_banned(struct intel_context *ce) 249 { 250 lockdep_assert_held(&ce->guc_state.lock); 251 ce->guc_state.sched_state &= ~SCHED_STATE_BANNED; 252 } 253 254 static inline bool context_enabled(struct intel_context *ce) 255 { 256 return ce->guc_state.sched_state & SCHED_STATE_ENABLED; 257 } 258 259 static inline void set_context_enabled(struct intel_context *ce) 260 { 261 lockdep_assert_held(&ce->guc_state.lock); 262 ce->guc_state.sched_state |= SCHED_STATE_ENABLED; 263 } 264 265 static inline void clr_context_enabled(struct intel_context *ce) 266 { 267 lockdep_assert_held(&ce->guc_state.lock); 268 ce->guc_state.sched_state &= ~SCHED_STATE_ENABLED; 269 } 270 271 static inline bool context_pending_enable(struct intel_context *ce) 272 { 273 return ce->guc_state.sched_state & SCHED_STATE_PENDING_ENABLE; 274 } 275 276 static inline void set_context_pending_enable(struct intel_context *ce) 277 { 278 lockdep_assert_held(&ce->guc_state.lock); 279 ce->guc_state.sched_state |= SCHED_STATE_PENDING_ENABLE; 280 } 281 282 static inline void clr_context_pending_enable(struct intel_context *ce) 283 { 284 lockdep_assert_held(&ce->guc_state.lock); 285 ce->guc_state.sched_state &= ~SCHED_STATE_PENDING_ENABLE; 286 } 287 288 static inline bool context_registered(struct intel_context *ce) 289 { 290 return ce->guc_state.sched_state & SCHED_STATE_REGISTERED; 291 } 292 293 static inline void set_context_registered(struct intel_context *ce) 294 { 295 lockdep_assert_held(&ce->guc_state.lock); 296 ce->guc_state.sched_state |= SCHED_STATE_REGISTERED; 297 } 298 299 static inline void clr_context_registered(struct intel_context *ce) 300 { 301 lockdep_assert_held(&ce->guc_state.lock); 302 ce->guc_state.sched_state &= ~SCHED_STATE_REGISTERED; 303 } 304 305 static inline bool context_policy_required(struct intel_context *ce) 306 { 307 return ce->guc_state.sched_state & SCHED_STATE_POLICY_REQUIRED; 308 } 309 310 static inline void set_context_policy_required(struct intel_context *ce) 311 { 312 lockdep_assert_held(&ce->guc_state.lock); 313 ce->guc_state.sched_state |= SCHED_STATE_POLICY_REQUIRED; 314 } 315 316 static inline void clr_context_policy_required(struct intel_context *ce) 317 { 318 lockdep_assert_held(&ce->guc_state.lock); 319 ce->guc_state.sched_state &= ~SCHED_STATE_POLICY_REQUIRED; 320 } 321 322 static inline u32 context_blocked(struct intel_context *ce) 323 { 324 return (ce->guc_state.sched_state & SCHED_STATE_BLOCKED_MASK) >> 325 SCHED_STATE_BLOCKED_SHIFT; 326 } 327 328 static inline void incr_context_blocked(struct intel_context *ce) 329 { 330 lockdep_assert_held(&ce->guc_state.lock); 331 332 ce->guc_state.sched_state += SCHED_STATE_BLOCKED; 333 334 GEM_BUG_ON(!context_blocked(ce)); /* Overflow check */ 335 } 336 337 static inline void decr_context_blocked(struct intel_context *ce) 338 { 339 lockdep_assert_held(&ce->guc_state.lock); 340 341 GEM_BUG_ON(!context_blocked(ce)); /* Underflow check */ 342 343 ce->guc_state.sched_state -= SCHED_STATE_BLOCKED; 344 } 345 346 static inline bool context_has_committed_requests(struct intel_context *ce) 347 { 348 return !!ce->guc_state.number_committed_requests; 349 } 350 351 static inline void incr_context_committed_requests(struct intel_context *ce) 352 { 353 lockdep_assert_held(&ce->guc_state.lock); 354 ++ce->guc_state.number_committed_requests; 355 GEM_BUG_ON(ce->guc_state.number_committed_requests < 0); 356 } 357 358 static inline void decr_context_committed_requests(struct intel_context *ce) 359 { 360 lockdep_assert_held(&ce->guc_state.lock); 361 --ce->guc_state.number_committed_requests; 362 GEM_BUG_ON(ce->guc_state.number_committed_requests < 0); 363 } 364 365 static struct intel_context * 366 request_to_scheduling_context(struct i915_request *rq) 367 { 368 return intel_context_to_parent(rq->context); 369 } 370 371 static inline bool context_guc_id_invalid(struct intel_context *ce) 372 { 373 return ce->guc_id.id == GUC_INVALID_CONTEXT_ID; 374 } 375 376 static inline void set_context_guc_id_invalid(struct intel_context *ce) 377 { 378 ce->guc_id.id = GUC_INVALID_CONTEXT_ID; 379 } 380 381 static inline struct intel_guc *ce_to_guc(struct intel_context *ce) 382 { 383 return &ce->engine->gt->uc.guc; 384 } 385 386 static inline struct i915_priolist *to_priolist(struct rb_node *rb) 387 { 388 return rb_entry(rb, struct i915_priolist, node); 389 } 390 391 /* 392 * When using multi-lrc submission a scratch memory area is reserved in the 393 * parent's context state for the process descriptor, work queue, and handshake 394 * between the parent + children contexts to insert safe preemption points 395 * between each of the BBs. Currently the scratch area is sized to a page. 396 * 397 * The layout of this scratch area is below: 398 * 0 guc_process_desc 399 * + sizeof(struct guc_process_desc) child go 400 * + CACHELINE_BYTES child join[0] 401 * ... 402 * + CACHELINE_BYTES child join[n - 1] 403 * ... unused 404 * PARENT_SCRATCH_SIZE / 2 work queue start 405 * ... work queue 406 * PARENT_SCRATCH_SIZE - 1 work queue end 407 */ 408 #define WQ_SIZE (PARENT_SCRATCH_SIZE / 2) 409 #define WQ_OFFSET (PARENT_SCRATCH_SIZE - WQ_SIZE) 410 411 struct sync_semaphore { 412 u32 semaphore; 413 u8 unused[CACHELINE_BYTES - sizeof(u32)]; 414 }; 415 416 struct parent_scratch { 417 union guc_descs { 418 struct guc_sched_wq_desc wq_desc; 419 struct guc_process_desc_v69 pdesc; 420 } descs; 421 422 struct sync_semaphore go; 423 struct sync_semaphore join[MAX_ENGINE_INSTANCE + 1]; 424 425 u8 unused[WQ_OFFSET - sizeof(union guc_descs) - 426 sizeof(struct sync_semaphore) * (MAX_ENGINE_INSTANCE + 2)]; 427 428 u32 wq[WQ_SIZE / sizeof(u32)]; 429 }; 430 431 static u32 __get_parent_scratch_offset(struct intel_context *ce) 432 { 433 GEM_BUG_ON(!ce->parallel.guc.parent_page); 434 435 return ce->parallel.guc.parent_page * PAGE_SIZE; 436 } 437 438 static u32 __get_wq_offset(struct intel_context *ce) 439 { 440 BUILD_BUG_ON(offsetof(struct parent_scratch, wq) != WQ_OFFSET); 441 442 return __get_parent_scratch_offset(ce) + WQ_OFFSET; 443 } 444 445 static struct parent_scratch * 446 __get_parent_scratch(struct intel_context *ce) 447 { 448 BUILD_BUG_ON(sizeof(struct parent_scratch) != PARENT_SCRATCH_SIZE); 449 BUILD_BUG_ON(sizeof(struct sync_semaphore) != CACHELINE_BYTES); 450 451 /* 452 * Need to subtract LRC_STATE_OFFSET here as the 453 * parallel.guc.parent_page is the offset into ce->state while 454 * ce->lrc_reg_reg is ce->state + LRC_STATE_OFFSET. 455 */ 456 return (struct parent_scratch *) 457 (ce->lrc_reg_state + 458 ((__get_parent_scratch_offset(ce) - 459 LRC_STATE_OFFSET) / sizeof(u32))); 460 } 461 462 static struct guc_process_desc_v69 * 463 __get_process_desc_v69(struct intel_context *ce) 464 { 465 struct parent_scratch *ps = __get_parent_scratch(ce); 466 467 return &ps->descs.pdesc; 468 } 469 470 static struct guc_sched_wq_desc * 471 __get_wq_desc_v70(struct intel_context *ce) 472 { 473 struct parent_scratch *ps = __get_parent_scratch(ce); 474 475 return &ps->descs.wq_desc; 476 } 477 478 static u32 *get_wq_pointer(struct intel_context *ce, u32 wqi_size) 479 { 480 /* 481 * Check for space in work queue. Caching a value of head pointer in 482 * intel_context structure in order reduce the number accesses to shared 483 * GPU memory which may be across a PCIe bus. 484 */ 485 #define AVAILABLE_SPACE \ 486 CIRC_SPACE(ce->parallel.guc.wqi_tail, ce->parallel.guc.wqi_head, WQ_SIZE) 487 if (wqi_size > AVAILABLE_SPACE) { 488 ce->parallel.guc.wqi_head = READ_ONCE(*ce->parallel.guc.wq_head); 489 490 if (wqi_size > AVAILABLE_SPACE) 491 return NULL; 492 } 493 #undef AVAILABLE_SPACE 494 495 return &__get_parent_scratch(ce)->wq[ce->parallel.guc.wqi_tail / sizeof(u32)]; 496 } 497 498 static inline struct intel_context *__get_context(struct intel_guc *guc, u32 id) 499 { 500 struct intel_context *ce = xa_load(&guc->context_lookup, id); 501 502 GEM_BUG_ON(id >= GUC_MAX_CONTEXT_ID); 503 504 return ce; 505 } 506 507 static struct guc_lrc_desc_v69 *__get_lrc_desc_v69(struct intel_guc *guc, u32 index) 508 { 509 struct guc_lrc_desc_v69 *base = guc->lrc_desc_pool_vaddr_v69; 510 511 if (!base) 512 return NULL; 513 514 GEM_BUG_ON(index >= GUC_MAX_CONTEXT_ID); 515 516 return &base[index]; 517 } 518 519 static int guc_lrc_desc_pool_create_v69(struct intel_guc *guc) 520 { 521 u32 size; 522 int ret; 523 524 size = PAGE_ALIGN(sizeof(struct guc_lrc_desc_v69) * 525 GUC_MAX_CONTEXT_ID); 526 ret = intel_guc_allocate_and_map_vma(guc, size, &guc->lrc_desc_pool_v69, 527 (void **)&guc->lrc_desc_pool_vaddr_v69); 528 if (ret) 529 return ret; 530 531 return 0; 532 } 533 534 static void guc_lrc_desc_pool_destroy_v69(struct intel_guc *guc) 535 { 536 if (!guc->lrc_desc_pool_vaddr_v69) 537 return; 538 539 guc->lrc_desc_pool_vaddr_v69 = NULL; 540 i915_vma_unpin_and_release(&guc->lrc_desc_pool_v69, I915_VMA_RELEASE_MAP); 541 } 542 543 static inline bool guc_submission_initialized(struct intel_guc *guc) 544 { 545 return guc->submission_initialized; 546 } 547 548 static inline void _reset_lrc_desc_v69(struct intel_guc *guc, u32 id) 549 { 550 struct guc_lrc_desc_v69 *desc = __get_lrc_desc_v69(guc, id); 551 552 if (desc) 553 memset(desc, 0, sizeof(*desc)); 554 } 555 556 static inline bool ctx_id_mapped(struct intel_guc *guc, u32 id) 557 { 558 return __get_context(guc, id); 559 } 560 561 static inline void set_ctx_id_mapping(struct intel_guc *guc, u32 id, 562 struct intel_context *ce) 563 { 564 unsigned long flags; 565 566 /* 567 * xarray API doesn't have xa_save_irqsave wrapper, so calling the 568 * lower level functions directly. 569 */ 570 xa_lock_irqsave(&guc->context_lookup, flags); 571 __xa_store(&guc->context_lookup, id, ce, GFP_ATOMIC); 572 xa_unlock_irqrestore(&guc->context_lookup, flags); 573 } 574 575 static inline void clr_ctx_id_mapping(struct intel_guc *guc, u32 id) 576 { 577 unsigned long flags; 578 579 if (unlikely(!guc_submission_initialized(guc))) 580 return; 581 582 _reset_lrc_desc_v69(guc, id); 583 584 /* 585 * xarray API doesn't have xa_erase_irqsave wrapper, so calling 586 * the lower level functions directly. 587 */ 588 xa_lock_irqsave(&guc->context_lookup, flags); 589 __xa_erase(&guc->context_lookup, id); 590 xa_unlock_irqrestore(&guc->context_lookup, flags); 591 } 592 593 static void decr_outstanding_submission_g2h(struct intel_guc *guc) 594 { 595 if (atomic_dec_and_test(&guc->outstanding_submission_g2h)) 596 wake_up_all(&guc->ct.wq); 597 } 598 599 static int guc_submission_send_busy_loop(struct intel_guc *guc, 600 const u32 *action, 601 u32 len, 602 u32 g2h_len_dw, 603 bool loop) 604 { 605 /* 606 * We always loop when a send requires a reply (i.e. g2h_len_dw > 0), 607 * so we don't handle the case where we don't get a reply because we 608 * aborted the send due to the channel being busy. 609 */ 610 GEM_BUG_ON(g2h_len_dw && !loop); 611 612 if (g2h_len_dw) 613 atomic_inc(&guc->outstanding_submission_g2h); 614 615 return intel_guc_send_busy_loop(guc, action, len, g2h_len_dw, loop); 616 } 617 618 int intel_guc_wait_for_pending_msg(struct intel_guc *guc, 619 atomic_t *wait_var, 620 bool interruptible, 621 long timeout) 622 { 623 const int state = interruptible ? 624 TASK_INTERRUPTIBLE : TASK_UNINTERRUPTIBLE; 625 DEFINE_WAIT(wait); 626 627 might_sleep(); 628 GEM_BUG_ON(timeout < 0); 629 630 if (!atomic_read(wait_var)) 631 return 0; 632 633 if (!timeout) 634 return -ETIME; 635 636 for (;;) { 637 prepare_to_wait(&guc->ct.wq, &wait, state); 638 639 if (!atomic_read(wait_var)) 640 break; 641 642 if (signal_pending_state(state, current)) { 643 timeout = -EINTR; 644 break; 645 } 646 647 if (!timeout) { 648 timeout = -ETIME; 649 break; 650 } 651 652 timeout = io_schedule_timeout(timeout); 653 } 654 finish_wait(&guc->ct.wq, &wait); 655 656 return (timeout < 0) ? timeout : 0; 657 } 658 659 int intel_guc_wait_for_idle(struct intel_guc *guc, long timeout) 660 { 661 if (!intel_uc_uses_guc_submission(&guc_to_gt(guc)->uc)) 662 return 0; 663 664 return intel_guc_wait_for_pending_msg(guc, 665 &guc->outstanding_submission_g2h, 666 true, timeout); 667 } 668 669 static int guc_context_policy_init_v70(struct intel_context *ce, bool loop); 670 static int try_context_registration(struct intel_context *ce, bool loop); 671 672 static int __guc_add_request(struct intel_guc *guc, struct i915_request *rq) 673 { 674 int err = 0; 675 struct intel_context *ce = request_to_scheduling_context(rq); 676 u32 action[3]; 677 int len = 0; 678 u32 g2h_len_dw = 0; 679 bool enabled; 680 681 lockdep_assert_held(&rq->engine->sched_engine->lock); 682 683 /* 684 * Corner case where requests were sitting in the priority list or a 685 * request resubmitted after the context was banned. 686 */ 687 if (unlikely(!intel_context_is_schedulable(ce))) { 688 i915_request_put(i915_request_mark_eio(rq)); 689 intel_engine_signal_breadcrumbs(ce->engine); 690 return 0; 691 } 692 693 GEM_BUG_ON(!atomic_read(&ce->guc_id.ref)); 694 GEM_BUG_ON(context_guc_id_invalid(ce)); 695 696 if (context_policy_required(ce)) { 697 err = guc_context_policy_init_v70(ce, false); 698 if (err) 699 return err; 700 } 701 702 spin_lock(&ce->guc_state.lock); 703 704 /* 705 * The request / context will be run on the hardware when scheduling 706 * gets enabled in the unblock. For multi-lrc we still submit the 707 * context to move the LRC tails. 708 */ 709 if (unlikely(context_blocked(ce) && !intel_context_is_parent(ce))) 710 goto out; 711 712 enabled = context_enabled(ce) || context_blocked(ce); 713 714 if (!enabled) { 715 action[len++] = INTEL_GUC_ACTION_SCHED_CONTEXT_MODE_SET; 716 action[len++] = ce->guc_id.id; 717 action[len++] = GUC_CONTEXT_ENABLE; 718 set_context_pending_enable(ce); 719 intel_context_get(ce); 720 g2h_len_dw = G2H_LEN_DW_SCHED_CONTEXT_MODE_SET; 721 } else { 722 action[len++] = INTEL_GUC_ACTION_SCHED_CONTEXT; 723 action[len++] = ce->guc_id.id; 724 } 725 726 err = intel_guc_send_nb(guc, action, len, g2h_len_dw); 727 if (!enabled && !err) { 728 trace_intel_context_sched_enable(ce); 729 atomic_inc(&guc->outstanding_submission_g2h); 730 set_context_enabled(ce); 731 732 /* 733 * Without multi-lrc KMD does the submission step (moving the 734 * lrc tail) so enabling scheduling is sufficient to submit the 735 * context. This isn't the case in multi-lrc submission as the 736 * GuC needs to move the tails, hence the need for another H2G 737 * to submit a multi-lrc context after enabling scheduling. 738 */ 739 if (intel_context_is_parent(ce)) { 740 action[0] = INTEL_GUC_ACTION_SCHED_CONTEXT; 741 err = intel_guc_send_nb(guc, action, len - 1, 0); 742 } 743 } else if (!enabled) { 744 clr_context_pending_enable(ce); 745 intel_context_put(ce); 746 } 747 if (likely(!err)) 748 trace_i915_request_guc_submit(rq); 749 750 out: 751 spin_unlock(&ce->guc_state.lock); 752 return err; 753 } 754 755 static int guc_add_request(struct intel_guc *guc, struct i915_request *rq) 756 { 757 int ret = __guc_add_request(guc, rq); 758 759 if (unlikely(ret == -EBUSY)) { 760 guc->stalled_request = rq; 761 guc->submission_stall_reason = STALL_ADD_REQUEST; 762 } 763 764 return ret; 765 } 766 767 static inline void guc_set_lrc_tail(struct i915_request *rq) 768 { 769 rq->context->lrc_reg_state[CTX_RING_TAIL] = 770 intel_ring_set_tail(rq->ring, rq->tail); 771 } 772 773 static inline int rq_prio(const struct i915_request *rq) 774 { 775 return rq->sched.attr.priority; 776 } 777 778 static bool is_multi_lrc_rq(struct i915_request *rq) 779 { 780 return intel_context_is_parallel(rq->context); 781 } 782 783 static bool can_merge_rq(struct i915_request *rq, 784 struct i915_request *last) 785 { 786 return request_to_scheduling_context(rq) == 787 request_to_scheduling_context(last); 788 } 789 790 static u32 wq_space_until_wrap(struct intel_context *ce) 791 { 792 return (WQ_SIZE - ce->parallel.guc.wqi_tail); 793 } 794 795 static void write_wqi(struct intel_context *ce, u32 wqi_size) 796 { 797 BUILD_BUG_ON(!is_power_of_2(WQ_SIZE)); 798 799 /* 800 * Ensure WQI are visible before updating tail 801 */ 802 intel_guc_write_barrier(ce_to_guc(ce)); 803 804 ce->parallel.guc.wqi_tail = (ce->parallel.guc.wqi_tail + wqi_size) & 805 (WQ_SIZE - 1); 806 WRITE_ONCE(*ce->parallel.guc.wq_tail, ce->parallel.guc.wqi_tail); 807 } 808 809 static int guc_wq_noop_append(struct intel_context *ce) 810 { 811 u32 *wqi = get_wq_pointer(ce, wq_space_until_wrap(ce)); 812 u32 len_dw = wq_space_until_wrap(ce) / sizeof(u32) - 1; 813 814 if (!wqi) 815 return -EBUSY; 816 817 GEM_BUG_ON(!FIELD_FIT(WQ_LEN_MASK, len_dw)); 818 819 *wqi = FIELD_PREP(WQ_TYPE_MASK, WQ_TYPE_NOOP) | 820 FIELD_PREP(WQ_LEN_MASK, len_dw); 821 ce->parallel.guc.wqi_tail = 0; 822 823 return 0; 824 } 825 826 static int __guc_wq_item_append(struct i915_request *rq) 827 { 828 struct intel_context *ce = request_to_scheduling_context(rq); 829 struct intel_context *child; 830 unsigned int wqi_size = (ce->parallel.number_children + 4) * 831 sizeof(u32); 832 u32 *wqi; 833 u32 len_dw = (wqi_size / sizeof(u32)) - 1; 834 int ret; 835 836 /* Ensure context is in correct state updating work queue */ 837 GEM_BUG_ON(!atomic_read(&ce->guc_id.ref)); 838 GEM_BUG_ON(context_guc_id_invalid(ce)); 839 GEM_BUG_ON(context_wait_for_deregister_to_register(ce)); 840 GEM_BUG_ON(!ctx_id_mapped(ce_to_guc(ce), ce->guc_id.id)); 841 842 /* Insert NOOP if this work queue item will wrap the tail pointer. */ 843 if (wqi_size > wq_space_until_wrap(ce)) { 844 ret = guc_wq_noop_append(ce); 845 if (ret) 846 return ret; 847 } 848 849 wqi = get_wq_pointer(ce, wqi_size); 850 if (!wqi) 851 return -EBUSY; 852 853 GEM_BUG_ON(!FIELD_FIT(WQ_LEN_MASK, len_dw)); 854 855 *wqi++ = FIELD_PREP(WQ_TYPE_MASK, WQ_TYPE_MULTI_LRC) | 856 FIELD_PREP(WQ_LEN_MASK, len_dw); 857 *wqi++ = ce->lrc.lrca; 858 *wqi++ = FIELD_PREP(WQ_GUC_ID_MASK, ce->guc_id.id) | 859 FIELD_PREP(WQ_RING_TAIL_MASK, ce->ring->tail / sizeof(u64)); 860 *wqi++ = 0; /* fence_id */ 861 for_each_child(ce, child) 862 *wqi++ = child->ring->tail / sizeof(u64); 863 864 write_wqi(ce, wqi_size); 865 866 return 0; 867 } 868 869 static int guc_wq_item_append(struct intel_guc *guc, 870 struct i915_request *rq) 871 { 872 struct intel_context *ce = request_to_scheduling_context(rq); 873 int ret; 874 875 if (unlikely(!intel_context_is_schedulable(ce))) 876 return 0; 877 878 ret = __guc_wq_item_append(rq); 879 if (unlikely(ret == -EBUSY)) { 880 guc->stalled_request = rq; 881 guc->submission_stall_reason = STALL_MOVE_LRC_TAIL; 882 } 883 884 return ret; 885 } 886 887 static bool multi_lrc_submit(struct i915_request *rq) 888 { 889 struct intel_context *ce = request_to_scheduling_context(rq); 890 891 intel_ring_set_tail(rq->ring, rq->tail); 892 893 /* 894 * We expect the front end (execbuf IOCTL) to set this flag on the last 895 * request generated from a multi-BB submission. This indicates to the 896 * backend (GuC interface) that we should submit this context thus 897 * submitting all the requests generated in parallel. 898 */ 899 return test_bit(I915_FENCE_FLAG_SUBMIT_PARALLEL, &rq->fence.flags) || 900 !intel_context_is_schedulable(ce); 901 } 902 903 static int guc_dequeue_one_context(struct intel_guc *guc) 904 { 905 struct i915_sched_engine * const sched_engine = guc->sched_engine; 906 struct i915_request *last = NULL; 907 bool submit = false; 908 struct rb_node *rb; 909 int ret; 910 911 lockdep_assert_held(&sched_engine->lock); 912 913 if (guc->stalled_request) { 914 submit = true; 915 last = guc->stalled_request; 916 917 switch (guc->submission_stall_reason) { 918 case STALL_REGISTER_CONTEXT: 919 goto register_context; 920 case STALL_MOVE_LRC_TAIL: 921 goto move_lrc_tail; 922 case STALL_ADD_REQUEST: 923 goto add_request; 924 default: 925 MISSING_CASE(guc->submission_stall_reason); 926 } 927 } 928 929 while ((rb = rb_first_cached(&sched_engine->queue))) { 930 struct i915_priolist *p = to_priolist(rb); 931 struct i915_request *rq, *rn; 932 933 priolist_for_each_request_consume(rq, rn, p) { 934 if (last && !can_merge_rq(rq, last)) 935 goto register_context; 936 937 list_del_init(&rq->sched.link); 938 939 __i915_request_submit(rq); 940 941 trace_i915_request_in(rq, 0); 942 last = rq; 943 944 if (is_multi_lrc_rq(rq)) { 945 /* 946 * We need to coalesce all multi-lrc requests in 947 * a relationship into a single H2G. We are 948 * guaranteed that all of these requests will be 949 * submitted sequentially. 950 */ 951 if (multi_lrc_submit(rq)) { 952 submit = true; 953 goto register_context; 954 } 955 } else { 956 submit = true; 957 } 958 } 959 960 rb_erase_cached(&p->node, &sched_engine->queue); 961 i915_priolist_free(p); 962 } 963 964 register_context: 965 if (submit) { 966 struct intel_context *ce = request_to_scheduling_context(last); 967 968 if (unlikely(!ctx_id_mapped(guc, ce->guc_id.id) && 969 intel_context_is_schedulable(ce))) { 970 ret = try_context_registration(ce, false); 971 if (unlikely(ret == -EPIPE)) { 972 goto deadlk; 973 } else if (ret == -EBUSY) { 974 guc->stalled_request = last; 975 guc->submission_stall_reason = 976 STALL_REGISTER_CONTEXT; 977 goto schedule_tasklet; 978 } else if (ret != 0) { 979 GEM_WARN_ON(ret); /* Unexpected */ 980 goto deadlk; 981 } 982 } 983 984 move_lrc_tail: 985 if (is_multi_lrc_rq(last)) { 986 ret = guc_wq_item_append(guc, last); 987 if (ret == -EBUSY) { 988 goto schedule_tasklet; 989 } else if (ret != 0) { 990 GEM_WARN_ON(ret); /* Unexpected */ 991 goto deadlk; 992 } 993 } else { 994 guc_set_lrc_tail(last); 995 } 996 997 add_request: 998 ret = guc_add_request(guc, last); 999 if (unlikely(ret == -EPIPE)) { 1000 goto deadlk; 1001 } else if (ret == -EBUSY) { 1002 goto schedule_tasklet; 1003 } else if (ret != 0) { 1004 GEM_WARN_ON(ret); /* Unexpected */ 1005 goto deadlk; 1006 } 1007 } 1008 1009 guc->stalled_request = NULL; 1010 guc->submission_stall_reason = STALL_NONE; 1011 return submit; 1012 1013 deadlk: 1014 sched_engine->tasklet.callback = NULL; 1015 tasklet_disable_nosync(&sched_engine->tasklet); 1016 return false; 1017 1018 schedule_tasklet: 1019 tasklet_schedule(&sched_engine->tasklet); 1020 return false; 1021 } 1022 1023 static void guc_submission_tasklet(struct tasklet_struct *t) 1024 { 1025 struct i915_sched_engine *sched_engine = 1026 from_tasklet(sched_engine, t, tasklet); 1027 unsigned long flags; 1028 bool loop; 1029 1030 spin_lock_irqsave(&sched_engine->lock, flags); 1031 1032 do { 1033 loop = guc_dequeue_one_context(sched_engine->private_data); 1034 } while (loop); 1035 1036 i915_sched_engine_reset_on_empty(sched_engine); 1037 1038 spin_unlock_irqrestore(&sched_engine->lock, flags); 1039 } 1040 1041 static void cs_irq_handler(struct intel_engine_cs *engine, u16 iir) 1042 { 1043 if (iir & GT_RENDER_USER_INTERRUPT) 1044 intel_engine_signal_breadcrumbs(engine); 1045 } 1046 1047 static void __guc_context_destroy(struct intel_context *ce); 1048 static void release_guc_id(struct intel_guc *guc, struct intel_context *ce); 1049 static void guc_signal_context_fence(struct intel_context *ce); 1050 static void guc_cancel_context_requests(struct intel_context *ce); 1051 static void guc_blocked_fence_complete(struct intel_context *ce); 1052 1053 static void scrub_guc_desc_for_outstanding_g2h(struct intel_guc *guc) 1054 { 1055 struct intel_context *ce; 1056 unsigned long index, flags; 1057 bool pending_disable, pending_enable, deregister, destroyed, banned; 1058 1059 xa_lock_irqsave(&guc->context_lookup, flags); 1060 xa_for_each(&guc->context_lookup, index, ce) { 1061 /* 1062 * Corner case where the ref count on the object is zero but and 1063 * deregister G2H was lost. In this case we don't touch the ref 1064 * count and finish the destroy of the context. 1065 */ 1066 bool do_put = kref_get_unless_zero(&ce->ref); 1067 1068 xa_unlock(&guc->context_lookup); 1069 1070 spin_lock(&ce->guc_state.lock); 1071 1072 /* 1073 * Once we are at this point submission_disabled() is guaranteed 1074 * to be visible to all callers who set the below flags (see above 1075 * flush and flushes in reset_prepare). If submission_disabled() 1076 * is set, the caller shouldn't set these flags. 1077 */ 1078 1079 destroyed = context_destroyed(ce); 1080 pending_enable = context_pending_enable(ce); 1081 pending_disable = context_pending_disable(ce); 1082 deregister = context_wait_for_deregister_to_register(ce); 1083 banned = context_banned(ce); 1084 init_sched_state(ce); 1085 1086 spin_unlock(&ce->guc_state.lock); 1087 1088 if (pending_enable || destroyed || deregister) { 1089 decr_outstanding_submission_g2h(guc); 1090 if (deregister) 1091 guc_signal_context_fence(ce); 1092 if (destroyed) { 1093 intel_gt_pm_put_async(guc_to_gt(guc)); 1094 release_guc_id(guc, ce); 1095 __guc_context_destroy(ce); 1096 } 1097 if (pending_enable || deregister) 1098 intel_context_put(ce); 1099 } 1100 1101 /* Not mutualy exclusive with above if statement. */ 1102 if (pending_disable) { 1103 guc_signal_context_fence(ce); 1104 if (banned) { 1105 guc_cancel_context_requests(ce); 1106 intel_engine_signal_breadcrumbs(ce->engine); 1107 } 1108 intel_context_sched_disable_unpin(ce); 1109 decr_outstanding_submission_g2h(guc); 1110 1111 spin_lock(&ce->guc_state.lock); 1112 guc_blocked_fence_complete(ce); 1113 spin_unlock(&ce->guc_state.lock); 1114 1115 intel_context_put(ce); 1116 } 1117 1118 if (do_put) 1119 intel_context_put(ce); 1120 xa_lock(&guc->context_lookup); 1121 } 1122 xa_unlock_irqrestore(&guc->context_lookup, flags); 1123 } 1124 1125 /* 1126 * GuC stores busyness stats for each engine at context in/out boundaries. A 1127 * context 'in' logs execution start time, 'out' adds in -> out delta to total. 1128 * i915/kmd accesses 'start', 'total' and 'context id' from memory shared with 1129 * GuC. 1130 * 1131 * __i915_pmu_event_read samples engine busyness. When sampling, if context id 1132 * is valid (!= ~0) and start is non-zero, the engine is considered to be 1133 * active. For an active engine total busyness = total + (now - start), where 1134 * 'now' is the time at which the busyness is sampled. For inactive engine, 1135 * total busyness = total. 1136 * 1137 * All times are captured from GUCPMTIMESTAMP reg and are in gt clock domain. 1138 * 1139 * The start and total values provided by GuC are 32 bits and wrap around in a 1140 * few minutes. Since perf pmu provides busyness as 64 bit monotonically 1141 * increasing ns values, there is a need for this implementation to account for 1142 * overflows and extend the GuC provided values to 64 bits before returning 1143 * busyness to the user. In order to do that, a worker runs periodically at 1144 * frequency = 1/8th the time it takes for the timestamp to wrap (i.e. once in 1145 * 27 seconds for a gt clock frequency of 19.2 MHz). 1146 */ 1147 1148 #define WRAP_TIME_CLKS U32_MAX 1149 #define POLL_TIME_CLKS (WRAP_TIME_CLKS >> 3) 1150 1151 static void 1152 __extend_last_switch(struct intel_guc *guc, u64 *prev_start, u32 new_start) 1153 { 1154 u32 gt_stamp_hi = upper_32_bits(guc->timestamp.gt_stamp); 1155 u32 gt_stamp_last = lower_32_bits(guc->timestamp.gt_stamp); 1156 1157 if (new_start == lower_32_bits(*prev_start)) 1158 return; 1159 1160 /* 1161 * When gt is unparked, we update the gt timestamp and start the ping 1162 * worker that updates the gt_stamp every POLL_TIME_CLKS. As long as gt 1163 * is unparked, all switched in contexts will have a start time that is 1164 * within +/- POLL_TIME_CLKS of the most recent gt_stamp. 1165 * 1166 * If neither gt_stamp nor new_start has rolled over, then the 1167 * gt_stamp_hi does not need to be adjusted, however if one of them has 1168 * rolled over, we need to adjust gt_stamp_hi accordingly. 1169 * 1170 * The below conditions address the cases of new_start rollover and 1171 * gt_stamp_last rollover respectively. 1172 */ 1173 if (new_start < gt_stamp_last && 1174 (new_start - gt_stamp_last) <= POLL_TIME_CLKS) 1175 gt_stamp_hi++; 1176 1177 if (new_start > gt_stamp_last && 1178 (gt_stamp_last - new_start) <= POLL_TIME_CLKS && gt_stamp_hi) 1179 gt_stamp_hi--; 1180 1181 *prev_start = ((u64)gt_stamp_hi << 32) | new_start; 1182 } 1183 1184 #define record_read(map_, field_) \ 1185 iosys_map_rd_field(map_, 0, struct guc_engine_usage_record, field_) 1186 1187 /* 1188 * GuC updates shared memory and KMD reads it. Since this is not synchronized, 1189 * we run into a race where the value read is inconsistent. Sometimes the 1190 * inconsistency is in reading the upper MSB bytes of the last_in value when 1191 * this race occurs. 2 types of cases are seen - upper 8 bits are zero and upper 1192 * 24 bits are zero. Since these are non-zero values, it is non-trivial to 1193 * determine validity of these values. Instead we read the values multiple times 1194 * until they are consistent. In test runs, 3 attempts results in consistent 1195 * values. The upper bound is set to 6 attempts and may need to be tuned as per 1196 * any new occurences. 1197 */ 1198 static void __get_engine_usage_record(struct intel_engine_cs *engine, 1199 u32 *last_in, u32 *id, u32 *total) 1200 { 1201 STUB(); 1202 #ifdef notyet 1203 struct iosys_map rec_map = intel_guc_engine_usage_record_map(engine); 1204 int i = 0; 1205 1206 do { 1207 *last_in = record_read(&rec_map, last_switch_in_stamp); 1208 *id = record_read(&rec_map, current_context_index); 1209 *total = record_read(&rec_map, total_runtime); 1210 1211 if (record_read(&rec_map, last_switch_in_stamp) == *last_in && 1212 record_read(&rec_map, current_context_index) == *id && 1213 record_read(&rec_map, total_runtime) == *total) 1214 break; 1215 } while (++i < 6); 1216 #endif 1217 } 1218 1219 static void guc_update_engine_gt_clks(struct intel_engine_cs *engine) 1220 { 1221 struct intel_engine_guc_stats *stats = &engine->stats.guc; 1222 struct intel_guc *guc = &engine->gt->uc.guc; 1223 u32 last_switch, ctx_id, total; 1224 1225 lockdep_assert_held(&guc->timestamp.lock); 1226 1227 __get_engine_usage_record(engine, &last_switch, &ctx_id, &total); 1228 1229 stats->running = ctx_id != ~0U && last_switch; 1230 if (stats->running) 1231 __extend_last_switch(guc, &stats->start_gt_clk, last_switch); 1232 1233 /* 1234 * Instead of adjusting the total for overflow, just add the 1235 * difference from previous sample stats->total_gt_clks 1236 */ 1237 if (total && total != ~0U) { 1238 stats->total_gt_clks += (u32)(total - stats->prev_total); 1239 stats->prev_total = total; 1240 } 1241 } 1242 1243 static u32 gpm_timestamp_shift(struct intel_gt *gt) 1244 { 1245 intel_wakeref_t wakeref; 1246 u32 reg, shift; 1247 1248 with_intel_runtime_pm(gt->uncore->rpm, wakeref) 1249 reg = intel_uncore_read(gt->uncore, RPM_CONFIG0); 1250 1251 shift = (reg & GEN10_RPM_CONFIG0_CTC_SHIFT_PARAMETER_MASK) >> 1252 GEN10_RPM_CONFIG0_CTC_SHIFT_PARAMETER_SHIFT; 1253 1254 return 3 - shift; 1255 } 1256 1257 static void guc_update_pm_timestamp(struct intel_guc *guc, ktime_t *now) 1258 { 1259 struct intel_gt *gt = guc_to_gt(guc); 1260 u32 gt_stamp_lo, gt_stamp_hi; 1261 u64 gpm_ts; 1262 1263 lockdep_assert_held(&guc->timestamp.lock); 1264 1265 gt_stamp_hi = upper_32_bits(guc->timestamp.gt_stamp); 1266 gpm_ts = intel_uncore_read64_2x32(gt->uncore, MISC_STATUS0, 1267 MISC_STATUS1) >> guc->timestamp.shift; 1268 gt_stamp_lo = lower_32_bits(gpm_ts); 1269 *now = ktime_get(); 1270 1271 if (gt_stamp_lo < lower_32_bits(guc->timestamp.gt_stamp)) 1272 gt_stamp_hi++; 1273 1274 guc->timestamp.gt_stamp = ((u64)gt_stamp_hi << 32) | gt_stamp_lo; 1275 } 1276 1277 /* 1278 * Unlike the execlist mode of submission total and active times are in terms of 1279 * gt clocks. The *now parameter is retained to return the cpu time at which the 1280 * busyness was sampled. 1281 */ 1282 static ktime_t guc_engine_busyness(struct intel_engine_cs *engine, ktime_t *now) 1283 { 1284 struct intel_engine_guc_stats stats_saved, *stats = &engine->stats.guc; 1285 struct i915_gpu_error *gpu_error = &engine->i915->gpu_error; 1286 struct intel_gt *gt = engine->gt; 1287 struct intel_guc *guc = >->uc.guc; 1288 u64 total, gt_stamp_saved; 1289 unsigned long flags; 1290 u32 reset_count; 1291 bool in_reset; 1292 1293 spin_lock_irqsave(&guc->timestamp.lock, flags); 1294 1295 /* 1296 * If a reset happened, we risk reading partially updated engine 1297 * busyness from GuC, so we just use the driver stored copy of busyness. 1298 * Synchronize with gt reset using reset_count and the 1299 * I915_RESET_BACKOFF flag. Note that reset flow updates the reset_count 1300 * after I915_RESET_BACKOFF flag, so ensure that the reset_count is 1301 * usable by checking the flag afterwards. 1302 */ 1303 reset_count = i915_reset_count(gpu_error); 1304 in_reset = test_bit(I915_RESET_BACKOFF, >->reset.flags); 1305 1306 *now = ktime_get(); 1307 1308 /* 1309 * The active busyness depends on start_gt_clk and gt_stamp. 1310 * gt_stamp is updated by i915 only when gt is awake and the 1311 * start_gt_clk is derived from GuC state. To get a consistent 1312 * view of activity, we query the GuC state only if gt is awake. 1313 */ 1314 if (!in_reset && intel_gt_pm_get_if_awake(gt)) { 1315 stats_saved = *stats; 1316 gt_stamp_saved = guc->timestamp.gt_stamp; 1317 /* 1318 * Update gt_clks, then gt timestamp to simplify the 'gt_stamp - 1319 * start_gt_clk' calculation below for active engines. 1320 */ 1321 guc_update_engine_gt_clks(engine); 1322 guc_update_pm_timestamp(guc, now); 1323 intel_gt_pm_put_async(gt); 1324 if (i915_reset_count(gpu_error) != reset_count) { 1325 *stats = stats_saved; 1326 guc->timestamp.gt_stamp = gt_stamp_saved; 1327 } 1328 } 1329 1330 total = intel_gt_clock_interval_to_ns(gt, stats->total_gt_clks); 1331 if (stats->running) { 1332 u64 clk = guc->timestamp.gt_stamp - stats->start_gt_clk; 1333 1334 total += intel_gt_clock_interval_to_ns(gt, clk); 1335 } 1336 1337 spin_unlock_irqrestore(&guc->timestamp.lock, flags); 1338 1339 return ns_to_ktime(total); 1340 } 1341 1342 static void __reset_guc_busyness_stats(struct intel_guc *guc) 1343 { 1344 struct intel_gt *gt = guc_to_gt(guc); 1345 struct intel_engine_cs *engine; 1346 enum intel_engine_id id; 1347 unsigned long flags; 1348 ktime_t unused; 1349 1350 cancel_delayed_work_sync(&guc->timestamp.work); 1351 1352 spin_lock_irqsave(&guc->timestamp.lock, flags); 1353 1354 guc_update_pm_timestamp(guc, &unused); 1355 for_each_engine(engine, gt, id) { 1356 guc_update_engine_gt_clks(engine); 1357 engine->stats.guc.prev_total = 0; 1358 } 1359 1360 spin_unlock_irqrestore(&guc->timestamp.lock, flags); 1361 } 1362 1363 static void __update_guc_busyness_stats(struct intel_guc *guc) 1364 { 1365 struct intel_gt *gt = guc_to_gt(guc); 1366 struct intel_engine_cs *engine; 1367 enum intel_engine_id id; 1368 unsigned long flags; 1369 ktime_t unused; 1370 1371 guc->timestamp.last_stat_jiffies = jiffies; 1372 1373 spin_lock_irqsave(&guc->timestamp.lock, flags); 1374 1375 guc_update_pm_timestamp(guc, &unused); 1376 for_each_engine(engine, gt, id) 1377 guc_update_engine_gt_clks(engine); 1378 1379 spin_unlock_irqrestore(&guc->timestamp.lock, flags); 1380 } 1381 1382 static void guc_timestamp_ping(struct work_struct *wrk) 1383 { 1384 struct intel_guc *guc = container_of(wrk, typeof(*guc), 1385 timestamp.work.work); 1386 struct intel_uc *uc = container_of(guc, typeof(*uc), guc); 1387 struct intel_gt *gt = guc_to_gt(guc); 1388 intel_wakeref_t wakeref; 1389 int srcu, ret; 1390 1391 /* 1392 * Synchronize with gt reset to make sure the worker does not 1393 * corrupt the engine/guc stats. 1394 */ 1395 ret = intel_gt_reset_trylock(gt, &srcu); 1396 if (ret) 1397 return; 1398 1399 with_intel_runtime_pm(>->i915->runtime_pm, wakeref) 1400 __update_guc_busyness_stats(guc); 1401 1402 intel_gt_reset_unlock(gt, srcu); 1403 1404 mod_delayed_work(system_highpri_wq, &guc->timestamp.work, 1405 guc->timestamp.ping_delay); 1406 } 1407 1408 static int guc_action_enable_usage_stats(struct intel_guc *guc) 1409 { 1410 u32 offset = intel_guc_engine_usage_offset(guc); 1411 u32 action[] = { 1412 INTEL_GUC_ACTION_SET_ENG_UTIL_BUFF, 1413 offset, 1414 0, 1415 }; 1416 1417 return intel_guc_send(guc, action, ARRAY_SIZE(action)); 1418 } 1419 1420 static void guc_init_engine_stats(struct intel_guc *guc) 1421 { 1422 struct intel_gt *gt = guc_to_gt(guc); 1423 intel_wakeref_t wakeref; 1424 1425 mod_delayed_work(system_highpri_wq, &guc->timestamp.work, 1426 guc->timestamp.ping_delay); 1427 1428 with_intel_runtime_pm(>->i915->runtime_pm, wakeref) { 1429 int ret = guc_action_enable_usage_stats(guc); 1430 1431 if (ret) 1432 drm_err(>->i915->drm, 1433 "Failed to enable usage stats: %d!\n", ret); 1434 } 1435 } 1436 1437 void intel_guc_busyness_park(struct intel_gt *gt) 1438 { 1439 struct intel_guc *guc = >->uc.guc; 1440 1441 if (!guc_submission_initialized(guc)) 1442 return; 1443 1444 /* 1445 * There is a race with suspend flow where the worker runs after suspend 1446 * and causes an unclaimed register access warning. Cancel the worker 1447 * synchronously here. 1448 */ 1449 cancel_delayed_work_sync(&guc->timestamp.work); 1450 1451 /* 1452 * Before parking, we should sample engine busyness stats if we need to. 1453 * We can skip it if we are less than half a ping from the last time we 1454 * sampled the busyness stats. 1455 */ 1456 if (guc->timestamp.last_stat_jiffies && 1457 !time_after(jiffies, guc->timestamp.last_stat_jiffies + 1458 (guc->timestamp.ping_delay / 2))) 1459 return; 1460 1461 __update_guc_busyness_stats(guc); 1462 } 1463 1464 void intel_guc_busyness_unpark(struct intel_gt *gt) 1465 { 1466 struct intel_guc *guc = >->uc.guc; 1467 unsigned long flags; 1468 ktime_t unused; 1469 1470 if (!guc_submission_initialized(guc)) 1471 return; 1472 1473 spin_lock_irqsave(&guc->timestamp.lock, flags); 1474 guc_update_pm_timestamp(guc, &unused); 1475 spin_unlock_irqrestore(&guc->timestamp.lock, flags); 1476 mod_delayed_work(system_highpri_wq, &guc->timestamp.work, 1477 guc->timestamp.ping_delay); 1478 } 1479 1480 static inline bool 1481 submission_disabled(struct intel_guc *guc) 1482 { 1483 struct i915_sched_engine * const sched_engine = guc->sched_engine; 1484 1485 return unlikely(!sched_engine || 1486 !__tasklet_is_enabled(&sched_engine->tasklet) || 1487 intel_gt_is_wedged(guc_to_gt(guc))); 1488 } 1489 1490 static void disable_submission(struct intel_guc *guc) 1491 { 1492 struct i915_sched_engine * const sched_engine = guc->sched_engine; 1493 1494 if (__tasklet_is_enabled(&sched_engine->tasklet)) { 1495 GEM_BUG_ON(!guc->ct.enabled); 1496 __tasklet_disable_sync_once(&sched_engine->tasklet); 1497 sched_engine->tasklet.callback = NULL; 1498 } 1499 } 1500 1501 static void enable_submission(struct intel_guc *guc) 1502 { 1503 struct i915_sched_engine * const sched_engine = guc->sched_engine; 1504 unsigned long flags; 1505 1506 spin_lock_irqsave(&guc->sched_engine->lock, flags); 1507 sched_engine->tasklet.callback = guc_submission_tasklet; 1508 wmb(); /* Make sure callback visible */ 1509 if (!__tasklet_is_enabled(&sched_engine->tasklet) && 1510 __tasklet_enable(&sched_engine->tasklet)) { 1511 GEM_BUG_ON(!guc->ct.enabled); 1512 1513 /* And kick in case we missed a new request submission. */ 1514 tasklet_hi_schedule(&sched_engine->tasklet); 1515 } 1516 spin_unlock_irqrestore(&guc->sched_engine->lock, flags); 1517 } 1518 1519 static void guc_flush_submissions(struct intel_guc *guc) 1520 { 1521 struct i915_sched_engine * const sched_engine = guc->sched_engine; 1522 unsigned long flags; 1523 1524 spin_lock_irqsave(&sched_engine->lock, flags); 1525 spin_unlock_irqrestore(&sched_engine->lock, flags); 1526 } 1527 1528 static void guc_flush_destroyed_contexts(struct intel_guc *guc); 1529 1530 void intel_guc_submission_reset_prepare(struct intel_guc *guc) 1531 { 1532 if (unlikely(!guc_submission_initialized(guc))) { 1533 /* Reset called during driver load? GuC not yet initialised! */ 1534 return; 1535 } 1536 1537 intel_gt_park_heartbeats(guc_to_gt(guc)); 1538 disable_submission(guc); 1539 guc->interrupts.disable(guc); 1540 __reset_guc_busyness_stats(guc); 1541 1542 /* Flush IRQ handler */ 1543 spin_lock_irq(guc_to_gt(guc)->irq_lock); 1544 spin_unlock_irq(guc_to_gt(guc)->irq_lock); 1545 1546 guc_flush_submissions(guc); 1547 guc_flush_destroyed_contexts(guc); 1548 flush_work(&guc->ct.requests.worker); 1549 1550 scrub_guc_desc_for_outstanding_g2h(guc); 1551 } 1552 1553 static struct intel_engine_cs * 1554 guc_virtual_get_sibling(struct intel_engine_cs *ve, unsigned int sibling) 1555 { 1556 struct intel_engine_cs *engine; 1557 intel_engine_mask_t tmp, mask = ve->mask; 1558 unsigned int num_siblings = 0; 1559 1560 for_each_engine_masked(engine, ve->gt, mask, tmp) 1561 if (num_siblings++ == sibling) 1562 return engine; 1563 1564 return NULL; 1565 } 1566 1567 static inline struct intel_engine_cs * 1568 __context_to_physical_engine(struct intel_context *ce) 1569 { 1570 struct intel_engine_cs *engine = ce->engine; 1571 1572 if (intel_engine_is_virtual(engine)) 1573 engine = guc_virtual_get_sibling(engine, 0); 1574 1575 return engine; 1576 } 1577 1578 static void guc_reset_state(struct intel_context *ce, u32 head, bool scrub) 1579 { 1580 struct intel_engine_cs *engine = __context_to_physical_engine(ce); 1581 1582 if (!intel_context_is_schedulable(ce)) 1583 return; 1584 1585 GEM_BUG_ON(!intel_context_is_pinned(ce)); 1586 1587 /* 1588 * We want a simple context + ring to execute the breadcrumb update. 1589 * We cannot rely on the context being intact across the GPU hang, 1590 * so clear it and rebuild just what we need for the breadcrumb. 1591 * All pending requests for this context will be zapped, and any 1592 * future request will be after userspace has had the opportunity 1593 * to recreate its own state. 1594 */ 1595 if (scrub) 1596 lrc_init_regs(ce, engine, true); 1597 1598 /* Rerun the request; its payload has been neutered (if guilty). */ 1599 lrc_update_regs(ce, engine, head); 1600 } 1601 1602 static void guc_engine_reset_prepare(struct intel_engine_cs *engine) 1603 { 1604 if (!IS_GRAPHICS_VER(engine->i915, 11, 12)) 1605 return; 1606 1607 intel_engine_stop_cs(engine); 1608 1609 /* 1610 * Wa_22011802037:gen11/gen12: In addition to stopping the cs, we need 1611 * to wait for any pending mi force wakeups 1612 */ 1613 intel_engine_wait_for_pending_mi_fw(engine); 1614 } 1615 1616 static void guc_reset_nop(struct intel_engine_cs *engine) 1617 { 1618 } 1619 1620 static void guc_rewind_nop(struct intel_engine_cs *engine, bool stalled) 1621 { 1622 } 1623 1624 static void 1625 __unwind_incomplete_requests(struct intel_context *ce) 1626 { 1627 struct i915_request *rq, *rn; 1628 struct list_head *pl; 1629 int prio = I915_PRIORITY_INVALID; 1630 struct i915_sched_engine * const sched_engine = 1631 ce->engine->sched_engine; 1632 unsigned long flags; 1633 1634 spin_lock_irqsave(&sched_engine->lock, flags); 1635 spin_lock(&ce->guc_state.lock); 1636 list_for_each_entry_safe_reverse(rq, rn, 1637 &ce->guc_state.requests, 1638 sched.link) { 1639 if (i915_request_completed(rq)) 1640 continue; 1641 1642 list_del_init(&rq->sched.link); 1643 __i915_request_unsubmit(rq); 1644 1645 /* Push the request back into the queue for later resubmission. */ 1646 GEM_BUG_ON(rq_prio(rq) == I915_PRIORITY_INVALID); 1647 if (rq_prio(rq) != prio) { 1648 prio = rq_prio(rq); 1649 pl = i915_sched_lookup_priolist(sched_engine, prio); 1650 } 1651 GEM_BUG_ON(i915_sched_engine_is_empty(sched_engine)); 1652 1653 list_add(&rq->sched.link, pl); 1654 set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags); 1655 } 1656 spin_unlock(&ce->guc_state.lock); 1657 spin_unlock_irqrestore(&sched_engine->lock, flags); 1658 } 1659 1660 static void __guc_reset_context(struct intel_context *ce, intel_engine_mask_t stalled) 1661 { 1662 bool guilty; 1663 struct i915_request *rq; 1664 unsigned long flags; 1665 u32 head; 1666 int i, number_children = ce->parallel.number_children; 1667 struct intel_context *parent = ce; 1668 1669 GEM_BUG_ON(intel_context_is_child(ce)); 1670 1671 intel_context_get(ce); 1672 1673 /* 1674 * GuC will implicitly mark the context as non-schedulable when it sends 1675 * the reset notification. Make sure our state reflects this change. The 1676 * context will be marked enabled on resubmission. 1677 */ 1678 spin_lock_irqsave(&ce->guc_state.lock, flags); 1679 clr_context_enabled(ce); 1680 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 1681 1682 /* 1683 * For each context in the relationship find the hanging request 1684 * resetting each context / request as needed 1685 */ 1686 for (i = 0; i < number_children + 1; ++i) { 1687 if (!intel_context_is_pinned(ce)) 1688 goto next_context; 1689 1690 guilty = false; 1691 rq = intel_context_get_active_request(ce); 1692 if (!rq) { 1693 head = ce->ring->tail; 1694 goto out_replay; 1695 } 1696 1697 if (i915_request_started(rq)) 1698 guilty = stalled & ce->engine->mask; 1699 1700 GEM_BUG_ON(i915_active_is_idle(&ce->active)); 1701 head = intel_ring_wrap(ce->ring, rq->head); 1702 1703 __i915_request_reset(rq, guilty); 1704 i915_request_put(rq); 1705 out_replay: 1706 guc_reset_state(ce, head, guilty); 1707 next_context: 1708 if (i != number_children) 1709 ce = list_next_entry(ce, parallel.child_link); 1710 } 1711 1712 __unwind_incomplete_requests(parent); 1713 intel_context_put(parent); 1714 } 1715 1716 void intel_guc_submission_reset(struct intel_guc *guc, intel_engine_mask_t stalled) 1717 { 1718 struct intel_context *ce; 1719 unsigned long index; 1720 unsigned long flags; 1721 1722 if (unlikely(!guc_submission_initialized(guc))) { 1723 /* Reset called during driver load? GuC not yet initialised! */ 1724 return; 1725 } 1726 1727 xa_lock_irqsave(&guc->context_lookup, flags); 1728 xa_for_each(&guc->context_lookup, index, ce) { 1729 if (!kref_get_unless_zero(&ce->ref)) 1730 continue; 1731 1732 xa_unlock(&guc->context_lookup); 1733 1734 if (intel_context_is_pinned(ce) && 1735 !intel_context_is_child(ce)) 1736 __guc_reset_context(ce, stalled); 1737 1738 intel_context_put(ce); 1739 1740 xa_lock(&guc->context_lookup); 1741 } 1742 xa_unlock_irqrestore(&guc->context_lookup, flags); 1743 1744 /* GuC is blown away, drop all references to contexts */ 1745 xa_destroy(&guc->context_lookup); 1746 } 1747 1748 static void guc_cancel_context_requests(struct intel_context *ce) 1749 { 1750 struct i915_sched_engine *sched_engine = ce_to_guc(ce)->sched_engine; 1751 struct i915_request *rq; 1752 unsigned long flags; 1753 1754 /* Mark all executing requests as skipped. */ 1755 spin_lock_irqsave(&sched_engine->lock, flags); 1756 spin_lock(&ce->guc_state.lock); 1757 list_for_each_entry(rq, &ce->guc_state.requests, sched.link) 1758 i915_request_put(i915_request_mark_eio(rq)); 1759 spin_unlock(&ce->guc_state.lock); 1760 spin_unlock_irqrestore(&sched_engine->lock, flags); 1761 } 1762 1763 static void 1764 guc_cancel_sched_engine_requests(struct i915_sched_engine *sched_engine) 1765 { 1766 struct i915_request *rq, *rn; 1767 struct rb_node *rb; 1768 unsigned long flags; 1769 1770 /* Can be called during boot if GuC fails to load */ 1771 if (!sched_engine) 1772 return; 1773 1774 /* 1775 * Before we call engine->cancel_requests(), we should have exclusive 1776 * access to the submission state. This is arranged for us by the 1777 * caller disabling the interrupt generation, the tasklet and other 1778 * threads that may then access the same state, giving us a free hand 1779 * to reset state. However, we still need to let lockdep be aware that 1780 * we know this state may be accessed in hardirq context, so we 1781 * disable the irq around this manipulation and we want to keep 1782 * the spinlock focused on its duties and not accidentally conflate 1783 * coverage to the submission's irq state. (Similarly, although we 1784 * shouldn't need to disable irq around the manipulation of the 1785 * submission's irq state, we also wish to remind ourselves that 1786 * it is irq state.) 1787 */ 1788 spin_lock_irqsave(&sched_engine->lock, flags); 1789 1790 /* Flush the queued requests to the timeline list (for retiring). */ 1791 while ((rb = rb_first_cached(&sched_engine->queue))) { 1792 struct i915_priolist *p = to_priolist(rb); 1793 1794 priolist_for_each_request_consume(rq, rn, p) { 1795 list_del_init(&rq->sched.link); 1796 1797 __i915_request_submit(rq); 1798 1799 i915_request_put(i915_request_mark_eio(rq)); 1800 } 1801 1802 rb_erase_cached(&p->node, &sched_engine->queue); 1803 i915_priolist_free(p); 1804 } 1805 1806 /* Remaining _unready_ requests will be nop'ed when submitted */ 1807 1808 sched_engine->queue_priority_hint = INT_MIN; 1809 sched_engine->queue = RB_ROOT_CACHED; 1810 1811 spin_unlock_irqrestore(&sched_engine->lock, flags); 1812 } 1813 1814 void intel_guc_submission_cancel_requests(struct intel_guc *guc) 1815 { 1816 struct intel_context *ce; 1817 unsigned long index; 1818 unsigned long flags; 1819 1820 xa_lock_irqsave(&guc->context_lookup, flags); 1821 xa_for_each(&guc->context_lookup, index, ce) { 1822 if (!kref_get_unless_zero(&ce->ref)) 1823 continue; 1824 1825 xa_unlock(&guc->context_lookup); 1826 1827 if (intel_context_is_pinned(ce) && 1828 !intel_context_is_child(ce)) 1829 guc_cancel_context_requests(ce); 1830 1831 intel_context_put(ce); 1832 1833 xa_lock(&guc->context_lookup); 1834 } 1835 xa_unlock_irqrestore(&guc->context_lookup, flags); 1836 1837 guc_cancel_sched_engine_requests(guc->sched_engine); 1838 1839 /* GuC is blown away, drop all references to contexts */ 1840 xa_destroy(&guc->context_lookup); 1841 } 1842 1843 void intel_guc_submission_reset_finish(struct intel_guc *guc) 1844 { 1845 /* Reset called during driver load or during wedge? */ 1846 if (unlikely(!guc_submission_initialized(guc) || 1847 intel_gt_is_wedged(guc_to_gt(guc)))) { 1848 return; 1849 } 1850 1851 /* 1852 * Technically possible for either of these values to be non-zero here, 1853 * but very unlikely + harmless. Regardless let's add a warn so we can 1854 * see in CI if this happens frequently / a precursor to taking down the 1855 * machine. 1856 */ 1857 GEM_WARN_ON(atomic_read(&guc->outstanding_submission_g2h)); 1858 atomic_set(&guc->outstanding_submission_g2h, 0); 1859 1860 intel_guc_global_policies_update(guc); 1861 enable_submission(guc); 1862 intel_gt_unpark_heartbeats(guc_to_gt(guc)); 1863 } 1864 1865 static void destroyed_worker_func(struct work_struct *w); 1866 static void reset_fail_worker_func(struct work_struct *w); 1867 1868 /* 1869 * Set up the memory resources to be shared with the GuC (via the GGTT) 1870 * at firmware loading time. 1871 */ 1872 int intel_guc_submission_init(struct intel_guc *guc) 1873 { 1874 struct intel_gt *gt = guc_to_gt(guc); 1875 int ret; 1876 1877 if (guc->submission_initialized) 1878 return 0; 1879 1880 if (GET_UC_VER(guc) < MAKE_UC_VER(70, 0, 0)) { 1881 ret = guc_lrc_desc_pool_create_v69(guc); 1882 if (ret) 1883 return ret; 1884 } 1885 1886 guc->submission_state.guc_ids_bitmap = 1887 bitmap_zalloc(NUMBER_MULTI_LRC_GUC_ID(guc), GFP_KERNEL); 1888 if (!guc->submission_state.guc_ids_bitmap) { 1889 ret = -ENOMEM; 1890 goto destroy_pool; 1891 } 1892 1893 guc->timestamp.ping_delay = (POLL_TIME_CLKS / gt->clock_frequency + 1) * HZ; 1894 guc->timestamp.shift = gpm_timestamp_shift(gt); 1895 guc->submission_initialized = true; 1896 1897 return 0; 1898 1899 destroy_pool: 1900 guc_lrc_desc_pool_destroy_v69(guc); 1901 1902 return ret; 1903 } 1904 1905 void intel_guc_submission_fini(struct intel_guc *guc) 1906 { 1907 if (!guc->submission_initialized) 1908 return; 1909 1910 guc_flush_destroyed_contexts(guc); 1911 guc_lrc_desc_pool_destroy_v69(guc); 1912 i915_sched_engine_put(guc->sched_engine); 1913 bitmap_free(guc->submission_state.guc_ids_bitmap); 1914 guc->submission_initialized = false; 1915 } 1916 1917 static inline void queue_request(struct i915_sched_engine *sched_engine, 1918 struct i915_request *rq, 1919 int prio) 1920 { 1921 GEM_BUG_ON(!list_empty(&rq->sched.link)); 1922 list_add_tail(&rq->sched.link, 1923 i915_sched_lookup_priolist(sched_engine, prio)); 1924 set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags); 1925 tasklet_hi_schedule(&sched_engine->tasklet); 1926 } 1927 1928 static int guc_bypass_tasklet_submit(struct intel_guc *guc, 1929 struct i915_request *rq) 1930 { 1931 int ret = 0; 1932 1933 __i915_request_submit(rq); 1934 1935 trace_i915_request_in(rq, 0); 1936 1937 if (is_multi_lrc_rq(rq)) { 1938 if (multi_lrc_submit(rq)) { 1939 ret = guc_wq_item_append(guc, rq); 1940 if (!ret) 1941 ret = guc_add_request(guc, rq); 1942 } 1943 } else { 1944 guc_set_lrc_tail(rq); 1945 ret = guc_add_request(guc, rq); 1946 } 1947 1948 if (unlikely(ret == -EPIPE)) 1949 disable_submission(guc); 1950 1951 return ret; 1952 } 1953 1954 static bool need_tasklet(struct intel_guc *guc, struct i915_request *rq) 1955 { 1956 struct i915_sched_engine *sched_engine = rq->engine->sched_engine; 1957 struct intel_context *ce = request_to_scheduling_context(rq); 1958 1959 return submission_disabled(guc) || guc->stalled_request || 1960 !i915_sched_engine_is_empty(sched_engine) || 1961 !ctx_id_mapped(guc, ce->guc_id.id); 1962 } 1963 1964 static void guc_submit_request(struct i915_request *rq) 1965 { 1966 struct i915_sched_engine *sched_engine = rq->engine->sched_engine; 1967 struct intel_guc *guc = &rq->engine->gt->uc.guc; 1968 unsigned long flags; 1969 1970 /* Will be called from irq-context when using foreign fences. */ 1971 spin_lock_irqsave(&sched_engine->lock, flags); 1972 1973 if (need_tasklet(guc, rq)) 1974 queue_request(sched_engine, rq, rq_prio(rq)); 1975 else if (guc_bypass_tasklet_submit(guc, rq) == -EBUSY) 1976 tasklet_hi_schedule(&sched_engine->tasklet); 1977 1978 spin_unlock_irqrestore(&sched_engine->lock, flags); 1979 } 1980 1981 static int new_guc_id(struct intel_guc *guc, struct intel_context *ce) 1982 { 1983 STUB(); 1984 return -ENOSYS; 1985 #ifdef notyet 1986 int ret; 1987 1988 GEM_BUG_ON(intel_context_is_child(ce)); 1989 1990 if (intel_context_is_parent(ce)) 1991 ret = bitmap_find_free_region(guc->submission_state.guc_ids_bitmap, 1992 NUMBER_MULTI_LRC_GUC_ID(guc), 1993 order_base_2(ce->parallel.number_children 1994 + 1)); 1995 else 1996 ret = ida_simple_get(&guc->submission_state.guc_ids, 1997 NUMBER_MULTI_LRC_GUC_ID(guc), 1998 guc->submission_state.num_guc_ids, 1999 GFP_KERNEL | __GFP_RETRY_MAYFAIL | 2000 __GFP_NOWARN); 2001 if (unlikely(ret < 0)) 2002 return ret; 2003 2004 ce->guc_id.id = ret; 2005 return 0; 2006 #endif 2007 } 2008 2009 static void __release_guc_id(struct intel_guc *guc, struct intel_context *ce) 2010 { 2011 STUB(); 2012 #ifdef notyet 2013 GEM_BUG_ON(intel_context_is_child(ce)); 2014 2015 if (!context_guc_id_invalid(ce)) { 2016 if (intel_context_is_parent(ce)) 2017 bitmap_release_region(guc->submission_state.guc_ids_bitmap, 2018 ce->guc_id.id, 2019 order_base_2(ce->parallel.number_children 2020 + 1)); 2021 else 2022 ida_simple_remove(&guc->submission_state.guc_ids, 2023 ce->guc_id.id); 2024 clr_ctx_id_mapping(guc, ce->guc_id.id); 2025 set_context_guc_id_invalid(ce); 2026 } 2027 if (!list_empty(&ce->guc_id.link)) 2028 list_del_init(&ce->guc_id.link); 2029 #endif 2030 } 2031 2032 static void release_guc_id(struct intel_guc *guc, struct intel_context *ce) 2033 { 2034 unsigned long flags; 2035 2036 spin_lock_irqsave(&guc->submission_state.lock, flags); 2037 __release_guc_id(guc, ce); 2038 spin_unlock_irqrestore(&guc->submission_state.lock, flags); 2039 } 2040 2041 static int steal_guc_id(struct intel_guc *guc, struct intel_context *ce) 2042 { 2043 struct intel_context *cn; 2044 2045 lockdep_assert_held(&guc->submission_state.lock); 2046 GEM_BUG_ON(intel_context_is_child(ce)); 2047 GEM_BUG_ON(intel_context_is_parent(ce)); 2048 2049 if (!list_empty(&guc->submission_state.guc_id_list)) { 2050 cn = list_first_entry(&guc->submission_state.guc_id_list, 2051 struct intel_context, 2052 guc_id.link); 2053 2054 GEM_BUG_ON(atomic_read(&cn->guc_id.ref)); 2055 GEM_BUG_ON(context_guc_id_invalid(cn)); 2056 GEM_BUG_ON(intel_context_is_child(cn)); 2057 GEM_BUG_ON(intel_context_is_parent(cn)); 2058 2059 list_del_init(&cn->guc_id.link); 2060 ce->guc_id.id = cn->guc_id.id; 2061 2062 spin_lock(&cn->guc_state.lock); 2063 clr_context_registered(cn); 2064 spin_unlock(&cn->guc_state.lock); 2065 2066 set_context_guc_id_invalid(cn); 2067 2068 #ifdef CONFIG_DRM_I915_SELFTEST 2069 guc->number_guc_id_stolen++; 2070 #endif 2071 2072 return 0; 2073 } else { 2074 return -EAGAIN; 2075 } 2076 } 2077 2078 static int assign_guc_id(struct intel_guc *guc, struct intel_context *ce) 2079 { 2080 int ret; 2081 2082 lockdep_assert_held(&guc->submission_state.lock); 2083 GEM_BUG_ON(intel_context_is_child(ce)); 2084 2085 ret = new_guc_id(guc, ce); 2086 if (unlikely(ret < 0)) { 2087 if (intel_context_is_parent(ce)) 2088 return -ENOSPC; 2089 2090 ret = steal_guc_id(guc, ce); 2091 if (ret < 0) 2092 return ret; 2093 } 2094 2095 if (intel_context_is_parent(ce)) { 2096 struct intel_context *child; 2097 int i = 1; 2098 2099 for_each_child(ce, child) 2100 child->guc_id.id = ce->guc_id.id + i++; 2101 } 2102 2103 return 0; 2104 } 2105 2106 #define PIN_GUC_ID_TRIES 4 2107 static int pin_guc_id(struct intel_guc *guc, struct intel_context *ce) 2108 { 2109 int ret = 0; 2110 unsigned long flags, tries = PIN_GUC_ID_TRIES; 2111 2112 GEM_BUG_ON(atomic_read(&ce->guc_id.ref)); 2113 2114 try_again: 2115 spin_lock_irqsave(&guc->submission_state.lock, flags); 2116 2117 might_lock(&ce->guc_state.lock); 2118 2119 if (context_guc_id_invalid(ce)) { 2120 ret = assign_guc_id(guc, ce); 2121 if (ret) 2122 goto out_unlock; 2123 ret = 1; /* Indidcates newly assigned guc_id */ 2124 } 2125 if (!list_empty(&ce->guc_id.link)) 2126 list_del_init(&ce->guc_id.link); 2127 atomic_inc(&ce->guc_id.ref); 2128 2129 out_unlock: 2130 spin_unlock_irqrestore(&guc->submission_state.lock, flags); 2131 2132 /* 2133 * -EAGAIN indicates no guc_id are available, let's retire any 2134 * outstanding requests to see if that frees up a guc_id. If the first 2135 * retire didn't help, insert a sleep with the timeslice duration before 2136 * attempting to retire more requests. Double the sleep period each 2137 * subsequent pass before finally giving up. The sleep period has max of 2138 * 100ms and minimum of 1ms. 2139 */ 2140 if (ret == -EAGAIN && --tries) { 2141 if (PIN_GUC_ID_TRIES - tries > 1) { 2142 unsigned int timeslice_shifted = 2143 ce->engine->props.timeslice_duration_ms << 2144 (PIN_GUC_ID_TRIES - tries - 2); 2145 unsigned int max = min_t(unsigned int, 100, 2146 timeslice_shifted); 2147 2148 drm_msleep(max_t(unsigned int, max, 1)); 2149 } 2150 intel_gt_retire_requests(guc_to_gt(guc)); 2151 goto try_again; 2152 } 2153 2154 return ret; 2155 } 2156 2157 static void unpin_guc_id(struct intel_guc *guc, struct intel_context *ce) 2158 { 2159 unsigned long flags; 2160 2161 GEM_BUG_ON(atomic_read(&ce->guc_id.ref) < 0); 2162 GEM_BUG_ON(intel_context_is_child(ce)); 2163 2164 if (unlikely(context_guc_id_invalid(ce) || 2165 intel_context_is_parent(ce))) 2166 return; 2167 2168 spin_lock_irqsave(&guc->submission_state.lock, flags); 2169 if (!context_guc_id_invalid(ce) && list_empty(&ce->guc_id.link) && 2170 !atomic_read(&ce->guc_id.ref)) 2171 list_add_tail(&ce->guc_id.link, 2172 &guc->submission_state.guc_id_list); 2173 spin_unlock_irqrestore(&guc->submission_state.lock, flags); 2174 } 2175 2176 static int __guc_action_register_multi_lrc_v69(struct intel_guc *guc, 2177 struct intel_context *ce, 2178 u32 guc_id, 2179 u32 offset, 2180 bool loop) 2181 { 2182 struct intel_context *child; 2183 u32 action[4 + MAX_ENGINE_INSTANCE]; 2184 int len = 0; 2185 2186 GEM_BUG_ON(ce->parallel.number_children > MAX_ENGINE_INSTANCE); 2187 2188 action[len++] = INTEL_GUC_ACTION_REGISTER_CONTEXT_MULTI_LRC; 2189 action[len++] = guc_id; 2190 action[len++] = ce->parallel.number_children + 1; 2191 action[len++] = offset; 2192 for_each_child(ce, child) { 2193 offset += sizeof(struct guc_lrc_desc_v69); 2194 action[len++] = offset; 2195 } 2196 2197 return guc_submission_send_busy_loop(guc, action, len, 0, loop); 2198 } 2199 2200 static int __guc_action_register_multi_lrc_v70(struct intel_guc *guc, 2201 struct intel_context *ce, 2202 struct guc_ctxt_registration_info *info, 2203 bool loop) 2204 { 2205 struct intel_context *child; 2206 u32 action[13 + (MAX_ENGINE_INSTANCE * 2)]; 2207 int len = 0; 2208 u32 next_id; 2209 2210 GEM_BUG_ON(ce->parallel.number_children > MAX_ENGINE_INSTANCE); 2211 2212 action[len++] = INTEL_GUC_ACTION_REGISTER_CONTEXT_MULTI_LRC; 2213 action[len++] = info->flags; 2214 action[len++] = info->context_idx; 2215 action[len++] = info->engine_class; 2216 action[len++] = info->engine_submit_mask; 2217 action[len++] = info->wq_desc_lo; 2218 action[len++] = info->wq_desc_hi; 2219 action[len++] = info->wq_base_lo; 2220 action[len++] = info->wq_base_hi; 2221 action[len++] = info->wq_size; 2222 action[len++] = ce->parallel.number_children + 1; 2223 action[len++] = info->hwlrca_lo; 2224 action[len++] = info->hwlrca_hi; 2225 2226 next_id = info->context_idx + 1; 2227 for_each_child(ce, child) { 2228 GEM_BUG_ON(next_id++ != child->guc_id.id); 2229 2230 /* 2231 * NB: GuC interface supports 64 bit LRCA even though i915/HW 2232 * only supports 32 bit currently. 2233 */ 2234 action[len++] = lower_32_bits(child->lrc.lrca); 2235 action[len++] = upper_32_bits(child->lrc.lrca); 2236 } 2237 2238 GEM_BUG_ON(len > ARRAY_SIZE(action)); 2239 2240 return guc_submission_send_busy_loop(guc, action, len, 0, loop); 2241 } 2242 2243 static int __guc_action_register_context_v69(struct intel_guc *guc, 2244 u32 guc_id, 2245 u32 offset, 2246 bool loop) 2247 { 2248 u32 action[] = { 2249 INTEL_GUC_ACTION_REGISTER_CONTEXT, 2250 guc_id, 2251 offset, 2252 }; 2253 2254 return guc_submission_send_busy_loop(guc, action, ARRAY_SIZE(action), 2255 0, loop); 2256 } 2257 2258 static int __guc_action_register_context_v70(struct intel_guc *guc, 2259 struct guc_ctxt_registration_info *info, 2260 bool loop) 2261 { 2262 u32 action[] = { 2263 INTEL_GUC_ACTION_REGISTER_CONTEXT, 2264 info->flags, 2265 info->context_idx, 2266 info->engine_class, 2267 info->engine_submit_mask, 2268 info->wq_desc_lo, 2269 info->wq_desc_hi, 2270 info->wq_base_lo, 2271 info->wq_base_hi, 2272 info->wq_size, 2273 info->hwlrca_lo, 2274 info->hwlrca_hi, 2275 }; 2276 2277 return guc_submission_send_busy_loop(guc, action, ARRAY_SIZE(action), 2278 0, loop); 2279 } 2280 2281 static void prepare_context_registration_info_v69(struct intel_context *ce); 2282 static void prepare_context_registration_info_v70(struct intel_context *ce, 2283 struct guc_ctxt_registration_info *info); 2284 2285 static int 2286 register_context_v69(struct intel_guc *guc, struct intel_context *ce, bool loop) 2287 { 2288 u32 offset = intel_guc_ggtt_offset(guc, guc->lrc_desc_pool_v69) + 2289 ce->guc_id.id * sizeof(struct guc_lrc_desc_v69); 2290 2291 prepare_context_registration_info_v69(ce); 2292 2293 if (intel_context_is_parent(ce)) 2294 return __guc_action_register_multi_lrc_v69(guc, ce, ce->guc_id.id, 2295 offset, loop); 2296 else 2297 return __guc_action_register_context_v69(guc, ce->guc_id.id, 2298 offset, loop); 2299 } 2300 2301 static int 2302 register_context_v70(struct intel_guc *guc, struct intel_context *ce, bool loop) 2303 { 2304 struct guc_ctxt_registration_info info; 2305 2306 prepare_context_registration_info_v70(ce, &info); 2307 2308 if (intel_context_is_parent(ce)) 2309 return __guc_action_register_multi_lrc_v70(guc, ce, &info, loop); 2310 else 2311 return __guc_action_register_context_v70(guc, &info, loop); 2312 } 2313 2314 static int register_context(struct intel_context *ce, bool loop) 2315 { 2316 struct intel_guc *guc = ce_to_guc(ce); 2317 int ret; 2318 2319 GEM_BUG_ON(intel_context_is_child(ce)); 2320 trace_intel_context_register(ce); 2321 2322 if (GET_UC_VER(guc) >= MAKE_UC_VER(70, 0, 0)) 2323 ret = register_context_v70(guc, ce, loop); 2324 else 2325 ret = register_context_v69(guc, ce, loop); 2326 2327 if (likely(!ret)) { 2328 unsigned long flags; 2329 2330 spin_lock_irqsave(&ce->guc_state.lock, flags); 2331 set_context_registered(ce); 2332 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 2333 2334 if (GET_UC_VER(guc) >= MAKE_UC_VER(70, 0, 0)) 2335 guc_context_policy_init_v70(ce, loop); 2336 } 2337 2338 return ret; 2339 } 2340 2341 static int __guc_action_deregister_context(struct intel_guc *guc, 2342 u32 guc_id) 2343 { 2344 u32 action[] = { 2345 INTEL_GUC_ACTION_DEREGISTER_CONTEXT, 2346 guc_id, 2347 }; 2348 2349 return guc_submission_send_busy_loop(guc, action, ARRAY_SIZE(action), 2350 G2H_LEN_DW_DEREGISTER_CONTEXT, 2351 true); 2352 } 2353 2354 static int deregister_context(struct intel_context *ce, u32 guc_id) 2355 { 2356 struct intel_guc *guc = ce_to_guc(ce); 2357 2358 GEM_BUG_ON(intel_context_is_child(ce)); 2359 trace_intel_context_deregister(ce); 2360 2361 return __guc_action_deregister_context(guc, guc_id); 2362 } 2363 2364 static inline void clear_children_join_go_memory(struct intel_context *ce) 2365 { 2366 struct parent_scratch *ps = __get_parent_scratch(ce); 2367 int i; 2368 2369 ps->go.semaphore = 0; 2370 for (i = 0; i < ce->parallel.number_children + 1; ++i) 2371 ps->join[i].semaphore = 0; 2372 } 2373 2374 static inline u32 get_children_go_value(struct intel_context *ce) 2375 { 2376 return __get_parent_scratch(ce)->go.semaphore; 2377 } 2378 2379 static inline u32 get_children_join_value(struct intel_context *ce, 2380 u8 child_index) 2381 { 2382 return __get_parent_scratch(ce)->join[child_index].semaphore; 2383 } 2384 2385 struct context_policy { 2386 u32 count; 2387 struct guc_update_context_policy h2g; 2388 }; 2389 2390 static u32 __guc_context_policy_action_size(struct context_policy *policy) 2391 { 2392 size_t bytes = sizeof(policy->h2g.header) + 2393 (sizeof(policy->h2g.klv[0]) * policy->count); 2394 2395 return bytes / sizeof(u32); 2396 } 2397 2398 static void __guc_context_policy_start_klv(struct context_policy *policy, u16 guc_id) 2399 { 2400 policy->h2g.header.action = INTEL_GUC_ACTION_HOST2GUC_UPDATE_CONTEXT_POLICIES; 2401 policy->h2g.header.ctx_id = guc_id; 2402 policy->count = 0; 2403 } 2404 2405 #define MAKE_CONTEXT_POLICY_ADD(func, id) \ 2406 static void __guc_context_policy_add_##func(struct context_policy *policy, u32 data) \ 2407 { \ 2408 GEM_BUG_ON(policy->count >= GUC_CONTEXT_POLICIES_KLV_NUM_IDS); \ 2409 policy->h2g.klv[policy->count].kl = \ 2410 FIELD_PREP(GUC_KLV_0_KEY, GUC_CONTEXT_POLICIES_KLV_ID_##id) | \ 2411 FIELD_PREP(GUC_KLV_0_LEN, 1); \ 2412 policy->h2g.klv[policy->count].value = data; \ 2413 policy->count++; \ 2414 } 2415 2416 MAKE_CONTEXT_POLICY_ADD(execution_quantum, EXECUTION_QUANTUM) 2417 MAKE_CONTEXT_POLICY_ADD(preemption_timeout, PREEMPTION_TIMEOUT) 2418 MAKE_CONTEXT_POLICY_ADD(priority, SCHEDULING_PRIORITY) 2419 MAKE_CONTEXT_POLICY_ADD(preempt_to_idle, PREEMPT_TO_IDLE_ON_QUANTUM_EXPIRY) 2420 2421 #undef MAKE_CONTEXT_POLICY_ADD 2422 2423 static int __guc_context_set_context_policies(struct intel_guc *guc, 2424 struct context_policy *policy, 2425 bool loop) 2426 { 2427 return guc_submission_send_busy_loop(guc, (u32 *)&policy->h2g, 2428 __guc_context_policy_action_size(policy), 2429 0, loop); 2430 } 2431 2432 static int guc_context_policy_init_v70(struct intel_context *ce, bool loop) 2433 { 2434 struct intel_engine_cs *engine = ce->engine; 2435 struct intel_guc *guc = &engine->gt->uc.guc; 2436 struct context_policy policy; 2437 u32 execution_quantum; 2438 u32 preemption_timeout; 2439 unsigned long flags; 2440 int ret; 2441 2442 /* NB: For both of these, zero means disabled. */ 2443 GEM_BUG_ON(overflows_type(engine->props.timeslice_duration_ms * 1000, 2444 execution_quantum)); 2445 GEM_BUG_ON(overflows_type(engine->props.preempt_timeout_ms * 1000, 2446 preemption_timeout)); 2447 execution_quantum = engine->props.timeslice_duration_ms * 1000; 2448 preemption_timeout = engine->props.preempt_timeout_ms * 1000; 2449 2450 __guc_context_policy_start_klv(&policy, ce->guc_id.id); 2451 2452 __guc_context_policy_add_priority(&policy, ce->guc_state.prio); 2453 __guc_context_policy_add_execution_quantum(&policy, execution_quantum); 2454 __guc_context_policy_add_preemption_timeout(&policy, preemption_timeout); 2455 2456 if (engine->flags & I915_ENGINE_WANT_FORCED_PREEMPTION) 2457 __guc_context_policy_add_preempt_to_idle(&policy, 1); 2458 2459 ret = __guc_context_set_context_policies(guc, &policy, loop); 2460 2461 spin_lock_irqsave(&ce->guc_state.lock, flags); 2462 if (ret != 0) 2463 set_context_policy_required(ce); 2464 else 2465 clr_context_policy_required(ce); 2466 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 2467 2468 return ret; 2469 } 2470 2471 static void guc_context_policy_init_v69(struct intel_engine_cs *engine, 2472 struct guc_lrc_desc_v69 *desc) 2473 { 2474 desc->policy_flags = 0; 2475 2476 if (engine->flags & I915_ENGINE_WANT_FORCED_PREEMPTION) 2477 desc->policy_flags |= CONTEXT_POLICY_FLAG_PREEMPT_TO_IDLE_V69; 2478 2479 /* NB: For both of these, zero means disabled. */ 2480 GEM_BUG_ON(overflows_type(engine->props.timeslice_duration_ms * 1000, 2481 desc->execution_quantum)); 2482 GEM_BUG_ON(overflows_type(engine->props.preempt_timeout_ms * 1000, 2483 desc->preemption_timeout)); 2484 desc->execution_quantum = engine->props.timeslice_duration_ms * 1000; 2485 desc->preemption_timeout = engine->props.preempt_timeout_ms * 1000; 2486 } 2487 2488 static u32 map_guc_prio_to_lrc_desc_prio(u8 prio) 2489 { 2490 /* 2491 * this matches the mapping we do in map_i915_prio_to_guc_prio() 2492 * (e.g. prio < I915_PRIORITY_NORMAL maps to GUC_CLIENT_PRIORITY_NORMAL) 2493 */ 2494 switch (prio) { 2495 default: 2496 MISSING_CASE(prio); 2497 fallthrough; 2498 case GUC_CLIENT_PRIORITY_KMD_NORMAL: 2499 return GEN12_CTX_PRIORITY_NORMAL; 2500 case GUC_CLIENT_PRIORITY_NORMAL: 2501 return GEN12_CTX_PRIORITY_LOW; 2502 case GUC_CLIENT_PRIORITY_HIGH: 2503 case GUC_CLIENT_PRIORITY_KMD_HIGH: 2504 return GEN12_CTX_PRIORITY_HIGH; 2505 } 2506 } 2507 2508 static void prepare_context_registration_info_v69(struct intel_context *ce) 2509 { 2510 struct intel_engine_cs *engine = ce->engine; 2511 struct intel_guc *guc = &engine->gt->uc.guc; 2512 u32 ctx_id = ce->guc_id.id; 2513 struct guc_lrc_desc_v69 *desc; 2514 struct intel_context *child; 2515 2516 GEM_BUG_ON(!engine->mask); 2517 2518 /* 2519 * Ensure LRC + CT vmas are is same region as write barrier is done 2520 * based on CT vma region. 2521 */ 2522 GEM_BUG_ON(i915_gem_object_is_lmem(guc->ct.vma->obj) != 2523 i915_gem_object_is_lmem(ce->ring->vma->obj)); 2524 2525 desc = __get_lrc_desc_v69(guc, ctx_id); 2526 desc->engine_class = engine_class_to_guc_class(engine->class); 2527 desc->engine_submit_mask = engine->logical_mask; 2528 desc->hw_context_desc = ce->lrc.lrca; 2529 desc->priority = ce->guc_state.prio; 2530 desc->context_flags = CONTEXT_REGISTRATION_FLAG_KMD; 2531 guc_context_policy_init_v69(engine, desc); 2532 2533 /* 2534 * If context is a parent, we need to register a process descriptor 2535 * describing a work queue and register all child contexts. 2536 */ 2537 if (intel_context_is_parent(ce)) { 2538 struct guc_process_desc_v69 *pdesc; 2539 2540 ce->parallel.guc.wqi_tail = 0; 2541 ce->parallel.guc.wqi_head = 0; 2542 2543 desc->process_desc = i915_ggtt_offset(ce->state) + 2544 __get_parent_scratch_offset(ce); 2545 desc->wq_addr = i915_ggtt_offset(ce->state) + 2546 __get_wq_offset(ce); 2547 desc->wq_size = WQ_SIZE; 2548 2549 pdesc = __get_process_desc_v69(ce); 2550 memset(pdesc, 0, sizeof(*(pdesc))); 2551 pdesc->stage_id = ce->guc_id.id; 2552 pdesc->wq_base_addr = desc->wq_addr; 2553 pdesc->wq_size_bytes = desc->wq_size; 2554 pdesc->wq_status = WQ_STATUS_ACTIVE; 2555 2556 ce->parallel.guc.wq_head = &pdesc->head; 2557 ce->parallel.guc.wq_tail = &pdesc->tail; 2558 ce->parallel.guc.wq_status = &pdesc->wq_status; 2559 2560 for_each_child(ce, child) { 2561 desc = __get_lrc_desc_v69(guc, child->guc_id.id); 2562 2563 desc->engine_class = 2564 engine_class_to_guc_class(engine->class); 2565 desc->hw_context_desc = child->lrc.lrca; 2566 desc->priority = ce->guc_state.prio; 2567 desc->context_flags = CONTEXT_REGISTRATION_FLAG_KMD; 2568 guc_context_policy_init_v69(engine, desc); 2569 } 2570 2571 clear_children_join_go_memory(ce); 2572 } 2573 } 2574 2575 static void prepare_context_registration_info_v70(struct intel_context *ce, 2576 struct guc_ctxt_registration_info *info) 2577 { 2578 struct intel_engine_cs *engine = ce->engine; 2579 struct intel_guc *guc = &engine->gt->uc.guc; 2580 u32 ctx_id = ce->guc_id.id; 2581 2582 GEM_BUG_ON(!engine->mask); 2583 2584 /* 2585 * Ensure LRC + CT vmas are is same region as write barrier is done 2586 * based on CT vma region. 2587 */ 2588 GEM_BUG_ON(i915_gem_object_is_lmem(guc->ct.vma->obj) != 2589 i915_gem_object_is_lmem(ce->ring->vma->obj)); 2590 2591 memset(info, 0, sizeof(*info)); 2592 info->context_idx = ctx_id; 2593 info->engine_class = engine_class_to_guc_class(engine->class); 2594 info->engine_submit_mask = engine->logical_mask; 2595 /* 2596 * NB: GuC interface supports 64 bit LRCA even though i915/HW 2597 * only supports 32 bit currently. 2598 */ 2599 info->hwlrca_lo = lower_32_bits(ce->lrc.lrca); 2600 info->hwlrca_hi = upper_32_bits(ce->lrc.lrca); 2601 if (engine->flags & I915_ENGINE_HAS_EU_PRIORITY) 2602 info->hwlrca_lo |= map_guc_prio_to_lrc_desc_prio(ce->guc_state.prio); 2603 info->flags = CONTEXT_REGISTRATION_FLAG_KMD; 2604 2605 /* 2606 * If context is a parent, we need to register a process descriptor 2607 * describing a work queue and register all child contexts. 2608 */ 2609 if (intel_context_is_parent(ce)) { 2610 struct guc_sched_wq_desc *wq_desc; 2611 u64 wq_desc_offset, wq_base_offset; 2612 2613 ce->parallel.guc.wqi_tail = 0; 2614 ce->parallel.guc.wqi_head = 0; 2615 2616 wq_desc_offset = i915_ggtt_offset(ce->state) + 2617 __get_parent_scratch_offset(ce); 2618 wq_base_offset = i915_ggtt_offset(ce->state) + 2619 __get_wq_offset(ce); 2620 info->wq_desc_lo = lower_32_bits(wq_desc_offset); 2621 info->wq_desc_hi = upper_32_bits(wq_desc_offset); 2622 info->wq_base_lo = lower_32_bits(wq_base_offset); 2623 info->wq_base_hi = upper_32_bits(wq_base_offset); 2624 info->wq_size = WQ_SIZE; 2625 2626 wq_desc = __get_wq_desc_v70(ce); 2627 memset(wq_desc, 0, sizeof(*wq_desc)); 2628 wq_desc->wq_status = WQ_STATUS_ACTIVE; 2629 2630 ce->parallel.guc.wq_head = &wq_desc->head; 2631 ce->parallel.guc.wq_tail = &wq_desc->tail; 2632 ce->parallel.guc.wq_status = &wq_desc->wq_status; 2633 2634 clear_children_join_go_memory(ce); 2635 } 2636 } 2637 2638 static int try_context_registration(struct intel_context *ce, bool loop) 2639 { 2640 struct intel_engine_cs *engine = ce->engine; 2641 struct intel_runtime_pm *runtime_pm = engine->uncore->rpm; 2642 struct intel_guc *guc = &engine->gt->uc.guc; 2643 intel_wakeref_t wakeref; 2644 u32 ctx_id = ce->guc_id.id; 2645 bool context_registered; 2646 int ret = 0; 2647 2648 GEM_BUG_ON(!sched_state_is_init(ce)); 2649 2650 context_registered = ctx_id_mapped(guc, ctx_id); 2651 2652 clr_ctx_id_mapping(guc, ctx_id); 2653 set_ctx_id_mapping(guc, ctx_id, ce); 2654 2655 /* 2656 * The context_lookup xarray is used to determine if the hardware 2657 * context is currently registered. There are two cases in which it 2658 * could be registered either the guc_id has been stolen from another 2659 * context or the lrc descriptor address of this context has changed. In 2660 * either case the context needs to be deregistered with the GuC before 2661 * registering this context. 2662 */ 2663 if (context_registered) { 2664 bool disabled; 2665 unsigned long flags; 2666 2667 trace_intel_context_steal_guc_id(ce); 2668 GEM_BUG_ON(!loop); 2669 2670 /* Seal race with Reset */ 2671 spin_lock_irqsave(&ce->guc_state.lock, flags); 2672 disabled = submission_disabled(guc); 2673 if (likely(!disabled)) { 2674 set_context_wait_for_deregister_to_register(ce); 2675 intel_context_get(ce); 2676 } 2677 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 2678 if (unlikely(disabled)) { 2679 clr_ctx_id_mapping(guc, ctx_id); 2680 return 0; /* Will get registered later */ 2681 } 2682 2683 /* 2684 * If stealing the guc_id, this ce has the same guc_id as the 2685 * context whose guc_id was stolen. 2686 */ 2687 with_intel_runtime_pm(runtime_pm, wakeref) 2688 ret = deregister_context(ce, ce->guc_id.id); 2689 if (unlikely(ret == -ENODEV)) 2690 ret = 0; /* Will get registered later */ 2691 } else { 2692 with_intel_runtime_pm(runtime_pm, wakeref) 2693 ret = register_context(ce, loop); 2694 if (unlikely(ret == -EBUSY)) { 2695 clr_ctx_id_mapping(guc, ctx_id); 2696 } else if (unlikely(ret == -ENODEV)) { 2697 clr_ctx_id_mapping(guc, ctx_id); 2698 ret = 0; /* Will get registered later */ 2699 } 2700 } 2701 2702 return ret; 2703 } 2704 2705 static int __guc_context_pre_pin(struct intel_context *ce, 2706 struct intel_engine_cs *engine, 2707 struct i915_gem_ww_ctx *ww, 2708 void **vaddr) 2709 { 2710 return lrc_pre_pin(ce, engine, ww, vaddr); 2711 } 2712 2713 static int __guc_context_pin(struct intel_context *ce, 2714 struct intel_engine_cs *engine, 2715 void *vaddr) 2716 { 2717 if (i915_ggtt_offset(ce->state) != 2718 (ce->lrc.lrca & CTX_GTT_ADDRESS_MASK)) 2719 set_bit(CONTEXT_LRCA_DIRTY, &ce->flags); 2720 2721 /* 2722 * GuC context gets pinned in guc_request_alloc. See that function for 2723 * explaination of why. 2724 */ 2725 2726 return lrc_pin(ce, engine, vaddr); 2727 } 2728 2729 static int guc_context_pre_pin(struct intel_context *ce, 2730 struct i915_gem_ww_ctx *ww, 2731 void **vaddr) 2732 { 2733 return __guc_context_pre_pin(ce, ce->engine, ww, vaddr); 2734 } 2735 2736 static int guc_context_pin(struct intel_context *ce, void *vaddr) 2737 { 2738 int ret = __guc_context_pin(ce, ce->engine, vaddr); 2739 2740 if (likely(!ret && !intel_context_is_barrier(ce))) 2741 intel_engine_pm_get(ce->engine); 2742 2743 return ret; 2744 } 2745 2746 static void guc_context_unpin(struct intel_context *ce) 2747 { 2748 struct intel_guc *guc = ce_to_guc(ce); 2749 2750 unpin_guc_id(guc, ce); 2751 lrc_unpin(ce); 2752 2753 if (likely(!intel_context_is_barrier(ce))) 2754 intel_engine_pm_put_async(ce->engine); 2755 } 2756 2757 static void guc_context_post_unpin(struct intel_context *ce) 2758 { 2759 lrc_post_unpin(ce); 2760 } 2761 2762 static void __guc_context_sched_enable(struct intel_guc *guc, 2763 struct intel_context *ce) 2764 { 2765 u32 action[] = { 2766 INTEL_GUC_ACTION_SCHED_CONTEXT_MODE_SET, 2767 ce->guc_id.id, 2768 GUC_CONTEXT_ENABLE 2769 }; 2770 2771 trace_intel_context_sched_enable(ce); 2772 2773 guc_submission_send_busy_loop(guc, action, ARRAY_SIZE(action), 2774 G2H_LEN_DW_SCHED_CONTEXT_MODE_SET, true); 2775 } 2776 2777 static void __guc_context_sched_disable(struct intel_guc *guc, 2778 struct intel_context *ce, 2779 u16 guc_id) 2780 { 2781 u32 action[] = { 2782 INTEL_GUC_ACTION_SCHED_CONTEXT_MODE_SET, 2783 guc_id, /* ce->guc_id.id not stable */ 2784 GUC_CONTEXT_DISABLE 2785 }; 2786 2787 GEM_BUG_ON(guc_id == GUC_INVALID_CONTEXT_ID); 2788 2789 GEM_BUG_ON(intel_context_is_child(ce)); 2790 trace_intel_context_sched_disable(ce); 2791 2792 guc_submission_send_busy_loop(guc, action, ARRAY_SIZE(action), 2793 G2H_LEN_DW_SCHED_CONTEXT_MODE_SET, true); 2794 } 2795 2796 static void guc_blocked_fence_complete(struct intel_context *ce) 2797 { 2798 lockdep_assert_held(&ce->guc_state.lock); 2799 2800 if (!i915_sw_fence_done(&ce->guc_state.blocked)) 2801 i915_sw_fence_complete(&ce->guc_state.blocked); 2802 } 2803 2804 static void guc_blocked_fence_reinit(struct intel_context *ce) 2805 { 2806 lockdep_assert_held(&ce->guc_state.lock); 2807 GEM_BUG_ON(!i915_sw_fence_done(&ce->guc_state.blocked)); 2808 2809 /* 2810 * This fence is always complete unless a pending schedule disable is 2811 * outstanding. We arm the fence here and complete it when we receive 2812 * the pending schedule disable complete message. 2813 */ 2814 i915_sw_fence_fini(&ce->guc_state.blocked); 2815 i915_sw_fence_reinit(&ce->guc_state.blocked); 2816 i915_sw_fence_await(&ce->guc_state.blocked); 2817 i915_sw_fence_commit(&ce->guc_state.blocked); 2818 } 2819 2820 static u16 prep_context_pending_disable(struct intel_context *ce) 2821 { 2822 lockdep_assert_held(&ce->guc_state.lock); 2823 2824 set_context_pending_disable(ce); 2825 clr_context_enabled(ce); 2826 guc_blocked_fence_reinit(ce); 2827 intel_context_get(ce); 2828 2829 return ce->guc_id.id; 2830 } 2831 2832 static struct i915_sw_fence *guc_context_block(struct intel_context *ce) 2833 { 2834 struct intel_guc *guc = ce_to_guc(ce); 2835 unsigned long flags; 2836 struct intel_runtime_pm *runtime_pm = ce->engine->uncore->rpm; 2837 intel_wakeref_t wakeref; 2838 u16 guc_id; 2839 bool enabled; 2840 2841 GEM_BUG_ON(intel_context_is_child(ce)); 2842 2843 spin_lock_irqsave(&ce->guc_state.lock, flags); 2844 2845 incr_context_blocked(ce); 2846 2847 enabled = context_enabled(ce); 2848 if (unlikely(!enabled || submission_disabled(guc))) { 2849 if (enabled) 2850 clr_context_enabled(ce); 2851 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 2852 return &ce->guc_state.blocked; 2853 } 2854 2855 /* 2856 * We add +2 here as the schedule disable complete CTB handler calls 2857 * intel_context_sched_disable_unpin (-2 to pin_count). 2858 */ 2859 atomic_add(2, &ce->pin_count); 2860 2861 guc_id = prep_context_pending_disable(ce); 2862 2863 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 2864 2865 with_intel_runtime_pm(runtime_pm, wakeref) 2866 __guc_context_sched_disable(guc, ce, guc_id); 2867 2868 return &ce->guc_state.blocked; 2869 } 2870 2871 #define SCHED_STATE_MULTI_BLOCKED_MASK \ 2872 (SCHED_STATE_BLOCKED_MASK & ~SCHED_STATE_BLOCKED) 2873 #define SCHED_STATE_NO_UNBLOCK \ 2874 (SCHED_STATE_MULTI_BLOCKED_MASK | \ 2875 SCHED_STATE_PENDING_DISABLE | \ 2876 SCHED_STATE_BANNED) 2877 2878 static bool context_cant_unblock(struct intel_context *ce) 2879 { 2880 lockdep_assert_held(&ce->guc_state.lock); 2881 2882 return (ce->guc_state.sched_state & SCHED_STATE_NO_UNBLOCK) || 2883 context_guc_id_invalid(ce) || 2884 !ctx_id_mapped(ce_to_guc(ce), ce->guc_id.id) || 2885 !intel_context_is_pinned(ce); 2886 } 2887 2888 static void guc_context_unblock(struct intel_context *ce) 2889 { 2890 struct intel_guc *guc = ce_to_guc(ce); 2891 unsigned long flags; 2892 struct intel_runtime_pm *runtime_pm = ce->engine->uncore->rpm; 2893 intel_wakeref_t wakeref; 2894 bool enable; 2895 2896 GEM_BUG_ON(context_enabled(ce)); 2897 GEM_BUG_ON(intel_context_is_child(ce)); 2898 2899 spin_lock_irqsave(&ce->guc_state.lock, flags); 2900 2901 if (unlikely(submission_disabled(guc) || 2902 context_cant_unblock(ce))) { 2903 enable = false; 2904 } else { 2905 enable = true; 2906 set_context_pending_enable(ce); 2907 set_context_enabled(ce); 2908 intel_context_get(ce); 2909 } 2910 2911 decr_context_blocked(ce); 2912 2913 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 2914 2915 if (enable) { 2916 with_intel_runtime_pm(runtime_pm, wakeref) 2917 __guc_context_sched_enable(guc, ce); 2918 } 2919 } 2920 2921 static void guc_context_cancel_request(struct intel_context *ce, 2922 struct i915_request *rq) 2923 { 2924 struct intel_context *block_context = 2925 request_to_scheduling_context(rq); 2926 2927 if (i915_sw_fence_signaled(&rq->submit)) { 2928 struct i915_sw_fence *fence; 2929 2930 intel_context_get(ce); 2931 fence = guc_context_block(block_context); 2932 i915_sw_fence_wait(fence); 2933 if (!i915_request_completed(rq)) { 2934 __i915_request_skip(rq); 2935 guc_reset_state(ce, intel_ring_wrap(ce->ring, rq->head), 2936 true); 2937 } 2938 2939 guc_context_unblock(block_context); 2940 intel_context_put(ce); 2941 } 2942 } 2943 2944 static void __guc_context_set_preemption_timeout(struct intel_guc *guc, 2945 u16 guc_id, 2946 u32 preemption_timeout) 2947 { 2948 if (GET_UC_VER(guc) >= MAKE_UC_VER(70, 0, 0)) { 2949 struct context_policy policy; 2950 2951 __guc_context_policy_start_klv(&policy, guc_id); 2952 __guc_context_policy_add_preemption_timeout(&policy, preemption_timeout); 2953 __guc_context_set_context_policies(guc, &policy, true); 2954 } else { 2955 u32 action[] = { 2956 INTEL_GUC_ACTION_V69_SET_CONTEXT_PREEMPTION_TIMEOUT, 2957 guc_id, 2958 preemption_timeout 2959 }; 2960 2961 intel_guc_send_busy_loop(guc, action, ARRAY_SIZE(action), 0, true); 2962 } 2963 } 2964 2965 static void 2966 guc_context_revoke(struct intel_context *ce, struct i915_request *rq, 2967 unsigned int preempt_timeout_ms) 2968 { 2969 struct intel_guc *guc = ce_to_guc(ce); 2970 struct intel_runtime_pm *runtime_pm = 2971 &ce->engine->gt->i915->runtime_pm; 2972 intel_wakeref_t wakeref; 2973 unsigned long flags; 2974 2975 GEM_BUG_ON(intel_context_is_child(ce)); 2976 2977 guc_flush_submissions(guc); 2978 2979 spin_lock_irqsave(&ce->guc_state.lock, flags); 2980 set_context_banned(ce); 2981 2982 if (submission_disabled(guc) || 2983 (!context_enabled(ce) && !context_pending_disable(ce))) { 2984 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 2985 2986 guc_cancel_context_requests(ce); 2987 intel_engine_signal_breadcrumbs(ce->engine); 2988 } else if (!context_pending_disable(ce)) { 2989 u16 guc_id; 2990 2991 /* 2992 * We add +2 here as the schedule disable complete CTB handler 2993 * calls intel_context_sched_disable_unpin (-2 to pin_count). 2994 */ 2995 atomic_add(2, &ce->pin_count); 2996 2997 guc_id = prep_context_pending_disable(ce); 2998 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 2999 3000 /* 3001 * In addition to disabling scheduling, set the preemption 3002 * timeout to the minimum value (1 us) so the banned context 3003 * gets kicked off the HW ASAP. 3004 */ 3005 with_intel_runtime_pm(runtime_pm, wakeref) { 3006 __guc_context_set_preemption_timeout(guc, guc_id, 3007 preempt_timeout_ms); 3008 __guc_context_sched_disable(guc, ce, guc_id); 3009 } 3010 } else { 3011 if (!context_guc_id_invalid(ce)) 3012 with_intel_runtime_pm(runtime_pm, wakeref) 3013 __guc_context_set_preemption_timeout(guc, 3014 ce->guc_id.id, 3015 preempt_timeout_ms); 3016 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 3017 } 3018 } 3019 3020 static void guc_context_sched_disable(struct intel_context *ce) 3021 { 3022 struct intel_guc *guc = ce_to_guc(ce); 3023 unsigned long flags; 3024 struct intel_runtime_pm *runtime_pm = &ce->engine->gt->i915->runtime_pm; 3025 intel_wakeref_t wakeref; 3026 u16 guc_id; 3027 3028 GEM_BUG_ON(intel_context_is_child(ce)); 3029 3030 spin_lock_irqsave(&ce->guc_state.lock, flags); 3031 3032 /* 3033 * We have to check if the context has been disabled by another thread, 3034 * check if submssion has been disabled to seal a race with reset and 3035 * finally check if any more requests have been committed to the 3036 * context ensursing that a request doesn't slip through the 3037 * 'context_pending_disable' fence. 3038 */ 3039 if (unlikely(!context_enabled(ce) || submission_disabled(guc) || 3040 context_has_committed_requests(ce))) { 3041 clr_context_enabled(ce); 3042 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 3043 goto unpin; 3044 } 3045 guc_id = prep_context_pending_disable(ce); 3046 3047 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 3048 3049 with_intel_runtime_pm(runtime_pm, wakeref) 3050 __guc_context_sched_disable(guc, ce, guc_id); 3051 3052 return; 3053 unpin: 3054 intel_context_sched_disable_unpin(ce); 3055 } 3056 3057 static inline void guc_lrc_desc_unpin(struct intel_context *ce) 3058 { 3059 struct intel_guc *guc = ce_to_guc(ce); 3060 struct intel_gt *gt = guc_to_gt(guc); 3061 unsigned long flags; 3062 bool disabled; 3063 3064 GEM_BUG_ON(!intel_gt_pm_is_awake(gt)); 3065 GEM_BUG_ON(!ctx_id_mapped(guc, ce->guc_id.id)); 3066 GEM_BUG_ON(ce != __get_context(guc, ce->guc_id.id)); 3067 GEM_BUG_ON(context_enabled(ce)); 3068 3069 /* Seal race with Reset */ 3070 spin_lock_irqsave(&ce->guc_state.lock, flags); 3071 disabled = submission_disabled(guc); 3072 if (likely(!disabled)) { 3073 __intel_gt_pm_get(gt); 3074 set_context_destroyed(ce); 3075 clr_context_registered(ce); 3076 } 3077 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 3078 if (unlikely(disabled)) { 3079 release_guc_id(guc, ce); 3080 __guc_context_destroy(ce); 3081 return; 3082 } 3083 3084 deregister_context(ce, ce->guc_id.id); 3085 } 3086 3087 static void __guc_context_destroy(struct intel_context *ce) 3088 { 3089 GEM_BUG_ON(ce->guc_state.prio_count[GUC_CLIENT_PRIORITY_KMD_HIGH] || 3090 ce->guc_state.prio_count[GUC_CLIENT_PRIORITY_HIGH] || 3091 ce->guc_state.prio_count[GUC_CLIENT_PRIORITY_KMD_NORMAL] || 3092 ce->guc_state.prio_count[GUC_CLIENT_PRIORITY_NORMAL]); 3093 GEM_BUG_ON(ce->guc_state.number_committed_requests); 3094 3095 lrc_fini(ce); 3096 intel_context_fini(ce); 3097 3098 if (intel_engine_is_virtual(ce->engine)) { 3099 struct guc_virtual_engine *ve = 3100 container_of(ce, typeof(*ve), context); 3101 3102 if (ve->base.breadcrumbs) 3103 intel_breadcrumbs_put(ve->base.breadcrumbs); 3104 3105 kfree(ve); 3106 } else { 3107 intel_context_free(ce); 3108 } 3109 } 3110 3111 static void guc_flush_destroyed_contexts(struct intel_guc *guc) 3112 { 3113 struct intel_context *ce; 3114 unsigned long flags; 3115 3116 GEM_BUG_ON(!submission_disabled(guc) && 3117 guc_submission_initialized(guc)); 3118 3119 while (!list_empty(&guc->submission_state.destroyed_contexts)) { 3120 spin_lock_irqsave(&guc->submission_state.lock, flags); 3121 ce = list_first_entry_or_null(&guc->submission_state.destroyed_contexts, 3122 struct intel_context, 3123 destroyed_link); 3124 if (ce) 3125 list_del_init(&ce->destroyed_link); 3126 spin_unlock_irqrestore(&guc->submission_state.lock, flags); 3127 3128 if (!ce) 3129 break; 3130 3131 release_guc_id(guc, ce); 3132 __guc_context_destroy(ce); 3133 } 3134 } 3135 3136 static void deregister_destroyed_contexts(struct intel_guc *guc) 3137 { 3138 struct intel_context *ce; 3139 unsigned long flags; 3140 3141 while (!list_empty(&guc->submission_state.destroyed_contexts)) { 3142 spin_lock_irqsave(&guc->submission_state.lock, flags); 3143 ce = list_first_entry_or_null(&guc->submission_state.destroyed_contexts, 3144 struct intel_context, 3145 destroyed_link); 3146 if (ce) 3147 list_del_init(&ce->destroyed_link); 3148 spin_unlock_irqrestore(&guc->submission_state.lock, flags); 3149 3150 if (!ce) 3151 break; 3152 3153 guc_lrc_desc_unpin(ce); 3154 } 3155 } 3156 3157 static void destroyed_worker_func(struct work_struct *w) 3158 { 3159 struct intel_guc *guc = container_of(w, struct intel_guc, 3160 submission_state.destroyed_worker); 3161 struct intel_gt *gt = guc_to_gt(guc); 3162 int tmp; 3163 3164 with_intel_gt_pm(gt, tmp) 3165 deregister_destroyed_contexts(guc); 3166 } 3167 3168 static void guc_context_destroy(struct kref *kref) 3169 { 3170 struct intel_context *ce = container_of(kref, typeof(*ce), ref); 3171 struct intel_guc *guc = ce_to_guc(ce); 3172 unsigned long flags; 3173 bool destroy; 3174 3175 /* 3176 * If the guc_id is invalid this context has been stolen and we can free 3177 * it immediately. Also can be freed immediately if the context is not 3178 * registered with the GuC or the GuC is in the middle of a reset. 3179 */ 3180 spin_lock_irqsave(&guc->submission_state.lock, flags); 3181 destroy = submission_disabled(guc) || context_guc_id_invalid(ce) || 3182 !ctx_id_mapped(guc, ce->guc_id.id); 3183 if (likely(!destroy)) { 3184 if (!list_empty(&ce->guc_id.link)) 3185 list_del_init(&ce->guc_id.link); 3186 list_add_tail(&ce->destroyed_link, 3187 &guc->submission_state.destroyed_contexts); 3188 } else { 3189 __release_guc_id(guc, ce); 3190 } 3191 spin_unlock_irqrestore(&guc->submission_state.lock, flags); 3192 if (unlikely(destroy)) { 3193 __guc_context_destroy(ce); 3194 return; 3195 } 3196 3197 /* 3198 * We use a worker to issue the H2G to deregister the context as we can 3199 * take the GT PM for the first time which isn't allowed from an atomic 3200 * context. 3201 */ 3202 queue_work(system_unbound_wq, &guc->submission_state.destroyed_worker); 3203 } 3204 3205 static int guc_context_alloc(struct intel_context *ce) 3206 { 3207 return lrc_alloc(ce, ce->engine); 3208 } 3209 3210 static void __guc_context_set_prio(struct intel_guc *guc, 3211 struct intel_context *ce) 3212 { 3213 if (GET_UC_VER(guc) >= MAKE_UC_VER(70, 0, 0)) { 3214 struct context_policy policy; 3215 3216 __guc_context_policy_start_klv(&policy, ce->guc_id.id); 3217 __guc_context_policy_add_priority(&policy, ce->guc_state.prio); 3218 __guc_context_set_context_policies(guc, &policy, true); 3219 } else { 3220 u32 action[] = { 3221 INTEL_GUC_ACTION_V69_SET_CONTEXT_PRIORITY, 3222 ce->guc_id.id, 3223 ce->guc_state.prio, 3224 }; 3225 3226 guc_submission_send_busy_loop(guc, action, ARRAY_SIZE(action), 0, true); 3227 } 3228 } 3229 3230 static void guc_context_set_prio(struct intel_guc *guc, 3231 struct intel_context *ce, 3232 u8 prio) 3233 { 3234 GEM_BUG_ON(prio < GUC_CLIENT_PRIORITY_KMD_HIGH || 3235 prio > GUC_CLIENT_PRIORITY_NORMAL); 3236 lockdep_assert_held(&ce->guc_state.lock); 3237 3238 if (ce->guc_state.prio == prio || submission_disabled(guc) || 3239 !context_registered(ce)) { 3240 ce->guc_state.prio = prio; 3241 return; 3242 } 3243 3244 ce->guc_state.prio = prio; 3245 __guc_context_set_prio(guc, ce); 3246 3247 trace_intel_context_set_prio(ce); 3248 } 3249 3250 static inline u8 map_i915_prio_to_guc_prio(int prio) 3251 { 3252 if (prio == I915_PRIORITY_NORMAL) 3253 return GUC_CLIENT_PRIORITY_KMD_NORMAL; 3254 else if (prio < I915_PRIORITY_NORMAL) 3255 return GUC_CLIENT_PRIORITY_NORMAL; 3256 else if (prio < I915_PRIORITY_DISPLAY) 3257 return GUC_CLIENT_PRIORITY_HIGH; 3258 else 3259 return GUC_CLIENT_PRIORITY_KMD_HIGH; 3260 } 3261 3262 static inline void add_context_inflight_prio(struct intel_context *ce, 3263 u8 guc_prio) 3264 { 3265 lockdep_assert_held(&ce->guc_state.lock); 3266 GEM_BUG_ON(guc_prio >= ARRAY_SIZE(ce->guc_state.prio_count)); 3267 3268 ++ce->guc_state.prio_count[guc_prio]; 3269 3270 /* Overflow protection */ 3271 GEM_WARN_ON(!ce->guc_state.prio_count[guc_prio]); 3272 } 3273 3274 static inline void sub_context_inflight_prio(struct intel_context *ce, 3275 u8 guc_prio) 3276 { 3277 lockdep_assert_held(&ce->guc_state.lock); 3278 GEM_BUG_ON(guc_prio >= ARRAY_SIZE(ce->guc_state.prio_count)); 3279 3280 /* Underflow protection */ 3281 GEM_WARN_ON(!ce->guc_state.prio_count[guc_prio]); 3282 3283 --ce->guc_state.prio_count[guc_prio]; 3284 } 3285 3286 static inline void update_context_prio(struct intel_context *ce) 3287 { 3288 struct intel_guc *guc = &ce->engine->gt->uc.guc; 3289 int i; 3290 3291 BUILD_BUG_ON(GUC_CLIENT_PRIORITY_KMD_HIGH != 0); 3292 BUILD_BUG_ON(GUC_CLIENT_PRIORITY_KMD_HIGH > GUC_CLIENT_PRIORITY_NORMAL); 3293 3294 lockdep_assert_held(&ce->guc_state.lock); 3295 3296 for (i = 0; i < ARRAY_SIZE(ce->guc_state.prio_count); ++i) { 3297 if (ce->guc_state.prio_count[i]) { 3298 guc_context_set_prio(guc, ce, i); 3299 break; 3300 } 3301 } 3302 } 3303 3304 static inline bool new_guc_prio_higher(u8 old_guc_prio, u8 new_guc_prio) 3305 { 3306 /* Lower value is higher priority */ 3307 return new_guc_prio < old_guc_prio; 3308 } 3309 3310 static void add_to_context(struct i915_request *rq) 3311 { 3312 struct intel_context *ce = request_to_scheduling_context(rq); 3313 u8 new_guc_prio = map_i915_prio_to_guc_prio(rq_prio(rq)); 3314 3315 GEM_BUG_ON(intel_context_is_child(ce)); 3316 GEM_BUG_ON(rq->guc_prio == GUC_PRIO_FINI); 3317 3318 spin_lock(&ce->guc_state.lock); 3319 list_move_tail(&rq->sched.link, &ce->guc_state.requests); 3320 3321 if (rq->guc_prio == GUC_PRIO_INIT) { 3322 rq->guc_prio = new_guc_prio; 3323 add_context_inflight_prio(ce, rq->guc_prio); 3324 } else if (new_guc_prio_higher(rq->guc_prio, new_guc_prio)) { 3325 sub_context_inflight_prio(ce, rq->guc_prio); 3326 rq->guc_prio = new_guc_prio; 3327 add_context_inflight_prio(ce, rq->guc_prio); 3328 } 3329 update_context_prio(ce); 3330 3331 spin_unlock(&ce->guc_state.lock); 3332 } 3333 3334 static void guc_prio_fini(struct i915_request *rq, struct intel_context *ce) 3335 { 3336 lockdep_assert_held(&ce->guc_state.lock); 3337 3338 if (rq->guc_prio != GUC_PRIO_INIT && 3339 rq->guc_prio != GUC_PRIO_FINI) { 3340 sub_context_inflight_prio(ce, rq->guc_prio); 3341 update_context_prio(ce); 3342 } 3343 rq->guc_prio = GUC_PRIO_FINI; 3344 } 3345 3346 static void remove_from_context(struct i915_request *rq) 3347 { 3348 struct intel_context *ce = request_to_scheduling_context(rq); 3349 3350 GEM_BUG_ON(intel_context_is_child(ce)); 3351 3352 spin_lock_irq(&ce->guc_state.lock); 3353 3354 list_del_init(&rq->sched.link); 3355 clear_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags); 3356 3357 /* Prevent further __await_execution() registering a cb, then flush */ 3358 set_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags); 3359 3360 guc_prio_fini(rq, ce); 3361 3362 decr_context_committed_requests(ce); 3363 3364 spin_unlock_irq(&ce->guc_state.lock); 3365 3366 atomic_dec(&ce->guc_id.ref); 3367 i915_request_notify_execute_cb_imm(rq); 3368 } 3369 3370 static const struct intel_context_ops guc_context_ops = { 3371 .alloc = guc_context_alloc, 3372 3373 .pre_pin = guc_context_pre_pin, 3374 .pin = guc_context_pin, 3375 .unpin = guc_context_unpin, 3376 .post_unpin = guc_context_post_unpin, 3377 3378 .revoke = guc_context_revoke, 3379 3380 .cancel_request = guc_context_cancel_request, 3381 3382 .enter = intel_context_enter_engine, 3383 .exit = intel_context_exit_engine, 3384 3385 .sched_disable = guc_context_sched_disable, 3386 3387 .reset = lrc_reset, 3388 .destroy = guc_context_destroy, 3389 3390 .create_virtual = guc_create_virtual, 3391 .create_parallel = guc_create_parallel, 3392 }; 3393 3394 static void submit_work_cb(struct irq_work *wrk) 3395 { 3396 struct i915_request *rq = container_of(wrk, typeof(*rq), submit_work); 3397 3398 might_lock(&rq->engine->sched_engine->lock); 3399 i915_sw_fence_complete(&rq->submit); 3400 } 3401 3402 static void __guc_signal_context_fence(struct intel_context *ce) 3403 { 3404 struct i915_request *rq, *rn; 3405 3406 lockdep_assert_held(&ce->guc_state.lock); 3407 3408 if (!list_empty(&ce->guc_state.fences)) 3409 trace_intel_context_fence_release(ce); 3410 3411 /* 3412 * Use an IRQ to ensure locking order of sched_engine->lock -> 3413 * ce->guc_state.lock is preserved. 3414 */ 3415 list_for_each_entry_safe(rq, rn, &ce->guc_state.fences, 3416 guc_fence_link) { 3417 list_del(&rq->guc_fence_link); 3418 irq_work_queue(&rq->submit_work); 3419 } 3420 3421 INIT_LIST_HEAD(&ce->guc_state.fences); 3422 } 3423 3424 static void guc_signal_context_fence(struct intel_context *ce) 3425 { 3426 unsigned long flags; 3427 3428 GEM_BUG_ON(intel_context_is_child(ce)); 3429 3430 spin_lock_irqsave(&ce->guc_state.lock, flags); 3431 clr_context_wait_for_deregister_to_register(ce); 3432 __guc_signal_context_fence(ce); 3433 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 3434 } 3435 3436 static bool context_needs_register(struct intel_context *ce, bool new_guc_id) 3437 { 3438 return (new_guc_id || test_bit(CONTEXT_LRCA_DIRTY, &ce->flags) || 3439 !ctx_id_mapped(ce_to_guc(ce), ce->guc_id.id)) && 3440 !submission_disabled(ce_to_guc(ce)); 3441 } 3442 3443 static void guc_context_init(struct intel_context *ce) 3444 { 3445 const struct i915_gem_context *ctx; 3446 int prio = I915_CONTEXT_DEFAULT_PRIORITY; 3447 3448 rcu_read_lock(); 3449 ctx = rcu_dereference(ce->gem_context); 3450 if (ctx) 3451 prio = ctx->sched.priority; 3452 rcu_read_unlock(); 3453 3454 ce->guc_state.prio = map_i915_prio_to_guc_prio(prio); 3455 set_bit(CONTEXT_GUC_INIT, &ce->flags); 3456 } 3457 3458 static int guc_request_alloc(struct i915_request *rq) 3459 { 3460 struct intel_context *ce = request_to_scheduling_context(rq); 3461 struct intel_guc *guc = ce_to_guc(ce); 3462 unsigned long flags; 3463 int ret; 3464 3465 GEM_BUG_ON(!intel_context_is_pinned(rq->context)); 3466 3467 /* 3468 * Flush enough space to reduce the likelihood of waiting after 3469 * we start building the request - in which case we will just 3470 * have to repeat work. 3471 */ 3472 rq->reserved_space += GUC_REQUEST_SIZE; 3473 3474 /* 3475 * Note that after this point, we have committed to using 3476 * this request as it is being used to both track the 3477 * state of engine initialisation and liveness of the 3478 * golden renderstate above. Think twice before you try 3479 * to cancel/unwind this request now. 3480 */ 3481 3482 /* Unconditionally invalidate GPU caches and TLBs. */ 3483 ret = rq->engine->emit_flush(rq, EMIT_INVALIDATE); 3484 if (ret) 3485 return ret; 3486 3487 rq->reserved_space -= GUC_REQUEST_SIZE; 3488 3489 if (unlikely(!test_bit(CONTEXT_GUC_INIT, &ce->flags))) 3490 guc_context_init(ce); 3491 3492 /* 3493 * Call pin_guc_id here rather than in the pinning step as with 3494 * dma_resv, contexts can be repeatedly pinned / unpinned trashing the 3495 * guc_id and creating horrible race conditions. This is especially bad 3496 * when guc_id are being stolen due to over subscription. By the time 3497 * this function is reached, it is guaranteed that the guc_id will be 3498 * persistent until the generated request is retired. Thus, sealing these 3499 * race conditions. It is still safe to fail here if guc_id are 3500 * exhausted and return -EAGAIN to the user indicating that they can try 3501 * again in the future. 3502 * 3503 * There is no need for a lock here as the timeline mutex ensures at 3504 * most one context can be executing this code path at once. The 3505 * guc_id_ref is incremented once for every request in flight and 3506 * decremented on each retire. When it is zero, a lock around the 3507 * increment (in pin_guc_id) is needed to seal a race with unpin_guc_id. 3508 */ 3509 if (atomic_add_unless(&ce->guc_id.ref, 1, 0)) 3510 goto out; 3511 3512 ret = pin_guc_id(guc, ce); /* returns 1 if new guc_id assigned */ 3513 if (unlikely(ret < 0)) 3514 return ret; 3515 if (context_needs_register(ce, !!ret)) { 3516 ret = try_context_registration(ce, true); 3517 if (unlikely(ret)) { /* unwind */ 3518 if (ret == -EPIPE) { 3519 disable_submission(guc); 3520 goto out; /* GPU will be reset */ 3521 } 3522 atomic_dec(&ce->guc_id.ref); 3523 unpin_guc_id(guc, ce); 3524 return ret; 3525 } 3526 } 3527 3528 clear_bit(CONTEXT_LRCA_DIRTY, &ce->flags); 3529 3530 out: 3531 /* 3532 * We block all requests on this context if a G2H is pending for a 3533 * schedule disable or context deregistration as the GuC will fail a 3534 * schedule enable or context registration if either G2H is pending 3535 * respectfully. Once a G2H returns, the fence is released that is 3536 * blocking these requests (see guc_signal_context_fence). 3537 */ 3538 spin_lock_irqsave(&ce->guc_state.lock, flags); 3539 if (context_wait_for_deregister_to_register(ce) || 3540 context_pending_disable(ce)) { 3541 init_irq_work(&rq->submit_work, submit_work_cb); 3542 i915_sw_fence_await(&rq->submit); 3543 3544 list_add_tail(&rq->guc_fence_link, &ce->guc_state.fences); 3545 } 3546 incr_context_committed_requests(ce); 3547 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 3548 3549 return 0; 3550 } 3551 3552 static int guc_virtual_context_pre_pin(struct intel_context *ce, 3553 struct i915_gem_ww_ctx *ww, 3554 void **vaddr) 3555 { 3556 struct intel_engine_cs *engine = guc_virtual_get_sibling(ce->engine, 0); 3557 3558 return __guc_context_pre_pin(ce, engine, ww, vaddr); 3559 } 3560 3561 static int guc_virtual_context_pin(struct intel_context *ce, void *vaddr) 3562 { 3563 struct intel_engine_cs *engine = guc_virtual_get_sibling(ce->engine, 0); 3564 int ret = __guc_context_pin(ce, engine, vaddr); 3565 intel_engine_mask_t tmp, mask = ce->engine->mask; 3566 3567 if (likely(!ret)) 3568 for_each_engine_masked(engine, ce->engine->gt, mask, tmp) 3569 intel_engine_pm_get(engine); 3570 3571 return ret; 3572 } 3573 3574 static void guc_virtual_context_unpin(struct intel_context *ce) 3575 { 3576 intel_engine_mask_t tmp, mask = ce->engine->mask; 3577 struct intel_engine_cs *engine; 3578 struct intel_guc *guc = ce_to_guc(ce); 3579 3580 GEM_BUG_ON(context_enabled(ce)); 3581 GEM_BUG_ON(intel_context_is_barrier(ce)); 3582 3583 unpin_guc_id(guc, ce); 3584 lrc_unpin(ce); 3585 3586 for_each_engine_masked(engine, ce->engine->gt, mask, tmp) 3587 intel_engine_pm_put_async(engine); 3588 } 3589 3590 static void guc_virtual_context_enter(struct intel_context *ce) 3591 { 3592 intel_engine_mask_t tmp, mask = ce->engine->mask; 3593 struct intel_engine_cs *engine; 3594 3595 for_each_engine_masked(engine, ce->engine->gt, mask, tmp) 3596 intel_engine_pm_get(engine); 3597 3598 intel_timeline_enter(ce->timeline); 3599 } 3600 3601 static void guc_virtual_context_exit(struct intel_context *ce) 3602 { 3603 intel_engine_mask_t tmp, mask = ce->engine->mask; 3604 struct intel_engine_cs *engine; 3605 3606 for_each_engine_masked(engine, ce->engine->gt, mask, tmp) 3607 intel_engine_pm_put(engine); 3608 3609 intel_timeline_exit(ce->timeline); 3610 } 3611 3612 static int guc_virtual_context_alloc(struct intel_context *ce) 3613 { 3614 struct intel_engine_cs *engine = guc_virtual_get_sibling(ce->engine, 0); 3615 3616 return lrc_alloc(ce, engine); 3617 } 3618 3619 static const struct intel_context_ops virtual_guc_context_ops = { 3620 .alloc = guc_virtual_context_alloc, 3621 3622 .pre_pin = guc_virtual_context_pre_pin, 3623 .pin = guc_virtual_context_pin, 3624 .unpin = guc_virtual_context_unpin, 3625 .post_unpin = guc_context_post_unpin, 3626 3627 .revoke = guc_context_revoke, 3628 3629 .cancel_request = guc_context_cancel_request, 3630 3631 .enter = guc_virtual_context_enter, 3632 .exit = guc_virtual_context_exit, 3633 3634 .sched_disable = guc_context_sched_disable, 3635 3636 .destroy = guc_context_destroy, 3637 3638 .get_sibling = guc_virtual_get_sibling, 3639 }; 3640 3641 static int guc_parent_context_pin(struct intel_context *ce, void *vaddr) 3642 { 3643 struct intel_engine_cs *engine = guc_virtual_get_sibling(ce->engine, 0); 3644 struct intel_guc *guc = ce_to_guc(ce); 3645 int ret; 3646 3647 GEM_BUG_ON(!intel_context_is_parent(ce)); 3648 GEM_BUG_ON(!intel_engine_is_virtual(ce->engine)); 3649 3650 ret = pin_guc_id(guc, ce); 3651 if (unlikely(ret < 0)) 3652 return ret; 3653 3654 return __guc_context_pin(ce, engine, vaddr); 3655 } 3656 3657 static int guc_child_context_pin(struct intel_context *ce, void *vaddr) 3658 { 3659 struct intel_engine_cs *engine = guc_virtual_get_sibling(ce->engine, 0); 3660 3661 GEM_BUG_ON(!intel_context_is_child(ce)); 3662 GEM_BUG_ON(!intel_engine_is_virtual(ce->engine)); 3663 3664 __intel_context_pin(ce->parallel.parent); 3665 return __guc_context_pin(ce, engine, vaddr); 3666 } 3667 3668 static void guc_parent_context_unpin(struct intel_context *ce) 3669 { 3670 struct intel_guc *guc = ce_to_guc(ce); 3671 3672 GEM_BUG_ON(context_enabled(ce)); 3673 GEM_BUG_ON(intel_context_is_barrier(ce)); 3674 GEM_BUG_ON(!intel_context_is_parent(ce)); 3675 GEM_BUG_ON(!intel_engine_is_virtual(ce->engine)); 3676 3677 unpin_guc_id(guc, ce); 3678 lrc_unpin(ce); 3679 } 3680 3681 static void guc_child_context_unpin(struct intel_context *ce) 3682 { 3683 GEM_BUG_ON(context_enabled(ce)); 3684 GEM_BUG_ON(intel_context_is_barrier(ce)); 3685 GEM_BUG_ON(!intel_context_is_child(ce)); 3686 GEM_BUG_ON(!intel_engine_is_virtual(ce->engine)); 3687 3688 lrc_unpin(ce); 3689 } 3690 3691 static void guc_child_context_post_unpin(struct intel_context *ce) 3692 { 3693 GEM_BUG_ON(!intel_context_is_child(ce)); 3694 GEM_BUG_ON(!intel_context_is_pinned(ce->parallel.parent)); 3695 GEM_BUG_ON(!intel_engine_is_virtual(ce->engine)); 3696 3697 lrc_post_unpin(ce); 3698 intel_context_unpin(ce->parallel.parent); 3699 } 3700 3701 static void guc_child_context_destroy(struct kref *kref) 3702 { 3703 struct intel_context *ce = container_of(kref, typeof(*ce), ref); 3704 3705 __guc_context_destroy(ce); 3706 } 3707 3708 static const struct intel_context_ops virtual_parent_context_ops = { 3709 .alloc = guc_virtual_context_alloc, 3710 3711 .pre_pin = guc_context_pre_pin, 3712 .pin = guc_parent_context_pin, 3713 .unpin = guc_parent_context_unpin, 3714 .post_unpin = guc_context_post_unpin, 3715 3716 .revoke = guc_context_revoke, 3717 3718 .cancel_request = guc_context_cancel_request, 3719 3720 .enter = guc_virtual_context_enter, 3721 .exit = guc_virtual_context_exit, 3722 3723 .sched_disable = guc_context_sched_disable, 3724 3725 .destroy = guc_context_destroy, 3726 3727 .get_sibling = guc_virtual_get_sibling, 3728 }; 3729 3730 static const struct intel_context_ops virtual_child_context_ops = { 3731 .alloc = guc_virtual_context_alloc, 3732 3733 .pre_pin = guc_context_pre_pin, 3734 .pin = guc_child_context_pin, 3735 .unpin = guc_child_context_unpin, 3736 .post_unpin = guc_child_context_post_unpin, 3737 3738 .cancel_request = guc_context_cancel_request, 3739 3740 .enter = guc_virtual_context_enter, 3741 .exit = guc_virtual_context_exit, 3742 3743 .destroy = guc_child_context_destroy, 3744 3745 .get_sibling = guc_virtual_get_sibling, 3746 }; 3747 3748 /* 3749 * The below override of the breadcrumbs is enabled when the user configures a 3750 * context for parallel submission (multi-lrc, parent-child). 3751 * 3752 * The overridden breadcrumbs implements an algorithm which allows the GuC to 3753 * safely preempt all the hw contexts configured for parallel submission 3754 * between each BB. The contract between the i915 and GuC is if the parent 3755 * context can be preempted, all the children can be preempted, and the GuC will 3756 * always try to preempt the parent before the children. A handshake between the 3757 * parent / children breadcrumbs ensures the i915 holds up its end of the deal 3758 * creating a window to preempt between each set of BBs. 3759 */ 3760 static int emit_bb_start_parent_no_preempt_mid_batch(struct i915_request *rq, 3761 u64 offset, u32 len, 3762 const unsigned int flags); 3763 static int emit_bb_start_child_no_preempt_mid_batch(struct i915_request *rq, 3764 u64 offset, u32 len, 3765 const unsigned int flags); 3766 static u32 * 3767 emit_fini_breadcrumb_parent_no_preempt_mid_batch(struct i915_request *rq, 3768 u32 *cs); 3769 static u32 * 3770 emit_fini_breadcrumb_child_no_preempt_mid_batch(struct i915_request *rq, 3771 u32 *cs); 3772 3773 static struct intel_context * 3774 guc_create_parallel(struct intel_engine_cs **engines, 3775 unsigned int num_siblings, 3776 unsigned int width) 3777 { 3778 struct intel_engine_cs **siblings = NULL; 3779 struct intel_context *parent = NULL, *ce, *err; 3780 int i, j; 3781 3782 siblings = kmalloc_array(num_siblings, 3783 sizeof(*siblings), 3784 GFP_KERNEL); 3785 if (!siblings) 3786 return ERR_PTR(-ENOMEM); 3787 3788 for (i = 0; i < width; ++i) { 3789 for (j = 0; j < num_siblings; ++j) 3790 siblings[j] = engines[i * num_siblings + j]; 3791 3792 ce = intel_engine_create_virtual(siblings, num_siblings, 3793 FORCE_VIRTUAL); 3794 if (IS_ERR(ce)) { 3795 err = ERR_CAST(ce); 3796 goto unwind; 3797 } 3798 3799 if (i == 0) { 3800 parent = ce; 3801 parent->ops = &virtual_parent_context_ops; 3802 } else { 3803 ce->ops = &virtual_child_context_ops; 3804 intel_context_bind_parent_child(parent, ce); 3805 } 3806 } 3807 3808 parent->parallel.fence_context = dma_fence_context_alloc(1); 3809 3810 parent->engine->emit_bb_start = 3811 emit_bb_start_parent_no_preempt_mid_batch; 3812 parent->engine->emit_fini_breadcrumb = 3813 emit_fini_breadcrumb_parent_no_preempt_mid_batch; 3814 parent->engine->emit_fini_breadcrumb_dw = 3815 12 + 4 * parent->parallel.number_children; 3816 for_each_child(parent, ce) { 3817 ce->engine->emit_bb_start = 3818 emit_bb_start_child_no_preempt_mid_batch; 3819 ce->engine->emit_fini_breadcrumb = 3820 emit_fini_breadcrumb_child_no_preempt_mid_batch; 3821 ce->engine->emit_fini_breadcrumb_dw = 16; 3822 } 3823 3824 kfree(siblings); 3825 return parent; 3826 3827 unwind: 3828 if (parent) 3829 intel_context_put(parent); 3830 kfree(siblings); 3831 return err; 3832 } 3833 3834 static bool 3835 guc_irq_enable_breadcrumbs(struct intel_breadcrumbs *b) 3836 { 3837 struct intel_engine_cs *sibling; 3838 intel_engine_mask_t tmp, mask = b->engine_mask; 3839 bool result = false; 3840 3841 for_each_engine_masked(sibling, b->irq_engine->gt, mask, tmp) 3842 result |= intel_engine_irq_enable(sibling); 3843 3844 return result; 3845 } 3846 3847 static void 3848 guc_irq_disable_breadcrumbs(struct intel_breadcrumbs *b) 3849 { 3850 struct intel_engine_cs *sibling; 3851 intel_engine_mask_t tmp, mask = b->engine_mask; 3852 3853 for_each_engine_masked(sibling, b->irq_engine->gt, mask, tmp) 3854 intel_engine_irq_disable(sibling); 3855 } 3856 3857 static void guc_init_breadcrumbs(struct intel_engine_cs *engine) 3858 { 3859 int i; 3860 3861 /* 3862 * In GuC submission mode we do not know which physical engine a request 3863 * will be scheduled on, this creates a problem because the breadcrumb 3864 * interrupt is per physical engine. To work around this we attach 3865 * requests and direct all breadcrumb interrupts to the first instance 3866 * of an engine per class. In addition all breadcrumb interrupts are 3867 * enabled / disabled across an engine class in unison. 3868 */ 3869 for (i = 0; i < MAX_ENGINE_INSTANCE; ++i) { 3870 struct intel_engine_cs *sibling = 3871 engine->gt->engine_class[engine->class][i]; 3872 3873 if (sibling) { 3874 if (engine->breadcrumbs != sibling->breadcrumbs) { 3875 intel_breadcrumbs_put(engine->breadcrumbs); 3876 engine->breadcrumbs = 3877 intel_breadcrumbs_get(sibling->breadcrumbs); 3878 } 3879 break; 3880 } 3881 } 3882 3883 if (engine->breadcrumbs) { 3884 engine->breadcrumbs->engine_mask |= engine->mask; 3885 engine->breadcrumbs->irq_enable = guc_irq_enable_breadcrumbs; 3886 engine->breadcrumbs->irq_disable = guc_irq_disable_breadcrumbs; 3887 } 3888 } 3889 3890 static void guc_bump_inflight_request_prio(struct i915_request *rq, 3891 int prio) 3892 { 3893 struct intel_context *ce = request_to_scheduling_context(rq); 3894 u8 new_guc_prio = map_i915_prio_to_guc_prio(prio); 3895 3896 /* Short circuit function */ 3897 if (prio < I915_PRIORITY_NORMAL || 3898 rq->guc_prio == GUC_PRIO_FINI || 3899 (rq->guc_prio != GUC_PRIO_INIT && 3900 !new_guc_prio_higher(rq->guc_prio, new_guc_prio))) 3901 return; 3902 3903 spin_lock(&ce->guc_state.lock); 3904 if (rq->guc_prio != GUC_PRIO_FINI) { 3905 if (rq->guc_prio != GUC_PRIO_INIT) 3906 sub_context_inflight_prio(ce, rq->guc_prio); 3907 rq->guc_prio = new_guc_prio; 3908 add_context_inflight_prio(ce, rq->guc_prio); 3909 update_context_prio(ce); 3910 } 3911 spin_unlock(&ce->guc_state.lock); 3912 } 3913 3914 static void guc_retire_inflight_request_prio(struct i915_request *rq) 3915 { 3916 struct intel_context *ce = request_to_scheduling_context(rq); 3917 3918 spin_lock(&ce->guc_state.lock); 3919 guc_prio_fini(rq, ce); 3920 spin_unlock(&ce->guc_state.lock); 3921 } 3922 3923 static void sanitize_hwsp(struct intel_engine_cs *engine) 3924 { 3925 struct intel_timeline *tl; 3926 3927 list_for_each_entry(tl, &engine->status_page.timelines, engine_link) 3928 intel_timeline_reset_seqno(tl); 3929 } 3930 3931 static void guc_sanitize(struct intel_engine_cs *engine) 3932 { 3933 /* 3934 * Poison residual state on resume, in case the suspend didn't! 3935 * 3936 * We have to assume that across suspend/resume (or other loss 3937 * of control) that the contents of our pinned buffers has been 3938 * lost, replaced by garbage. Since this doesn't always happen, 3939 * let's poison such state so that we more quickly spot when 3940 * we falsely assume it has been preserved. 3941 */ 3942 if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) 3943 memset(engine->status_page.addr, POISON_INUSE, PAGE_SIZE); 3944 3945 /* 3946 * The kernel_context HWSP is stored in the status_page. As above, 3947 * that may be lost on resume/initialisation, and so we need to 3948 * reset the value in the HWSP. 3949 */ 3950 sanitize_hwsp(engine); 3951 3952 /* And scrub the dirty cachelines for the HWSP */ 3953 drm_clflush_virt_range(engine->status_page.addr, PAGE_SIZE); 3954 3955 intel_engine_reset_pinned_contexts(engine); 3956 } 3957 3958 static void setup_hwsp(struct intel_engine_cs *engine) 3959 { 3960 intel_engine_set_hwsp_writemask(engine, ~0u); /* HWSTAM */ 3961 3962 ENGINE_WRITE_FW(engine, 3963 RING_HWS_PGA, 3964 i915_ggtt_offset(engine->status_page.vma)); 3965 } 3966 3967 static void start_engine(struct intel_engine_cs *engine) 3968 { 3969 ENGINE_WRITE_FW(engine, 3970 RING_MODE_GEN7, 3971 _MASKED_BIT_ENABLE(GEN11_GFX_DISABLE_LEGACY_MODE)); 3972 3973 ENGINE_WRITE_FW(engine, RING_MI_MODE, _MASKED_BIT_DISABLE(STOP_RING)); 3974 ENGINE_POSTING_READ(engine, RING_MI_MODE); 3975 } 3976 3977 static int guc_resume(struct intel_engine_cs *engine) 3978 { 3979 assert_forcewakes_active(engine->uncore, FORCEWAKE_ALL); 3980 3981 intel_mocs_init_engine(engine); 3982 3983 intel_breadcrumbs_reset(engine->breadcrumbs); 3984 3985 setup_hwsp(engine); 3986 start_engine(engine); 3987 3988 if (engine->flags & I915_ENGINE_FIRST_RENDER_COMPUTE) 3989 xehp_enable_ccs_engines(engine); 3990 3991 return 0; 3992 } 3993 3994 static bool guc_sched_engine_disabled(struct i915_sched_engine *sched_engine) 3995 { 3996 return !sched_engine->tasklet.callback; 3997 } 3998 3999 static void guc_set_default_submission(struct intel_engine_cs *engine) 4000 { 4001 engine->submit_request = guc_submit_request; 4002 } 4003 4004 static inline void guc_kernel_context_pin(struct intel_guc *guc, 4005 struct intel_context *ce) 4006 { 4007 /* 4008 * Note: we purposefully do not check the returns below because 4009 * the registration can only fail if a reset is just starting. 4010 * This is called at the end of reset so presumably another reset 4011 * isn't happening and even it did this code would be run again. 4012 */ 4013 4014 if (context_guc_id_invalid(ce)) 4015 pin_guc_id(guc, ce); 4016 4017 try_context_registration(ce, true); 4018 } 4019 4020 static inline void guc_init_lrc_mapping(struct intel_guc *guc) 4021 { 4022 struct intel_gt *gt = guc_to_gt(guc); 4023 struct intel_engine_cs *engine; 4024 enum intel_engine_id id; 4025 4026 /* make sure all descriptors are clean... */ 4027 xa_destroy(&guc->context_lookup); 4028 4029 /* 4030 * A reset might have occurred while we had a pending stalled request, 4031 * so make sure we clean that up. 4032 */ 4033 guc->stalled_request = NULL; 4034 guc->submission_stall_reason = STALL_NONE; 4035 4036 /* 4037 * Some contexts might have been pinned before we enabled GuC 4038 * submission, so we need to add them to the GuC bookeeping. 4039 * Also, after a reset the of the GuC we want to make sure that the 4040 * information shared with GuC is properly reset. The kernel LRCs are 4041 * not attached to the gem_context, so they need to be added separately. 4042 */ 4043 for_each_engine(engine, gt, id) { 4044 struct intel_context *ce; 4045 4046 list_for_each_entry(ce, &engine->pinned_contexts_list, 4047 pinned_contexts_link) 4048 guc_kernel_context_pin(guc, ce); 4049 } 4050 } 4051 4052 static void guc_release(struct intel_engine_cs *engine) 4053 { 4054 engine->sanitize = NULL; /* no longer in control, nothing to sanitize */ 4055 4056 intel_engine_cleanup_common(engine); 4057 lrc_fini_wa_ctx(engine); 4058 } 4059 4060 static void virtual_guc_bump_serial(struct intel_engine_cs *engine) 4061 { 4062 struct intel_engine_cs *e; 4063 intel_engine_mask_t tmp, mask = engine->mask; 4064 4065 for_each_engine_masked(e, engine->gt, mask, tmp) 4066 e->serial++; 4067 } 4068 4069 static void guc_default_vfuncs(struct intel_engine_cs *engine) 4070 { 4071 /* Default vfuncs which can be overridden by each engine. */ 4072 4073 engine->resume = guc_resume; 4074 4075 engine->cops = &guc_context_ops; 4076 engine->request_alloc = guc_request_alloc; 4077 engine->add_active_request = add_to_context; 4078 engine->remove_active_request = remove_from_context; 4079 4080 engine->sched_engine->schedule = i915_schedule; 4081 4082 engine->reset.prepare = guc_engine_reset_prepare; 4083 engine->reset.rewind = guc_rewind_nop; 4084 engine->reset.cancel = guc_reset_nop; 4085 engine->reset.finish = guc_reset_nop; 4086 4087 engine->emit_flush = gen8_emit_flush_xcs; 4088 engine->emit_init_breadcrumb = gen8_emit_init_breadcrumb; 4089 engine->emit_fini_breadcrumb = gen8_emit_fini_breadcrumb_xcs; 4090 if (GRAPHICS_VER(engine->i915) >= 12) { 4091 engine->emit_fini_breadcrumb = gen12_emit_fini_breadcrumb_xcs; 4092 engine->emit_flush = gen12_emit_flush_xcs; 4093 } 4094 engine->set_default_submission = guc_set_default_submission; 4095 engine->busyness = guc_engine_busyness; 4096 4097 engine->flags |= I915_ENGINE_SUPPORTS_STATS; 4098 engine->flags |= I915_ENGINE_HAS_PREEMPTION; 4099 engine->flags |= I915_ENGINE_HAS_TIMESLICES; 4100 4101 /* Wa_14014475959:dg2 */ 4102 if (IS_DG2(engine->i915) && engine->class == COMPUTE_CLASS) 4103 engine->flags |= I915_ENGINE_USES_WA_HOLD_CCS_SWITCHOUT; 4104 4105 /* 4106 * TODO: GuC supports timeslicing and semaphores as well, but they're 4107 * handled by the firmware so some minor tweaks are required before 4108 * enabling. 4109 * 4110 * engine->flags |= I915_ENGINE_HAS_SEMAPHORES; 4111 */ 4112 4113 engine->emit_bb_start = gen8_emit_bb_start; 4114 if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 50)) 4115 engine->emit_bb_start = gen125_emit_bb_start; 4116 } 4117 4118 static void rcs_submission_override(struct intel_engine_cs *engine) 4119 { 4120 switch (GRAPHICS_VER(engine->i915)) { 4121 case 12: 4122 engine->emit_flush = gen12_emit_flush_rcs; 4123 engine->emit_fini_breadcrumb = gen12_emit_fini_breadcrumb_rcs; 4124 break; 4125 case 11: 4126 engine->emit_flush = gen11_emit_flush_rcs; 4127 engine->emit_fini_breadcrumb = gen11_emit_fini_breadcrumb_rcs; 4128 break; 4129 default: 4130 engine->emit_flush = gen8_emit_flush_rcs; 4131 engine->emit_fini_breadcrumb = gen8_emit_fini_breadcrumb_rcs; 4132 break; 4133 } 4134 } 4135 4136 static inline void guc_default_irqs(struct intel_engine_cs *engine) 4137 { 4138 engine->irq_keep_mask = GT_RENDER_USER_INTERRUPT; 4139 intel_engine_set_irq_handler(engine, cs_irq_handler); 4140 } 4141 4142 static void guc_sched_engine_destroy(struct kref *kref) 4143 { 4144 struct i915_sched_engine *sched_engine = 4145 container_of(kref, typeof(*sched_engine), ref); 4146 struct intel_guc *guc = sched_engine->private_data; 4147 4148 guc->sched_engine = NULL; 4149 tasklet_kill(&sched_engine->tasklet); /* flush the callback */ 4150 kfree(sched_engine); 4151 } 4152 4153 int intel_guc_submission_setup(struct intel_engine_cs *engine) 4154 { 4155 struct drm_i915_private *i915 = engine->i915; 4156 struct intel_guc *guc = &engine->gt->uc.guc; 4157 4158 /* 4159 * The setup relies on several assumptions (e.g. irqs always enabled) 4160 * that are only valid on gen11+ 4161 */ 4162 GEM_BUG_ON(GRAPHICS_VER(i915) < 11); 4163 4164 if (!guc->sched_engine) { 4165 guc->sched_engine = i915_sched_engine_create(ENGINE_VIRTUAL); 4166 if (!guc->sched_engine) 4167 return -ENOMEM; 4168 4169 guc->sched_engine->schedule = i915_schedule; 4170 guc->sched_engine->disabled = guc_sched_engine_disabled; 4171 guc->sched_engine->private_data = guc; 4172 guc->sched_engine->destroy = guc_sched_engine_destroy; 4173 guc->sched_engine->bump_inflight_request_prio = 4174 guc_bump_inflight_request_prio; 4175 guc->sched_engine->retire_inflight_request_prio = 4176 guc_retire_inflight_request_prio; 4177 tasklet_setup(&guc->sched_engine->tasklet, 4178 guc_submission_tasklet); 4179 } 4180 i915_sched_engine_put(engine->sched_engine); 4181 engine->sched_engine = i915_sched_engine_get(guc->sched_engine); 4182 4183 guc_default_vfuncs(engine); 4184 guc_default_irqs(engine); 4185 guc_init_breadcrumbs(engine); 4186 4187 if (engine->flags & I915_ENGINE_HAS_RCS_REG_STATE) 4188 rcs_submission_override(engine); 4189 4190 lrc_init_wa_ctx(engine); 4191 4192 /* Finally, take ownership and responsibility for cleanup! */ 4193 engine->sanitize = guc_sanitize; 4194 engine->release = guc_release; 4195 4196 return 0; 4197 } 4198 4199 void intel_guc_submission_enable(struct intel_guc *guc) 4200 { 4201 struct intel_gt *gt = guc_to_gt(guc); 4202 4203 /* Enable and route to GuC */ 4204 if (GRAPHICS_VER(gt->i915) >= 12) 4205 intel_uncore_write(gt->uncore, GEN12_GUC_SEM_INTR_ENABLES, 4206 GUC_SEM_INTR_ROUTE_TO_GUC | 4207 GUC_SEM_INTR_ENABLE_ALL); 4208 4209 guc_init_lrc_mapping(guc); 4210 guc_init_engine_stats(guc); 4211 } 4212 4213 void intel_guc_submission_disable(struct intel_guc *guc) 4214 { 4215 struct intel_gt *gt = guc_to_gt(guc); 4216 4217 /* Note: By the time we're here, GuC may have already been reset */ 4218 4219 /* Disable and route to host */ 4220 if (GRAPHICS_VER(gt->i915) >= 12) 4221 intel_uncore_write(gt->uncore, GEN12_GUC_SEM_INTR_ENABLES, 0x0); 4222 } 4223 4224 static bool __guc_submission_supported(struct intel_guc *guc) 4225 { 4226 /* GuC submission is unavailable for pre-Gen11 */ 4227 return intel_guc_is_supported(guc) && 4228 GRAPHICS_VER(guc_to_gt(guc)->i915) >= 11; 4229 } 4230 4231 static bool __guc_submission_selected(struct intel_guc *guc) 4232 { 4233 struct drm_i915_private *i915 = guc_to_gt(guc)->i915; 4234 4235 if (!intel_guc_submission_is_supported(guc)) 4236 return false; 4237 4238 return i915->params.enable_guc & ENABLE_GUC_SUBMISSION; 4239 } 4240 4241 void intel_guc_submission_init_early(struct intel_guc *guc) 4242 { 4243 xa_init_flags(&guc->context_lookup, XA_FLAGS_LOCK_IRQ); 4244 4245 mtx_init(&guc->submission_state.lock, IPL_TTY); 4246 INIT_LIST_HEAD(&guc->submission_state.guc_id_list); 4247 ida_init(&guc->submission_state.guc_ids); 4248 INIT_LIST_HEAD(&guc->submission_state.destroyed_contexts); 4249 INIT_WORK(&guc->submission_state.destroyed_worker, 4250 destroyed_worker_func); 4251 INIT_WORK(&guc->submission_state.reset_fail_worker, 4252 reset_fail_worker_func); 4253 4254 mtx_init(&guc->timestamp.lock, IPL_TTY); 4255 INIT_DELAYED_WORK(&guc->timestamp.work, guc_timestamp_ping); 4256 4257 guc->submission_state.num_guc_ids = GUC_MAX_CONTEXT_ID; 4258 guc->submission_supported = __guc_submission_supported(guc); 4259 guc->submission_selected = __guc_submission_selected(guc); 4260 } 4261 4262 static inline struct intel_context * 4263 g2h_context_lookup(struct intel_guc *guc, u32 ctx_id) 4264 { 4265 struct intel_context *ce; 4266 4267 if (unlikely(ctx_id >= GUC_MAX_CONTEXT_ID)) { 4268 drm_err(&guc_to_gt(guc)->i915->drm, 4269 "Invalid ctx_id %u\n", ctx_id); 4270 return NULL; 4271 } 4272 4273 ce = __get_context(guc, ctx_id); 4274 if (unlikely(!ce)) { 4275 drm_err(&guc_to_gt(guc)->i915->drm, 4276 "Context is NULL, ctx_id %u\n", ctx_id); 4277 return NULL; 4278 } 4279 4280 if (unlikely(intel_context_is_child(ce))) { 4281 drm_err(&guc_to_gt(guc)->i915->drm, 4282 "Context is child, ctx_id %u\n", ctx_id); 4283 return NULL; 4284 } 4285 4286 return ce; 4287 } 4288 4289 int intel_guc_deregister_done_process_msg(struct intel_guc *guc, 4290 const u32 *msg, 4291 u32 len) 4292 { 4293 struct intel_context *ce; 4294 u32 ctx_id; 4295 4296 if (unlikely(len < 1)) { 4297 drm_err(&guc_to_gt(guc)->i915->drm, "Invalid length %u\n", len); 4298 return -EPROTO; 4299 } 4300 ctx_id = msg[0]; 4301 4302 ce = g2h_context_lookup(guc, ctx_id); 4303 if (unlikely(!ce)) 4304 return -EPROTO; 4305 4306 trace_intel_context_deregister_done(ce); 4307 4308 #ifdef CONFIG_DRM_I915_SELFTEST 4309 if (unlikely(ce->drop_deregister)) { 4310 ce->drop_deregister = false; 4311 return 0; 4312 } 4313 #endif 4314 4315 if (context_wait_for_deregister_to_register(ce)) { 4316 struct intel_runtime_pm *runtime_pm = 4317 &ce->engine->gt->i915->runtime_pm; 4318 intel_wakeref_t wakeref; 4319 4320 /* 4321 * Previous owner of this guc_id has been deregistered, now safe 4322 * register this context. 4323 */ 4324 with_intel_runtime_pm(runtime_pm, wakeref) 4325 register_context(ce, true); 4326 guc_signal_context_fence(ce); 4327 intel_context_put(ce); 4328 } else if (context_destroyed(ce)) { 4329 /* Context has been destroyed */ 4330 intel_gt_pm_put_async(guc_to_gt(guc)); 4331 release_guc_id(guc, ce); 4332 __guc_context_destroy(ce); 4333 } 4334 4335 decr_outstanding_submission_g2h(guc); 4336 4337 return 0; 4338 } 4339 4340 int intel_guc_sched_done_process_msg(struct intel_guc *guc, 4341 const u32 *msg, 4342 u32 len) 4343 { 4344 struct intel_context *ce; 4345 unsigned long flags; 4346 u32 ctx_id; 4347 4348 if (unlikely(len < 2)) { 4349 drm_err(&guc_to_gt(guc)->i915->drm, "Invalid length %u\n", len); 4350 return -EPROTO; 4351 } 4352 ctx_id = msg[0]; 4353 4354 ce = g2h_context_lookup(guc, ctx_id); 4355 if (unlikely(!ce)) 4356 return -EPROTO; 4357 4358 if (unlikely(context_destroyed(ce) || 4359 (!context_pending_enable(ce) && 4360 !context_pending_disable(ce)))) { 4361 drm_err(&guc_to_gt(guc)->i915->drm, 4362 "Bad context sched_state 0x%x, ctx_id %u\n", 4363 ce->guc_state.sched_state, ctx_id); 4364 return -EPROTO; 4365 } 4366 4367 trace_intel_context_sched_done(ce); 4368 4369 if (context_pending_enable(ce)) { 4370 #ifdef CONFIG_DRM_I915_SELFTEST 4371 if (unlikely(ce->drop_schedule_enable)) { 4372 ce->drop_schedule_enable = false; 4373 return 0; 4374 } 4375 #endif 4376 4377 spin_lock_irqsave(&ce->guc_state.lock, flags); 4378 clr_context_pending_enable(ce); 4379 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 4380 } else if (context_pending_disable(ce)) { 4381 bool banned; 4382 4383 #ifdef CONFIG_DRM_I915_SELFTEST 4384 if (unlikely(ce->drop_schedule_disable)) { 4385 ce->drop_schedule_disable = false; 4386 return 0; 4387 } 4388 #endif 4389 4390 /* 4391 * Unpin must be done before __guc_signal_context_fence, 4392 * otherwise a race exists between the requests getting 4393 * submitted + retired before this unpin completes resulting in 4394 * the pin_count going to zero and the context still being 4395 * enabled. 4396 */ 4397 intel_context_sched_disable_unpin(ce); 4398 4399 spin_lock_irqsave(&ce->guc_state.lock, flags); 4400 banned = context_banned(ce); 4401 clr_context_banned(ce); 4402 clr_context_pending_disable(ce); 4403 __guc_signal_context_fence(ce); 4404 guc_blocked_fence_complete(ce); 4405 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 4406 4407 if (banned) { 4408 guc_cancel_context_requests(ce); 4409 intel_engine_signal_breadcrumbs(ce->engine); 4410 } 4411 } 4412 4413 decr_outstanding_submission_g2h(guc); 4414 intel_context_put(ce); 4415 4416 return 0; 4417 } 4418 4419 static void capture_error_state(struct intel_guc *guc, 4420 struct intel_context *ce) 4421 { 4422 struct intel_gt *gt = guc_to_gt(guc); 4423 struct drm_i915_private *i915 = gt->i915; 4424 struct intel_engine_cs *engine = __context_to_physical_engine(ce); 4425 intel_wakeref_t wakeref; 4426 4427 intel_engine_set_hung_context(engine, ce); 4428 with_intel_runtime_pm(&i915->runtime_pm, wakeref) 4429 i915_capture_error_state(gt, engine->mask, CORE_DUMP_FLAG_IS_GUC_CAPTURE); 4430 atomic_inc(&i915->gpu_error.reset_engine_count[engine->uabi_class]); 4431 } 4432 4433 static void guc_context_replay(struct intel_context *ce) 4434 { 4435 struct i915_sched_engine *sched_engine = ce->engine->sched_engine; 4436 4437 __guc_reset_context(ce, ce->engine->mask); 4438 tasklet_hi_schedule(&sched_engine->tasklet); 4439 } 4440 4441 static void guc_handle_context_reset(struct intel_guc *guc, 4442 struct intel_context *ce) 4443 { 4444 trace_intel_context_reset(ce); 4445 4446 if (likely(intel_context_is_schedulable(ce))) { 4447 capture_error_state(guc, ce); 4448 guc_context_replay(ce); 4449 } else { 4450 drm_info(&guc_to_gt(guc)->i915->drm, 4451 "Ignoring context reset notification of exiting context 0x%04X on %s", 4452 ce->guc_id.id, ce->engine->name); 4453 } 4454 } 4455 4456 int intel_guc_context_reset_process_msg(struct intel_guc *guc, 4457 const u32 *msg, u32 len) 4458 { 4459 struct intel_context *ce; 4460 unsigned long flags; 4461 int ctx_id; 4462 4463 if (unlikely(len != 1)) { 4464 drm_err(&guc_to_gt(guc)->i915->drm, "Invalid length %u", len); 4465 return -EPROTO; 4466 } 4467 4468 ctx_id = msg[0]; 4469 4470 /* 4471 * The context lookup uses the xarray but lookups only require an RCU lock 4472 * not the full spinlock. So take the lock explicitly and keep it until the 4473 * context has been reference count locked to ensure it can't be destroyed 4474 * asynchronously until the reset is done. 4475 */ 4476 xa_lock_irqsave(&guc->context_lookup, flags); 4477 ce = g2h_context_lookup(guc, ctx_id); 4478 if (ce) 4479 intel_context_get(ce); 4480 xa_unlock_irqrestore(&guc->context_lookup, flags); 4481 4482 if (unlikely(!ce)) 4483 return -EPROTO; 4484 4485 guc_handle_context_reset(guc, ce); 4486 intel_context_put(ce); 4487 4488 return 0; 4489 } 4490 4491 int intel_guc_error_capture_process_msg(struct intel_guc *guc, 4492 const u32 *msg, u32 len) 4493 { 4494 u32 status; 4495 4496 if (unlikely(len != 1)) { 4497 drm_dbg(&guc_to_gt(guc)->i915->drm, "Invalid length %u", len); 4498 return -EPROTO; 4499 } 4500 4501 status = msg[0] & INTEL_GUC_STATE_CAPTURE_EVENT_STATUS_MASK; 4502 if (status == INTEL_GUC_STATE_CAPTURE_EVENT_STATUS_NOSPACE) 4503 drm_warn(&guc_to_gt(guc)->i915->drm, "G2H-Error capture no space"); 4504 4505 intel_guc_capture_process(guc); 4506 4507 return 0; 4508 } 4509 4510 struct intel_engine_cs * 4511 intel_guc_lookup_engine(struct intel_guc *guc, u8 guc_class, u8 instance) 4512 { 4513 struct intel_gt *gt = guc_to_gt(guc); 4514 u8 engine_class = guc_class_to_engine_class(guc_class); 4515 4516 /* Class index is checked in class converter */ 4517 GEM_BUG_ON(instance > MAX_ENGINE_INSTANCE); 4518 4519 return gt->engine_class[engine_class][instance]; 4520 } 4521 4522 static void reset_fail_worker_func(struct work_struct *w) 4523 { 4524 struct intel_guc *guc = container_of(w, struct intel_guc, 4525 submission_state.reset_fail_worker); 4526 struct intel_gt *gt = guc_to_gt(guc); 4527 intel_engine_mask_t reset_fail_mask; 4528 unsigned long flags; 4529 4530 spin_lock_irqsave(&guc->submission_state.lock, flags); 4531 reset_fail_mask = guc->submission_state.reset_fail_mask; 4532 guc->submission_state.reset_fail_mask = 0; 4533 spin_unlock_irqrestore(&guc->submission_state.lock, flags); 4534 4535 if (likely(reset_fail_mask)) 4536 intel_gt_handle_error(gt, reset_fail_mask, 4537 I915_ERROR_CAPTURE, 4538 "GuC failed to reset engine mask=0x%x\n", 4539 reset_fail_mask); 4540 } 4541 4542 int intel_guc_engine_failure_process_msg(struct intel_guc *guc, 4543 const u32 *msg, u32 len) 4544 { 4545 struct intel_engine_cs *engine; 4546 struct intel_gt *gt = guc_to_gt(guc); 4547 u8 guc_class, instance; 4548 u32 reason; 4549 unsigned long flags; 4550 4551 if (unlikely(len != 3)) { 4552 drm_err(>->i915->drm, "Invalid length %u", len); 4553 return -EPROTO; 4554 } 4555 4556 guc_class = msg[0]; 4557 instance = msg[1]; 4558 reason = msg[2]; 4559 4560 engine = intel_guc_lookup_engine(guc, guc_class, instance); 4561 if (unlikely(!engine)) { 4562 drm_err(>->i915->drm, 4563 "Invalid engine %d:%d", guc_class, instance); 4564 return -EPROTO; 4565 } 4566 4567 /* 4568 * This is an unexpected failure of a hardware feature. So, log a real 4569 * error message not just the informational that comes with the reset. 4570 */ 4571 drm_err(>->i915->drm, "GuC engine reset request failed on %d:%d (%s) because 0x%08X", 4572 guc_class, instance, engine->name, reason); 4573 4574 spin_lock_irqsave(&guc->submission_state.lock, flags); 4575 guc->submission_state.reset_fail_mask |= engine->mask; 4576 spin_unlock_irqrestore(&guc->submission_state.lock, flags); 4577 4578 /* 4579 * A GT reset flushes this worker queue (G2H handler) so we must use 4580 * another worker to trigger a GT reset. 4581 */ 4582 queue_work(system_unbound_wq, &guc->submission_state.reset_fail_worker); 4583 4584 return 0; 4585 } 4586 4587 void intel_guc_find_hung_context(struct intel_engine_cs *engine) 4588 { 4589 struct intel_guc *guc = &engine->gt->uc.guc; 4590 struct intel_context *ce; 4591 struct i915_request *rq; 4592 unsigned long index; 4593 unsigned long flags; 4594 4595 /* Reset called during driver load? GuC not yet initialised! */ 4596 if (unlikely(!guc_submission_initialized(guc))) 4597 return; 4598 4599 xa_lock_irqsave(&guc->context_lookup, flags); 4600 xa_for_each(&guc->context_lookup, index, ce) { 4601 bool found; 4602 4603 if (!kref_get_unless_zero(&ce->ref)) 4604 continue; 4605 4606 xa_unlock(&guc->context_lookup); 4607 4608 if (!intel_context_is_pinned(ce)) 4609 goto next; 4610 4611 if (intel_engine_is_virtual(ce->engine)) { 4612 if (!(ce->engine->mask & engine->mask)) 4613 goto next; 4614 } else { 4615 if (ce->engine != engine) 4616 goto next; 4617 } 4618 4619 found = false; 4620 spin_lock(&ce->guc_state.lock); 4621 list_for_each_entry(rq, &ce->guc_state.requests, sched.link) { 4622 if (i915_test_request_state(rq) != I915_REQUEST_ACTIVE) 4623 continue; 4624 4625 found = true; 4626 break; 4627 } 4628 spin_unlock(&ce->guc_state.lock); 4629 4630 if (found) { 4631 intel_engine_set_hung_context(engine, ce); 4632 4633 /* Can only cope with one hang at a time... */ 4634 intel_context_put(ce); 4635 xa_lock(&guc->context_lookup); 4636 goto done; 4637 } 4638 4639 next: 4640 intel_context_put(ce); 4641 xa_lock(&guc->context_lookup); 4642 } 4643 done: 4644 xa_unlock_irqrestore(&guc->context_lookup, flags); 4645 } 4646 4647 void intel_guc_dump_active_requests(struct intel_engine_cs *engine, 4648 struct i915_request *hung_rq, 4649 struct drm_printer *m) 4650 { 4651 struct intel_guc *guc = &engine->gt->uc.guc; 4652 struct intel_context *ce; 4653 unsigned long index; 4654 unsigned long flags; 4655 4656 /* Reset called during driver load? GuC not yet initialised! */ 4657 if (unlikely(!guc_submission_initialized(guc))) 4658 return; 4659 4660 xa_lock_irqsave(&guc->context_lookup, flags); 4661 xa_for_each(&guc->context_lookup, index, ce) { 4662 if (!kref_get_unless_zero(&ce->ref)) 4663 continue; 4664 4665 xa_unlock(&guc->context_lookup); 4666 4667 if (!intel_context_is_pinned(ce)) 4668 goto next; 4669 4670 if (intel_engine_is_virtual(ce->engine)) { 4671 if (!(ce->engine->mask & engine->mask)) 4672 goto next; 4673 } else { 4674 if (ce->engine != engine) 4675 goto next; 4676 } 4677 4678 spin_lock(&ce->guc_state.lock); 4679 intel_engine_dump_active_requests(&ce->guc_state.requests, 4680 hung_rq, m); 4681 spin_unlock(&ce->guc_state.lock); 4682 4683 next: 4684 intel_context_put(ce); 4685 xa_lock(&guc->context_lookup); 4686 } 4687 xa_unlock_irqrestore(&guc->context_lookup, flags); 4688 } 4689 4690 void intel_guc_submission_print_info(struct intel_guc *guc, 4691 struct drm_printer *p) 4692 { 4693 struct i915_sched_engine *sched_engine = guc->sched_engine; 4694 struct rb_node *rb; 4695 unsigned long flags; 4696 4697 if (!sched_engine) 4698 return; 4699 4700 drm_printf(p, "GuC Number Outstanding Submission G2H: %u\n", 4701 atomic_read(&guc->outstanding_submission_g2h)); 4702 drm_printf(p, "GuC tasklet count: %u\n\n", 4703 atomic_read(&sched_engine->tasklet.count)); 4704 4705 spin_lock_irqsave(&sched_engine->lock, flags); 4706 drm_printf(p, "Requests in GuC submit tasklet:\n"); 4707 for (rb = rb_first_cached(&sched_engine->queue); rb; rb = rb_next(rb)) { 4708 struct i915_priolist *pl = to_priolist(rb); 4709 struct i915_request *rq; 4710 4711 priolist_for_each_request(rq, pl) 4712 drm_printf(p, "guc_id=%u, seqno=%llu\n", 4713 rq->context->guc_id.id, 4714 rq->fence.seqno); 4715 } 4716 spin_unlock_irqrestore(&sched_engine->lock, flags); 4717 drm_printf(p, "\n"); 4718 } 4719 4720 static inline void guc_log_context_priority(struct drm_printer *p, 4721 struct intel_context *ce) 4722 { 4723 int i; 4724 4725 drm_printf(p, "\t\tPriority: %d\n", ce->guc_state.prio); 4726 drm_printf(p, "\t\tNumber Requests (lower index == higher priority)\n"); 4727 for (i = GUC_CLIENT_PRIORITY_KMD_HIGH; 4728 i < GUC_CLIENT_PRIORITY_NUM; ++i) { 4729 drm_printf(p, "\t\tNumber requests in priority band[%d]: %d\n", 4730 i, ce->guc_state.prio_count[i]); 4731 } 4732 drm_printf(p, "\n"); 4733 } 4734 4735 static inline void guc_log_context(struct drm_printer *p, 4736 struct intel_context *ce) 4737 { 4738 drm_printf(p, "GuC lrc descriptor %u:\n", ce->guc_id.id); 4739 drm_printf(p, "\tHW Context Desc: 0x%08x\n", ce->lrc.lrca); 4740 drm_printf(p, "\t\tLRC Head: Internal %u, Memory %u\n", 4741 ce->ring->head, 4742 ce->lrc_reg_state[CTX_RING_HEAD]); 4743 drm_printf(p, "\t\tLRC Tail: Internal %u, Memory %u\n", 4744 ce->ring->tail, 4745 ce->lrc_reg_state[CTX_RING_TAIL]); 4746 drm_printf(p, "\t\tContext Pin Count: %u\n", 4747 atomic_read(&ce->pin_count)); 4748 drm_printf(p, "\t\tGuC ID Ref Count: %u\n", 4749 atomic_read(&ce->guc_id.ref)); 4750 drm_printf(p, "\t\tSchedule State: 0x%x\n\n", 4751 ce->guc_state.sched_state); 4752 } 4753 4754 void intel_guc_submission_print_context_info(struct intel_guc *guc, 4755 struct drm_printer *p) 4756 { 4757 struct intel_context *ce; 4758 unsigned long index; 4759 unsigned long flags; 4760 4761 xa_lock_irqsave(&guc->context_lookup, flags); 4762 xa_for_each(&guc->context_lookup, index, ce) { 4763 GEM_BUG_ON(intel_context_is_child(ce)); 4764 4765 guc_log_context(p, ce); 4766 guc_log_context_priority(p, ce); 4767 4768 if (intel_context_is_parent(ce)) { 4769 struct intel_context *child; 4770 4771 drm_printf(p, "\t\tNumber children: %u\n", 4772 ce->parallel.number_children); 4773 4774 if (ce->parallel.guc.wq_status) { 4775 drm_printf(p, "\t\tWQI Head: %u\n", 4776 READ_ONCE(*ce->parallel.guc.wq_head)); 4777 drm_printf(p, "\t\tWQI Tail: %u\n", 4778 READ_ONCE(*ce->parallel.guc.wq_tail)); 4779 drm_printf(p, "\t\tWQI Status: %u\n\n", 4780 READ_ONCE(*ce->parallel.guc.wq_status)); 4781 } 4782 4783 if (ce->engine->emit_bb_start == 4784 emit_bb_start_parent_no_preempt_mid_batch) { 4785 u8 i; 4786 4787 drm_printf(p, "\t\tChildren Go: %u\n\n", 4788 get_children_go_value(ce)); 4789 for (i = 0; i < ce->parallel.number_children; ++i) 4790 drm_printf(p, "\t\tChildren Join: %u\n", 4791 get_children_join_value(ce, i)); 4792 } 4793 4794 for_each_child(ce, child) 4795 guc_log_context(p, child); 4796 } 4797 } 4798 xa_unlock_irqrestore(&guc->context_lookup, flags); 4799 } 4800 4801 static inline u32 get_children_go_addr(struct intel_context *ce) 4802 { 4803 GEM_BUG_ON(!intel_context_is_parent(ce)); 4804 4805 return i915_ggtt_offset(ce->state) + 4806 __get_parent_scratch_offset(ce) + 4807 offsetof(struct parent_scratch, go.semaphore); 4808 } 4809 4810 static inline u32 get_children_join_addr(struct intel_context *ce, 4811 u8 child_index) 4812 { 4813 GEM_BUG_ON(!intel_context_is_parent(ce)); 4814 4815 return i915_ggtt_offset(ce->state) + 4816 __get_parent_scratch_offset(ce) + 4817 offsetof(struct parent_scratch, join[child_index].semaphore); 4818 } 4819 4820 #define PARENT_GO_BB 1 4821 #define PARENT_GO_FINI_BREADCRUMB 0 4822 #define CHILD_GO_BB 1 4823 #define CHILD_GO_FINI_BREADCRUMB 0 4824 static int emit_bb_start_parent_no_preempt_mid_batch(struct i915_request *rq, 4825 u64 offset, u32 len, 4826 const unsigned int flags) 4827 { 4828 struct intel_context *ce = rq->context; 4829 u32 *cs; 4830 u8 i; 4831 4832 GEM_BUG_ON(!intel_context_is_parent(ce)); 4833 4834 cs = intel_ring_begin(rq, 10 + 4 * ce->parallel.number_children); 4835 if (IS_ERR(cs)) 4836 return PTR_ERR(cs); 4837 4838 /* Wait on children */ 4839 for (i = 0; i < ce->parallel.number_children; ++i) { 4840 *cs++ = (MI_SEMAPHORE_WAIT | 4841 MI_SEMAPHORE_GLOBAL_GTT | 4842 MI_SEMAPHORE_POLL | 4843 MI_SEMAPHORE_SAD_EQ_SDD); 4844 *cs++ = PARENT_GO_BB; 4845 *cs++ = get_children_join_addr(ce, i); 4846 *cs++ = 0; 4847 } 4848 4849 /* Turn off preemption */ 4850 *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE; 4851 *cs++ = MI_NOOP; 4852 4853 /* Tell children go */ 4854 cs = gen8_emit_ggtt_write(cs, 4855 CHILD_GO_BB, 4856 get_children_go_addr(ce), 4857 0); 4858 4859 /* Jump to batch */ 4860 *cs++ = MI_BATCH_BUFFER_START_GEN8 | 4861 (flags & I915_DISPATCH_SECURE ? 0 : BIT(8)); 4862 *cs++ = lower_32_bits(offset); 4863 *cs++ = upper_32_bits(offset); 4864 *cs++ = MI_NOOP; 4865 4866 intel_ring_advance(rq, cs); 4867 4868 return 0; 4869 } 4870 4871 static int emit_bb_start_child_no_preempt_mid_batch(struct i915_request *rq, 4872 u64 offset, u32 len, 4873 const unsigned int flags) 4874 { 4875 struct intel_context *ce = rq->context; 4876 struct intel_context *parent = intel_context_to_parent(ce); 4877 u32 *cs; 4878 4879 GEM_BUG_ON(!intel_context_is_child(ce)); 4880 4881 cs = intel_ring_begin(rq, 12); 4882 if (IS_ERR(cs)) 4883 return PTR_ERR(cs); 4884 4885 /* Signal parent */ 4886 cs = gen8_emit_ggtt_write(cs, 4887 PARENT_GO_BB, 4888 get_children_join_addr(parent, 4889 ce->parallel.child_index), 4890 0); 4891 4892 /* Wait on parent for go */ 4893 *cs++ = (MI_SEMAPHORE_WAIT | 4894 MI_SEMAPHORE_GLOBAL_GTT | 4895 MI_SEMAPHORE_POLL | 4896 MI_SEMAPHORE_SAD_EQ_SDD); 4897 *cs++ = CHILD_GO_BB; 4898 *cs++ = get_children_go_addr(parent); 4899 *cs++ = 0; 4900 4901 /* Turn off preemption */ 4902 *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE; 4903 4904 /* Jump to batch */ 4905 *cs++ = MI_BATCH_BUFFER_START_GEN8 | 4906 (flags & I915_DISPATCH_SECURE ? 0 : BIT(8)); 4907 *cs++ = lower_32_bits(offset); 4908 *cs++ = upper_32_bits(offset); 4909 4910 intel_ring_advance(rq, cs); 4911 4912 return 0; 4913 } 4914 4915 static u32 * 4916 __emit_fini_breadcrumb_parent_no_preempt_mid_batch(struct i915_request *rq, 4917 u32 *cs) 4918 { 4919 struct intel_context *ce = rq->context; 4920 u8 i; 4921 4922 GEM_BUG_ON(!intel_context_is_parent(ce)); 4923 4924 /* Wait on children */ 4925 for (i = 0; i < ce->parallel.number_children; ++i) { 4926 *cs++ = (MI_SEMAPHORE_WAIT | 4927 MI_SEMAPHORE_GLOBAL_GTT | 4928 MI_SEMAPHORE_POLL | 4929 MI_SEMAPHORE_SAD_EQ_SDD); 4930 *cs++ = PARENT_GO_FINI_BREADCRUMB; 4931 *cs++ = get_children_join_addr(ce, i); 4932 *cs++ = 0; 4933 } 4934 4935 /* Turn on preemption */ 4936 *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE; 4937 *cs++ = MI_NOOP; 4938 4939 /* Tell children go */ 4940 cs = gen8_emit_ggtt_write(cs, 4941 CHILD_GO_FINI_BREADCRUMB, 4942 get_children_go_addr(ce), 4943 0); 4944 4945 return cs; 4946 } 4947 4948 /* 4949 * If this true, a submission of multi-lrc requests had an error and the 4950 * requests need to be skipped. The front end (execuf IOCTL) should've called 4951 * i915_request_skip which squashes the BB but we still need to emit the fini 4952 * breadrcrumbs seqno write. At this point we don't know how many of the 4953 * requests in the multi-lrc submission were generated so we can't do the 4954 * handshake between the parent and children (e.g. if 4 requests should be 4955 * generated but 2nd hit an error only 1 would be seen by the GuC backend). 4956 * Simply skip the handshake, but still emit the breadcrumbd seqno, if an error 4957 * has occurred on any of the requests in submission / relationship. 4958 */ 4959 static inline bool skip_handshake(struct i915_request *rq) 4960 { 4961 return test_bit(I915_FENCE_FLAG_SKIP_PARALLEL, &rq->fence.flags); 4962 } 4963 4964 #define NON_SKIP_LEN 6 4965 static u32 * 4966 emit_fini_breadcrumb_parent_no_preempt_mid_batch(struct i915_request *rq, 4967 u32 *cs) 4968 { 4969 struct intel_context *ce = rq->context; 4970 __maybe_unused u32 *before_fini_breadcrumb_user_interrupt_cs; 4971 __maybe_unused u32 *start_fini_breadcrumb_cs = cs; 4972 4973 GEM_BUG_ON(!intel_context_is_parent(ce)); 4974 4975 if (unlikely(skip_handshake(rq))) { 4976 /* 4977 * NOP everything in __emit_fini_breadcrumb_parent_no_preempt_mid_batch, 4978 * the NON_SKIP_LEN comes from the length of the emits below. 4979 */ 4980 memset(cs, 0, sizeof(u32) * 4981 (ce->engine->emit_fini_breadcrumb_dw - NON_SKIP_LEN)); 4982 cs += ce->engine->emit_fini_breadcrumb_dw - NON_SKIP_LEN; 4983 } else { 4984 cs = __emit_fini_breadcrumb_parent_no_preempt_mid_batch(rq, cs); 4985 } 4986 4987 /* Emit fini breadcrumb */ 4988 before_fini_breadcrumb_user_interrupt_cs = cs; 4989 cs = gen8_emit_ggtt_write(cs, 4990 rq->fence.seqno, 4991 i915_request_active_timeline(rq)->hwsp_offset, 4992 0); 4993 4994 /* User interrupt */ 4995 *cs++ = MI_USER_INTERRUPT; 4996 *cs++ = MI_NOOP; 4997 4998 /* Ensure our math for skip + emit is correct */ 4999 GEM_BUG_ON(before_fini_breadcrumb_user_interrupt_cs + NON_SKIP_LEN != 5000 cs); 5001 GEM_BUG_ON(start_fini_breadcrumb_cs + 5002 ce->engine->emit_fini_breadcrumb_dw != cs); 5003 5004 rq->tail = intel_ring_offset(rq, cs); 5005 5006 return cs; 5007 } 5008 5009 static u32 * 5010 __emit_fini_breadcrumb_child_no_preempt_mid_batch(struct i915_request *rq, 5011 u32 *cs) 5012 { 5013 struct intel_context *ce = rq->context; 5014 struct intel_context *parent = intel_context_to_parent(ce); 5015 5016 GEM_BUG_ON(!intel_context_is_child(ce)); 5017 5018 /* Turn on preemption */ 5019 *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE; 5020 *cs++ = MI_NOOP; 5021 5022 /* Signal parent */ 5023 cs = gen8_emit_ggtt_write(cs, 5024 PARENT_GO_FINI_BREADCRUMB, 5025 get_children_join_addr(parent, 5026 ce->parallel.child_index), 5027 0); 5028 5029 /* Wait parent on for go */ 5030 *cs++ = (MI_SEMAPHORE_WAIT | 5031 MI_SEMAPHORE_GLOBAL_GTT | 5032 MI_SEMAPHORE_POLL | 5033 MI_SEMAPHORE_SAD_EQ_SDD); 5034 *cs++ = CHILD_GO_FINI_BREADCRUMB; 5035 *cs++ = get_children_go_addr(parent); 5036 *cs++ = 0; 5037 5038 return cs; 5039 } 5040 5041 static u32 * 5042 emit_fini_breadcrumb_child_no_preempt_mid_batch(struct i915_request *rq, 5043 u32 *cs) 5044 { 5045 struct intel_context *ce = rq->context; 5046 __maybe_unused u32 *before_fini_breadcrumb_user_interrupt_cs; 5047 __maybe_unused u32 *start_fini_breadcrumb_cs = cs; 5048 5049 GEM_BUG_ON(!intel_context_is_child(ce)); 5050 5051 if (unlikely(skip_handshake(rq))) { 5052 /* 5053 * NOP everything in __emit_fini_breadcrumb_child_no_preempt_mid_batch, 5054 * the NON_SKIP_LEN comes from the length of the emits below. 5055 */ 5056 memset(cs, 0, sizeof(u32) * 5057 (ce->engine->emit_fini_breadcrumb_dw - NON_SKIP_LEN)); 5058 cs += ce->engine->emit_fini_breadcrumb_dw - NON_SKIP_LEN; 5059 } else { 5060 cs = __emit_fini_breadcrumb_child_no_preempt_mid_batch(rq, cs); 5061 } 5062 5063 /* Emit fini breadcrumb */ 5064 before_fini_breadcrumb_user_interrupt_cs = cs; 5065 cs = gen8_emit_ggtt_write(cs, 5066 rq->fence.seqno, 5067 i915_request_active_timeline(rq)->hwsp_offset, 5068 0); 5069 5070 /* User interrupt */ 5071 *cs++ = MI_USER_INTERRUPT; 5072 *cs++ = MI_NOOP; 5073 5074 /* Ensure our math for skip + emit is correct */ 5075 GEM_BUG_ON(before_fini_breadcrumb_user_interrupt_cs + NON_SKIP_LEN != 5076 cs); 5077 GEM_BUG_ON(start_fini_breadcrumb_cs + 5078 ce->engine->emit_fini_breadcrumb_dw != cs); 5079 5080 rq->tail = intel_ring_offset(rq, cs); 5081 5082 return cs; 5083 } 5084 5085 #undef NON_SKIP_LEN 5086 5087 static struct intel_context * 5088 guc_create_virtual(struct intel_engine_cs **siblings, unsigned int count, 5089 unsigned long flags) 5090 { 5091 struct guc_virtual_engine *ve; 5092 struct intel_guc *guc; 5093 unsigned int n; 5094 int err; 5095 5096 ve = kzalloc(sizeof(*ve), GFP_KERNEL); 5097 if (!ve) 5098 return ERR_PTR(-ENOMEM); 5099 5100 guc = &siblings[0]->gt->uc.guc; 5101 5102 ve->base.i915 = siblings[0]->i915; 5103 ve->base.gt = siblings[0]->gt; 5104 ve->base.uncore = siblings[0]->uncore; 5105 ve->base.id = -1; 5106 5107 ve->base.uabi_class = I915_ENGINE_CLASS_INVALID; 5108 ve->base.instance = I915_ENGINE_CLASS_INVALID_VIRTUAL; 5109 ve->base.uabi_instance = I915_ENGINE_CLASS_INVALID_VIRTUAL; 5110 ve->base.saturated = ALL_ENGINES; 5111 5112 snprintf(ve->base.name, sizeof(ve->base.name), "virtual"); 5113 5114 ve->base.sched_engine = i915_sched_engine_get(guc->sched_engine); 5115 5116 ve->base.cops = &virtual_guc_context_ops; 5117 ve->base.request_alloc = guc_request_alloc; 5118 ve->base.bump_serial = virtual_guc_bump_serial; 5119 5120 ve->base.submit_request = guc_submit_request; 5121 5122 ve->base.flags = I915_ENGINE_IS_VIRTUAL; 5123 5124 #ifdef notyet 5125 BUILD_BUG_ON(ilog2(VIRTUAL_ENGINES) < I915_NUM_ENGINES); 5126 #endif 5127 ve->base.mask = VIRTUAL_ENGINES; 5128 5129 intel_context_init(&ve->context, &ve->base); 5130 5131 for (n = 0; n < count; n++) { 5132 struct intel_engine_cs *sibling = siblings[n]; 5133 5134 GEM_BUG_ON(!is_power_of_2(sibling->mask)); 5135 if (sibling->mask & ve->base.mask) { 5136 DRM_DEBUG("duplicate %s entry in load balancer\n", 5137 sibling->name); 5138 err = -EINVAL; 5139 goto err_put; 5140 } 5141 5142 ve->base.mask |= sibling->mask; 5143 ve->base.logical_mask |= sibling->logical_mask; 5144 5145 if (n != 0 && ve->base.class != sibling->class) { 5146 DRM_DEBUG("invalid mixing of engine class, sibling %d, already %d\n", 5147 sibling->class, ve->base.class); 5148 err = -EINVAL; 5149 goto err_put; 5150 } else if (n == 0) { 5151 ve->base.class = sibling->class; 5152 ve->base.uabi_class = sibling->uabi_class; 5153 snprintf(ve->base.name, sizeof(ve->base.name), 5154 "v%dx%d", ve->base.class, count); 5155 ve->base.context_size = sibling->context_size; 5156 5157 ve->base.add_active_request = 5158 sibling->add_active_request; 5159 ve->base.remove_active_request = 5160 sibling->remove_active_request; 5161 ve->base.emit_bb_start = sibling->emit_bb_start; 5162 ve->base.emit_flush = sibling->emit_flush; 5163 ve->base.emit_init_breadcrumb = 5164 sibling->emit_init_breadcrumb; 5165 ve->base.emit_fini_breadcrumb = 5166 sibling->emit_fini_breadcrumb; 5167 ve->base.emit_fini_breadcrumb_dw = 5168 sibling->emit_fini_breadcrumb_dw; 5169 ve->base.breadcrumbs = 5170 intel_breadcrumbs_get(sibling->breadcrumbs); 5171 5172 ve->base.flags |= sibling->flags; 5173 5174 ve->base.props.timeslice_duration_ms = 5175 sibling->props.timeslice_duration_ms; 5176 ve->base.props.preempt_timeout_ms = 5177 sibling->props.preempt_timeout_ms; 5178 } 5179 } 5180 5181 return &ve->context; 5182 5183 err_put: 5184 intel_context_put(&ve->context); 5185 return ERR_PTR(err); 5186 } 5187 5188 bool intel_guc_virtual_engine_has_heartbeat(const struct intel_engine_cs *ve) 5189 { 5190 struct intel_engine_cs *engine; 5191 intel_engine_mask_t tmp, mask = ve->mask; 5192 5193 for_each_engine_masked(engine, ve->gt, mask, tmp) 5194 if (READ_ONCE(engine->props.heartbeat_interval_ms)) 5195 return true; 5196 5197 return false; 5198 } 5199 5200 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) 5201 #include "selftest_guc.c" 5202 #include "selftest_guc_multi_lrc.c" 5203 #include "selftest_guc_hangcheck.c" 5204 #endif 5205