1 /* 2 * Copyright © 2011-2012 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 * 23 * Authors: 24 * Ben Widawsky <ben@bwidawsk.net> 25 * 26 */ 27 28 /* 29 * This file implements HW context support. On gen5+ a HW context consists of an 30 * opaque GPU object which is referenced at times of context saves and restores. 31 * With RC6 enabled, the context is also referenced as the GPU enters and exists 32 * from RC6 (GPU has it's own internal power context, except on gen5). Though 33 * something like a context does exist for the media ring, the code only 34 * supports contexts for the render ring. 35 * 36 * In software, there is a distinction between contexts created by the user, 37 * and the default HW context. The default HW context is used by GPU clients 38 * that do not request setup of their own hardware context. The default 39 * context's state is never restored to help prevent programming errors. This 40 * would happen if a client ran and piggy-backed off another clients GPU state. 41 * The default context only exists to give the GPU some offset to load as the 42 * current to invoke a save of the context we actually care about. In fact, the 43 * code could likely be constructed, albeit in a more complicated fashion, to 44 * never use the default context, though that limits the driver's ability to 45 * swap out, and/or destroy other contexts. 46 * 47 * All other contexts are created as a request by the GPU client. These contexts 48 * store GPU state, and thus allow GPU clients to not re-emit state (and 49 * potentially query certain state) at any time. The kernel driver makes 50 * certain that the appropriate commands are inserted. 51 * 52 * The context life cycle is semi-complicated in that context BOs may live 53 * longer than the context itself because of the way the hardware, and object 54 * tracking works. Below is a very crude representation of the state machine 55 * describing the context life. 56 * refcount pincount active 57 * S0: initial state 0 0 0 58 * S1: context created 1 0 0 59 * S2: context is currently running 2 1 X 60 * S3: GPU referenced, but not current 2 0 1 61 * S4: context is current, but destroyed 1 1 0 62 * S5: like S3, but destroyed 1 0 1 63 * 64 * The most common (but not all) transitions: 65 * S0->S1: client creates a context 66 * S1->S2: client submits execbuf with context 67 * S2->S3: other clients submits execbuf with context 68 * S3->S1: context object was retired 69 * S3->S2: clients submits another execbuf 70 * S2->S4: context destroy called with current context 71 * S3->S5->S0: destroy path 72 * S4->S5->S0: destroy path on current context 73 * 74 * There are two confusing terms used above: 75 * The "current context" means the context which is currently running on the 76 * GPU. The GPU has loaded its state already and has stored away the gtt 77 * offset of the BO. The GPU is not actively referencing the data at this 78 * offset, but it will on the next context switch. The only way to avoid this 79 * is to do a GPU reset. 80 * 81 * An "active context' is one which was previously the "current context" and is 82 * on the active list waiting for the next context switch to occur. Until this 83 * happens, the object must remain at the same gtt offset. It is therefore 84 * possible to destroy a context, but it is still active. 85 * 86 */ 87 88 #include <linux/log2.h> 89 #include <drm/drmP.h> 90 #include <drm/i915_drm.h> 91 #include "i915_drv.h" 92 #include "i915_trace.h" 93 94 #define ALL_L3_SLICES(dev) (1 << NUM_L3_SLICES(dev)) - 1 95 96 static void lut_close(struct i915_gem_context *ctx) 97 { 98 struct i915_lut_handle *lut, *ln; 99 struct radix_tree_iter iter; 100 void __rcu **slot; 101 102 list_for_each_entry_safe(lut, ln, &ctx->handles_list, ctx_link) { 103 list_del(&lut->obj_link); 104 kmem_cache_free(ctx->i915->luts, lut); 105 } 106 107 rcu_read_lock(); 108 radix_tree_for_each_slot(slot, &ctx->handles_vma, &iter, 0) { 109 struct i915_vma *vma = rcu_dereference_raw(*slot); 110 111 radix_tree_iter_delete(&ctx->handles_vma, &iter, slot); 112 __i915_gem_object_release_unless_active(vma->obj); 113 } 114 rcu_read_unlock(); 115 } 116 117 static void i915_gem_context_free(struct i915_gem_context *ctx) 118 { 119 int i; 120 121 lockdep_assert_held(&ctx->i915->drm.struct_mutex); 122 GEM_BUG_ON(!i915_gem_context_is_closed(ctx)); 123 124 i915_ppgtt_put(ctx->ppgtt); 125 126 for (i = 0; i < I915_NUM_ENGINES; i++) { 127 struct intel_context *ce = &ctx->engine[i]; 128 129 if (!ce->state) 130 continue; 131 132 WARN_ON(ce->pin_count); 133 if (ce->ring) 134 intel_ring_free(ce->ring); 135 136 __i915_gem_object_release_unless_active(ce->state->obj); 137 } 138 139 kfree(ctx->name); 140 put_pid(ctx->pid); 141 142 list_del(&ctx->link); 143 144 ida_simple_remove(&ctx->i915->contexts.hw_ida, ctx->hw_id); 145 kfree_rcu(ctx, rcu); 146 } 147 148 static void contexts_free(struct drm_i915_private *i915) 149 { 150 struct llist_node *freed = llist_del_all(&i915->contexts.free_list); 151 struct i915_gem_context *ctx, *cn; 152 153 lockdep_assert_held(&i915->drm.struct_mutex); 154 155 llist_for_each_entry_safe(ctx, cn, freed, free_link) 156 i915_gem_context_free(ctx); 157 } 158 159 static void contexts_free_first(struct drm_i915_private *i915) 160 { 161 struct i915_gem_context *ctx; 162 struct llist_node *freed; 163 164 lockdep_assert_held(&i915->drm.struct_mutex); 165 166 freed = llist_del_first(&i915->contexts.free_list); 167 if (!freed) 168 return; 169 170 ctx = container_of(freed, typeof(*ctx), free_link); 171 i915_gem_context_free(ctx); 172 } 173 174 static void contexts_free_worker(struct work_struct *work) 175 { 176 struct drm_i915_private *i915 = 177 container_of(work, typeof(*i915), contexts.free_work); 178 179 mutex_lock(&i915->drm.struct_mutex); 180 contexts_free(i915); 181 mutex_unlock(&i915->drm.struct_mutex); 182 } 183 184 void i915_gem_context_release(struct kref *ref) 185 { 186 struct i915_gem_context *ctx = container_of(ref, typeof(*ctx), ref); 187 struct drm_i915_private *i915 = ctx->i915; 188 189 trace_i915_context_free(ctx); 190 if (llist_add(&ctx->free_link, &i915->contexts.free_list)) 191 queue_work(i915->wq, &i915->contexts.free_work); 192 } 193 194 static void context_close(struct i915_gem_context *ctx) 195 { 196 i915_gem_context_set_closed(ctx); 197 198 /* 199 * The LUT uses the VMA as a backpointer to unref the object, 200 * so we need to clear the LUT before we close all the VMA (inside 201 * the ppgtt). 202 */ 203 lut_close(ctx); 204 if (ctx->ppgtt) 205 i915_ppgtt_close(&ctx->ppgtt->base); 206 207 ctx->file_priv = ERR_PTR(-EBADF); 208 i915_gem_context_put(ctx); 209 } 210 211 static int assign_hw_id(struct drm_i915_private *dev_priv, unsigned *out) 212 { 213 int ret; 214 215 ret = ida_simple_get(&dev_priv->contexts.hw_ida, 216 0, MAX_CONTEXT_HW_ID, GFP_KERNEL); 217 if (ret < 0) { 218 /* Contexts are only released when no longer active. 219 * Flush any pending retires to hopefully release some 220 * stale contexts and try again. 221 */ 222 i915_gem_retire_requests(dev_priv); 223 ret = ida_simple_get(&dev_priv->contexts.hw_ida, 224 0, MAX_CONTEXT_HW_ID, GFP_KERNEL); 225 if (ret < 0) 226 return ret; 227 } 228 229 *out = ret; 230 return 0; 231 } 232 233 static u32 default_desc_template(const struct drm_i915_private *i915, 234 const struct i915_hw_ppgtt *ppgtt) 235 { 236 u32 address_mode; 237 u32 desc; 238 239 desc = GEN8_CTX_VALID | GEN8_CTX_PRIVILEGE; 240 241 address_mode = INTEL_LEGACY_32B_CONTEXT; 242 if (ppgtt && i915_vm_is_48bit(&ppgtt->base)) 243 address_mode = INTEL_LEGACY_64B_CONTEXT; 244 desc |= address_mode << GEN8_CTX_ADDRESSING_MODE_SHIFT; 245 246 if (IS_GEN8(i915)) 247 desc |= GEN8_CTX_L3LLC_COHERENT; 248 249 /* TODO: WaDisableLiteRestore when we start using semaphore 250 * signalling between Command Streamers 251 * ring->ctx_desc_template |= GEN8_CTX_FORCE_RESTORE; 252 */ 253 254 return desc; 255 } 256 257 static struct i915_gem_context * 258 __create_hw_context(struct drm_i915_private *dev_priv, 259 struct drm_i915_file_private *file_priv) 260 { 261 struct i915_gem_context *ctx; 262 int ret; 263 264 ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); 265 if (ctx == NULL) 266 return ERR_PTR(-ENOMEM); 267 268 ret = assign_hw_id(dev_priv, &ctx->hw_id); 269 if (ret) { 270 kfree(ctx); 271 return ERR_PTR(ret); 272 } 273 274 kref_init(&ctx->ref); 275 list_add_tail(&ctx->link, &dev_priv->contexts.list); 276 ctx->i915 = dev_priv; 277 ctx->priority = I915_PRIORITY_NORMAL; 278 279 INIT_RADIX_TREE(&ctx->handles_vma, GFP_KERNEL); 280 INIT_LIST_HEAD(&ctx->handles_list); 281 282 /* Default context will never have a file_priv */ 283 ret = DEFAULT_CONTEXT_HANDLE; 284 if (file_priv) { 285 ret = idr_alloc(&file_priv->context_idr, ctx, 286 DEFAULT_CONTEXT_HANDLE, 0, GFP_KERNEL); 287 if (ret < 0) 288 goto err_lut; 289 } 290 ctx->user_handle = ret; 291 292 ctx->file_priv = file_priv; 293 if (file_priv) { 294 ctx->pid = get_task_pid(current, PIDTYPE_PID); 295 ctx->name = kasprintf(GFP_KERNEL, "%s[%d]/%x", 296 current->comm, 297 pid_nr(ctx->pid), 298 ctx->user_handle); 299 if (!ctx->name) { 300 ret = -ENOMEM; 301 goto err_pid; 302 } 303 } 304 305 /* NB: Mark all slices as needing a remap so that when the context first 306 * loads it will restore whatever remap state already exists. If there 307 * is no remap info, it will be a NOP. */ 308 ctx->remap_slice = ALL_L3_SLICES(dev_priv); 309 310 i915_gem_context_set_bannable(ctx); 311 ctx->ring_size = 4 * PAGE_SIZE; 312 ctx->desc_template = 313 default_desc_template(dev_priv, dev_priv->mm.aliasing_ppgtt); 314 315 /* GuC requires the ring to be placed above GUC_WOPCM_TOP. If GuC is not 316 * present or not in use we still need a small bias as ring wraparound 317 * at offset 0 sometimes hangs. No idea why. 318 */ 319 if (HAS_GUC(dev_priv) && i915_modparams.enable_guc_loading) 320 ctx->ggtt_offset_bias = GUC_WOPCM_TOP; 321 else 322 ctx->ggtt_offset_bias = I915_GTT_PAGE_SIZE; 323 324 return ctx; 325 326 err_pid: 327 put_pid(ctx->pid); 328 idr_remove(&file_priv->context_idr, ctx->user_handle); 329 err_lut: 330 context_close(ctx); 331 return ERR_PTR(ret); 332 } 333 334 static void __destroy_hw_context(struct i915_gem_context *ctx, 335 struct drm_i915_file_private *file_priv) 336 { 337 idr_remove(&file_priv->context_idr, ctx->user_handle); 338 context_close(ctx); 339 } 340 341 /** 342 * The default context needs to exist per ring that uses contexts. It stores the 343 * context state of the GPU for applications that don't utilize HW contexts, as 344 * well as an idle case. 345 */ 346 static struct i915_gem_context * 347 i915_gem_create_context(struct drm_i915_private *dev_priv, 348 struct drm_i915_file_private *file_priv) 349 { 350 struct i915_gem_context *ctx; 351 352 lockdep_assert_held(&dev_priv->drm.struct_mutex); 353 354 /* Reap the most stale context */ 355 contexts_free_first(dev_priv); 356 357 ctx = __create_hw_context(dev_priv, file_priv); 358 if (IS_ERR(ctx)) 359 return ctx; 360 361 if (USES_FULL_PPGTT(dev_priv)) { 362 struct i915_hw_ppgtt *ppgtt; 363 364 ppgtt = i915_ppgtt_create(dev_priv, file_priv, ctx->name); 365 if (IS_ERR(ppgtt)) { 366 DRM_DEBUG_DRIVER("PPGTT setup failed (%ld)\n", 367 PTR_ERR(ppgtt)); 368 __destroy_hw_context(ctx, file_priv); 369 return ERR_CAST(ppgtt); 370 } 371 372 ctx->ppgtt = ppgtt; 373 ctx->desc_template = default_desc_template(dev_priv, ppgtt); 374 } 375 376 trace_i915_context_create(ctx); 377 378 return ctx; 379 } 380 381 /** 382 * i915_gem_context_create_gvt - create a GVT GEM context 383 * @dev: drm device * 384 * 385 * This function is used to create a GVT specific GEM context. 386 * 387 * Returns: 388 * pointer to i915_gem_context on success, error pointer if failed 389 * 390 */ 391 struct i915_gem_context * 392 i915_gem_context_create_gvt(struct drm_device *dev) 393 { 394 struct i915_gem_context *ctx; 395 int ret; 396 397 if (!IS_ENABLED(CONFIG_DRM_I915_GVT)) 398 return ERR_PTR(-ENODEV); 399 400 ret = i915_mutex_lock_interruptible(dev); 401 if (ret) 402 return ERR_PTR(ret); 403 404 ctx = __create_hw_context(to_i915(dev), NULL); 405 if (IS_ERR(ctx)) 406 goto out; 407 408 ctx->file_priv = ERR_PTR(-EBADF); 409 i915_gem_context_set_closed(ctx); /* not user accessible */ 410 i915_gem_context_clear_bannable(ctx); 411 i915_gem_context_set_force_single_submission(ctx); 412 if (!i915_modparams.enable_guc_submission) 413 ctx->ring_size = 512 * PAGE_SIZE; /* Max ring buffer size */ 414 415 GEM_BUG_ON(i915_gem_context_is_kernel(ctx)); 416 out: 417 mutex_unlock(&dev->struct_mutex); 418 return ctx; 419 } 420 421 static struct i915_gem_context * 422 create_kernel_context(struct drm_i915_private *i915, int prio) 423 { 424 struct i915_gem_context *ctx; 425 426 ctx = i915_gem_create_context(i915, NULL); 427 if (IS_ERR(ctx)) 428 return ctx; 429 430 i915_gem_context_clear_bannable(ctx); 431 ctx->priority = prio; 432 ctx->ring_size = PAGE_SIZE; 433 434 GEM_BUG_ON(!i915_gem_context_is_kernel(ctx)); 435 436 return ctx; 437 } 438 439 static void 440 destroy_kernel_context(struct i915_gem_context **ctxp) 441 { 442 struct i915_gem_context *ctx; 443 444 /* Keep the context ref so that we can free it immediately ourselves */ 445 ctx = i915_gem_context_get(fetch_and_zero(ctxp)); 446 GEM_BUG_ON(!i915_gem_context_is_kernel(ctx)); 447 448 context_close(ctx); 449 i915_gem_context_free(ctx); 450 } 451 452 int i915_gem_contexts_init(struct drm_i915_private *dev_priv) 453 { 454 struct i915_gem_context *ctx; 455 int err; 456 457 GEM_BUG_ON(dev_priv->kernel_context); 458 459 INIT_LIST_HEAD(&dev_priv->contexts.list); 460 INIT_WORK(&dev_priv->contexts.free_work, contexts_free_worker); 461 init_llist_head(&dev_priv->contexts.free_list); 462 463 if (intel_vgpu_active(dev_priv) && 464 HAS_LOGICAL_RING_CONTEXTS(dev_priv)) { 465 if (!i915_modparams.enable_execlists) { 466 DRM_INFO("Only EXECLIST mode is supported in vgpu.\n"); 467 return -EINVAL; 468 } 469 } 470 471 /* Using the simple ida interface, the max is limited by sizeof(int) */ 472 BUILD_BUG_ON(MAX_CONTEXT_HW_ID > INT_MAX); 473 ida_init(&dev_priv->contexts.hw_ida); 474 475 /* lowest priority; idle task */ 476 ctx = create_kernel_context(dev_priv, I915_PRIORITY_MIN); 477 if (IS_ERR(ctx)) { 478 DRM_ERROR("Failed to create default global context\n"); 479 err = PTR_ERR(ctx); 480 goto err; 481 } 482 /* 483 * For easy recognisablity, we want the kernel context to be 0 and then 484 * all user contexts will have non-zero hw_id. 485 */ 486 GEM_BUG_ON(ctx->hw_id); 487 dev_priv->kernel_context = ctx; 488 489 /* highest priority; preempting task */ 490 ctx = create_kernel_context(dev_priv, INT_MAX); 491 if (IS_ERR(ctx)) { 492 DRM_ERROR("Failed to create default preempt context\n"); 493 err = PTR_ERR(ctx); 494 goto err_kernel_context; 495 } 496 dev_priv->preempt_context = ctx; 497 498 DRM_DEBUG_DRIVER("%s context support initialized\n", 499 dev_priv->engine[RCS]->context_size ? "logical" : 500 "fake"); 501 return 0; 502 503 err_kernel_context: 504 destroy_kernel_context(&dev_priv->kernel_context); 505 err: 506 return err; 507 } 508 509 void i915_gem_contexts_lost(struct drm_i915_private *dev_priv) 510 { 511 struct intel_engine_cs *engine; 512 enum intel_engine_id id; 513 514 lockdep_assert_held(&dev_priv->drm.struct_mutex); 515 516 for_each_engine(engine, dev_priv, id) { 517 engine->legacy_active_context = NULL; 518 519 if (!engine->last_retired_context) 520 continue; 521 522 engine->context_unpin(engine, engine->last_retired_context); 523 engine->last_retired_context = NULL; 524 } 525 526 /* Force the GPU state to be restored on enabling */ 527 if (!i915_modparams.enable_execlists) { 528 struct i915_gem_context *ctx; 529 530 list_for_each_entry(ctx, &dev_priv->contexts.list, link) { 531 if (!i915_gem_context_is_default(ctx)) 532 continue; 533 534 for_each_engine(engine, dev_priv, id) 535 ctx->engine[engine->id].initialised = false; 536 537 ctx->remap_slice = ALL_L3_SLICES(dev_priv); 538 } 539 540 for_each_engine(engine, dev_priv, id) { 541 struct intel_context *kce = 542 &dev_priv->kernel_context->engine[engine->id]; 543 544 kce->initialised = true; 545 } 546 } 547 } 548 549 void i915_gem_contexts_fini(struct drm_i915_private *i915) 550 { 551 lockdep_assert_held(&i915->drm.struct_mutex); 552 553 destroy_kernel_context(&i915->preempt_context); 554 destroy_kernel_context(&i915->kernel_context); 555 556 /* Must free all deferred contexts (via flush_workqueue) first */ 557 ida_destroy(&i915->contexts.hw_ida); 558 } 559 560 static int context_idr_cleanup(int id, void *p, void *data) 561 { 562 struct i915_gem_context *ctx = p; 563 564 context_close(ctx); 565 return 0; 566 } 567 568 int i915_gem_context_open(struct drm_i915_private *i915, 569 struct drm_file *file) 570 { 571 struct drm_i915_file_private *file_priv = file->driver_priv; 572 struct i915_gem_context *ctx; 573 574 idr_init(&file_priv->context_idr); 575 576 mutex_lock(&i915->drm.struct_mutex); 577 ctx = i915_gem_create_context(i915, file_priv); 578 mutex_unlock(&i915->drm.struct_mutex); 579 if (IS_ERR(ctx)) { 580 idr_destroy(&file_priv->context_idr); 581 return PTR_ERR(ctx); 582 } 583 584 GEM_BUG_ON(i915_gem_context_is_kernel(ctx)); 585 586 return 0; 587 } 588 589 void i915_gem_context_close(struct drm_file *file) 590 { 591 struct drm_i915_file_private *file_priv = file->driver_priv; 592 593 lockdep_assert_held(&file_priv->dev_priv->drm.struct_mutex); 594 595 idr_for_each(&file_priv->context_idr, context_idr_cleanup, NULL); 596 idr_destroy(&file_priv->context_idr); 597 } 598 599 static inline int 600 mi_set_context(struct drm_i915_gem_request *req, u32 flags) 601 { 602 struct drm_i915_private *dev_priv = req->i915; 603 struct intel_engine_cs *engine = req->engine; 604 enum intel_engine_id id; 605 const int num_rings = 606 /* Use an extended w/a on gen7 if signalling from other rings */ 607 (i915_modparams.semaphores && INTEL_GEN(dev_priv) == 7) ? 608 INTEL_INFO(dev_priv)->num_rings - 1 : 609 0; 610 int len; 611 u32 *cs; 612 613 flags |= MI_MM_SPACE_GTT; 614 if (IS_HASWELL(dev_priv) || INTEL_GEN(dev_priv) >= 8) 615 /* These flags are for resource streamer on HSW+ */ 616 flags |= HSW_MI_RS_SAVE_STATE_EN | HSW_MI_RS_RESTORE_STATE_EN; 617 else 618 flags |= MI_SAVE_EXT_STATE_EN | MI_RESTORE_EXT_STATE_EN; 619 620 len = 4; 621 if (INTEL_GEN(dev_priv) >= 7) 622 len += 2 + (num_rings ? 4*num_rings + 6 : 0); 623 624 cs = intel_ring_begin(req, len); 625 if (IS_ERR(cs)) 626 return PTR_ERR(cs); 627 628 /* WaProgramMiArbOnOffAroundMiSetContext:ivb,vlv,hsw,bdw,chv */ 629 if (INTEL_GEN(dev_priv) >= 7) { 630 *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE; 631 if (num_rings) { 632 struct intel_engine_cs *signaller; 633 634 *cs++ = MI_LOAD_REGISTER_IMM(num_rings); 635 for_each_engine(signaller, dev_priv, id) { 636 if (signaller == engine) 637 continue; 638 639 *cs++ = i915_mmio_reg_offset( 640 RING_PSMI_CTL(signaller->mmio_base)); 641 *cs++ = _MASKED_BIT_ENABLE( 642 GEN6_PSMI_SLEEP_MSG_DISABLE); 643 } 644 } 645 } 646 647 *cs++ = MI_NOOP; 648 *cs++ = MI_SET_CONTEXT; 649 *cs++ = i915_ggtt_offset(req->ctx->engine[RCS].state) | flags; 650 /* 651 * w/a: MI_SET_CONTEXT must always be followed by MI_NOOP 652 * WaMiSetContext_Hang:snb,ivb,vlv 653 */ 654 *cs++ = MI_NOOP; 655 656 if (INTEL_GEN(dev_priv) >= 7) { 657 if (num_rings) { 658 struct intel_engine_cs *signaller; 659 i915_reg_t last_reg = {}; /* keep gcc quiet */ 660 661 *cs++ = MI_LOAD_REGISTER_IMM(num_rings); 662 for_each_engine(signaller, dev_priv, id) { 663 if (signaller == engine) 664 continue; 665 666 last_reg = RING_PSMI_CTL(signaller->mmio_base); 667 *cs++ = i915_mmio_reg_offset(last_reg); 668 *cs++ = _MASKED_BIT_DISABLE( 669 GEN6_PSMI_SLEEP_MSG_DISABLE); 670 } 671 672 /* Insert a delay before the next switch! */ 673 *cs++ = MI_STORE_REGISTER_MEM | MI_SRM_LRM_GLOBAL_GTT; 674 *cs++ = i915_mmio_reg_offset(last_reg); 675 *cs++ = i915_ggtt_offset(engine->scratch); 676 *cs++ = MI_NOOP; 677 } 678 *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE; 679 } 680 681 intel_ring_advance(req, cs); 682 683 return 0; 684 } 685 686 static int remap_l3(struct drm_i915_gem_request *req, int slice) 687 { 688 u32 *cs, *remap_info = req->i915->l3_parity.remap_info[slice]; 689 int i; 690 691 if (!remap_info) 692 return 0; 693 694 cs = intel_ring_begin(req, GEN7_L3LOG_SIZE/4 * 2 + 2); 695 if (IS_ERR(cs)) 696 return PTR_ERR(cs); 697 698 /* 699 * Note: We do not worry about the concurrent register cacheline hang 700 * here because no other code should access these registers other than 701 * at initialization time. 702 */ 703 *cs++ = MI_LOAD_REGISTER_IMM(GEN7_L3LOG_SIZE/4); 704 for (i = 0; i < GEN7_L3LOG_SIZE/4; i++) { 705 *cs++ = i915_mmio_reg_offset(GEN7_L3LOG(slice, i)); 706 *cs++ = remap_info[i]; 707 } 708 *cs++ = MI_NOOP; 709 intel_ring_advance(req, cs); 710 711 return 0; 712 } 713 714 static inline bool skip_rcs_switch(struct i915_hw_ppgtt *ppgtt, 715 struct intel_engine_cs *engine, 716 struct i915_gem_context *to) 717 { 718 if (to->remap_slice) 719 return false; 720 721 if (!to->engine[RCS].initialised) 722 return false; 723 724 if (ppgtt && (intel_engine_flag(engine) & ppgtt->pd_dirty_rings)) 725 return false; 726 727 return to == engine->legacy_active_context; 728 } 729 730 static bool 731 needs_pd_load_pre(struct i915_hw_ppgtt *ppgtt, struct intel_engine_cs *engine) 732 { 733 struct i915_gem_context *from = engine->legacy_active_context; 734 735 if (!ppgtt) 736 return false; 737 738 /* Always load the ppgtt on first use */ 739 if (!from) 740 return true; 741 742 /* Same context without new entries, skip */ 743 if ((!from->ppgtt || from->ppgtt == ppgtt) && 744 !(intel_engine_flag(engine) & ppgtt->pd_dirty_rings)) 745 return false; 746 747 if (engine->id != RCS) 748 return true; 749 750 if (INTEL_GEN(engine->i915) < 8) 751 return true; 752 753 return false; 754 } 755 756 static bool 757 needs_pd_load_post(struct i915_hw_ppgtt *ppgtt, 758 struct i915_gem_context *to, 759 u32 hw_flags) 760 { 761 if (!ppgtt) 762 return false; 763 764 if (!IS_GEN8(to->i915)) 765 return false; 766 767 if (hw_flags & MI_RESTORE_INHIBIT) 768 return true; 769 770 return false; 771 } 772 773 static int do_rcs_switch(struct drm_i915_gem_request *req) 774 { 775 struct i915_gem_context *to = req->ctx; 776 struct intel_engine_cs *engine = req->engine; 777 struct i915_hw_ppgtt *ppgtt = to->ppgtt ?: req->i915->mm.aliasing_ppgtt; 778 struct i915_gem_context *from = engine->legacy_active_context; 779 u32 hw_flags; 780 int ret, i; 781 782 GEM_BUG_ON(engine->id != RCS); 783 784 if (skip_rcs_switch(ppgtt, engine, to)) 785 return 0; 786 787 if (needs_pd_load_pre(ppgtt, engine)) { 788 /* Older GENs and non render rings still want the load first, 789 * "PP_DCLV followed by PP_DIR_BASE register through Load 790 * Register Immediate commands in Ring Buffer before submitting 791 * a context."*/ 792 trace_switch_mm(engine, to); 793 ret = ppgtt->switch_mm(ppgtt, req); 794 if (ret) 795 return ret; 796 } 797 798 if (!to->engine[RCS].initialised || i915_gem_context_is_default(to)) 799 /* NB: If we inhibit the restore, the context is not allowed to 800 * die because future work may end up depending on valid address 801 * space. This means we must enforce that a page table load 802 * occur when this occurs. */ 803 hw_flags = MI_RESTORE_INHIBIT; 804 else if (ppgtt && intel_engine_flag(engine) & ppgtt->pd_dirty_rings) 805 hw_flags = MI_FORCE_RESTORE; 806 else 807 hw_flags = 0; 808 809 if (to != from || (hw_flags & MI_FORCE_RESTORE)) { 810 ret = mi_set_context(req, hw_flags); 811 if (ret) 812 return ret; 813 814 engine->legacy_active_context = to; 815 } 816 817 /* GEN8 does *not* require an explicit reload if the PDPs have been 818 * setup, and we do not wish to move them. 819 */ 820 if (needs_pd_load_post(ppgtt, to, hw_flags)) { 821 trace_switch_mm(engine, to); 822 ret = ppgtt->switch_mm(ppgtt, req); 823 /* The hardware context switch is emitted, but we haven't 824 * actually changed the state - so it's probably safe to bail 825 * here. Still, let the user know something dangerous has 826 * happened. 827 */ 828 if (ret) 829 return ret; 830 } 831 832 if (ppgtt) 833 ppgtt->pd_dirty_rings &= ~intel_engine_flag(engine); 834 835 for (i = 0; i < MAX_L3_SLICES; i++) { 836 if (!(to->remap_slice & (1<<i))) 837 continue; 838 839 ret = remap_l3(req, i); 840 if (ret) 841 return ret; 842 843 to->remap_slice &= ~(1<<i); 844 } 845 846 if (!to->engine[RCS].initialised) { 847 if (engine->init_context) { 848 ret = engine->init_context(req); 849 if (ret) 850 return ret; 851 } 852 to->engine[RCS].initialised = true; 853 } 854 855 return 0; 856 } 857 858 /** 859 * i915_switch_context() - perform a GPU context switch. 860 * @req: request for which we'll execute the context switch 861 * 862 * The context life cycle is simple. The context refcount is incremented and 863 * decremented by 1 and create and destroy. If the context is in use by the GPU, 864 * it will have a refcount > 1. This allows us to destroy the context abstract 865 * object while letting the normal object tracking destroy the backing BO. 866 * 867 * This function should not be used in execlists mode. Instead the context is 868 * switched by writing to the ELSP and requests keep a reference to their 869 * context. 870 */ 871 int i915_switch_context(struct drm_i915_gem_request *req) 872 { 873 struct intel_engine_cs *engine = req->engine; 874 875 lockdep_assert_held(&req->i915->drm.struct_mutex); 876 if (i915_modparams.enable_execlists) 877 return 0; 878 879 if (!req->ctx->engine[engine->id].state) { 880 struct i915_gem_context *to = req->ctx; 881 struct i915_hw_ppgtt *ppgtt = 882 to->ppgtt ?: req->i915->mm.aliasing_ppgtt; 883 884 if (needs_pd_load_pre(ppgtt, engine)) { 885 int ret; 886 887 trace_switch_mm(engine, to); 888 ret = ppgtt->switch_mm(ppgtt, req); 889 if (ret) 890 return ret; 891 892 ppgtt->pd_dirty_rings &= ~intel_engine_flag(engine); 893 } 894 895 engine->legacy_active_context = to; 896 return 0; 897 } 898 899 return do_rcs_switch(req); 900 } 901 902 static bool engine_has_kernel_context(struct intel_engine_cs *engine) 903 { 904 struct i915_gem_timeline *timeline; 905 906 list_for_each_entry(timeline, &engine->i915->gt.timelines, link) { 907 struct intel_timeline *tl; 908 909 if (timeline == &engine->i915->gt.global_timeline) 910 continue; 911 912 tl = &timeline->engine[engine->id]; 913 if (i915_gem_active_peek(&tl->last_request, 914 &engine->i915->drm.struct_mutex)) 915 return false; 916 } 917 918 return (!engine->last_retired_context || 919 i915_gem_context_is_kernel(engine->last_retired_context)); 920 } 921 922 int i915_gem_switch_to_kernel_context(struct drm_i915_private *dev_priv) 923 { 924 struct intel_engine_cs *engine; 925 struct i915_gem_timeline *timeline; 926 enum intel_engine_id id; 927 928 lockdep_assert_held(&dev_priv->drm.struct_mutex); 929 930 i915_gem_retire_requests(dev_priv); 931 932 for_each_engine(engine, dev_priv, id) { 933 struct drm_i915_gem_request *req; 934 int ret; 935 936 if (engine_has_kernel_context(engine)) 937 continue; 938 939 req = i915_gem_request_alloc(engine, dev_priv->kernel_context); 940 if (IS_ERR(req)) 941 return PTR_ERR(req); 942 943 /* Queue this switch after all other activity */ 944 list_for_each_entry(timeline, &dev_priv->gt.timelines, link) { 945 struct drm_i915_gem_request *prev; 946 struct intel_timeline *tl; 947 948 tl = &timeline->engine[engine->id]; 949 prev = i915_gem_active_raw(&tl->last_request, 950 &dev_priv->drm.struct_mutex); 951 if (prev) 952 i915_sw_fence_await_sw_fence_gfp(&req->submit, 953 &prev->submit, 954 GFP_KERNEL); 955 } 956 957 ret = i915_switch_context(req); 958 i915_add_request(req); 959 if (ret) 960 return ret; 961 } 962 963 return 0; 964 } 965 966 static bool client_is_banned(struct drm_i915_file_private *file_priv) 967 { 968 return atomic_read(&file_priv->context_bans) > I915_MAX_CLIENT_CONTEXT_BANS; 969 } 970 971 int i915_gem_context_create_ioctl(struct drm_device *dev, void *data, 972 struct drm_file *file) 973 { 974 struct drm_i915_private *dev_priv = to_i915(dev); 975 struct drm_i915_gem_context_create *args = data; 976 struct drm_i915_file_private *file_priv = file->driver_priv; 977 struct i915_gem_context *ctx; 978 int ret; 979 980 if (!dev_priv->engine[RCS]->context_size) 981 return -ENODEV; 982 983 if (args->pad != 0) 984 return -EINVAL; 985 986 if (client_is_banned(file_priv)) { 987 DRM_DEBUG("client %s[%d] banned from creating ctx\n", 988 current->comm, 989 pid_nr(get_task_pid(current, PIDTYPE_PID))); 990 991 return -EIO; 992 } 993 994 ret = i915_mutex_lock_interruptible(dev); 995 if (ret) 996 return ret; 997 998 ctx = i915_gem_create_context(dev_priv, file_priv); 999 mutex_unlock(&dev->struct_mutex); 1000 if (IS_ERR(ctx)) 1001 return PTR_ERR(ctx); 1002 1003 GEM_BUG_ON(i915_gem_context_is_kernel(ctx)); 1004 1005 args->ctx_id = ctx->user_handle; 1006 DRM_DEBUG("HW context %d created\n", args->ctx_id); 1007 1008 return 0; 1009 } 1010 1011 int i915_gem_context_destroy_ioctl(struct drm_device *dev, void *data, 1012 struct drm_file *file) 1013 { 1014 struct drm_i915_gem_context_destroy *args = data; 1015 struct drm_i915_file_private *file_priv = file->driver_priv; 1016 struct i915_gem_context *ctx; 1017 int ret; 1018 1019 if (args->pad != 0) 1020 return -EINVAL; 1021 1022 if (args->ctx_id == DEFAULT_CONTEXT_HANDLE) 1023 return -ENOENT; 1024 1025 ctx = i915_gem_context_lookup(file_priv, args->ctx_id); 1026 if (!ctx) 1027 return -ENOENT; 1028 1029 ret = mutex_lock_interruptible(&dev->struct_mutex); 1030 if (ret) 1031 goto out; 1032 1033 __destroy_hw_context(ctx, file_priv); 1034 mutex_unlock(&dev->struct_mutex); 1035 1036 out: 1037 i915_gem_context_put(ctx); 1038 return 0; 1039 } 1040 1041 int i915_gem_context_getparam_ioctl(struct drm_device *dev, void *data, 1042 struct drm_file *file) 1043 { 1044 struct drm_i915_file_private *file_priv = file->driver_priv; 1045 struct drm_i915_gem_context_param *args = data; 1046 struct i915_gem_context *ctx; 1047 int ret = 0; 1048 1049 ctx = i915_gem_context_lookup(file_priv, args->ctx_id); 1050 if (!ctx) 1051 return -ENOENT; 1052 1053 args->size = 0; 1054 switch (args->param) { 1055 case I915_CONTEXT_PARAM_BAN_PERIOD: 1056 ret = -EINVAL; 1057 break; 1058 case I915_CONTEXT_PARAM_NO_ZEROMAP: 1059 args->value = ctx->flags & CONTEXT_NO_ZEROMAP; 1060 break; 1061 case I915_CONTEXT_PARAM_GTT_SIZE: 1062 if (ctx->ppgtt) 1063 args->value = ctx->ppgtt->base.total; 1064 else if (to_i915(dev)->mm.aliasing_ppgtt) 1065 args->value = to_i915(dev)->mm.aliasing_ppgtt->base.total; 1066 else 1067 args->value = to_i915(dev)->ggtt.base.total; 1068 break; 1069 case I915_CONTEXT_PARAM_NO_ERROR_CAPTURE: 1070 args->value = i915_gem_context_no_error_capture(ctx); 1071 break; 1072 case I915_CONTEXT_PARAM_BANNABLE: 1073 args->value = i915_gem_context_is_bannable(ctx); 1074 break; 1075 case I915_CONTEXT_PARAM_PRIORITY: 1076 args->value = ctx->priority; 1077 break; 1078 default: 1079 ret = -EINVAL; 1080 break; 1081 } 1082 1083 i915_gem_context_put(ctx); 1084 return ret; 1085 } 1086 1087 int i915_gem_context_setparam_ioctl(struct drm_device *dev, void *data, 1088 struct drm_file *file) 1089 { 1090 struct drm_i915_file_private *file_priv = file->driver_priv; 1091 struct drm_i915_gem_context_param *args = data; 1092 struct i915_gem_context *ctx; 1093 int ret; 1094 1095 ctx = i915_gem_context_lookup(file_priv, args->ctx_id); 1096 if (!ctx) 1097 return -ENOENT; 1098 1099 ret = i915_mutex_lock_interruptible(dev); 1100 if (ret) 1101 goto out; 1102 1103 switch (args->param) { 1104 case I915_CONTEXT_PARAM_BAN_PERIOD: 1105 ret = -EINVAL; 1106 break; 1107 case I915_CONTEXT_PARAM_NO_ZEROMAP: 1108 if (args->size) { 1109 ret = -EINVAL; 1110 } else { 1111 ctx->flags &= ~CONTEXT_NO_ZEROMAP; 1112 ctx->flags |= args->value ? CONTEXT_NO_ZEROMAP : 0; 1113 } 1114 break; 1115 case I915_CONTEXT_PARAM_NO_ERROR_CAPTURE: 1116 if (args->size) 1117 ret = -EINVAL; 1118 else if (args->value) 1119 i915_gem_context_set_no_error_capture(ctx); 1120 else 1121 i915_gem_context_clear_no_error_capture(ctx); 1122 break; 1123 case I915_CONTEXT_PARAM_BANNABLE: 1124 if (args->size) 1125 ret = -EINVAL; 1126 else if (!capable(CAP_SYS_ADMIN) && !args->value) 1127 ret = -EPERM; 1128 else if (args->value) 1129 i915_gem_context_set_bannable(ctx); 1130 else 1131 i915_gem_context_clear_bannable(ctx); 1132 break; 1133 1134 case I915_CONTEXT_PARAM_PRIORITY: 1135 { 1136 int priority = args->value; 1137 1138 if (args->size) 1139 ret = -EINVAL; 1140 else if (!to_i915(dev)->engine[RCS]->schedule) 1141 ret = -ENODEV; 1142 else if (priority > I915_CONTEXT_MAX_USER_PRIORITY || 1143 priority < I915_CONTEXT_MIN_USER_PRIORITY) 1144 ret = -EINVAL; 1145 #if 0 1146 else if (priority > I915_CONTEXT_DEFAULT_PRIORITY && 1147 !capable(CAP_SYS_NICE)) 1148 #else 1149 else if (priority > I915_CONTEXT_DEFAULT_PRIORITY) 1150 #endif 1151 ret = -EPERM; 1152 else 1153 ctx->priority = priority; 1154 } 1155 break; 1156 1157 default: 1158 ret = -EINVAL; 1159 break; 1160 } 1161 mutex_unlock(&dev->struct_mutex); 1162 1163 out: 1164 i915_gem_context_put(ctx); 1165 return ret; 1166 } 1167 1168 int i915_gem_context_reset_stats_ioctl(struct drm_device *dev, 1169 void *data, struct drm_file *file) 1170 { 1171 struct drm_i915_private *dev_priv = to_i915(dev); 1172 struct drm_i915_reset_stats *args = data; 1173 struct i915_gem_context *ctx; 1174 int ret; 1175 1176 if (args->flags || args->pad) 1177 return -EINVAL; 1178 1179 ret = -ENOENT; 1180 rcu_read_lock(); 1181 ctx = __i915_gem_context_lookup_rcu(file->driver_priv, args->ctx_id); 1182 if (!ctx) 1183 goto out; 1184 1185 /* 1186 * We opt for unserialised reads here. This may result in tearing 1187 * in the extremely unlikely event of a GPU hang on this context 1188 * as we are querying them. If we need that extra layer of protection, 1189 * we should wrap the hangstats with a seqlock. 1190 */ 1191 1192 if (capable(CAP_SYS_ADMIN)) 1193 args->reset_count = i915_reset_count(&dev_priv->gpu_error); 1194 else 1195 args->reset_count = 0; 1196 1197 args->batch_active = atomic_read(&ctx->guilty_count); 1198 args->batch_pending = atomic_read(&ctx->active_count); 1199 1200 ret = 0; 1201 out: 1202 rcu_read_unlock(); 1203 return ret; 1204 } 1205 1206 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) 1207 #include "selftests/mock_context.c" 1208 #include "selftests/i915_gem_context.c" 1209 #endif 1210