1 /* 2 * Copyright © 2011-2012 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 * 23 * Authors: 24 * Ben Widawsky <ben@bwidawsk.net> 25 * 26 */ 27 28 /* 29 * This file implements HW context support. On gen5+ a HW context consists of an 30 * opaque GPU object which is referenced at times of context saves and restores. 31 * With RC6 enabled, the context is also referenced as the GPU enters and exists 32 * from RC6 (GPU has it's own internal power context, except on gen5). Though 33 * something like a context does exist for the media ring, the code only 34 * supports contexts for the render ring. 35 * 36 * In software, there is a distinction between contexts created by the user, 37 * and the default HW context. The default HW context is used by GPU clients 38 * that do not request setup of their own hardware context. The default 39 * context's state is never restored to help prevent programming errors. This 40 * would happen if a client ran and piggy-backed off another clients GPU state. 41 * The default context only exists to give the GPU some offset to load as the 42 * current to invoke a save of the context we actually care about. In fact, the 43 * code could likely be constructed, albeit in a more complicated fashion, to 44 * never use the default context, though that limits the driver's ability to 45 * swap out, and/or destroy other contexts. 46 * 47 * All other contexts are created as a request by the GPU client. These contexts 48 * store GPU state, and thus allow GPU clients to not re-emit state (and 49 * potentially query certain state) at any time. The kernel driver makes 50 * certain that the appropriate commands are inserted. 51 * 52 * The context life cycle is semi-complicated in that context BOs may live 53 * longer than the context itself because of the way the hardware, and object 54 * tracking works. Below is a very crude representation of the state machine 55 * describing the context life. 56 * refcount pincount active 57 * S0: initial state 0 0 0 58 * S1: context created 1 0 0 59 * S2: context is currently running 2 1 X 60 * S3: GPU referenced, but not current 2 0 1 61 * S4: context is current, but destroyed 1 1 0 62 * S5: like S3, but destroyed 1 0 1 63 * 64 * The most common (but not all) transitions: 65 * S0->S1: client creates a context 66 * S1->S2: client submits execbuf with context 67 * S2->S3: other clients submits execbuf with context 68 * S3->S1: context object was retired 69 * S3->S2: clients submits another execbuf 70 * S2->S4: context destroy called with current context 71 * S3->S5->S0: destroy path 72 * S4->S5->S0: destroy path on current context 73 * 74 * There are two confusing terms used above: 75 * The "current context" means the context which is currently running on the 76 * GPU. The GPU has loaded its state already and has stored away the gtt 77 * offset of the BO. The GPU is not actively referencing the data at this 78 * offset, but it will on the next context switch. The only way to avoid this 79 * is to do a GPU reset. 80 * 81 * An "active context' is one which was previously the "current context" and is 82 * on the active list waiting for the next context switch to occur. Until this 83 * happens, the object must remain at the same gtt offset. It is therefore 84 * possible to destroy a context, but it is still active. 85 * 86 */ 87 88 #include <drm/drmP.h> 89 #include <drm/i915_drm.h> 90 #include "i915_drv.h" 91 #include "i915_trace.h" 92 93 #define ALL_L3_SLICES(dev) (1 << NUM_L3_SLICES(dev)) - 1 94 95 /* This is a HW constraint. The value below is the largest known requirement 96 * I've seen in a spec to date, and that was a workaround for a non-shipping 97 * part. It should be safe to decrease this, but it's more future proof as is. 98 */ 99 #define GEN6_CONTEXT_ALIGN (64<<10) 100 #define GEN7_CONTEXT_ALIGN 4096 101 102 static size_t get_context_alignment(struct drm_i915_private *dev_priv) 103 { 104 if (IS_GEN6(dev_priv)) 105 return GEN6_CONTEXT_ALIGN; 106 107 return GEN7_CONTEXT_ALIGN; 108 } 109 110 static int get_context_size(struct drm_i915_private *dev_priv) 111 { 112 int ret; 113 u32 reg; 114 115 switch (INTEL_GEN(dev_priv)) { 116 case 6: 117 reg = I915_READ(CXT_SIZE); 118 ret = GEN6_CXT_TOTAL_SIZE(reg) * 64; 119 break; 120 case 7: 121 reg = I915_READ(GEN7_CXT_SIZE); 122 if (IS_HASWELL(dev_priv)) 123 ret = HSW_CXT_TOTAL_SIZE; 124 else 125 ret = GEN7_CXT_TOTAL_SIZE(reg) * 64; 126 break; 127 case 8: 128 ret = GEN8_CXT_TOTAL_SIZE; 129 break; 130 default: 131 BUG(); 132 } 133 134 return ret; 135 } 136 137 void i915_gem_context_free(struct kref *ctx_ref) 138 { 139 struct i915_gem_context *ctx = container_of(ctx_ref, typeof(*ctx), ref); 140 int i; 141 142 lockdep_assert_held(&ctx->i915->drm.struct_mutex); 143 trace_i915_context_free(ctx); 144 GEM_BUG_ON(!ctx->closed); 145 146 i915_ppgtt_put(ctx->ppgtt); 147 148 for (i = 0; i < I915_NUM_ENGINES; i++) { 149 struct intel_context *ce = &ctx->engine[i]; 150 151 if (!ce->state) 152 continue; 153 154 WARN_ON(ce->pin_count); 155 if (ce->ring) 156 intel_ring_free(ce->ring); 157 158 __i915_gem_object_release_unless_active(ce->state->obj); 159 } 160 161 kfree(ctx->name); 162 put_pid(ctx->pid); 163 list_del(&ctx->link); 164 165 ida_simple_remove(&ctx->i915->context_hw_ida, ctx->hw_id); 166 kfree(ctx); 167 } 168 169 struct drm_i915_gem_object * 170 i915_gem_alloc_context_obj(struct drm_device *dev, size_t size) 171 { 172 struct drm_i915_gem_object *obj; 173 int ret; 174 175 lockdep_assert_held(&dev->struct_mutex); 176 177 obj = i915_gem_object_create(dev, size); 178 if (IS_ERR(obj)) 179 return obj; 180 181 /* 182 * Try to make the context utilize L3 as well as LLC. 183 * 184 * On VLV we don't have L3 controls in the PTEs so we 185 * shouldn't touch the cache level, especially as that 186 * would make the object snooped which might have a 187 * negative performance impact. 188 * 189 * Snooping is required on non-llc platforms in execlist 190 * mode, but since all GGTT accesses use PAT entry 0 we 191 * get snooping anyway regardless of cache_level. 192 * 193 * This is only applicable for Ivy Bridge devices since 194 * later platforms don't have L3 control bits in the PTE. 195 */ 196 if (IS_IVYBRIDGE(to_i915(dev))) { 197 ret = i915_gem_object_set_cache_level(obj, I915_CACHE_L3_LLC); 198 /* Failure shouldn't ever happen this early */ 199 if (WARN_ON(ret)) { 200 i915_gem_object_put(obj); 201 return ERR_PTR(ret); 202 } 203 } 204 205 return obj; 206 } 207 208 static void i915_ppgtt_close(struct i915_address_space *vm) 209 { 210 struct list_head *phases[] = { 211 &vm->active_list, 212 &vm->inactive_list, 213 &vm->unbound_list, 214 NULL, 215 }, **phase; 216 217 GEM_BUG_ON(vm->closed); 218 vm->closed = true; 219 220 for (phase = phases; *phase; phase++) { 221 struct i915_vma *vma, *vn; 222 223 list_for_each_entry_safe(vma, vn, *phase, vm_link) 224 if (!i915_vma_is_closed(vma)) 225 i915_vma_close(vma); 226 } 227 } 228 229 static void context_close(struct i915_gem_context *ctx) 230 { 231 GEM_BUG_ON(ctx->closed); 232 ctx->closed = true; 233 if (ctx->ppgtt) 234 i915_ppgtt_close(&ctx->ppgtt->base); 235 ctx->file_priv = ERR_PTR(-EBADF); 236 i915_gem_context_put(ctx); 237 } 238 239 static int assign_hw_id(struct drm_i915_private *dev_priv, unsigned *out) 240 { 241 int ret; 242 243 ret = ida_simple_get(&dev_priv->context_hw_ida, 244 0, MAX_CONTEXT_HW_ID, GFP_KERNEL); 245 if (ret < 0) { 246 /* Contexts are only released when no longer active. 247 * Flush any pending retires to hopefully release some 248 * stale contexts and try again. 249 */ 250 i915_gem_retire_requests(dev_priv); 251 ret = ida_simple_get(&dev_priv->context_hw_ida, 252 0, MAX_CONTEXT_HW_ID, GFP_KERNEL); 253 if (ret < 0) 254 return ret; 255 } 256 257 *out = ret; 258 return 0; 259 } 260 261 static struct i915_gem_context * 262 __create_hw_context(struct drm_device *dev, 263 struct drm_i915_file_private *file_priv) 264 { 265 struct drm_i915_private *dev_priv = to_i915(dev); 266 struct i915_gem_context *ctx; 267 int ret; 268 269 ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); 270 if (ctx == NULL) 271 return ERR_PTR(-ENOMEM); 272 273 ret = assign_hw_id(dev_priv, &ctx->hw_id); 274 if (ret) { 275 kfree(ctx); 276 return ERR_PTR(ret); 277 } 278 279 kref_init(&ctx->ref); 280 list_add_tail(&ctx->link, &dev_priv->context_list); 281 ctx->i915 = dev_priv; 282 283 ctx->ggtt_alignment = get_context_alignment(dev_priv); 284 285 if (dev_priv->hw_context_size) { 286 struct drm_i915_gem_object *obj; 287 struct i915_vma *vma; 288 289 obj = i915_gem_alloc_context_obj(dev, 290 dev_priv->hw_context_size); 291 if (IS_ERR(obj)) { 292 ret = PTR_ERR(obj); 293 goto err_out; 294 } 295 296 vma = i915_vma_create(obj, &dev_priv->ggtt.base, NULL); 297 if (IS_ERR(vma)) { 298 i915_gem_object_put(obj); 299 ret = PTR_ERR(vma); 300 goto err_out; 301 } 302 303 ctx->engine[RCS].state = vma; 304 } 305 306 /* Default context will never have a file_priv */ 307 ret = DEFAULT_CONTEXT_HANDLE; 308 if (file_priv) { 309 ret = idr_alloc(&file_priv->context_idr, ctx, 310 DEFAULT_CONTEXT_HANDLE, 0, GFP_KERNEL); 311 if (ret < 0) 312 goto err_out; 313 } 314 ctx->user_handle = ret; 315 316 ctx->file_priv = file_priv; 317 if (file_priv) { 318 #ifndef __DragonFly__ 319 ctx->pid = get_task_pid(current, PIDTYPE_PID); 320 #else 321 ctx->pid = DRM_CURRENTPID; 322 #endif 323 ctx->name = kasprintf(GFP_KERNEL, "%s[%d]/%x", 324 "current->comm", 325 pid_nr(ctx->pid), 326 ctx->user_handle); 327 if (!ctx->name) { 328 ret = -ENOMEM; 329 goto err_pid; 330 } 331 } 332 333 /* NB: Mark all slices as needing a remap so that when the context first 334 * loads it will restore whatever remap state already exists. If there 335 * is no remap info, it will be a NOP. */ 336 ctx->remap_slice = ALL_L3_SLICES(dev_priv); 337 338 ctx->hang_stats.ban_period_seconds = DRM_I915_CTX_BAN_PERIOD; 339 ctx->ring_size = 4 * PAGE_SIZE; 340 ctx->desc_template = GEN8_CTX_ADDRESSING_MODE(dev_priv) << 341 GEN8_CTX_ADDRESSING_MODE_SHIFT; 342 ATOMIC_INIT_NOTIFIER_HEAD(&ctx->status_notifier); 343 344 return ctx; 345 346 err_pid: 347 put_pid(ctx->pid); 348 idr_remove(&file_priv->context_idr, ctx->user_handle); 349 err_out: 350 context_close(ctx); 351 return ERR_PTR(ret); 352 } 353 354 /** 355 * The default context needs to exist per ring that uses contexts. It stores the 356 * context state of the GPU for applications that don't utilize HW contexts, as 357 * well as an idle case. 358 */ 359 static struct i915_gem_context * 360 i915_gem_create_context(struct drm_device *dev, 361 struct drm_i915_file_private *file_priv) 362 { 363 struct i915_gem_context *ctx; 364 365 lockdep_assert_held(&dev->struct_mutex); 366 367 ctx = __create_hw_context(dev, file_priv); 368 if (IS_ERR(ctx)) 369 return ctx; 370 371 if (USES_FULL_PPGTT(dev)) { 372 struct i915_hw_ppgtt *ppgtt; 373 374 ppgtt = i915_ppgtt_create(to_i915(dev), file_priv, ctx->name); 375 if (IS_ERR(ppgtt)) { 376 DRM_DEBUG_DRIVER("PPGTT setup failed (%ld)\n", 377 PTR_ERR(ppgtt)); 378 idr_remove(&file_priv->context_idr, ctx->user_handle); 379 context_close(ctx); 380 return ERR_CAST(ppgtt); 381 } 382 383 ctx->ppgtt = ppgtt; 384 } 385 386 trace_i915_context_create(ctx); 387 388 return ctx; 389 } 390 391 /** 392 * i915_gem_context_create_gvt - create a GVT GEM context 393 * @dev: drm device * 394 * 395 * This function is used to create a GVT specific GEM context. 396 * 397 * Returns: 398 * pointer to i915_gem_context on success, error pointer if failed 399 * 400 */ 401 struct i915_gem_context * 402 i915_gem_context_create_gvt(struct drm_device *dev) 403 { 404 struct i915_gem_context *ctx; 405 int ret; 406 407 if (!IS_ENABLED(CONFIG_DRM_I915_GVT)) 408 return ERR_PTR(-ENODEV); 409 410 ret = i915_mutex_lock_interruptible(dev); 411 if (ret) 412 return ERR_PTR(ret); 413 414 ctx = i915_gem_create_context(dev, NULL); 415 if (IS_ERR(ctx)) 416 goto out; 417 418 ctx->execlists_force_single_submission = true; 419 ctx->ring_size = 512 * PAGE_SIZE; /* Max ring buffer size */ 420 out: 421 mutex_unlock(&dev->struct_mutex); 422 return ctx; 423 } 424 425 static void i915_gem_context_unpin(struct i915_gem_context *ctx, 426 struct intel_engine_cs *engine) 427 { 428 if (i915.enable_execlists) { 429 intel_lr_context_unpin(ctx, engine); 430 } else { 431 struct intel_context *ce = &ctx->engine[engine->id]; 432 433 if (ce->state) 434 i915_vma_unpin(ce->state); 435 436 i915_gem_context_put(ctx); 437 } 438 } 439 440 int i915_gem_context_init(struct drm_device *dev) 441 { 442 struct drm_i915_private *dev_priv = to_i915(dev); 443 struct i915_gem_context *ctx; 444 445 /* Init should only be called once per module load. Eventually the 446 * restriction on the context_disabled check can be loosened. */ 447 if (WARN_ON(dev_priv->kernel_context)) 448 return 0; 449 450 if (intel_vgpu_active(dev_priv) && 451 HAS_LOGICAL_RING_CONTEXTS(dev_priv)) { 452 if (!i915.enable_execlists) { 453 DRM_INFO("Only EXECLIST mode is supported in vgpu.\n"); 454 return -EINVAL; 455 } 456 } 457 458 /* Using the simple ida interface, the max is limited by sizeof(int) */ 459 BUILD_BUG_ON(MAX_CONTEXT_HW_ID > INT_MAX); 460 ida_init(&dev_priv->context_hw_ida); 461 462 if (i915.enable_execlists) { 463 /* NB: intentionally left blank. We will allocate our own 464 * backing objects as we need them, thank you very much */ 465 dev_priv->hw_context_size = 0; 466 } else if (HAS_HW_CONTEXTS(dev_priv)) { 467 dev_priv->hw_context_size = 468 round_up(get_context_size(dev_priv), 4096); 469 if (dev_priv->hw_context_size > (1<<20)) { 470 DRM_DEBUG_DRIVER("Disabling HW Contexts; invalid size %d\n", 471 dev_priv->hw_context_size); 472 dev_priv->hw_context_size = 0; 473 } 474 } 475 476 ctx = i915_gem_create_context(dev, NULL); 477 if (IS_ERR(ctx)) { 478 DRM_ERROR("Failed to create default global context (error %ld)\n", 479 PTR_ERR(ctx)); 480 return PTR_ERR(ctx); 481 } 482 483 ctx->priority = I915_PRIORITY_MIN; /* lowest priority; idle task */ 484 dev_priv->kernel_context = ctx; 485 486 DRM_DEBUG_DRIVER("%s context support initialized\n", 487 i915.enable_execlists ? "LR" : 488 dev_priv->hw_context_size ? "HW" : "fake"); 489 return 0; 490 } 491 492 void i915_gem_context_lost(struct drm_i915_private *dev_priv) 493 { 494 struct intel_engine_cs *engine; 495 enum intel_engine_id id; 496 497 lockdep_assert_held(&dev_priv->drm.struct_mutex); 498 499 for_each_engine(engine, dev_priv, id) { 500 if (engine->last_context) { 501 i915_gem_context_unpin(engine->last_context, engine); 502 engine->last_context = NULL; 503 } 504 } 505 506 /* Force the GPU state to be restored on enabling */ 507 if (!i915.enable_execlists) { 508 struct i915_gem_context *ctx; 509 510 list_for_each_entry(ctx, &dev_priv->context_list, link) { 511 if (!i915_gem_context_is_default(ctx)) 512 continue; 513 514 for_each_engine(engine, dev_priv, id) 515 ctx->engine[engine->id].initialised = false; 516 517 ctx->remap_slice = ALL_L3_SLICES(dev_priv); 518 } 519 520 for_each_engine(engine, dev_priv, id) { 521 struct intel_context *kce = 522 &dev_priv->kernel_context->engine[engine->id]; 523 524 kce->initialised = true; 525 } 526 } 527 } 528 529 void i915_gem_context_fini(struct drm_device *dev) 530 { 531 struct drm_i915_private *dev_priv = to_i915(dev); 532 struct i915_gem_context *dctx = dev_priv->kernel_context; 533 534 lockdep_assert_held(&dev->struct_mutex); 535 536 context_close(dctx); 537 dev_priv->kernel_context = NULL; 538 539 ida_destroy(&dev_priv->context_hw_ida); 540 } 541 542 static int context_idr_cleanup(int id, void *p, void *data) 543 { 544 struct i915_gem_context *ctx = p; 545 546 context_close(ctx); 547 return 0; 548 } 549 550 int i915_gem_context_open(struct drm_device *dev, struct drm_file *file) 551 { 552 struct drm_i915_file_private *file_priv = file->driver_priv; 553 struct i915_gem_context *ctx; 554 555 idr_init(&file_priv->context_idr); 556 557 mutex_lock(&dev->struct_mutex); 558 ctx = i915_gem_create_context(dev, file_priv); 559 mutex_unlock(&dev->struct_mutex); 560 561 if (IS_ERR(ctx)) { 562 idr_destroy(&file_priv->context_idr); 563 return PTR_ERR(ctx); 564 } 565 566 return 0; 567 } 568 569 void i915_gem_context_close(struct drm_device *dev, struct drm_file *file) 570 { 571 struct drm_i915_file_private *file_priv = file->driver_priv; 572 573 lockdep_assert_held(&dev->struct_mutex); 574 575 idr_for_each(&file_priv->context_idr, context_idr_cleanup, NULL); 576 idr_destroy(&file_priv->context_idr); 577 } 578 579 static inline int 580 mi_set_context(struct drm_i915_gem_request *req, u32 hw_flags) 581 { 582 struct drm_i915_private *dev_priv = req->i915; 583 struct intel_ring *ring = req->ring; 584 struct intel_engine_cs *engine = req->engine; 585 enum intel_engine_id id; 586 u32 flags = hw_flags | MI_MM_SPACE_GTT; 587 const int num_rings = 588 /* Use an extended w/a on ivb+ if signalling from other rings */ 589 i915.semaphores ? 590 INTEL_INFO(dev_priv)->num_rings - 1 : 591 0; 592 int len, ret; 593 594 /* w/a: If Flush TLB Invalidation Mode is enabled, driver must do a TLB 595 * invalidation prior to MI_SET_CONTEXT. On GEN6 we don't set the value 596 * explicitly, so we rely on the value at ring init, stored in 597 * itlb_before_ctx_switch. 598 */ 599 if (IS_GEN6(dev_priv)) { 600 ret = engine->emit_flush(req, EMIT_INVALIDATE); 601 if (ret) 602 return ret; 603 } 604 605 /* These flags are for resource streamer on HSW+ */ 606 if (IS_HASWELL(dev_priv) || INTEL_GEN(dev_priv) >= 8) 607 flags |= (HSW_MI_RS_SAVE_STATE_EN | HSW_MI_RS_RESTORE_STATE_EN); 608 else if (INTEL_GEN(dev_priv) < 8) 609 flags |= (MI_SAVE_EXT_STATE_EN | MI_RESTORE_EXT_STATE_EN); 610 611 612 len = 4; 613 if (INTEL_GEN(dev_priv) >= 7) 614 len += 2 + (num_rings ? 4*num_rings + 6 : 0); 615 616 ret = intel_ring_begin(req, len); 617 if (ret) 618 return ret; 619 620 /* WaProgramMiArbOnOffAroundMiSetContext:ivb,vlv,hsw,bdw,chv */ 621 if (INTEL_GEN(dev_priv) >= 7) { 622 intel_ring_emit(ring, MI_ARB_ON_OFF | MI_ARB_DISABLE); 623 if (num_rings) { 624 struct intel_engine_cs *signaller; 625 626 intel_ring_emit(ring, 627 MI_LOAD_REGISTER_IMM(num_rings)); 628 for_each_engine(signaller, dev_priv, id) { 629 if (signaller == engine) 630 continue; 631 632 intel_ring_emit_reg(ring, 633 RING_PSMI_CTL(signaller->mmio_base)); 634 intel_ring_emit(ring, 635 _MASKED_BIT_ENABLE(GEN6_PSMI_SLEEP_MSG_DISABLE)); 636 } 637 } 638 } 639 640 intel_ring_emit(ring, MI_NOOP); 641 intel_ring_emit(ring, MI_SET_CONTEXT); 642 intel_ring_emit(ring, 643 i915_ggtt_offset(req->ctx->engine[RCS].state) | flags); 644 /* 645 * w/a: MI_SET_CONTEXT must always be followed by MI_NOOP 646 * WaMiSetContext_Hang:snb,ivb,vlv 647 */ 648 intel_ring_emit(ring, MI_NOOP); 649 650 if (INTEL_GEN(dev_priv) >= 7) { 651 if (num_rings) { 652 struct intel_engine_cs *signaller; 653 i915_reg_t last_reg = {}; /* keep gcc quiet */ 654 655 intel_ring_emit(ring, 656 MI_LOAD_REGISTER_IMM(num_rings)); 657 for_each_engine(signaller, dev_priv, id) { 658 if (signaller == engine) 659 continue; 660 661 last_reg = RING_PSMI_CTL(signaller->mmio_base); 662 intel_ring_emit_reg(ring, last_reg); 663 intel_ring_emit(ring, 664 _MASKED_BIT_DISABLE(GEN6_PSMI_SLEEP_MSG_DISABLE)); 665 } 666 667 /* Insert a delay before the next switch! */ 668 intel_ring_emit(ring, 669 MI_STORE_REGISTER_MEM | 670 MI_SRM_LRM_GLOBAL_GTT); 671 intel_ring_emit_reg(ring, last_reg); 672 intel_ring_emit(ring, 673 i915_ggtt_offset(engine->scratch)); 674 intel_ring_emit(ring, MI_NOOP); 675 } 676 intel_ring_emit(ring, MI_ARB_ON_OFF | MI_ARB_ENABLE); 677 } 678 679 intel_ring_advance(ring); 680 681 return ret; 682 } 683 684 static int remap_l3(struct drm_i915_gem_request *req, int slice) 685 { 686 u32 *remap_info = req->i915->l3_parity.remap_info[slice]; 687 struct intel_ring *ring = req->ring; 688 int i, ret; 689 690 if (!remap_info) 691 return 0; 692 693 ret = intel_ring_begin(req, GEN7_L3LOG_SIZE/4 * 2 + 2); 694 if (ret) 695 return ret; 696 697 /* 698 * Note: We do not worry about the concurrent register cacheline hang 699 * here because no other code should access these registers other than 700 * at initialization time. 701 */ 702 intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(GEN7_L3LOG_SIZE/4)); 703 for (i = 0; i < GEN7_L3LOG_SIZE/4; i++) { 704 intel_ring_emit_reg(ring, GEN7_L3LOG(slice, i)); 705 intel_ring_emit(ring, remap_info[i]); 706 } 707 intel_ring_emit(ring, MI_NOOP); 708 intel_ring_advance(ring); 709 710 return 0; 711 } 712 713 static inline bool skip_rcs_switch(struct i915_hw_ppgtt *ppgtt, 714 struct intel_engine_cs *engine, 715 struct i915_gem_context *to) 716 { 717 if (to->remap_slice) 718 return false; 719 720 if (!to->engine[RCS].initialised) 721 return false; 722 723 if (ppgtt && (intel_engine_flag(engine) & ppgtt->pd_dirty_rings)) 724 return false; 725 726 return to == engine->last_context; 727 } 728 729 static bool 730 needs_pd_load_pre(struct i915_hw_ppgtt *ppgtt, 731 struct intel_engine_cs *engine, 732 struct i915_gem_context *to) 733 { 734 if (!ppgtt) 735 return false; 736 737 /* Always load the ppgtt on first use */ 738 if (!engine->last_context) 739 return true; 740 741 /* Same context without new entries, skip */ 742 if (engine->last_context == to && 743 !(intel_engine_flag(engine) & ppgtt->pd_dirty_rings)) 744 return false; 745 746 if (engine->id != RCS) 747 return true; 748 749 if (INTEL_GEN(engine->i915) < 8) 750 return true; 751 752 return false; 753 } 754 755 static bool 756 needs_pd_load_post(struct i915_hw_ppgtt *ppgtt, 757 struct i915_gem_context *to, 758 u32 hw_flags) 759 { 760 if (!ppgtt) 761 return false; 762 763 if (!IS_GEN8(to->i915)) 764 return false; 765 766 if (hw_flags & MI_RESTORE_INHIBIT) 767 return true; 768 769 return false; 770 } 771 772 struct i915_vma * 773 i915_gem_context_pin_legacy(struct i915_gem_context *ctx, 774 unsigned int flags) 775 { 776 struct i915_vma *vma = ctx->engine[RCS].state; 777 int ret; 778 779 /* Clear this page out of any CPU caches for coherent swap-in/out. 780 * We only want to do this on the first bind so that we do not stall 781 * on an active context (which by nature is already on the GPU). 782 */ 783 if (!(vma->flags & I915_VMA_GLOBAL_BIND)) { 784 ret = i915_gem_object_set_to_gtt_domain(vma->obj, false); 785 if (ret) 786 return ERR_PTR(ret); 787 } 788 789 ret = i915_vma_pin(vma, 0, ctx->ggtt_alignment, PIN_GLOBAL | flags); 790 if (ret) 791 return ERR_PTR(ret); 792 793 return vma; 794 } 795 796 static int do_rcs_switch(struct drm_i915_gem_request *req) 797 { 798 struct i915_gem_context *to = req->ctx; 799 struct intel_engine_cs *engine = req->engine; 800 struct i915_hw_ppgtt *ppgtt = to->ppgtt ?: req->i915->mm.aliasing_ppgtt; 801 struct i915_vma *vma; 802 struct i915_gem_context *from; 803 u32 hw_flags; 804 int ret, i; 805 806 if (skip_rcs_switch(ppgtt, engine, to)) 807 return 0; 808 809 /* Trying to pin first makes error handling easier. */ 810 vma = i915_gem_context_pin_legacy(to, 0); 811 if (IS_ERR(vma)) 812 return PTR_ERR(vma); 813 814 /* 815 * Pin can switch back to the default context if we end up calling into 816 * evict_everything - as a last ditch gtt defrag effort that also 817 * switches to the default context. Hence we need to reload from here. 818 * 819 * XXX: Doing so is painfully broken! 820 */ 821 from = engine->last_context; 822 823 if (needs_pd_load_pre(ppgtt, engine, to)) { 824 /* Older GENs and non render rings still want the load first, 825 * "PP_DCLV followed by PP_DIR_BASE register through Load 826 * Register Immediate commands in Ring Buffer before submitting 827 * a context."*/ 828 trace_switch_mm(engine, to); 829 ret = ppgtt->switch_mm(ppgtt, req); 830 if (ret) 831 goto err; 832 } 833 834 if (!to->engine[RCS].initialised || i915_gem_context_is_default(to)) 835 /* NB: If we inhibit the restore, the context is not allowed to 836 * die because future work may end up depending on valid address 837 * space. This means we must enforce that a page table load 838 * occur when this occurs. */ 839 hw_flags = MI_RESTORE_INHIBIT; 840 else if (ppgtt && intel_engine_flag(engine) & ppgtt->pd_dirty_rings) 841 hw_flags = MI_FORCE_RESTORE; 842 else 843 hw_flags = 0; 844 845 if (to != from || (hw_flags & MI_FORCE_RESTORE)) { 846 ret = mi_set_context(req, hw_flags); 847 if (ret) 848 goto err; 849 } 850 851 /* The backing object for the context is done after switching to the 852 * *next* context. Therefore we cannot retire the previous context until 853 * the next context has already started running. In fact, the below code 854 * is a bit suboptimal because the retiring can occur simply after the 855 * MI_SET_CONTEXT instead of when the next seqno has completed. 856 */ 857 if (from != NULL) { 858 /* As long as MI_SET_CONTEXT is serializing, ie. it flushes the 859 * whole damn pipeline, we don't need to explicitly mark the 860 * object dirty. The only exception is that the context must be 861 * correct in case the object gets swapped out. Ideally we'd be 862 * able to defer doing this until we know the object would be 863 * swapped, but there is no way to do that yet. 864 */ 865 i915_vma_move_to_active(from->engine[RCS].state, req, 0); 866 /* state is kept alive until the next request */ 867 i915_vma_unpin(from->engine[RCS].state); 868 i915_gem_context_put(from); 869 } 870 engine->last_context = i915_gem_context_get(to); 871 872 /* GEN8 does *not* require an explicit reload if the PDPs have been 873 * setup, and we do not wish to move them. 874 */ 875 if (needs_pd_load_post(ppgtt, to, hw_flags)) { 876 trace_switch_mm(engine, to); 877 ret = ppgtt->switch_mm(ppgtt, req); 878 /* The hardware context switch is emitted, but we haven't 879 * actually changed the state - so it's probably safe to bail 880 * here. Still, let the user know something dangerous has 881 * happened. 882 */ 883 if (ret) 884 return ret; 885 } 886 887 if (ppgtt) 888 ppgtt->pd_dirty_rings &= ~intel_engine_flag(engine); 889 890 for (i = 0; i < MAX_L3_SLICES; i++) { 891 if (!(to->remap_slice & (1<<i))) 892 continue; 893 894 ret = remap_l3(req, i); 895 if (ret) 896 return ret; 897 898 to->remap_slice &= ~(1<<i); 899 } 900 901 if (!to->engine[RCS].initialised) { 902 if (engine->init_context) { 903 ret = engine->init_context(req); 904 if (ret) 905 return ret; 906 } 907 to->engine[RCS].initialised = true; 908 } 909 910 return 0; 911 912 err: 913 i915_vma_unpin(vma); 914 return ret; 915 } 916 917 /** 918 * i915_switch_context() - perform a GPU context switch. 919 * @req: request for which we'll execute the context switch 920 * 921 * The context life cycle is simple. The context refcount is incremented and 922 * decremented by 1 and create and destroy. If the context is in use by the GPU, 923 * it will have a refcount > 1. This allows us to destroy the context abstract 924 * object while letting the normal object tracking destroy the backing BO. 925 * 926 * This function should not be used in execlists mode. Instead the context is 927 * switched by writing to the ELSP and requests keep a reference to their 928 * context. 929 */ 930 int i915_switch_context(struct drm_i915_gem_request *req) 931 { 932 struct intel_engine_cs *engine = req->engine; 933 934 lockdep_assert_held(&req->i915->drm.struct_mutex); 935 if (i915.enable_execlists) 936 return 0; 937 938 if (!req->ctx->engine[engine->id].state) { 939 struct i915_gem_context *to = req->ctx; 940 struct i915_hw_ppgtt *ppgtt = 941 to->ppgtt ?: req->i915->mm.aliasing_ppgtt; 942 943 if (needs_pd_load_pre(ppgtt, engine, to)) { 944 int ret; 945 946 trace_switch_mm(engine, to); 947 ret = ppgtt->switch_mm(ppgtt, req); 948 if (ret) 949 return ret; 950 951 ppgtt->pd_dirty_rings &= ~intel_engine_flag(engine); 952 } 953 954 if (to != engine->last_context) { 955 if (engine->last_context) 956 i915_gem_context_put(engine->last_context); 957 engine->last_context = i915_gem_context_get(to); 958 } 959 960 return 0; 961 } 962 963 return do_rcs_switch(req); 964 } 965 966 int i915_gem_switch_to_kernel_context(struct drm_i915_private *dev_priv) 967 { 968 struct intel_engine_cs *engine; 969 struct i915_gem_timeline *timeline; 970 enum intel_engine_id id; 971 972 lockdep_assert_held(&dev_priv->drm.struct_mutex); 973 974 for_each_engine(engine, dev_priv, id) { 975 struct drm_i915_gem_request *req; 976 int ret; 977 978 req = i915_gem_request_alloc(engine, dev_priv->kernel_context); 979 if (IS_ERR(req)) 980 return PTR_ERR(req); 981 982 /* Queue this switch after all other activity */ 983 list_for_each_entry(timeline, &dev_priv->gt.timelines, link) { 984 struct drm_i915_gem_request *prev; 985 struct intel_timeline *tl; 986 987 tl = &timeline->engine[engine->id]; 988 prev = i915_gem_active_raw(&tl->last_request, 989 &dev_priv->drm.struct_mutex); 990 if (prev) 991 i915_sw_fence_await_sw_fence_gfp(&req->submit, 992 &prev->submit, 993 GFP_KERNEL); 994 } 995 996 ret = i915_switch_context(req); 997 i915_add_request_no_flush(req); 998 if (ret) 999 return ret; 1000 } 1001 1002 return 0; 1003 } 1004 1005 static bool contexts_enabled(struct drm_device *dev) 1006 { 1007 return i915.enable_execlists || to_i915(dev)->hw_context_size; 1008 } 1009 1010 int i915_gem_context_create_ioctl(struct drm_device *dev, void *data, 1011 struct drm_file *file) 1012 { 1013 struct drm_i915_gem_context_create *args = data; 1014 struct drm_i915_file_private *file_priv = file->driver_priv; 1015 struct i915_gem_context *ctx; 1016 int ret; 1017 1018 if (!contexts_enabled(dev)) 1019 return -ENODEV; 1020 1021 if (args->pad != 0) 1022 return -EINVAL; 1023 1024 ret = i915_mutex_lock_interruptible(dev); 1025 if (ret) 1026 return ret; 1027 1028 ctx = i915_gem_create_context(dev, file_priv); 1029 mutex_unlock(&dev->struct_mutex); 1030 if (IS_ERR(ctx)) 1031 return PTR_ERR(ctx); 1032 1033 args->ctx_id = ctx->user_handle; 1034 DRM_DEBUG_DRIVER("HW context %d created\n", args->ctx_id); 1035 1036 return 0; 1037 } 1038 1039 int i915_gem_context_destroy_ioctl(struct drm_device *dev, void *data, 1040 struct drm_file *file) 1041 { 1042 struct drm_i915_gem_context_destroy *args = data; 1043 struct drm_i915_file_private *file_priv = file->driver_priv; 1044 struct i915_gem_context *ctx; 1045 int ret; 1046 1047 if (args->pad != 0) 1048 return -EINVAL; 1049 1050 if (args->ctx_id == DEFAULT_CONTEXT_HANDLE) 1051 return -ENOENT; 1052 1053 ret = i915_mutex_lock_interruptible(dev); 1054 if (ret) 1055 return ret; 1056 1057 ctx = i915_gem_context_lookup(file_priv, args->ctx_id); 1058 if (IS_ERR(ctx)) { 1059 mutex_unlock(&dev->struct_mutex); 1060 return PTR_ERR(ctx); 1061 } 1062 1063 idr_remove(&file_priv->context_idr, ctx->user_handle); 1064 context_close(ctx); 1065 mutex_unlock(&dev->struct_mutex); 1066 1067 DRM_DEBUG_DRIVER("HW context %d destroyed\n", args->ctx_id); 1068 return 0; 1069 } 1070 1071 int i915_gem_context_getparam_ioctl(struct drm_device *dev, void *data, 1072 struct drm_file *file) 1073 { 1074 struct drm_i915_file_private *file_priv = file->driver_priv; 1075 struct drm_i915_gem_context_param *args = data; 1076 struct i915_gem_context *ctx; 1077 int ret; 1078 1079 ret = i915_mutex_lock_interruptible(dev); 1080 if (ret) 1081 return ret; 1082 1083 ctx = i915_gem_context_lookup(file_priv, args->ctx_id); 1084 if (IS_ERR(ctx)) { 1085 mutex_unlock(&dev->struct_mutex); 1086 return PTR_ERR(ctx); 1087 } 1088 1089 args->size = 0; 1090 switch (args->param) { 1091 case I915_CONTEXT_PARAM_BAN_PERIOD: 1092 args->value = ctx->hang_stats.ban_period_seconds; 1093 break; 1094 case I915_CONTEXT_PARAM_NO_ZEROMAP: 1095 args->value = ctx->flags & CONTEXT_NO_ZEROMAP; 1096 break; 1097 case I915_CONTEXT_PARAM_GTT_SIZE: 1098 if (ctx->ppgtt) 1099 args->value = ctx->ppgtt->base.total; 1100 else if (to_i915(dev)->mm.aliasing_ppgtt) 1101 args->value = to_i915(dev)->mm.aliasing_ppgtt->base.total; 1102 else 1103 args->value = to_i915(dev)->ggtt.base.total; 1104 break; 1105 case I915_CONTEXT_PARAM_NO_ERROR_CAPTURE: 1106 args->value = !!(ctx->flags & CONTEXT_NO_ERROR_CAPTURE); 1107 break; 1108 default: 1109 ret = -EINVAL; 1110 break; 1111 } 1112 mutex_unlock(&dev->struct_mutex); 1113 1114 return ret; 1115 } 1116 1117 int i915_gem_context_setparam_ioctl(struct drm_device *dev, void *data, 1118 struct drm_file *file) 1119 { 1120 struct drm_i915_file_private *file_priv = file->driver_priv; 1121 struct drm_i915_gem_context_param *args = data; 1122 struct i915_gem_context *ctx; 1123 int ret; 1124 1125 ret = i915_mutex_lock_interruptible(dev); 1126 if (ret) 1127 return ret; 1128 1129 ctx = i915_gem_context_lookup(file_priv, args->ctx_id); 1130 if (IS_ERR(ctx)) { 1131 mutex_unlock(&dev->struct_mutex); 1132 return PTR_ERR(ctx); 1133 } 1134 1135 switch (args->param) { 1136 case I915_CONTEXT_PARAM_BAN_PERIOD: 1137 if (args->size) 1138 ret = -EINVAL; 1139 else if (args->value < ctx->hang_stats.ban_period_seconds && 1140 !capable(CAP_SYS_ADMIN)) 1141 ret = -EPERM; 1142 else 1143 ctx->hang_stats.ban_period_seconds = args->value; 1144 break; 1145 case I915_CONTEXT_PARAM_NO_ZEROMAP: 1146 if (args->size) { 1147 ret = -EINVAL; 1148 } else { 1149 ctx->flags &= ~CONTEXT_NO_ZEROMAP; 1150 ctx->flags |= args->value ? CONTEXT_NO_ZEROMAP : 0; 1151 } 1152 break; 1153 case I915_CONTEXT_PARAM_NO_ERROR_CAPTURE: 1154 if (args->size) { 1155 ret = -EINVAL; 1156 } else { 1157 if (args->value) 1158 ctx->flags |= CONTEXT_NO_ERROR_CAPTURE; 1159 else 1160 ctx->flags &= ~CONTEXT_NO_ERROR_CAPTURE; 1161 } 1162 break; 1163 default: 1164 ret = -EINVAL; 1165 break; 1166 } 1167 mutex_unlock(&dev->struct_mutex); 1168 1169 return ret; 1170 } 1171 1172 int i915_gem_context_reset_stats_ioctl(struct drm_device *dev, 1173 void *data, struct drm_file *file) 1174 { 1175 struct drm_i915_private *dev_priv = to_i915(dev); 1176 struct drm_i915_reset_stats *args = data; 1177 struct i915_ctx_hang_stats *hs; 1178 struct i915_gem_context *ctx; 1179 int ret; 1180 1181 if (args->flags || args->pad) 1182 return -EINVAL; 1183 1184 if (args->ctx_id == DEFAULT_CONTEXT_HANDLE && !capable(CAP_SYS_ADMIN)) 1185 return -EPERM; 1186 1187 ret = i915_mutex_lock_interruptible(dev); 1188 if (ret) 1189 return ret; 1190 1191 ctx = i915_gem_context_lookup(file->driver_priv, args->ctx_id); 1192 if (IS_ERR(ctx)) { 1193 mutex_unlock(&dev->struct_mutex); 1194 return PTR_ERR(ctx); 1195 } 1196 hs = &ctx->hang_stats; 1197 1198 if (capable(CAP_SYS_ADMIN)) 1199 args->reset_count = i915_reset_count(&dev_priv->gpu_error); 1200 else 1201 args->reset_count = 0; 1202 1203 args->batch_active = hs->batch_active; 1204 args->batch_pending = hs->batch_pending; 1205 1206 mutex_unlock(&dev->struct_mutex); 1207 1208 return 0; 1209 } 1210