1 /* 2 * Copyright © 2011-2012 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 * 23 * Authors: 24 * Ben Widawsky <ben@bwidawsk.net> 25 * 26 */ 27 28 /* 29 * This file implements HW context support. On gen5+ a HW context consists of an 30 * opaque GPU object which is referenced at times of context saves and restores. 31 * With RC6 enabled, the context is also referenced as the GPU enters and exists 32 * from RC6 (GPU has it's own internal power context, except on gen5). Though 33 * something like a context does exist for the media ring, the code only 34 * supports contexts for the render ring. 35 * 36 * In software, there is a distinction between contexts created by the user, 37 * and the default HW context. The default HW context is used by GPU clients 38 * that do not request setup of their own hardware context. The default 39 * context's state is never restored to help prevent programming errors. This 40 * would happen if a client ran and piggy-backed off another clients GPU state. 41 * The default context only exists to give the GPU some offset to load as the 42 * current to invoke a save of the context we actually care about. In fact, the 43 * code could likely be constructed, albeit in a more complicated fashion, to 44 * never use the default context, though that limits the driver's ability to 45 * swap out, and/or destroy other contexts. 46 * 47 * All other contexts are created as a request by the GPU client. These contexts 48 * store GPU state, and thus allow GPU clients to not re-emit state (and 49 * potentially query certain state) at any time. The kernel driver makes 50 * certain that the appropriate commands are inserted. 51 * 52 * The context life cycle is semi-complicated in that context BOs may live 53 * longer than the context itself because of the way the hardware, and object 54 * tracking works. Below is a very crude representation of the state machine 55 * describing the context life. 56 * refcount pincount active 57 * S0: initial state 0 0 0 58 * S1: context created 1 0 0 59 * S2: context is currently running 2 1 X 60 * S3: GPU referenced, but not current 2 0 1 61 * S4: context is current, but destroyed 1 1 0 62 * S5: like S3, but destroyed 1 0 1 63 * 64 * The most common (but not all) transitions: 65 * S0->S1: client creates a context 66 * S1->S2: client submits execbuf with context 67 * S2->S3: other clients submits execbuf with context 68 * S3->S1: context object was retired 69 * S3->S2: clients submits another execbuf 70 * S2->S4: context destroy called with current context 71 * S3->S5->S0: destroy path 72 * S4->S5->S0: destroy path on current context 73 * 74 * There are two confusing terms used above: 75 * The "current context" means the context which is currently running on the 76 * GPU. The GPU has loaded its state already and has stored away the gtt 77 * offset of the BO. The GPU is not actively referencing the data at this 78 * offset, but it will on the next context switch. The only way to avoid this 79 * is to do a GPU reset. 80 * 81 * An "active context' is one which was previously the "current context" and is 82 * on the active list waiting for the next context switch to occur. Until this 83 * happens, the object must remain at the same gtt offset. It is therefore 84 * possible to destroy a context, but it is still active. 85 * 86 */ 87 88 #include <drm/drmP.h> 89 #include <drm/i915_drm.h> 90 #include "i915_drv.h" 91 #include "i915_trace.h" 92 93 #define ALL_L3_SLICES(dev) (1 << NUM_L3_SLICES(dev)) - 1 94 95 /* This is a HW constraint. The value below is the largest known requirement 96 * I've seen in a spec to date, and that was a workaround for a non-shipping 97 * part. It should be safe to decrease this, but it's more future proof as is. 98 */ 99 #define GEN6_CONTEXT_ALIGN (64<<10) 100 #define GEN7_CONTEXT_ALIGN 4096 101 102 static size_t get_context_alignment(struct drm_i915_private *dev_priv) 103 { 104 if (IS_GEN6(dev_priv)) 105 return GEN6_CONTEXT_ALIGN; 106 107 return GEN7_CONTEXT_ALIGN; 108 } 109 110 static int get_context_size(struct drm_i915_private *dev_priv) 111 { 112 int ret; 113 u32 reg; 114 115 switch (INTEL_GEN(dev_priv)) { 116 case 6: 117 reg = I915_READ(CXT_SIZE); 118 ret = GEN6_CXT_TOTAL_SIZE(reg) * 64; 119 break; 120 case 7: 121 reg = I915_READ(GEN7_CXT_SIZE); 122 if (IS_HASWELL(dev_priv)) 123 ret = HSW_CXT_TOTAL_SIZE; 124 else 125 ret = GEN7_CXT_TOTAL_SIZE(reg) * 64; 126 break; 127 case 8: 128 ret = GEN8_CXT_TOTAL_SIZE; 129 break; 130 default: 131 BUG(); 132 } 133 134 return ret; 135 } 136 137 void i915_gem_context_free(struct kref *ctx_ref) 138 { 139 struct i915_gem_context *ctx = container_of(ctx_ref, typeof(*ctx), ref); 140 int i; 141 142 lockdep_assert_held(&ctx->i915->drm.struct_mutex); 143 trace_i915_context_free(ctx); 144 GEM_BUG_ON(!ctx->closed); 145 146 i915_ppgtt_put(ctx->ppgtt); 147 148 for (i = 0; i < I915_NUM_ENGINES; i++) { 149 struct intel_context *ce = &ctx->engine[i]; 150 151 if (!ce->state) 152 continue; 153 154 WARN_ON(ce->pin_count); 155 if (ce->ring) 156 intel_ring_free(ce->ring); 157 158 i915_vma_put(ce->state); 159 } 160 161 #if 0 162 put_pid(ctx->pid); 163 #endif 164 list_del(&ctx->link); 165 166 ida_simple_remove(&ctx->i915->context_hw_ida, ctx->hw_id); 167 kfree(ctx); 168 } 169 170 struct drm_i915_gem_object * 171 i915_gem_alloc_context_obj(struct drm_device *dev, size_t size) 172 { 173 struct drm_i915_gem_object *obj; 174 int ret; 175 176 lockdep_assert_held(&dev->struct_mutex); 177 178 obj = i915_gem_object_create(dev, size); 179 if (IS_ERR(obj)) 180 return obj; 181 182 /* 183 * Try to make the context utilize L3 as well as LLC. 184 * 185 * On VLV we don't have L3 controls in the PTEs so we 186 * shouldn't touch the cache level, especially as that 187 * would make the object snooped which might have a 188 * negative performance impact. 189 * 190 * Snooping is required on non-llc platforms in execlist 191 * mode, but since all GGTT accesses use PAT entry 0 we 192 * get snooping anyway regardless of cache_level. 193 * 194 * This is only applicable for Ivy Bridge devices since 195 * later platforms don't have L3 control bits in the PTE. 196 */ 197 if (IS_IVYBRIDGE(to_i915(dev))) { 198 ret = i915_gem_object_set_cache_level(obj, I915_CACHE_L3_LLC); 199 /* Failure shouldn't ever happen this early */ 200 if (WARN_ON(ret)) { 201 i915_gem_object_put(obj); 202 return ERR_PTR(ret); 203 } 204 } 205 206 return obj; 207 } 208 209 static void i915_ppgtt_close(struct i915_address_space *vm) 210 { 211 struct list_head *phases[] = { 212 &vm->active_list, 213 &vm->inactive_list, 214 &vm->unbound_list, 215 NULL, 216 }, **phase; 217 218 GEM_BUG_ON(vm->closed); 219 vm->closed = true; 220 221 for (phase = phases; *phase; phase++) { 222 struct i915_vma *vma, *vn; 223 224 list_for_each_entry_safe(vma, vn, *phase, vm_link) 225 if (!i915_vma_is_closed(vma)) 226 i915_vma_close(vma); 227 } 228 } 229 230 static void context_close(struct i915_gem_context *ctx) 231 { 232 GEM_BUG_ON(ctx->closed); 233 ctx->closed = true; 234 if (ctx->ppgtt) 235 i915_ppgtt_close(&ctx->ppgtt->base); 236 ctx->file_priv = ERR_PTR(-EBADF); 237 i915_gem_context_put(ctx); 238 } 239 240 static int assign_hw_id(struct drm_i915_private *dev_priv, unsigned *out) 241 { 242 int ret; 243 244 ret = ida_simple_get(&dev_priv->context_hw_ida, 245 0, MAX_CONTEXT_HW_ID, GFP_KERNEL); 246 if (ret < 0) { 247 /* Contexts are only released when no longer active. 248 * Flush any pending retires to hopefully release some 249 * stale contexts and try again. 250 */ 251 i915_gem_retire_requests(dev_priv); 252 ret = ida_simple_get(&dev_priv->context_hw_ida, 253 0, MAX_CONTEXT_HW_ID, GFP_KERNEL); 254 if (ret < 0) 255 return ret; 256 } 257 258 *out = ret; 259 return 0; 260 } 261 262 static struct i915_gem_context * 263 __create_hw_context(struct drm_device *dev, 264 struct drm_i915_file_private *file_priv) 265 { 266 struct drm_i915_private *dev_priv = to_i915(dev); 267 struct i915_gem_context *ctx; 268 int ret; 269 270 ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); 271 if (ctx == NULL) 272 return ERR_PTR(-ENOMEM); 273 274 ret = assign_hw_id(dev_priv, &ctx->hw_id); 275 if (ret) { 276 kfree(ctx); 277 return ERR_PTR(ret); 278 } 279 280 kref_init(&ctx->ref); 281 list_add_tail(&ctx->link, &dev_priv->context_list); 282 ctx->i915 = dev_priv; 283 284 ctx->ggtt_alignment = get_context_alignment(dev_priv); 285 286 if (dev_priv->hw_context_size) { 287 struct drm_i915_gem_object *obj; 288 struct i915_vma *vma; 289 290 obj = i915_gem_alloc_context_obj(dev, 291 dev_priv->hw_context_size); 292 if (IS_ERR(obj)) { 293 ret = PTR_ERR(obj); 294 goto err_out; 295 } 296 297 vma = i915_vma_create(obj, &dev_priv->ggtt.base, NULL); 298 if (IS_ERR(vma)) { 299 i915_gem_object_put(obj); 300 ret = PTR_ERR(vma); 301 goto err_out; 302 } 303 304 ctx->engine[RCS].state = vma; 305 } 306 307 /* Default context will never have a file_priv */ 308 if (file_priv != NULL) { 309 ret = idr_alloc(&file_priv->context_idr, ctx, 310 DEFAULT_CONTEXT_HANDLE, 0, GFP_KERNEL); 311 if (ret < 0) 312 goto err_out; 313 } else 314 ret = DEFAULT_CONTEXT_HANDLE; 315 316 ctx->file_priv = file_priv; 317 if (file_priv) 318 #ifndef __DragonFly__ 319 ctx->pid = get_task_pid(current, PIDTYPE_PID); 320 #else 321 ctx->pid = DRM_CURRENTPID; 322 #endif 323 324 ctx->user_handle = ret; 325 /* NB: Mark all slices as needing a remap so that when the context first 326 * loads it will restore whatever remap state already exists. If there 327 * is no remap info, it will be a NOP. */ 328 ctx->remap_slice = ALL_L3_SLICES(dev_priv); 329 330 ctx->hang_stats.ban_period_seconds = DRM_I915_CTX_BAN_PERIOD; 331 ctx->ring_size = 4 * PAGE_SIZE; 332 ctx->desc_template = GEN8_CTX_ADDRESSING_MODE(dev_priv) << 333 GEN8_CTX_ADDRESSING_MODE_SHIFT; 334 ATOMIC_INIT_NOTIFIER_HEAD(&ctx->status_notifier); 335 336 return ctx; 337 338 err_out: 339 context_close(ctx); 340 return ERR_PTR(ret); 341 } 342 343 /** 344 * The default context needs to exist per ring that uses contexts. It stores the 345 * context state of the GPU for applications that don't utilize HW contexts, as 346 * well as an idle case. 347 */ 348 static struct i915_gem_context * 349 i915_gem_create_context(struct drm_device *dev, 350 struct drm_i915_file_private *file_priv) 351 { 352 struct i915_gem_context *ctx; 353 354 lockdep_assert_held(&dev->struct_mutex); 355 356 ctx = __create_hw_context(dev, file_priv); 357 if (IS_ERR(ctx)) 358 return ctx; 359 360 if (USES_FULL_PPGTT(dev)) { 361 struct i915_hw_ppgtt *ppgtt = 362 i915_ppgtt_create(to_i915(dev), file_priv); 363 364 if (IS_ERR(ppgtt)) { 365 DRM_DEBUG_DRIVER("PPGTT setup failed (%ld)\n", 366 PTR_ERR(ppgtt)); 367 idr_remove(&file_priv->context_idr, ctx->user_handle); 368 context_close(ctx); 369 return ERR_CAST(ppgtt); 370 } 371 372 ctx->ppgtt = ppgtt; 373 } 374 375 trace_i915_context_create(ctx); 376 377 return ctx; 378 } 379 380 /** 381 * i915_gem_context_create_gvt - create a GVT GEM context 382 * @dev: drm device * 383 * 384 * This function is used to create a GVT specific GEM context. 385 * 386 * Returns: 387 * pointer to i915_gem_context on success, error pointer if failed 388 * 389 */ 390 struct i915_gem_context * 391 i915_gem_context_create_gvt(struct drm_device *dev) 392 { 393 struct i915_gem_context *ctx; 394 int ret; 395 396 if (!IS_ENABLED(CONFIG_DRM_I915_GVT)) 397 return ERR_PTR(-ENODEV); 398 399 ret = i915_mutex_lock_interruptible(dev); 400 if (ret) 401 return ERR_PTR(ret); 402 403 ctx = i915_gem_create_context(dev, NULL); 404 if (IS_ERR(ctx)) 405 goto out; 406 407 ctx->execlists_force_single_submission = true; 408 ctx->ring_size = 512 * PAGE_SIZE; /* Max ring buffer size */ 409 out: 410 mutex_unlock(&dev->struct_mutex); 411 return ctx; 412 } 413 414 static void i915_gem_context_unpin(struct i915_gem_context *ctx, 415 struct intel_engine_cs *engine) 416 { 417 if (i915.enable_execlists) { 418 intel_lr_context_unpin(ctx, engine); 419 } else { 420 struct intel_context *ce = &ctx->engine[engine->id]; 421 422 if (ce->state) 423 i915_vma_unpin(ce->state); 424 425 i915_gem_context_put(ctx); 426 } 427 } 428 429 int i915_gem_context_init(struct drm_device *dev) 430 { 431 struct drm_i915_private *dev_priv = to_i915(dev); 432 struct i915_gem_context *ctx; 433 434 /* Init should only be called once per module load. Eventually the 435 * restriction on the context_disabled check can be loosened. */ 436 if (WARN_ON(dev_priv->kernel_context)) 437 return 0; 438 439 if (intel_vgpu_active(dev_priv) && 440 HAS_LOGICAL_RING_CONTEXTS(dev_priv)) { 441 if (!i915.enable_execlists) { 442 DRM_INFO("Only EXECLIST mode is supported in vgpu.\n"); 443 return -EINVAL; 444 } 445 } 446 447 /* Using the simple ida interface, the max is limited by sizeof(int) */ 448 BUILD_BUG_ON(MAX_CONTEXT_HW_ID > INT_MAX); 449 ida_init(&dev_priv->context_hw_ida); 450 451 if (i915.enable_execlists) { 452 /* NB: intentionally left blank. We will allocate our own 453 * backing objects as we need them, thank you very much */ 454 dev_priv->hw_context_size = 0; 455 } else if (HAS_HW_CONTEXTS(dev_priv)) { 456 dev_priv->hw_context_size = 457 round_up(get_context_size(dev_priv), 4096); 458 if (dev_priv->hw_context_size > (1<<20)) { 459 DRM_DEBUG_DRIVER("Disabling HW Contexts; invalid size %d\n", 460 dev_priv->hw_context_size); 461 dev_priv->hw_context_size = 0; 462 } 463 } 464 465 ctx = i915_gem_create_context(dev, NULL); 466 if (IS_ERR(ctx)) { 467 DRM_ERROR("Failed to create default global context (error %ld)\n", 468 PTR_ERR(ctx)); 469 return PTR_ERR(ctx); 470 } 471 472 dev_priv->kernel_context = ctx; 473 474 DRM_DEBUG_DRIVER("%s context support initialized\n", 475 i915.enable_execlists ? "LR" : 476 dev_priv->hw_context_size ? "HW" : "fake"); 477 return 0; 478 } 479 480 void i915_gem_context_lost(struct drm_i915_private *dev_priv) 481 { 482 struct intel_engine_cs *engine; 483 enum intel_engine_id id; 484 485 lockdep_assert_held(&dev_priv->drm.struct_mutex); 486 487 for_each_engine(engine, dev_priv, id) { 488 if (engine->last_context) { 489 i915_gem_context_unpin(engine->last_context, engine); 490 engine->last_context = NULL; 491 } 492 } 493 494 /* Force the GPU state to be restored on enabling */ 495 if (!i915.enable_execlists) { 496 struct i915_gem_context *ctx; 497 498 list_for_each_entry(ctx, &dev_priv->context_list, link) { 499 if (!i915_gem_context_is_default(ctx)) 500 continue; 501 502 for_each_engine(engine, dev_priv, id) 503 ctx->engine[engine->id].initialised = false; 504 505 ctx->remap_slice = ALL_L3_SLICES(dev_priv); 506 } 507 508 for_each_engine(engine, dev_priv, id) { 509 struct intel_context *kce = 510 &dev_priv->kernel_context->engine[engine->id]; 511 512 kce->initialised = true; 513 } 514 } 515 } 516 517 void i915_gem_context_fini(struct drm_device *dev) 518 { 519 struct drm_i915_private *dev_priv = to_i915(dev); 520 struct i915_gem_context *dctx = dev_priv->kernel_context; 521 522 lockdep_assert_held(&dev->struct_mutex); 523 524 context_close(dctx); 525 dev_priv->kernel_context = NULL; 526 527 ida_destroy(&dev_priv->context_hw_ida); 528 } 529 530 static int context_idr_cleanup(int id, void *p, void *data) 531 { 532 struct i915_gem_context *ctx = p; 533 534 context_close(ctx); 535 return 0; 536 } 537 538 int i915_gem_context_open(struct drm_device *dev, struct drm_file *file) 539 { 540 struct drm_i915_file_private *file_priv = file->driver_priv; 541 struct i915_gem_context *ctx; 542 543 idr_init(&file_priv->context_idr); 544 545 mutex_lock(&dev->struct_mutex); 546 ctx = i915_gem_create_context(dev, file_priv); 547 mutex_unlock(&dev->struct_mutex); 548 549 if (IS_ERR(ctx)) { 550 idr_destroy(&file_priv->context_idr); 551 return PTR_ERR(ctx); 552 } 553 554 return 0; 555 } 556 557 void i915_gem_context_close(struct drm_device *dev, struct drm_file *file) 558 { 559 struct drm_i915_file_private *file_priv = file->driver_priv; 560 561 lockdep_assert_held(&dev->struct_mutex); 562 563 idr_for_each(&file_priv->context_idr, context_idr_cleanup, NULL); 564 idr_destroy(&file_priv->context_idr); 565 } 566 567 static inline int 568 mi_set_context(struct drm_i915_gem_request *req, u32 hw_flags) 569 { 570 struct drm_i915_private *dev_priv = req->i915; 571 struct intel_ring *ring = req->ring; 572 struct intel_engine_cs *engine = req->engine; 573 enum intel_engine_id id; 574 u32 flags = hw_flags | MI_MM_SPACE_GTT; 575 const int num_rings = 576 /* Use an extended w/a on ivb+ if signalling from other rings */ 577 i915.semaphores ? 578 INTEL_INFO(dev_priv)->num_rings - 1 : 579 0; 580 int len, ret; 581 582 /* w/a: If Flush TLB Invalidation Mode is enabled, driver must do a TLB 583 * invalidation prior to MI_SET_CONTEXT. On GEN6 we don't set the value 584 * explicitly, so we rely on the value at ring init, stored in 585 * itlb_before_ctx_switch. 586 */ 587 if (IS_GEN6(dev_priv)) { 588 ret = engine->emit_flush(req, EMIT_INVALIDATE); 589 if (ret) 590 return ret; 591 } 592 593 /* These flags are for resource streamer on HSW+ */ 594 if (IS_HASWELL(dev_priv) || INTEL_GEN(dev_priv) >= 8) 595 flags |= (HSW_MI_RS_SAVE_STATE_EN | HSW_MI_RS_RESTORE_STATE_EN); 596 else if (INTEL_GEN(dev_priv) < 8) 597 flags |= (MI_SAVE_EXT_STATE_EN | MI_RESTORE_EXT_STATE_EN); 598 599 600 len = 4; 601 if (INTEL_GEN(dev_priv) >= 7) 602 len += 2 + (num_rings ? 4*num_rings + 6 : 0); 603 604 ret = intel_ring_begin(req, len); 605 if (ret) 606 return ret; 607 608 /* WaProgramMiArbOnOffAroundMiSetContext:ivb,vlv,hsw,bdw,chv */ 609 if (INTEL_GEN(dev_priv) >= 7) { 610 intel_ring_emit(ring, MI_ARB_ON_OFF | MI_ARB_DISABLE); 611 if (num_rings) { 612 struct intel_engine_cs *signaller; 613 614 intel_ring_emit(ring, 615 MI_LOAD_REGISTER_IMM(num_rings)); 616 for_each_engine(signaller, dev_priv, id) { 617 if (signaller == engine) 618 continue; 619 620 intel_ring_emit_reg(ring, 621 RING_PSMI_CTL(signaller->mmio_base)); 622 intel_ring_emit(ring, 623 _MASKED_BIT_ENABLE(GEN6_PSMI_SLEEP_MSG_DISABLE)); 624 } 625 } 626 } 627 628 intel_ring_emit(ring, MI_NOOP); 629 intel_ring_emit(ring, MI_SET_CONTEXT); 630 intel_ring_emit(ring, 631 i915_ggtt_offset(req->ctx->engine[RCS].state) | flags); 632 /* 633 * w/a: MI_SET_CONTEXT must always be followed by MI_NOOP 634 * WaMiSetContext_Hang:snb,ivb,vlv 635 */ 636 intel_ring_emit(ring, MI_NOOP); 637 638 if (INTEL_GEN(dev_priv) >= 7) { 639 if (num_rings) { 640 struct intel_engine_cs *signaller; 641 i915_reg_t last_reg = {}; /* keep gcc quiet */ 642 643 intel_ring_emit(ring, 644 MI_LOAD_REGISTER_IMM(num_rings)); 645 for_each_engine(signaller, dev_priv, id) { 646 if (signaller == engine) 647 continue; 648 649 last_reg = RING_PSMI_CTL(signaller->mmio_base); 650 intel_ring_emit_reg(ring, last_reg); 651 intel_ring_emit(ring, 652 _MASKED_BIT_DISABLE(GEN6_PSMI_SLEEP_MSG_DISABLE)); 653 } 654 655 /* Insert a delay before the next switch! */ 656 intel_ring_emit(ring, 657 MI_STORE_REGISTER_MEM | 658 MI_SRM_LRM_GLOBAL_GTT); 659 intel_ring_emit_reg(ring, last_reg); 660 intel_ring_emit(ring, 661 i915_ggtt_offset(engine->scratch)); 662 intel_ring_emit(ring, MI_NOOP); 663 } 664 intel_ring_emit(ring, MI_ARB_ON_OFF | MI_ARB_ENABLE); 665 } 666 667 intel_ring_advance(ring); 668 669 return ret; 670 } 671 672 static int remap_l3(struct drm_i915_gem_request *req, int slice) 673 { 674 u32 *remap_info = req->i915->l3_parity.remap_info[slice]; 675 struct intel_ring *ring = req->ring; 676 int i, ret; 677 678 if (!remap_info) 679 return 0; 680 681 ret = intel_ring_begin(req, GEN7_L3LOG_SIZE/4 * 2 + 2); 682 if (ret) 683 return ret; 684 685 /* 686 * Note: We do not worry about the concurrent register cacheline hang 687 * here because no other code should access these registers other than 688 * at initialization time. 689 */ 690 intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(GEN7_L3LOG_SIZE/4)); 691 for (i = 0; i < GEN7_L3LOG_SIZE/4; i++) { 692 intel_ring_emit_reg(ring, GEN7_L3LOG(slice, i)); 693 intel_ring_emit(ring, remap_info[i]); 694 } 695 intel_ring_emit(ring, MI_NOOP); 696 intel_ring_advance(ring); 697 698 return 0; 699 } 700 701 static inline bool skip_rcs_switch(struct i915_hw_ppgtt *ppgtt, 702 struct intel_engine_cs *engine, 703 struct i915_gem_context *to) 704 { 705 if (to->remap_slice) 706 return false; 707 708 if (!to->engine[RCS].initialised) 709 return false; 710 711 if (ppgtt && (intel_engine_flag(engine) & ppgtt->pd_dirty_rings)) 712 return false; 713 714 return to == engine->last_context; 715 } 716 717 static bool 718 needs_pd_load_pre(struct i915_hw_ppgtt *ppgtt, 719 struct intel_engine_cs *engine, 720 struct i915_gem_context *to) 721 { 722 if (!ppgtt) 723 return false; 724 725 /* Always load the ppgtt on first use */ 726 if (!engine->last_context) 727 return true; 728 729 /* Same context without new entries, skip */ 730 if (engine->last_context == to && 731 !(intel_engine_flag(engine) & ppgtt->pd_dirty_rings)) 732 return false; 733 734 if (engine->id != RCS) 735 return true; 736 737 if (INTEL_GEN(engine->i915) < 8) 738 return true; 739 740 return false; 741 } 742 743 static bool 744 needs_pd_load_post(struct i915_hw_ppgtt *ppgtt, 745 struct i915_gem_context *to, 746 u32 hw_flags) 747 { 748 if (!ppgtt) 749 return false; 750 751 if (!IS_GEN8(to->i915)) 752 return false; 753 754 if (hw_flags & MI_RESTORE_INHIBIT) 755 return true; 756 757 return false; 758 } 759 760 static int do_rcs_switch(struct drm_i915_gem_request *req) 761 { 762 struct i915_gem_context *to = req->ctx; 763 struct intel_engine_cs *engine = req->engine; 764 struct i915_hw_ppgtt *ppgtt = to->ppgtt ?: req->i915->mm.aliasing_ppgtt; 765 struct i915_vma *vma = to->engine[RCS].state; 766 struct i915_gem_context *from; 767 u32 hw_flags; 768 int ret, i; 769 770 if (skip_rcs_switch(ppgtt, engine, to)) 771 return 0; 772 773 /* Clear this page out of any CPU caches for coherent swap-in/out. */ 774 if (!(vma->flags & I915_VMA_GLOBAL_BIND)) { 775 ret = i915_gem_object_set_to_gtt_domain(vma->obj, false); 776 if (ret) 777 return ret; 778 } 779 780 /* Trying to pin first makes error handling easier. */ 781 ret = i915_vma_pin(vma, 0, to->ggtt_alignment, PIN_GLOBAL); 782 if (ret) 783 return ret; 784 785 /* 786 * Pin can switch back to the default context if we end up calling into 787 * evict_everything - as a last ditch gtt defrag effort that also 788 * switches to the default context. Hence we need to reload from here. 789 * 790 * XXX: Doing so is painfully broken! 791 */ 792 from = engine->last_context; 793 794 if (needs_pd_load_pre(ppgtt, engine, to)) { 795 /* Older GENs and non render rings still want the load first, 796 * "PP_DCLV followed by PP_DIR_BASE register through Load 797 * Register Immediate commands in Ring Buffer before submitting 798 * a context."*/ 799 trace_switch_mm(engine, to); 800 ret = ppgtt->switch_mm(ppgtt, req); 801 if (ret) 802 goto err; 803 } 804 805 if (!to->engine[RCS].initialised || i915_gem_context_is_default(to)) 806 /* NB: If we inhibit the restore, the context is not allowed to 807 * die because future work may end up depending on valid address 808 * space. This means we must enforce that a page table load 809 * occur when this occurs. */ 810 hw_flags = MI_RESTORE_INHIBIT; 811 else if (ppgtt && intel_engine_flag(engine) & ppgtt->pd_dirty_rings) 812 hw_flags = MI_FORCE_RESTORE; 813 else 814 hw_flags = 0; 815 816 if (to != from || (hw_flags & MI_FORCE_RESTORE)) { 817 ret = mi_set_context(req, hw_flags); 818 if (ret) 819 goto err; 820 } 821 822 /* The backing object for the context is done after switching to the 823 * *next* context. Therefore we cannot retire the previous context until 824 * the next context has already started running. In fact, the below code 825 * is a bit suboptimal because the retiring can occur simply after the 826 * MI_SET_CONTEXT instead of when the next seqno has completed. 827 */ 828 if (from != NULL) { 829 /* As long as MI_SET_CONTEXT is serializing, ie. it flushes the 830 * whole damn pipeline, we don't need to explicitly mark the 831 * object dirty. The only exception is that the context must be 832 * correct in case the object gets swapped out. Ideally we'd be 833 * able to defer doing this until we know the object would be 834 * swapped, but there is no way to do that yet. 835 */ 836 i915_vma_move_to_active(from->engine[RCS].state, req, 0); 837 /* state is kept alive until the next request */ 838 i915_vma_unpin(from->engine[RCS].state); 839 i915_gem_context_put(from); 840 } 841 engine->last_context = i915_gem_context_get(to); 842 843 /* GEN8 does *not* require an explicit reload if the PDPs have been 844 * setup, and we do not wish to move them. 845 */ 846 if (needs_pd_load_post(ppgtt, to, hw_flags)) { 847 trace_switch_mm(engine, to); 848 ret = ppgtt->switch_mm(ppgtt, req); 849 /* The hardware context switch is emitted, but we haven't 850 * actually changed the state - so it's probably safe to bail 851 * here. Still, let the user know something dangerous has 852 * happened. 853 */ 854 if (ret) 855 return ret; 856 } 857 858 if (ppgtt) 859 ppgtt->pd_dirty_rings &= ~intel_engine_flag(engine); 860 861 for (i = 0; i < MAX_L3_SLICES; i++) { 862 if (!(to->remap_slice & (1<<i))) 863 continue; 864 865 ret = remap_l3(req, i); 866 if (ret) 867 return ret; 868 869 to->remap_slice &= ~(1<<i); 870 } 871 872 if (!to->engine[RCS].initialised) { 873 if (engine->init_context) { 874 ret = engine->init_context(req); 875 if (ret) 876 return ret; 877 } 878 to->engine[RCS].initialised = true; 879 } 880 881 return 0; 882 883 err: 884 i915_vma_unpin(vma); 885 return ret; 886 } 887 888 /** 889 * i915_switch_context() - perform a GPU context switch. 890 * @req: request for which we'll execute the context switch 891 * 892 * The context life cycle is simple. The context refcount is incremented and 893 * decremented by 1 and create and destroy. If the context is in use by the GPU, 894 * it will have a refcount > 1. This allows us to destroy the context abstract 895 * object while letting the normal object tracking destroy the backing BO. 896 * 897 * This function should not be used in execlists mode. Instead the context is 898 * switched by writing to the ELSP and requests keep a reference to their 899 * context. 900 */ 901 int i915_switch_context(struct drm_i915_gem_request *req) 902 { 903 struct intel_engine_cs *engine = req->engine; 904 905 lockdep_assert_held(&req->i915->drm.struct_mutex); 906 if (i915.enable_execlists) 907 return 0; 908 909 if (!req->ctx->engine[engine->id].state) { 910 struct i915_gem_context *to = req->ctx; 911 struct i915_hw_ppgtt *ppgtt = 912 to->ppgtt ?: req->i915->mm.aliasing_ppgtt; 913 914 if (needs_pd_load_pre(ppgtt, engine, to)) { 915 int ret; 916 917 trace_switch_mm(engine, to); 918 ret = ppgtt->switch_mm(ppgtt, req); 919 if (ret) 920 return ret; 921 922 ppgtt->pd_dirty_rings &= ~intel_engine_flag(engine); 923 } 924 925 if (to != engine->last_context) { 926 if (engine->last_context) 927 i915_gem_context_put(engine->last_context); 928 engine->last_context = i915_gem_context_get(to); 929 } 930 931 return 0; 932 } 933 934 return do_rcs_switch(req); 935 } 936 937 int i915_gem_switch_to_kernel_context(struct drm_i915_private *dev_priv) 938 { 939 struct intel_engine_cs *engine; 940 enum intel_engine_id id; 941 942 for_each_engine(engine, dev_priv, id) { 943 struct drm_i915_gem_request *req; 944 int ret; 945 946 if (engine->last_context == NULL) 947 continue; 948 949 if (engine->last_context == dev_priv->kernel_context) 950 continue; 951 952 req = i915_gem_request_alloc(engine, dev_priv->kernel_context); 953 if (IS_ERR(req)) 954 return PTR_ERR(req); 955 956 ret = i915_switch_context(req); 957 i915_add_request_no_flush(req); 958 if (ret) 959 return ret; 960 } 961 962 return 0; 963 } 964 965 static bool contexts_enabled(struct drm_device *dev) 966 { 967 return i915.enable_execlists || to_i915(dev)->hw_context_size; 968 } 969 970 int i915_gem_context_create_ioctl(struct drm_device *dev, void *data, 971 struct drm_file *file) 972 { 973 struct drm_i915_gem_context_create *args = data; 974 struct drm_i915_file_private *file_priv = file->driver_priv; 975 struct i915_gem_context *ctx; 976 int ret; 977 978 if (!contexts_enabled(dev)) 979 return -ENODEV; 980 981 if (args->pad != 0) 982 return -EINVAL; 983 984 ret = i915_mutex_lock_interruptible(dev); 985 if (ret) 986 return ret; 987 988 ctx = i915_gem_create_context(dev, file_priv); 989 mutex_unlock(&dev->struct_mutex); 990 if (IS_ERR(ctx)) 991 return PTR_ERR(ctx); 992 993 args->ctx_id = ctx->user_handle; 994 DRM_DEBUG_DRIVER("HW context %d created\n", args->ctx_id); 995 996 return 0; 997 } 998 999 int i915_gem_context_destroy_ioctl(struct drm_device *dev, void *data, 1000 struct drm_file *file) 1001 { 1002 struct drm_i915_gem_context_destroy *args = data; 1003 struct drm_i915_file_private *file_priv = file->driver_priv; 1004 struct i915_gem_context *ctx; 1005 int ret; 1006 1007 if (args->pad != 0) 1008 return -EINVAL; 1009 1010 if (args->ctx_id == DEFAULT_CONTEXT_HANDLE) 1011 return -ENOENT; 1012 1013 ret = i915_mutex_lock_interruptible(dev); 1014 if (ret) 1015 return ret; 1016 1017 ctx = i915_gem_context_lookup(file_priv, args->ctx_id); 1018 if (IS_ERR(ctx)) { 1019 mutex_unlock(&dev->struct_mutex); 1020 return PTR_ERR(ctx); 1021 } 1022 1023 idr_remove(&file_priv->context_idr, ctx->user_handle); 1024 context_close(ctx); 1025 mutex_unlock(&dev->struct_mutex); 1026 1027 DRM_DEBUG_DRIVER("HW context %d destroyed\n", args->ctx_id); 1028 return 0; 1029 } 1030 1031 int i915_gem_context_getparam_ioctl(struct drm_device *dev, void *data, 1032 struct drm_file *file) 1033 { 1034 struct drm_i915_file_private *file_priv = file->driver_priv; 1035 struct drm_i915_gem_context_param *args = data; 1036 struct i915_gem_context *ctx; 1037 int ret; 1038 1039 ret = i915_mutex_lock_interruptible(dev); 1040 if (ret) 1041 return ret; 1042 1043 ctx = i915_gem_context_lookup(file_priv, args->ctx_id); 1044 if (IS_ERR(ctx)) { 1045 mutex_unlock(&dev->struct_mutex); 1046 return PTR_ERR(ctx); 1047 } 1048 1049 args->size = 0; 1050 switch (args->param) { 1051 case I915_CONTEXT_PARAM_BAN_PERIOD: 1052 args->value = ctx->hang_stats.ban_period_seconds; 1053 break; 1054 case I915_CONTEXT_PARAM_NO_ZEROMAP: 1055 args->value = ctx->flags & CONTEXT_NO_ZEROMAP; 1056 break; 1057 case I915_CONTEXT_PARAM_GTT_SIZE: 1058 if (ctx->ppgtt) 1059 args->value = ctx->ppgtt->base.total; 1060 else if (to_i915(dev)->mm.aliasing_ppgtt) 1061 args->value = to_i915(dev)->mm.aliasing_ppgtt->base.total; 1062 else 1063 args->value = to_i915(dev)->ggtt.base.total; 1064 break; 1065 case I915_CONTEXT_PARAM_NO_ERROR_CAPTURE: 1066 args->value = !!(ctx->flags & CONTEXT_NO_ERROR_CAPTURE); 1067 break; 1068 default: 1069 ret = -EINVAL; 1070 break; 1071 } 1072 mutex_unlock(&dev->struct_mutex); 1073 1074 return ret; 1075 } 1076 1077 int i915_gem_context_setparam_ioctl(struct drm_device *dev, void *data, 1078 struct drm_file *file) 1079 { 1080 struct drm_i915_file_private *file_priv = file->driver_priv; 1081 struct drm_i915_gem_context_param *args = data; 1082 struct i915_gem_context *ctx; 1083 int ret; 1084 1085 ret = i915_mutex_lock_interruptible(dev); 1086 if (ret) 1087 return ret; 1088 1089 ctx = i915_gem_context_lookup(file_priv, args->ctx_id); 1090 if (IS_ERR(ctx)) { 1091 mutex_unlock(&dev->struct_mutex); 1092 return PTR_ERR(ctx); 1093 } 1094 1095 switch (args->param) { 1096 case I915_CONTEXT_PARAM_BAN_PERIOD: 1097 if (args->size) 1098 ret = -EINVAL; 1099 else if (args->value < ctx->hang_stats.ban_period_seconds && 1100 !capable(CAP_SYS_ADMIN)) 1101 ret = -EPERM; 1102 else 1103 ctx->hang_stats.ban_period_seconds = args->value; 1104 break; 1105 case I915_CONTEXT_PARAM_NO_ZEROMAP: 1106 if (args->size) { 1107 ret = -EINVAL; 1108 } else { 1109 ctx->flags &= ~CONTEXT_NO_ZEROMAP; 1110 ctx->flags |= args->value ? CONTEXT_NO_ZEROMAP : 0; 1111 } 1112 break; 1113 case I915_CONTEXT_PARAM_NO_ERROR_CAPTURE: 1114 if (args->size) { 1115 ret = -EINVAL; 1116 } else { 1117 if (args->value) 1118 ctx->flags |= CONTEXT_NO_ERROR_CAPTURE; 1119 else 1120 ctx->flags &= ~CONTEXT_NO_ERROR_CAPTURE; 1121 } 1122 break; 1123 default: 1124 ret = -EINVAL; 1125 break; 1126 } 1127 mutex_unlock(&dev->struct_mutex); 1128 1129 return ret; 1130 } 1131 1132 int i915_gem_context_reset_stats_ioctl(struct drm_device *dev, 1133 void *data, struct drm_file *file) 1134 { 1135 struct drm_i915_private *dev_priv = to_i915(dev); 1136 struct drm_i915_reset_stats *args = data; 1137 struct i915_ctx_hang_stats *hs; 1138 struct i915_gem_context *ctx; 1139 int ret; 1140 1141 if (args->flags || args->pad) 1142 return -EINVAL; 1143 1144 if (args->ctx_id == DEFAULT_CONTEXT_HANDLE && !capable(CAP_SYS_ADMIN)) 1145 return -EPERM; 1146 1147 ret = i915_mutex_lock_interruptible(dev); 1148 if (ret) 1149 return ret; 1150 1151 ctx = i915_gem_context_lookup(file->driver_priv, args->ctx_id); 1152 if (IS_ERR(ctx)) { 1153 mutex_unlock(&dev->struct_mutex); 1154 return PTR_ERR(ctx); 1155 } 1156 hs = &ctx->hang_stats; 1157 1158 if (capable(CAP_SYS_ADMIN)) 1159 args->reset_count = i915_reset_count(&dev_priv->gpu_error); 1160 else 1161 args->reset_count = 0; 1162 1163 args->batch_active = hs->batch_active; 1164 args->batch_pending = hs->batch_pending; 1165 1166 mutex_unlock(&dev->struct_mutex); 1167 1168 return 0; 1169 } 1170