1 /* 2 * Copyright © 2014 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 * 23 */ 24 #include <linux/firmware.h> 25 #include <linux/circ_buf.h> 26 #include "i915_drv.h" 27 #include "intel_guc.h" 28 29 /** 30 * DOC: GuC-based command submission 31 * 32 * i915_guc_client: 33 * We use the term client to avoid confusion with contexts. A i915_guc_client is 34 * equivalent to GuC object guc_context_desc. This context descriptor is 35 * allocated from a pool of 1024 entries. Kernel driver will allocate doorbell 36 * and workqueue for it. Also the process descriptor (guc_process_desc), which 37 * is mapped to client space. So the client can write Work Item then ring the 38 * doorbell. 39 * 40 * To simplify the implementation, we allocate one gem object that contains all 41 * pages for doorbell, process descriptor and workqueue. 42 * 43 * The Scratch registers: 44 * There are 16 MMIO-based registers start from 0xC180. The kernel driver writes 45 * a value to the action register (SOFT_SCRATCH_0) along with any data. It then 46 * triggers an interrupt on the GuC via another register write (0xC4C8). 47 * Firmware writes a success/fail code back to the action register after 48 * processes the request. The kernel driver polls waiting for this update and 49 * then proceeds. 50 * See host2guc_action() 51 * 52 * Doorbells: 53 * Doorbells are interrupts to uKernel. A doorbell is a single cache line (QW) 54 * mapped into process space. 55 * 56 * Work Items: 57 * There are several types of work items that the host may place into a 58 * workqueue, each with its own requirements and limitations. Currently only 59 * WQ_TYPE_INORDER is needed to support legacy submission via GuC, which 60 * represents in-order queue. The kernel driver packs ring tail pointer and an 61 * ELSP context descriptor dword into Work Item. 62 * See guc_add_workqueue_item() 63 * 64 */ 65 66 /* 67 * Read GuC command/status register (SOFT_SCRATCH_0) 68 * Return true if it contains a response rather than a command 69 */ 70 static inline bool host2guc_action_response(struct drm_i915_private *dev_priv, 71 u32 *status) 72 { 73 u32 val = I915_READ(SOFT_SCRATCH(0)); 74 *status = val; 75 return GUC2HOST_IS_RESPONSE(val); 76 } 77 78 static int host2guc_action(struct intel_guc *guc, u32 *data, u32 len) 79 { 80 struct drm_i915_private *dev_priv = guc_to_i915(guc); 81 u32 status; 82 int i; 83 int ret; 84 85 if (WARN_ON(len < 1 || len > 15)) 86 return -EINVAL; 87 88 intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL); 89 90 dev_priv->guc.action_count += 1; 91 dev_priv->guc.action_cmd = data[0]; 92 93 for (i = 0; i < len; i++) 94 I915_WRITE(SOFT_SCRATCH(i), data[i]); 95 96 POSTING_READ(SOFT_SCRATCH(i - 1)); 97 98 I915_WRITE(HOST2GUC_INTERRUPT, HOST2GUC_TRIGGER); 99 100 /* 101 * Fast commands should complete in less than 10us, so sample quickly 102 * up to that length of time, then switch to a slower sleep-wait loop. 103 * No HOST2GUC command should ever take longer than 10ms. 104 */ 105 ret = wait_for_us(host2guc_action_response(dev_priv, &status), 10); 106 if (ret) 107 ret = wait_for(host2guc_action_response(dev_priv, &status), 10); 108 if (status != GUC2HOST_STATUS_SUCCESS) { 109 /* 110 * Either the GuC explicitly returned an error (which 111 * we convert to -EIO here) or no response at all was 112 * received within the timeout limit (-ETIMEDOUT) 113 */ 114 if (ret != -ETIMEDOUT) 115 ret = -EIO; 116 117 DRM_ERROR("GUC: host2guc action 0x%X failed. ret=%d " 118 "status=0x%08X response=0x%08X\n", 119 data[0], ret, status, 120 I915_READ(SOFT_SCRATCH(15))); 121 122 dev_priv->guc.action_fail += 1; 123 dev_priv->guc.action_err = ret; 124 } 125 dev_priv->guc.action_status = status; 126 127 intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); 128 129 return ret; 130 } 131 132 /* 133 * Tell the GuC to allocate or deallocate a specific doorbell 134 */ 135 136 static int host2guc_allocate_doorbell(struct intel_guc *guc, 137 struct i915_guc_client *client) 138 { 139 u32 data[2]; 140 141 data[0] = HOST2GUC_ACTION_ALLOCATE_DOORBELL; 142 data[1] = client->ctx_index; 143 144 return host2guc_action(guc, data, 2); 145 } 146 147 static int host2guc_release_doorbell(struct intel_guc *guc, 148 struct i915_guc_client *client) 149 { 150 u32 data[2]; 151 152 data[0] = HOST2GUC_ACTION_DEALLOCATE_DOORBELL; 153 data[1] = client->ctx_index; 154 155 return host2guc_action(guc, data, 2); 156 } 157 158 static int host2guc_sample_forcewake(struct intel_guc *guc, 159 struct i915_guc_client *client) 160 { 161 struct drm_i915_private *dev_priv = guc_to_i915(guc); 162 u32 data[2]; 163 164 data[0] = HOST2GUC_ACTION_SAMPLE_FORCEWAKE; 165 /* WaRsDisableCoarsePowerGating:skl,bxt */ 166 if (!intel_enable_rc6() || NEEDS_WaRsDisableCoarsePowerGating(dev_priv)) 167 data[1] = 0; 168 else 169 /* bit 0 and 1 are for Render and Media domain separately */ 170 data[1] = GUC_FORCEWAKE_RENDER | GUC_FORCEWAKE_MEDIA; 171 172 return host2guc_action(guc, data, ARRAY_SIZE(data)); 173 } 174 175 /* 176 * Initialise, update, or clear doorbell data shared with the GuC 177 * 178 * These functions modify shared data and so need access to the mapped 179 * client object which contains the page being used for the doorbell 180 */ 181 182 static int guc_update_doorbell_id(struct intel_guc *guc, 183 struct i915_guc_client *client, 184 u16 new_id) 185 { 186 struct sg_table *sg = guc->ctx_pool_obj->pages; 187 void *doorbell_bitmap = guc->doorbell_bitmap; 188 struct guc_doorbell_info *doorbell; 189 struct guc_context_desc desc; 190 size_t len; 191 192 doorbell = client->client_base + client->doorbell_offset; 193 194 if (client->doorbell_id != GUC_INVALID_DOORBELL_ID && 195 test_bit(client->doorbell_id, doorbell_bitmap)) { 196 /* Deactivate the old doorbell */ 197 doorbell->db_status = GUC_DOORBELL_DISABLED; 198 (void)host2guc_release_doorbell(guc, client); 199 __clear_bit(client->doorbell_id, doorbell_bitmap); 200 } 201 202 /* Update the GuC's idea of the doorbell ID */ 203 len = sg_pcopy_to_buffer(sg->sgl, sg->nents, &desc, sizeof(desc), 204 sizeof(desc) * client->ctx_index); 205 if (len != sizeof(desc)) 206 return -EFAULT; 207 desc.db_id = new_id; 208 len = sg_pcopy_from_buffer(sg->sgl, sg->nents, &desc, sizeof(desc), 209 sizeof(desc) * client->ctx_index); 210 if (len != sizeof(desc)) 211 return -EFAULT; 212 213 client->doorbell_id = new_id; 214 if (new_id == GUC_INVALID_DOORBELL_ID) 215 return 0; 216 217 /* Activate the new doorbell */ 218 __set_bit(new_id, doorbell_bitmap); 219 doorbell->cookie = 0; 220 doorbell->db_status = GUC_DOORBELL_ENABLED; 221 return host2guc_allocate_doorbell(guc, client); 222 } 223 224 static int guc_init_doorbell(struct intel_guc *guc, 225 struct i915_guc_client *client, 226 uint16_t db_id) 227 { 228 return guc_update_doorbell_id(guc, client, db_id); 229 } 230 231 static void guc_disable_doorbell(struct intel_guc *guc, 232 struct i915_guc_client *client) 233 { 234 (void)guc_update_doorbell_id(guc, client, GUC_INVALID_DOORBELL_ID); 235 236 /* XXX: wait for any interrupts */ 237 /* XXX: wait for workqueue to drain */ 238 } 239 240 static uint16_t 241 select_doorbell_register(struct intel_guc *guc, uint32_t priority) 242 { 243 /* 244 * The bitmap tracks which doorbell registers are currently in use. 245 * It is split into two halves; the first half is used for normal 246 * priority contexts, the second half for high-priority ones. 247 * Note that logically higher priorities are numerically less than 248 * normal ones, so the test below means "is it high-priority?" 249 */ 250 const bool hi_pri = (priority <= GUC_CTX_PRIORITY_HIGH); 251 const uint16_t half = GUC_MAX_DOORBELLS / 2; 252 const uint16_t start = hi_pri ? half : 0; 253 const uint16_t end = start + half; 254 uint16_t id; 255 256 id = find_next_zero_bit(guc->doorbell_bitmap, end, start); 257 if (id == end) 258 id = GUC_INVALID_DOORBELL_ID; 259 260 DRM_DEBUG_DRIVER("assigned %s priority doorbell id 0x%x\n", 261 hi_pri ? "high" : "normal", id); 262 263 return id; 264 } 265 266 /* 267 * Select, assign and relase doorbell cachelines 268 * 269 * These functions track which doorbell cachelines are in use. 270 * The data they manipulate is protected by the host2guc lock. 271 */ 272 273 static uint32_t select_doorbell_cacheline(struct intel_guc *guc) 274 { 275 const uint32_t cacheline_size = cache_line_size(); 276 uint32_t offset; 277 278 /* Doorbell uses a single cache line within a page */ 279 offset = offset_in_page(guc->db_cacheline); 280 281 /* Moving to next cache line to reduce contention */ 282 guc->db_cacheline += cacheline_size; 283 284 DRM_DEBUG_DRIVER("selected doorbell cacheline 0x%x, next 0x%x, linesize %u\n", 285 offset, guc->db_cacheline, cacheline_size); 286 287 return offset; 288 } 289 290 /* 291 * Initialise the process descriptor shared with the GuC firmware. 292 */ 293 static void guc_init_proc_desc(struct intel_guc *guc, 294 struct i915_guc_client *client) 295 { 296 struct guc_process_desc *desc; 297 298 desc = client->client_base + client->proc_desc_offset; 299 300 memset(desc, 0, sizeof(*desc)); 301 302 /* 303 * XXX: pDoorbell and WQVBaseAddress are pointers in process address 304 * space for ring3 clients (set them as in mmap_ioctl) or kernel 305 * space for kernel clients (map on demand instead? May make debug 306 * easier to have it mapped). 307 */ 308 desc->wq_base_addr = 0; 309 desc->db_base_addr = 0; 310 311 desc->context_id = client->ctx_index; 312 desc->wq_size_bytes = client->wq_size; 313 desc->wq_status = WQ_STATUS_ACTIVE; 314 desc->priority = client->priority; 315 } 316 317 /* 318 * Initialise/clear the context descriptor shared with the GuC firmware. 319 * 320 * This descriptor tells the GuC where (in GGTT space) to find the important 321 * data structures relating to this client (doorbell, process descriptor, 322 * write queue, etc). 323 */ 324 325 static void guc_init_ctx_desc(struct intel_guc *guc, 326 struct i915_guc_client *client) 327 { 328 struct drm_i915_gem_object *client_obj = client->client_obj; 329 struct drm_i915_private *dev_priv = guc_to_i915(guc); 330 struct intel_engine_cs *engine; 331 struct i915_gem_context *ctx = client->owner; 332 struct guc_context_desc desc; 333 struct sg_table *sg; 334 u32 gfx_addr; 335 336 memset(&desc, 0, sizeof(desc)); 337 338 desc.attribute = GUC_CTX_DESC_ATTR_ACTIVE | GUC_CTX_DESC_ATTR_KERNEL; 339 desc.context_id = client->ctx_index; 340 desc.priority = client->priority; 341 desc.db_id = client->doorbell_id; 342 343 for_each_engine(engine, dev_priv) { 344 struct intel_context *ce = &ctx->engine[engine->id]; 345 struct guc_execlist_context *lrc = &desc.lrc[engine->guc_id]; 346 struct drm_i915_gem_object *obj; 347 348 /* TODO: We have a design issue to be solved here. Only when we 349 * receive the first batch, we know which engine is used by the 350 * user. But here GuC expects the lrc and ring to be pinned. It 351 * is not an issue for default context, which is the only one 352 * for now who owns a GuC client. But for future owner of GuC 353 * client, need to make sure lrc is pinned prior to enter here. 354 */ 355 if (!ce->state) 356 break; /* XXX: continue? */ 357 358 lrc->context_desc = lower_32_bits(ce->lrc_desc); 359 360 /* The state page is after PPHWSP */ 361 gfx_addr = i915_gem_obj_ggtt_offset(ce->state); 362 lrc->ring_lcra = gfx_addr + LRC_STATE_PN * PAGE_SIZE; 363 lrc->context_id = (client->ctx_index << GUC_ELC_CTXID_OFFSET) | 364 (engine->guc_id << GUC_ELC_ENGINE_OFFSET); 365 366 obj = ce->ringbuf->obj; 367 gfx_addr = i915_gem_obj_ggtt_offset(obj); 368 369 lrc->ring_begin = gfx_addr; 370 lrc->ring_end = gfx_addr + obj->base.size - 1; 371 lrc->ring_next_free_location = gfx_addr; 372 lrc->ring_current_tail_pointer_value = 0; 373 374 desc.engines_used |= (1 << engine->guc_id); 375 } 376 377 WARN_ON(desc.engines_used == 0); 378 379 /* 380 * The doorbell, process descriptor, and workqueue are all parts 381 * of the client object, which the GuC will reference via the GGTT 382 */ 383 gfx_addr = i915_gem_obj_ggtt_offset(client_obj); 384 desc.db_trigger_phy = sg_dma_address(client_obj->pages->sgl) + 385 client->doorbell_offset; 386 desc.db_trigger_cpu = (uintptr_t)client->client_base + 387 client->doorbell_offset; 388 desc.db_trigger_uk = gfx_addr + client->doorbell_offset; 389 desc.process_desc = gfx_addr + client->proc_desc_offset; 390 desc.wq_addr = gfx_addr + client->wq_offset; 391 desc.wq_size = client->wq_size; 392 393 /* 394 * XXX: Take LRCs from an existing context if this is not an 395 * IsKMDCreatedContext client 396 */ 397 desc.desc_private = (uintptr_t)client; 398 399 /* Pool context is pinned already */ 400 sg = guc->ctx_pool_obj->pages; 401 sg_pcopy_from_buffer(sg->sgl, sg->nents, &desc, sizeof(desc), 402 sizeof(desc) * client->ctx_index); 403 } 404 405 static void guc_fini_ctx_desc(struct intel_guc *guc, 406 struct i915_guc_client *client) 407 { 408 struct guc_context_desc desc; 409 struct sg_table *sg; 410 411 memset(&desc, 0, sizeof(desc)); 412 413 sg = guc->ctx_pool_obj->pages; 414 sg_pcopy_from_buffer(sg->sgl, sg->nents, &desc, sizeof(desc), 415 sizeof(desc) * client->ctx_index); 416 } 417 418 /** 419 * i915_guc_wq_check_space() - check that the GuC can accept a request 420 * @request: request associated with the commands 421 * 422 * Return: 0 if space is available 423 * -EAGAIN if space is not currently available 424 * 425 * This function must be called (and must return 0) before a request 426 * is submitted to the GuC via i915_guc_submit() below. Once a result 427 * of 0 has been returned, it remains valid until (but only until) 428 * the next call to submit(). 429 * 430 * This precheck allows the caller to determine in advance that space 431 * will be available for the next submission before committing resources 432 * to it, and helps avoid late failures with complicated recovery paths. 433 */ 434 int i915_guc_wq_check_space(struct drm_i915_gem_request *request) 435 { 436 const size_t wqi_size = sizeof(struct guc_wq_item); 437 struct i915_guc_client *gc = request->i915->guc.execbuf_client; 438 struct guc_process_desc *desc; 439 u32 freespace; 440 441 GEM_BUG_ON(gc == NULL); 442 443 desc = gc->client_base + gc->proc_desc_offset; 444 445 freespace = CIRC_SPACE(gc->wq_tail, desc->head, gc->wq_size); 446 if (likely(freespace >= wqi_size)) 447 return 0; 448 449 gc->no_wq_space += 1; 450 451 return -EAGAIN; 452 } 453 454 static void guc_add_workqueue_item(struct i915_guc_client *gc, 455 struct drm_i915_gem_request *rq) 456 { 457 /* wqi_len is in DWords, and does not include the one-word header */ 458 const size_t wqi_size = sizeof(struct guc_wq_item); 459 const u32 wqi_len = wqi_size/sizeof(u32) - 1; 460 struct guc_process_desc *desc; 461 struct guc_wq_item *wqi; 462 void *base; 463 u32 freespace, tail, wq_off, wq_page; 464 465 desc = gc->client_base + gc->proc_desc_offset; 466 467 /* Free space is guaranteed, see i915_guc_wq_check_space() above */ 468 freespace = CIRC_SPACE(gc->wq_tail, desc->head, gc->wq_size); 469 GEM_BUG_ON(freespace < wqi_size); 470 471 /* The GuC firmware wants the tail index in QWords, not bytes */ 472 tail = rq->tail; 473 GEM_BUG_ON(tail & 7); 474 tail >>= 3; 475 GEM_BUG_ON(tail > WQ_RING_TAIL_MAX); 476 477 /* For now workqueue item is 4 DWs; workqueue buffer is 2 pages. So we 478 * should not have the case where structure wqi is across page, neither 479 * wrapped to the beginning. This simplifies the implementation below. 480 * 481 * XXX: if not the case, we need save data to a temp wqi and copy it to 482 * workqueue buffer dw by dw. 483 */ 484 BUILD_BUG_ON(wqi_size != 16); 485 486 /* postincrement WQ tail for next time */ 487 wq_off = gc->wq_tail; 488 gc->wq_tail += wqi_size; 489 gc->wq_tail &= gc->wq_size - 1; 490 GEM_BUG_ON(wq_off & (wqi_size - 1)); 491 492 /* WQ starts from the page after doorbell / process_desc */ 493 wq_page = (wq_off + GUC_DB_SIZE) >> PAGE_SHIFT; 494 wq_off &= PAGE_SIZE - 1; 495 base = kmap_atomic(i915_gem_object_get_page(gc->client_obj, wq_page)); 496 wqi = (struct guc_wq_item *)((char *)base + wq_off); 497 498 /* Now fill in the 4-word work queue item */ 499 wqi->header = WQ_TYPE_INORDER | 500 (wqi_len << WQ_LEN_SHIFT) | 501 (rq->engine->guc_id << WQ_TARGET_SHIFT) | 502 WQ_NO_WCFLUSH_WAIT; 503 504 /* The GuC wants only the low-order word of the context descriptor */ 505 wqi->context_desc = (u32)intel_lr_context_descriptor(rq->ctx, 506 rq->engine); 507 508 wqi->ring_tail = tail << WQ_RING_TAIL_SHIFT; 509 wqi->fence_id = rq->seqno; 510 511 kunmap_atomic(base); 512 } 513 514 static int guc_ring_doorbell(struct i915_guc_client *gc) 515 { 516 struct guc_process_desc *desc; 517 union guc_doorbell_qw db_cmp, db_exc, db_ret; 518 union guc_doorbell_qw *db; 519 int attempt = 2, ret = -EAGAIN; 520 521 desc = gc->client_base + gc->proc_desc_offset; 522 523 /* Update the tail so it is visible to GuC */ 524 desc->tail = gc->wq_tail; 525 526 /* current cookie */ 527 db_cmp.db_status = GUC_DOORBELL_ENABLED; 528 db_cmp.cookie = gc->cookie; 529 530 /* cookie to be updated */ 531 db_exc.db_status = GUC_DOORBELL_ENABLED; 532 db_exc.cookie = gc->cookie + 1; 533 if (db_exc.cookie == 0) 534 db_exc.cookie = 1; 535 536 /* pointer of current doorbell cacheline */ 537 db = gc->client_base + gc->doorbell_offset; 538 539 while (attempt--) { 540 /* lets ring the doorbell */ 541 db_ret.value_qw = atomic64_cmpxchg((atomic64_t *)db, 542 db_cmp.value_qw, db_exc.value_qw); 543 544 /* if the exchange was successfully executed */ 545 if (db_ret.value_qw == db_cmp.value_qw) { 546 /* db was successfully rung */ 547 gc->cookie = db_exc.cookie; 548 ret = 0; 549 break; 550 } 551 552 /* XXX: doorbell was lost and need to acquire it again */ 553 if (db_ret.db_status == GUC_DOORBELL_DISABLED) 554 break; 555 556 DRM_ERROR("Cookie mismatch. Expected %d, returned %d\n", 557 db_cmp.cookie, db_ret.cookie); 558 559 /* update the cookie to newly read cookie from GuC */ 560 db_cmp.cookie = db_ret.cookie; 561 db_exc.cookie = db_ret.cookie + 1; 562 if (db_exc.cookie == 0) 563 db_exc.cookie = 1; 564 } 565 566 return ret; 567 } 568 569 /** 570 * i915_guc_submit() - Submit commands through GuC 571 * @rq: request associated with the commands 572 * 573 * Return: 0 on success, otherwise an errno. 574 * (Note: nonzero really shouldn't happen!) 575 * 576 * The caller must have already called i915_guc_wq_check_space() above 577 * with a result of 0 (success) since the last request submission. This 578 * guarantees that there is space in the work queue for the new request, 579 * so enqueuing the item cannot fail. 580 * 581 * Bad Things Will Happen if the caller violates this protocol e.g. calls 582 * submit() when check() says there's no space, or calls submit() multiple 583 * times with no intervening check(). 584 * 585 * The only error here arises if the doorbell hardware isn't functioning 586 * as expected, which really shouln't happen. 587 */ 588 int i915_guc_submit(struct drm_i915_gem_request *rq) 589 { 590 unsigned int engine_id = rq->engine->id; 591 struct intel_guc *guc = &rq->i915->guc; 592 struct i915_guc_client *client = guc->execbuf_client; 593 int b_ret; 594 595 guc_add_workqueue_item(client, rq); 596 b_ret = guc_ring_doorbell(client); 597 598 client->submissions[engine_id] += 1; 599 client->retcode = b_ret; 600 if (b_ret) 601 client->b_fail += 1; 602 603 guc->submissions[engine_id] += 1; 604 guc->last_seqno[engine_id] = rq->seqno; 605 606 return b_ret; 607 } 608 609 /* 610 * Everything below here is concerned with setup & teardown, and is 611 * therefore not part of the somewhat time-critical batch-submission 612 * path of i915_guc_submit() above. 613 */ 614 615 /** 616 * gem_allocate_guc_obj() - Allocate gem object for GuC usage 617 * @dev_priv: driver private data structure 618 * @size: size of object 619 * 620 * This is a wrapper to create a gem obj. In order to use it inside GuC, the 621 * object needs to be pinned lifetime. Also we must pin it to gtt space other 622 * than [0, GUC_WOPCM_TOP) because this range is reserved inside GuC. 623 * 624 * Return: A drm_i915_gem_object if successful, otherwise NULL. 625 */ 626 static struct drm_i915_gem_object * 627 gem_allocate_guc_obj(struct drm_i915_private *dev_priv, u32 size) 628 { 629 struct drm_i915_gem_object *obj; 630 631 obj = i915_gem_object_create(&dev_priv->drm, size); 632 if (IS_ERR(obj)) 633 return NULL; 634 635 if (i915_gem_object_get_pages(obj)) { 636 drm_gem_object_unreference(&obj->base); 637 return NULL; 638 } 639 640 if (i915_gem_obj_ggtt_pin(obj, PAGE_SIZE, 641 PIN_OFFSET_BIAS | GUC_WOPCM_TOP)) { 642 drm_gem_object_unreference(&obj->base); 643 return NULL; 644 } 645 646 /* Invalidate GuC TLB to let GuC take the latest updates to GTT. */ 647 I915_WRITE(GEN8_GTCR, GEN8_GTCR_INVALIDATE); 648 649 return obj; 650 } 651 652 /** 653 * gem_release_guc_obj() - Release gem object allocated for GuC usage 654 * @obj: gem obj to be released 655 */ 656 static void gem_release_guc_obj(struct drm_i915_gem_object *obj) 657 { 658 if (!obj) 659 return; 660 661 if (i915_gem_obj_is_pinned(obj)) 662 i915_gem_object_ggtt_unpin(obj); 663 664 drm_gem_object_unreference(&obj->base); 665 } 666 667 static void 668 guc_client_free(struct drm_i915_private *dev_priv, 669 struct i915_guc_client *client) 670 { 671 struct intel_guc *guc = &dev_priv->guc; 672 673 if (!client) 674 return; 675 676 /* 677 * XXX: wait for any outstanding submissions before freeing memory. 678 * Be sure to drop any locks 679 */ 680 681 if (client->client_base) { 682 /* 683 * If we got as far as setting up a doorbell, make sure we 684 * shut it down before unmapping & deallocating the memory. 685 */ 686 guc_disable_doorbell(guc, client); 687 688 kunmap(kmap_to_page(client->client_base)); 689 } 690 691 gem_release_guc_obj(client->client_obj); 692 693 if (client->ctx_index != GUC_INVALID_CTX_ID) { 694 guc_fini_ctx_desc(guc, client); 695 ida_simple_remove(&guc->ctx_ids, client->ctx_index); 696 } 697 698 kfree(client); 699 } 700 701 /* 702 * Borrow the first client to set up & tear down every doorbell 703 * in turn, to ensure that all doorbell h/w is (re)initialised. 704 */ 705 static void guc_init_doorbell_hw(struct intel_guc *guc) 706 { 707 struct drm_i915_private *dev_priv = guc_to_i915(guc); 708 struct i915_guc_client *client = guc->execbuf_client; 709 uint16_t db_id, i; 710 int err; 711 712 db_id = client->doorbell_id; 713 714 for (i = 0; i < GUC_MAX_DOORBELLS; ++i) { 715 i915_reg_t drbreg = GEN8_DRBREGL(i); 716 u32 value = I915_READ(drbreg); 717 718 err = guc_update_doorbell_id(guc, client, i); 719 720 /* Report update failure or unexpectedly active doorbell */ 721 if (err || (i != db_id && (value & GUC_DOORBELL_ENABLED))) 722 DRM_DEBUG_DRIVER("Doorbell %d (reg 0x%x) was 0x%x, err %d\n", 723 i, drbreg.reg, value, err); 724 } 725 726 /* Restore to original value */ 727 err = guc_update_doorbell_id(guc, client, db_id); 728 if (err) 729 DRM_ERROR("Failed to restore doorbell to %d, err %d\n", 730 db_id, err); 731 732 for (i = 0; i < GUC_MAX_DOORBELLS; ++i) { 733 i915_reg_t drbreg = GEN8_DRBREGL(i); 734 u32 value = I915_READ(drbreg); 735 736 if (i != db_id && (value & GUC_DOORBELL_ENABLED)) 737 DRM_DEBUG_DRIVER("Doorbell %d (reg 0x%x) finally 0x%x\n", 738 i, drbreg.reg, value); 739 740 } 741 } 742 743 /** 744 * guc_client_alloc() - Allocate an i915_guc_client 745 * @dev_priv: driver private data structure 746 * @priority: four levels priority _CRITICAL, _HIGH, _NORMAL and _LOW 747 * The kernel client to replace ExecList submission is created with 748 * NORMAL priority. Priority of a client for scheduler can be HIGH, 749 * while a preemption context can use CRITICAL. 750 * @ctx: the context that owns the client (we use the default render 751 * context) 752 * 753 * Return: An i915_guc_client object if success, else NULL. 754 */ 755 static struct i915_guc_client * 756 guc_client_alloc(struct drm_i915_private *dev_priv, 757 uint32_t priority, 758 struct i915_gem_context *ctx) 759 { 760 struct i915_guc_client *client; 761 struct intel_guc *guc = &dev_priv->guc; 762 struct drm_i915_gem_object *obj; 763 uint16_t db_id; 764 765 client = kzalloc(sizeof(*client), GFP_KERNEL); 766 if (!client) 767 return NULL; 768 769 client->doorbell_id = GUC_INVALID_DOORBELL_ID; 770 client->priority = priority; 771 client->owner = ctx; 772 client->guc = guc; 773 774 client->ctx_index = (uint32_t)ida_simple_get(&guc->ctx_ids, 0, 775 GUC_MAX_GPU_CONTEXTS, GFP_KERNEL); 776 if (client->ctx_index >= GUC_MAX_GPU_CONTEXTS) { 777 client->ctx_index = GUC_INVALID_CTX_ID; 778 goto err; 779 } 780 781 /* The first page is doorbell/proc_desc. Two followed pages are wq. */ 782 obj = gem_allocate_guc_obj(dev_priv, GUC_DB_SIZE + GUC_WQ_SIZE); 783 if (!obj) 784 goto err; 785 786 /* We'll keep just the first (doorbell/proc) page permanently kmap'd. */ 787 client->client_obj = obj; 788 client->client_base = kmap(i915_gem_object_get_page(obj, 0)); 789 client->wq_offset = GUC_DB_SIZE; 790 client->wq_size = GUC_WQ_SIZE; 791 792 db_id = select_doorbell_register(guc, client->priority); 793 if (db_id == GUC_INVALID_DOORBELL_ID) 794 /* XXX: evict a doorbell instead? */ 795 goto err; 796 797 client->doorbell_offset = select_doorbell_cacheline(guc); 798 799 /* 800 * Since the doorbell only requires a single cacheline, we can save 801 * space by putting the application process descriptor in the same 802 * page. Use the half of the page that doesn't include the doorbell. 803 */ 804 if (client->doorbell_offset >= (GUC_DB_SIZE / 2)) 805 client->proc_desc_offset = 0; 806 else 807 client->proc_desc_offset = (GUC_DB_SIZE / 2); 808 809 guc_init_proc_desc(guc, client); 810 guc_init_ctx_desc(guc, client); 811 if (guc_init_doorbell(guc, client, db_id)) 812 goto err; 813 814 DRM_DEBUG_DRIVER("new priority %u client %p: ctx_index %u\n", 815 priority, client, client->ctx_index); 816 DRM_DEBUG_DRIVER("doorbell id %u, cacheline offset 0x%x\n", 817 client->doorbell_id, client->doorbell_offset); 818 819 return client; 820 821 err: 822 DRM_ERROR("FAILED to create priority %u GuC client!\n", priority); 823 824 guc_client_free(dev_priv, client); 825 return NULL; 826 } 827 828 static void guc_create_log(struct intel_guc *guc) 829 { 830 struct drm_i915_private *dev_priv = guc_to_i915(guc); 831 struct drm_i915_gem_object *obj; 832 unsigned long offset; 833 uint32_t size, flags; 834 835 if (i915.guc_log_level < GUC_LOG_VERBOSITY_MIN) 836 return; 837 838 if (i915.guc_log_level > GUC_LOG_VERBOSITY_MAX) 839 i915.guc_log_level = GUC_LOG_VERBOSITY_MAX; 840 841 /* The first page is to save log buffer state. Allocate one 842 * extra page for others in case for overlap */ 843 size = (1 + GUC_LOG_DPC_PAGES + 1 + 844 GUC_LOG_ISR_PAGES + 1 + 845 GUC_LOG_CRASH_PAGES + 1) << PAGE_SHIFT; 846 847 obj = guc->log_obj; 848 if (!obj) { 849 obj = gem_allocate_guc_obj(dev_priv, size); 850 if (!obj) { 851 /* logging will be off */ 852 i915.guc_log_level = -1; 853 return; 854 } 855 856 guc->log_obj = obj; 857 } 858 859 /* each allocated unit is a page */ 860 flags = GUC_LOG_VALID | GUC_LOG_NOTIFY_ON_HALF_FULL | 861 (GUC_LOG_DPC_PAGES << GUC_LOG_DPC_SHIFT) | 862 (GUC_LOG_ISR_PAGES << GUC_LOG_ISR_SHIFT) | 863 (GUC_LOG_CRASH_PAGES << GUC_LOG_CRASH_SHIFT); 864 865 offset = i915_gem_obj_ggtt_offset(obj) >> PAGE_SHIFT; /* in pages */ 866 guc->log_flags = (offset << GUC_LOG_BUF_ADDR_SHIFT) | flags; 867 } 868 869 static void init_guc_policies(struct guc_policies *policies) 870 { 871 struct guc_policy *policy; 872 u32 p, i; 873 874 policies->dpc_promote_time = 500000; 875 policies->max_num_work_items = POLICY_MAX_NUM_WI; 876 877 for (p = 0; p < GUC_CTX_PRIORITY_NUM; p++) { 878 for (i = GUC_RENDER_ENGINE; i < GUC_MAX_ENGINES_NUM; i++) { 879 policy = &policies->policy[p][i]; 880 881 policy->execution_quantum = 1000000; 882 policy->preemption_time = 500000; 883 policy->fault_time = 250000; 884 policy->policy_flags = 0; 885 } 886 } 887 888 policies->is_valid = 1; 889 } 890 891 static void guc_create_ads(struct intel_guc *guc) 892 { 893 struct drm_i915_private *dev_priv = guc_to_i915(guc); 894 struct drm_i915_gem_object *obj; 895 struct guc_ads *ads; 896 struct guc_policies *policies; 897 struct guc_mmio_reg_state *reg_state; 898 struct intel_engine_cs *engine; 899 struct page *page; 900 u32 size; 901 902 /* The ads obj includes the struct itself and buffers passed to GuC */ 903 size = sizeof(struct guc_ads) + sizeof(struct guc_policies) + 904 sizeof(struct guc_mmio_reg_state) + 905 GUC_S3_SAVE_SPACE_PAGES * PAGE_SIZE; 906 907 obj = guc->ads_obj; 908 if (!obj) { 909 obj = gem_allocate_guc_obj(dev_priv, PAGE_ALIGN(size)); 910 if (!obj) 911 return; 912 913 guc->ads_obj = obj; 914 } 915 916 page = i915_gem_object_get_page(obj, 0); 917 ads = kmap(page); 918 919 /* 920 * The GuC requires a "Golden Context" when it reinitialises 921 * engines after a reset. Here we use the Render ring default 922 * context, which must already exist and be pinned in the GGTT, 923 * so its address won't change after we've told the GuC where 924 * to find it. 925 */ 926 engine = &dev_priv->engine[RCS]; 927 ads->golden_context_lrca = engine->status_page.gfx_addr; 928 929 for_each_engine(engine, dev_priv) 930 ads->eng_state_size[engine->guc_id] = intel_lr_context_size(engine); 931 932 /* GuC scheduling policies */ 933 policies = (void *)ads + sizeof(struct guc_ads); 934 init_guc_policies(policies); 935 936 ads->scheduler_policies = i915_gem_obj_ggtt_offset(obj) + 937 sizeof(struct guc_ads); 938 939 /* MMIO reg state */ 940 reg_state = (void *)policies + sizeof(struct guc_policies); 941 942 for_each_engine(engine, dev_priv) { 943 reg_state->mmio_white_list[engine->guc_id].mmio_start = 944 engine->mmio_base + GUC_MMIO_WHITE_LIST_START; 945 946 /* Nothing to be saved or restored for now. */ 947 reg_state->mmio_white_list[engine->guc_id].count = 0; 948 } 949 950 ads->reg_state_addr = ads->scheduler_policies + 951 sizeof(struct guc_policies); 952 953 ads->reg_state_buffer = ads->reg_state_addr + 954 sizeof(struct guc_mmio_reg_state); 955 956 kunmap(page); 957 } 958 959 /* 960 * Set up the memory resources to be shared with the GuC. At this point, 961 * we require just one object that can be mapped through the GGTT. 962 */ 963 int i915_guc_submission_init(struct drm_i915_private *dev_priv) 964 { 965 const size_t ctxsize = sizeof(struct guc_context_desc); 966 const size_t poolsize = GUC_MAX_GPU_CONTEXTS * ctxsize; 967 const size_t gemsize = round_up(poolsize, PAGE_SIZE); 968 struct intel_guc *guc = &dev_priv->guc; 969 970 /* Wipe bitmap & delete client in case of reinitialisation */ 971 bitmap_clear(guc->doorbell_bitmap, 0, GUC_MAX_DOORBELLS); 972 i915_guc_submission_disable(dev_priv); 973 974 if (!i915.enable_guc_submission) 975 return 0; /* not enabled */ 976 977 if (guc->ctx_pool_obj) 978 return 0; /* already allocated */ 979 980 guc->ctx_pool_obj = gem_allocate_guc_obj(dev_priv, gemsize); 981 if (!guc->ctx_pool_obj) 982 return -ENOMEM; 983 984 ida_init(&guc->ctx_ids); 985 guc_create_log(guc); 986 guc_create_ads(guc); 987 988 return 0; 989 } 990 991 int i915_guc_submission_enable(struct drm_i915_private *dev_priv) 992 { 993 struct intel_guc *guc = &dev_priv->guc; 994 struct i915_guc_client *client; 995 996 /* client for execbuf submission */ 997 client = guc_client_alloc(dev_priv, 998 GUC_CTX_PRIORITY_KMD_NORMAL, 999 dev_priv->kernel_context); 1000 if (!client) { 1001 DRM_ERROR("Failed to create execbuf guc_client\n"); 1002 return -ENOMEM; 1003 } 1004 1005 guc->execbuf_client = client; 1006 host2guc_sample_forcewake(guc, client); 1007 guc_init_doorbell_hw(guc); 1008 1009 return 0; 1010 } 1011 1012 void i915_guc_submission_disable(struct drm_i915_private *dev_priv) 1013 { 1014 struct intel_guc *guc = &dev_priv->guc; 1015 1016 guc_client_free(dev_priv, guc->execbuf_client); 1017 guc->execbuf_client = NULL; 1018 } 1019 1020 void i915_guc_submission_fini(struct drm_i915_private *dev_priv) 1021 { 1022 struct intel_guc *guc = &dev_priv->guc; 1023 1024 gem_release_guc_obj(dev_priv->guc.ads_obj); 1025 guc->ads_obj = NULL; 1026 1027 gem_release_guc_obj(dev_priv->guc.log_obj); 1028 guc->log_obj = NULL; 1029 1030 if (guc->ctx_pool_obj) 1031 ida_destroy(&guc->ctx_ids); 1032 gem_release_guc_obj(guc->ctx_pool_obj); 1033 guc->ctx_pool_obj = NULL; 1034 } 1035 1036 /** 1037 * intel_guc_suspend() - notify GuC entering suspend state 1038 * @dev: drm device 1039 */ 1040 int intel_guc_suspend(struct drm_device *dev) 1041 { 1042 struct drm_i915_private *dev_priv = to_i915(dev); 1043 struct intel_guc *guc = &dev_priv->guc; 1044 struct i915_gem_context *ctx; 1045 u32 data[3]; 1046 1047 if (guc->guc_fw.guc_fw_load_status != GUC_FIRMWARE_SUCCESS) 1048 return 0; 1049 1050 ctx = dev_priv->kernel_context; 1051 1052 data[0] = HOST2GUC_ACTION_ENTER_S_STATE; 1053 /* any value greater than GUC_POWER_D0 */ 1054 data[1] = GUC_POWER_D1; 1055 /* first page is shared data with GuC */ 1056 data[2] = i915_gem_obj_ggtt_offset(ctx->engine[RCS].state); 1057 1058 return host2guc_action(guc, data, ARRAY_SIZE(data)); 1059 } 1060 1061 1062 /** 1063 * intel_guc_resume() - notify GuC resuming from suspend state 1064 * @dev: drm device 1065 */ 1066 int intel_guc_resume(struct drm_device *dev) 1067 { 1068 struct drm_i915_private *dev_priv = to_i915(dev); 1069 struct intel_guc *guc = &dev_priv->guc; 1070 struct i915_gem_context *ctx; 1071 u32 data[3]; 1072 1073 if (guc->guc_fw.guc_fw_load_status != GUC_FIRMWARE_SUCCESS) 1074 return 0; 1075 1076 ctx = dev_priv->kernel_context; 1077 1078 data[0] = HOST2GUC_ACTION_EXIT_S_STATE; 1079 data[1] = GUC_POWER_D0; 1080 /* first page is shared data with GuC */ 1081 data[2] = i915_gem_obj_ggtt_offset(ctx->engine[RCS].state); 1082 1083 return host2guc_action(guc, data, ARRAY_SIZE(data)); 1084 } 1085