1 /* 2 * Copyright © 2008-2015 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 * 23 */ 24 25 #ifndef I915_GEM_REQUEST_H 26 #define I915_GEM_REQUEST_H 27 28 #include <linux/fence.h> 29 30 #include "i915_gem.h" 31 #include "i915_sw_fence.h" 32 33 struct intel_wait { 34 struct rb_node node; 35 struct task_struct *tsk; 36 u32 seqno; 37 }; 38 39 struct intel_signal_node { 40 struct rb_node node; 41 struct intel_wait wait; 42 }; 43 44 /** 45 * Request queue structure. 46 * 47 * The request queue allows us to note sequence numbers that have been emitted 48 * and may be associated with active buffers to be retired. 49 * 50 * By keeping this list, we can avoid having to do questionable sequence 51 * number comparisons on buffer last_read|write_seqno. It also allows an 52 * emission time to be associated with the request for tracking how far ahead 53 * of the GPU the submission is. 54 * 55 * When modifying this structure be very aware that we perform a lockless 56 * RCU lookup of it that may race against reallocation of the struct 57 * from the slab freelist. We intentionally do not zero the structure on 58 * allocation so that the lookup can use the dangling pointers (and is 59 * cogniscent that those pointers may be wrong). Instead, everything that 60 * needs to be initialised must be done so explicitly. 61 * 62 * The requests are reference counted. 63 */ 64 struct drm_i915_gem_request { 65 struct fence fence; 66 struct lock lock; 67 68 /** On Which ring this request was generated */ 69 struct drm_i915_private *i915; 70 71 /** 72 * Context and ring buffer related to this request 73 * Contexts are refcounted, so when this request is associated with a 74 * context, we must increment the context's refcount, to guarantee that 75 * it persists while any request is linked to it. Requests themselves 76 * are also refcounted, so the request will only be freed when the last 77 * reference to it is dismissed, and the code in 78 * i915_gem_request_free() will then decrement the refcount on the 79 * context. 80 */ 81 struct i915_gem_context *ctx; 82 struct intel_engine_cs *engine; 83 struct intel_ring *ring; 84 struct intel_signal_node signaling; 85 86 struct i915_sw_fence submit; 87 wait_queue_t submitq; 88 89 /** GEM sequence number associated with the previous request, 90 * when the HWS breadcrumb is equal to this the GPU is processing 91 * this request. 92 */ 93 u32 previous_seqno; 94 95 /** Position in the ring of the start of the request */ 96 u32 head; 97 98 /** 99 * Position in the ring of the start of the postfix. 100 * This is required to calculate the maximum available ring space 101 * without overwriting the postfix. 102 */ 103 u32 postfix; 104 105 /** Position in the ring of the end of the whole request */ 106 u32 tail; 107 108 /** Position in the ring of the end of any workarounds after the tail */ 109 u32 wa_tail; 110 111 /** Preallocate space in the ring for the emitting the request */ 112 u32 reserved_space; 113 114 /** 115 * Context related to the previous request. 116 * As the contexts are accessed by the hardware until the switch is 117 * completed to a new context, the hardware may still be writing 118 * to the context object after the breadcrumb is visible. We must 119 * not unpin/unbind/prune that object whilst still active and so 120 * we keep the previous context pinned until the following (this) 121 * request is retired. 122 */ 123 struct i915_gem_context *previous_context; 124 125 /** Batch buffer related to this request if any (used for 126 * error state dump only). 127 */ 128 struct i915_vma *batch; 129 struct list_head active_list; 130 131 /** Time at which this request was emitted, in jiffies. */ 132 unsigned long emitted_jiffies; 133 134 /** engine->request_list entry for this request */ 135 struct list_head link; 136 137 /** ring->request_list entry for this request */ 138 struct list_head ring_link; 139 140 struct drm_i915_file_private *file_priv; 141 /** file_priv list entry for this request */ 142 struct list_head client_list; 143 144 /** Link in the execlist submission queue, guarded by execlist_lock. */ 145 struct list_head execlist_link; 146 }; 147 148 extern const struct fence_ops i915_fence_ops; 149 150 static inline bool fence_is_i915(struct fence *fence) 151 { 152 return fence->ops == &i915_fence_ops; 153 } 154 155 struct drm_i915_gem_request * __must_check 156 i915_gem_request_alloc(struct intel_engine_cs *engine, 157 struct i915_gem_context *ctx); 158 int i915_gem_request_add_to_client(struct drm_i915_gem_request *req, 159 struct drm_file *file); 160 void i915_gem_request_retire_upto(struct drm_i915_gem_request *req); 161 162 static inline u32 163 i915_gem_request_get_seqno(struct drm_i915_gem_request *req) 164 { 165 return req ? req->fence.seqno : 0; 166 } 167 168 static inline struct intel_engine_cs * 169 i915_gem_request_get_engine(struct drm_i915_gem_request *req) 170 { 171 return req ? req->engine : NULL; 172 } 173 174 static inline struct drm_i915_gem_request * 175 to_request(struct fence *fence) 176 { 177 /* We assume that NULL fence/request are interoperable */ 178 BUILD_BUG_ON(offsetof(struct drm_i915_gem_request, fence) != 0); 179 GEM_BUG_ON(fence && !fence_is_i915(fence)); 180 return container_of(fence, struct drm_i915_gem_request, fence); 181 } 182 183 static inline struct drm_i915_gem_request * 184 i915_gem_request_get(struct drm_i915_gem_request *req) 185 { 186 return to_request(fence_get(&req->fence)); 187 } 188 189 static inline struct drm_i915_gem_request * 190 i915_gem_request_get_rcu(struct drm_i915_gem_request *req) 191 { 192 return to_request(fence_get_rcu(&req->fence)); 193 } 194 195 static inline void 196 i915_gem_request_put(struct drm_i915_gem_request *req) 197 { 198 fence_put(&req->fence); 199 } 200 201 static inline void i915_gem_request_assign(struct drm_i915_gem_request **pdst, 202 struct drm_i915_gem_request *src) 203 { 204 if (src) 205 i915_gem_request_get(src); 206 207 if (*pdst) 208 i915_gem_request_put(*pdst); 209 210 *pdst = src; 211 } 212 213 int 214 i915_gem_request_await_object(struct drm_i915_gem_request *to, 215 struct drm_i915_gem_object *obj, 216 bool write); 217 218 void __i915_add_request(struct drm_i915_gem_request *req, bool flush_caches); 219 #define i915_add_request(req) \ 220 __i915_add_request(req, true) 221 #define i915_add_request_no_flush(req) \ 222 __i915_add_request(req, false) 223 224 struct intel_rps_client; 225 #define NO_WAITBOOST ERR_PTR(-1) 226 #define IS_RPS_CLIENT(p) (!IS_ERR(p)) 227 #define IS_RPS_USER(p) (!IS_ERR_OR_NULL(p)) 228 229 int i915_wait_request(struct drm_i915_gem_request *req, 230 unsigned int flags, 231 s64 *timeout, 232 struct intel_rps_client *rps) 233 __attribute__((nonnull(1))); 234 #define I915_WAIT_INTERRUPTIBLE BIT(0) 235 #define I915_WAIT_LOCKED BIT(1) /* struct_mutex held, handle GPU reset */ 236 237 static inline u32 intel_engine_get_seqno(struct intel_engine_cs *engine); 238 239 /** 240 * Returns true if seq1 is later than seq2. 241 */ 242 static inline bool i915_seqno_passed(u32 seq1, u32 seq2) 243 { 244 return (s32)(seq1 - seq2) >= 0; 245 } 246 247 static inline bool 248 i915_gem_request_started(const struct drm_i915_gem_request *req) 249 { 250 return i915_seqno_passed(intel_engine_get_seqno(req->engine), 251 req->previous_seqno); 252 } 253 254 static inline bool 255 i915_gem_request_completed(const struct drm_i915_gem_request *req) 256 { 257 return i915_seqno_passed(intel_engine_get_seqno(req->engine), 258 req->fence.seqno); 259 } 260 261 bool __i915_spin_request(const struct drm_i915_gem_request *request, 262 int state, unsigned long timeout_us); 263 static inline bool i915_spin_request(const struct drm_i915_gem_request *request, 264 int state, unsigned long timeout_us) 265 { 266 return (i915_gem_request_started(request) && 267 __i915_spin_request(request, state, timeout_us)); 268 } 269 270 /* We treat requests as fences. This is not be to confused with our 271 * "fence registers" but pipeline synchronisation objects ala GL_ARB_sync. 272 * We use the fences to synchronize access from the CPU with activity on the 273 * GPU, for example, we should not rewrite an object's PTE whilst the GPU 274 * is reading them. We also track fences at a higher level to provide 275 * implicit synchronisation around GEM objects, e.g. set-domain will wait 276 * for outstanding GPU rendering before marking the object ready for CPU 277 * access, or a pageflip will wait until the GPU is complete before showing 278 * the frame on the scanout. 279 * 280 * In order to use a fence, the object must track the fence it needs to 281 * serialise with. For example, GEM objects want to track both read and 282 * write access so that we can perform concurrent read operations between 283 * the CPU and GPU engines, as well as waiting for all rendering to 284 * complete, or waiting for the last GPU user of a "fence register". The 285 * object then embeds a #i915_gem_active to track the most recent (in 286 * retirement order) request relevant for the desired mode of access. 287 * The #i915_gem_active is updated with i915_gem_active_set() to track the 288 * most recent fence request, typically this is done as part of 289 * i915_vma_move_to_active(). 290 * 291 * When the #i915_gem_active completes (is retired), it will 292 * signal its completion to the owner through a callback as well as mark 293 * itself as idle (i915_gem_active.request == NULL). The owner 294 * can then perform any action, such as delayed freeing of an active 295 * resource including itself. 296 */ 297 struct i915_gem_active; 298 299 typedef void (*i915_gem_retire_fn)(struct i915_gem_active *, 300 struct drm_i915_gem_request *); 301 302 struct i915_gem_active { 303 struct drm_i915_gem_request __rcu *request; 304 struct list_head link; 305 i915_gem_retire_fn retire; 306 }; 307 308 void i915_gem_retire_noop(struct i915_gem_active *, 309 struct drm_i915_gem_request *request); 310 311 /** 312 * init_request_active - prepares the activity tracker for use 313 * @active - the active tracker 314 * @func - a callback when then the tracker is retired (becomes idle), 315 * can be NULL 316 * 317 * init_request_active() prepares the embedded @active struct for use as 318 * an activity tracker, that is for tracking the last known active request 319 * associated with it. When the last request becomes idle, when it is retired 320 * after completion, the optional callback @func is invoked. 321 */ 322 static inline void 323 init_request_active(struct i915_gem_active *active, 324 i915_gem_retire_fn retire) 325 { 326 INIT_LIST_HEAD(&active->link); 327 active->retire = retire ?: i915_gem_retire_noop; 328 } 329 330 /** 331 * i915_gem_active_set - updates the tracker to watch the current request 332 * @active - the active tracker 333 * @request - the request to watch 334 * 335 * i915_gem_active_set() watches the given @request for completion. Whilst 336 * that @request is busy, the @active reports busy. When that @request is 337 * retired, the @active tracker is updated to report idle. 338 */ 339 static inline void 340 i915_gem_active_set(struct i915_gem_active *active, 341 struct drm_i915_gem_request *request) 342 { 343 list_move(&active->link, &request->active_list); 344 rcu_assign_pointer(active->request, request); 345 } 346 347 static inline struct drm_i915_gem_request * 348 __i915_gem_active_peek(const struct i915_gem_active *active) 349 { 350 /* Inside the error capture (running with the driver in an unknown 351 * state), we want to bend the rules slightly (a lot). 352 * 353 * Work is in progress to make it safer, in the meantime this keeps 354 * the known issue from spamming the logs. 355 */ 356 return rcu_dereference_protected(active->request, 1); 357 } 358 359 /** 360 * i915_gem_active_raw - return the active request 361 * @active - the active tracker 362 * 363 * i915_gem_active_raw() returns the current request being tracked, or NULL. 364 * It does not obtain a reference on the request for the caller, so the caller 365 * must hold struct_mutex. 366 */ 367 static inline struct drm_i915_gem_request * 368 i915_gem_active_raw(const struct i915_gem_active *active, struct lock *mutex) 369 { 370 return rcu_dereference_protected(active->request, 371 lockdep_is_held(mutex)); 372 } 373 374 /** 375 * i915_gem_active_peek - report the active request being monitored 376 * @active - the active tracker 377 * 378 * i915_gem_active_peek() returns the current request being tracked if 379 * still active, or NULL. It does not obtain a reference on the request 380 * for the caller, so the caller must hold struct_mutex. 381 */ 382 static inline struct drm_i915_gem_request * 383 i915_gem_active_peek(const struct i915_gem_active *active, struct lock *mutex) 384 { 385 struct drm_i915_gem_request *request; 386 387 request = i915_gem_active_raw(active, mutex); 388 if (!request || i915_gem_request_completed(request)) 389 return NULL; 390 391 return request; 392 } 393 394 /** 395 * i915_gem_active_get - return a reference to the active request 396 * @active - the active tracker 397 * 398 * i915_gem_active_get() returns a reference to the active request, or NULL 399 * if the active tracker is idle. The caller must hold struct_mutex. 400 */ 401 static inline struct drm_i915_gem_request * 402 i915_gem_active_get(const struct i915_gem_active *active, struct lock *mutex) 403 { 404 return i915_gem_request_get(i915_gem_active_peek(active, mutex)); 405 } 406 407 /** 408 * __i915_gem_active_get_rcu - return a reference to the active request 409 * @active - the active tracker 410 * 411 * __i915_gem_active_get() returns a reference to the active request, or NULL 412 * if the active tracker is idle. The caller must hold the RCU read lock, but 413 * the returned pointer is safe to use outside of RCU. 414 */ 415 static inline struct drm_i915_gem_request * 416 __i915_gem_active_get_rcu(const struct i915_gem_active *active) 417 { 418 /* Performing a lockless retrieval of the active request is super 419 * tricky. SLAB_DESTROY_BY_RCU merely guarantees that the backing 420 * slab of request objects will not be freed whilst we hold the 421 * RCU read lock. It does not guarantee that the request itself 422 * will not be freed and then *reused*. Viz, 423 * 424 * Thread A Thread B 425 * 426 * req = active.request 427 * retire(req) -> free(req); 428 * (req is now first on the slab freelist) 429 * active.request = NULL 430 * 431 * req = new submission on a new object 432 * ref(req) 433 * 434 * To prevent the request from being reused whilst the caller 435 * uses it, we take a reference like normal. Whilst acquiring 436 * the reference we check that it is not in a destroyed state 437 * (refcnt == 0). That prevents the request being reallocated 438 * whilst the caller holds on to it. To check that the request 439 * was not reallocated as we acquired the reference we have to 440 * check that our request remains the active request across 441 * the lookup, in the same manner as a seqlock. The visibility 442 * of the pointer versus the reference counting is controlled 443 * by using RCU barriers (rcu_dereference and rcu_assign_pointer). 444 * 445 * In the middle of all that, we inspect whether the request is 446 * complete. Retiring is lazy so the request may be completed long 447 * before the active tracker is updated. Querying whether the 448 * request is complete is far cheaper (as it involves no locked 449 * instructions setting cachelines to exclusive) than acquiring 450 * the reference, so we do it first. The RCU read lock ensures the 451 * pointer dereference is valid, but does not ensure that the 452 * seqno nor HWS is the right one! However, if the request was 453 * reallocated, that means the active tracker's request was complete. 454 * If the new request is also complete, then both are and we can 455 * just report the active tracker is idle. If the new request is 456 * incomplete, then we acquire a reference on it and check that 457 * it remained the active request. 458 * 459 * It is then imperative that we do not zero the request on 460 * reallocation, so that we can chase the dangling pointers! 461 * See i915_gem_request_alloc(). 462 */ 463 do { 464 struct drm_i915_gem_request *request; 465 466 request = rcu_dereference(active->request); 467 if (!request || i915_gem_request_completed(request)) 468 return NULL; 469 470 /* An especially silly compiler could decide to recompute the 471 * result of i915_gem_request_completed, more specifically 472 * re-emit the load for request->fence.seqno. A race would catch 473 * a later seqno value, which could flip the result from true to 474 * false. Which means part of the instructions below might not 475 * be executed, while later on instructions are executed. Due to 476 * barriers within the refcounting the inconsistency can't reach 477 * past the call to i915_gem_request_get_rcu, but not executing 478 * that while still executing i915_gem_request_put() creates 479 * havoc enough. Prevent this with a compiler barrier. 480 */ 481 barrier(); 482 483 request = i915_gem_request_get_rcu(request); 484 485 /* What stops the following rcu_access_pointer() from occurring 486 * before the above i915_gem_request_get_rcu()? If we were 487 * to read the value before pausing to get the reference to 488 * the request, we may not notice a change in the active 489 * tracker. 490 * 491 * The rcu_access_pointer() is a mere compiler barrier, which 492 * means both the CPU and compiler are free to perform the 493 * memory read without constraint. The compiler only has to 494 * ensure that any operations after the rcu_access_pointer() 495 * occur afterwards in program order. This means the read may 496 * be performed earlier by an out-of-order CPU, or adventurous 497 * compiler. 498 * 499 * The atomic operation at the heart of 500 * i915_gem_request_get_rcu(), see fence_get_rcu(), is 501 * atomic_inc_not_zero() which is only a full memory barrier 502 * when successful. That is, if i915_gem_request_get_rcu() 503 * returns the request (and so with the reference counted 504 * incremented) then the following read for rcu_access_pointer() 505 * must occur after the atomic operation and so confirm 506 * that this request is the one currently being tracked. 507 * 508 * The corresponding write barrier is part of 509 * rcu_assign_pointer(). 510 */ 511 if (!request || request == rcu_access_pointer(active->request)) 512 return rcu_pointer_handoff(request); 513 514 i915_gem_request_put(request); 515 } while (1); 516 } 517 518 /** 519 * i915_gem_active_get_unlocked - return a reference to the active request 520 * @active - the active tracker 521 * 522 * i915_gem_active_get_unlocked() returns a reference to the active request, 523 * or NULL if the active tracker is idle. The reference is obtained under RCU, 524 * so no locking is required by the caller. 525 * 526 * The reference should be freed with i915_gem_request_put(). 527 */ 528 static inline struct drm_i915_gem_request * 529 i915_gem_active_get_unlocked(const struct i915_gem_active *active) 530 { 531 struct drm_i915_gem_request *request; 532 533 rcu_read_lock(); 534 request = __i915_gem_active_get_rcu(active); 535 rcu_read_unlock(); 536 537 return request; 538 } 539 540 /** 541 * i915_gem_active_isset - report whether the active tracker is assigned 542 * @active - the active tracker 543 * 544 * i915_gem_active_isset() returns true if the active tracker is currently 545 * assigned to a request. Due to the lazy retiring, that request may be idle 546 * and this may report stale information. 547 */ 548 static inline bool 549 i915_gem_active_isset(const struct i915_gem_active *active) 550 { 551 return rcu_access_pointer(active->request); 552 } 553 554 /** 555 * i915_gem_active_is_idle - report whether the active tracker is idle 556 * @active - the active tracker 557 * 558 * i915_gem_active_is_idle() returns true if the active tracker is currently 559 * unassigned or if the request is complete (but not yet retired). Requires 560 * the caller to hold struct_mutex (but that can be relaxed if desired). 561 */ 562 static inline bool 563 i915_gem_active_is_idle(const struct i915_gem_active *active, 564 struct lock *mutex) 565 { 566 return !i915_gem_active_peek(active, mutex); 567 } 568 569 /** 570 * i915_gem_active_wait - waits until the request is completed 571 * @active - the active request on which to wait 572 * 573 * i915_gem_active_wait() waits until the request is completed before 574 * returning. Note that it does not guarantee that the request is 575 * retired first, see i915_gem_active_retire(). 576 * 577 * i915_gem_active_wait() returns immediately if the active 578 * request is already complete. 579 */ 580 static inline int __must_check 581 i915_gem_active_wait(const struct i915_gem_active *active, struct lock *mutex) 582 { 583 struct drm_i915_gem_request *request; 584 585 request = i915_gem_active_peek(active, mutex); 586 if (!request) 587 return 0; 588 589 return i915_wait_request(request, 590 I915_WAIT_INTERRUPTIBLE | I915_WAIT_LOCKED, 591 NULL, NULL); 592 } 593 594 /** 595 * i915_gem_active_wait_unlocked - waits until the request is completed 596 * @active - the active request on which to wait 597 * @flags - how to wait 598 * @timeout - how long to wait at most 599 * @rps - userspace client to charge for a waitboost 600 * 601 * i915_gem_active_wait_unlocked() waits until the request is completed before 602 * returning, without requiring any locks to be held. Note that it does not 603 * retire any requests before returning. 604 * 605 * This function relies on RCU in order to acquire the reference to the active 606 * request without holding any locks. See __i915_gem_active_get_rcu() for the 607 * glory details on how that is managed. Once the reference is acquired, we 608 * can then wait upon the request, and afterwards release our reference, 609 * free of any locking. 610 * 611 * This function wraps i915_wait_request(), see it for the full details on 612 * the arguments. 613 * 614 * Returns 0 if successful, or a negative error code. 615 */ 616 static inline int 617 i915_gem_active_wait_unlocked(const struct i915_gem_active *active, 618 unsigned int flags, 619 s64 *timeout, 620 struct intel_rps_client *rps) 621 { 622 struct drm_i915_gem_request *request; 623 int ret = 0; 624 625 request = i915_gem_active_get_unlocked(active); 626 if (request) { 627 ret = i915_wait_request(request, flags, timeout, rps); 628 i915_gem_request_put(request); 629 } 630 631 return ret; 632 } 633 634 /** 635 * i915_gem_active_retire - waits until the request is retired 636 * @active - the active request on which to wait 637 * 638 * i915_gem_active_retire() waits until the request is completed, 639 * and then ensures that at least the retirement handler for this 640 * @active tracker is called before returning. If the @active 641 * tracker is idle, the function returns immediately. 642 */ 643 static inline int __must_check 644 i915_gem_active_retire(struct i915_gem_active *active, 645 struct lock *mutex) 646 { 647 struct drm_i915_gem_request *request; 648 int ret; 649 650 request = i915_gem_active_raw(active, mutex); 651 if (!request) 652 return 0; 653 654 ret = i915_wait_request(request, 655 I915_WAIT_INTERRUPTIBLE | I915_WAIT_LOCKED, 656 NULL, NULL); 657 if (ret) 658 return ret; 659 660 list_del_init(&active->link); 661 RCU_INIT_POINTER(active->request, NULL); 662 663 active->retire(active, request); 664 665 return 0; 666 } 667 668 /* Convenience functions for peeking at state inside active's request whilst 669 * guarded by the struct_mutex. 670 */ 671 672 static inline uint32_t 673 i915_gem_active_get_seqno(const struct i915_gem_active *active, 674 struct lock *mutex) 675 { 676 return i915_gem_request_get_seqno(i915_gem_active_peek(active, mutex)); 677 } 678 679 static inline struct intel_engine_cs * 680 i915_gem_active_get_engine(const struct i915_gem_active *active, 681 struct lock *mutex) 682 { 683 return i915_gem_request_get_engine(i915_gem_active_peek(active, mutex)); 684 } 685 686 #define for_each_active(mask, idx) \ 687 for (; mask ? idx = ffs(mask) - 1, 1 : 0; mask &= ~BIT(idx)) 688 689 #endif /* I915_GEM_REQUEST_H */ 690