1fb4d8502Sjsg /* 2fb4d8502Sjsg * Copyright 2015 Advanced Micro Devices, Inc. 3fb4d8502Sjsg * 4fb4d8502Sjsg * Permission is hereby granted, free of charge, to any person obtaining a 5fb4d8502Sjsg * copy of this software and associated documentation files (the "Software"), 6fb4d8502Sjsg * to deal in the Software without restriction, including without limitation 7fb4d8502Sjsg * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8fb4d8502Sjsg * and/or sell copies of the Software, and to permit persons to whom the 9fb4d8502Sjsg * Software is furnished to do so, subject to the following conditions: 10fb4d8502Sjsg * 11fb4d8502Sjsg * The above copyright notice and this permission notice shall be included in 12fb4d8502Sjsg * all copies or substantial portions of the Software. 13fb4d8502Sjsg * 14fb4d8502Sjsg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15fb4d8502Sjsg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16fb4d8502Sjsg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17fb4d8502Sjsg * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 18fb4d8502Sjsg * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19fb4d8502Sjsg * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20fb4d8502Sjsg * OTHER DEALINGS IN THE SOFTWARE. 21fb4d8502Sjsg * 22fb4d8502Sjsg * Authors: monk liu <monk.liu@amd.com> 23fb4d8502Sjsg */ 24fb4d8502Sjsg 25fb4d8502Sjsg #include <drm/drm_auth.h> 261bb76ff1Sjsg #include <drm/drm_drv.h> 27fb4d8502Sjsg #include "amdgpu.h" 28fb4d8502Sjsg #include "amdgpu_sched.h" 29c349dbc7Sjsg #include "amdgpu_ras.h" 30ad8b1aafSjsg #include <linux/nospec.h> 31c349dbc7Sjsg 32c349dbc7Sjsg #define to_amdgpu_ctx_entity(e) \ 33c349dbc7Sjsg container_of((e), struct amdgpu_ctx_entity, entity) 34c349dbc7Sjsg 35c349dbc7Sjsg const unsigned int amdgpu_ctx_num_entities[AMDGPU_HW_IP_NUM] = { 36c349dbc7Sjsg [AMDGPU_HW_IP_GFX] = 1, 37c349dbc7Sjsg [AMDGPU_HW_IP_COMPUTE] = 4, 38c349dbc7Sjsg [AMDGPU_HW_IP_DMA] = 2, 39c349dbc7Sjsg [AMDGPU_HW_IP_UVD] = 1, 40c349dbc7Sjsg [AMDGPU_HW_IP_VCE] = 1, 41c349dbc7Sjsg [AMDGPU_HW_IP_UVD_ENC] = 1, 42c349dbc7Sjsg [AMDGPU_HW_IP_VCN_DEC] = 1, 43c349dbc7Sjsg [AMDGPU_HW_IP_VCN_ENC] = 1, 44c349dbc7Sjsg [AMDGPU_HW_IP_VCN_JPEG] = 1, 45c349dbc7Sjsg }; 46fb4d8502Sjsg 471bb76ff1Sjsg bool amdgpu_ctx_priority_is_valid(int32_t ctx_prio) 48fb4d8502Sjsg { 491bb76ff1Sjsg switch (ctx_prio) { 501bb76ff1Sjsg case AMDGPU_CTX_PRIORITY_VERY_LOW: 511bb76ff1Sjsg case AMDGPU_CTX_PRIORITY_LOW: 521bb76ff1Sjsg case AMDGPU_CTX_PRIORITY_NORMAL: 531bb76ff1Sjsg case AMDGPU_CTX_PRIORITY_HIGH: 541bb76ff1Sjsg case AMDGPU_CTX_PRIORITY_VERY_HIGH: 551bb76ff1Sjsg return true; 561bb76ff1Sjsg default: 57*84a6fc66Sjsg case AMDGPU_CTX_PRIORITY_UNSET: 581bb76ff1Sjsg return false; 591bb76ff1Sjsg } 601bb76ff1Sjsg } 611bb76ff1Sjsg 621bb76ff1Sjsg static enum drm_sched_priority 631bb76ff1Sjsg amdgpu_ctx_to_drm_sched_prio(int32_t ctx_prio) 641bb76ff1Sjsg { 651bb76ff1Sjsg switch (ctx_prio) { 661bb76ff1Sjsg case AMDGPU_CTX_PRIORITY_UNSET: 671bb76ff1Sjsg return DRM_SCHED_PRIORITY_UNSET; 681bb76ff1Sjsg 691bb76ff1Sjsg case AMDGPU_CTX_PRIORITY_VERY_LOW: 701bb76ff1Sjsg return DRM_SCHED_PRIORITY_MIN; 711bb76ff1Sjsg 721bb76ff1Sjsg case AMDGPU_CTX_PRIORITY_LOW: 731bb76ff1Sjsg return DRM_SCHED_PRIORITY_MIN; 741bb76ff1Sjsg 751bb76ff1Sjsg case AMDGPU_CTX_PRIORITY_NORMAL: 761bb76ff1Sjsg return DRM_SCHED_PRIORITY_NORMAL; 771bb76ff1Sjsg 781bb76ff1Sjsg case AMDGPU_CTX_PRIORITY_HIGH: 791bb76ff1Sjsg return DRM_SCHED_PRIORITY_HIGH; 801bb76ff1Sjsg 811bb76ff1Sjsg case AMDGPU_CTX_PRIORITY_VERY_HIGH: 821bb76ff1Sjsg return DRM_SCHED_PRIORITY_HIGH; 831bb76ff1Sjsg 841bb76ff1Sjsg /* This should not happen as we sanitized userspace provided priority 851bb76ff1Sjsg * already, WARN if this happens. 861bb76ff1Sjsg */ 871bb76ff1Sjsg default: 881bb76ff1Sjsg WARN(1, "Invalid context priority %d\n", ctx_prio); 891bb76ff1Sjsg return DRM_SCHED_PRIORITY_NORMAL; 901bb76ff1Sjsg } 911bb76ff1Sjsg 921bb76ff1Sjsg } 931bb76ff1Sjsg 941bb76ff1Sjsg static int amdgpu_ctx_priority_permit(struct drm_file *filp, 951bb76ff1Sjsg int32_t priority) 961bb76ff1Sjsg { 971bb76ff1Sjsg if (!amdgpu_ctx_priority_is_valid(priority)) 98c349dbc7Sjsg return -EINVAL; 99c349dbc7Sjsg 100fb4d8502Sjsg /* NORMAL and below are accessible by everyone */ 1011bb76ff1Sjsg if (priority <= AMDGPU_CTX_PRIORITY_NORMAL) 102fb4d8502Sjsg return 0; 103fb4d8502Sjsg 104fb4d8502Sjsg if (capable(CAP_SYS_NICE)) 105fb4d8502Sjsg return 0; 106fb4d8502Sjsg 107fb4d8502Sjsg if (drm_is_current_master(filp)) 108fb4d8502Sjsg return 0; 109fb4d8502Sjsg 110fb4d8502Sjsg return -EACCES; 111fb4d8502Sjsg } 112fb4d8502Sjsg 1131bb76ff1Sjsg static enum amdgpu_gfx_pipe_priority amdgpu_ctx_prio_to_gfx_pipe_prio(int32_t prio) 114c349dbc7Sjsg { 115c349dbc7Sjsg switch (prio) { 1161bb76ff1Sjsg case AMDGPU_CTX_PRIORITY_HIGH: 1171bb76ff1Sjsg case AMDGPU_CTX_PRIORITY_VERY_HIGH: 118c349dbc7Sjsg return AMDGPU_GFX_PIPE_PRIO_HIGH; 119c349dbc7Sjsg default: 120c349dbc7Sjsg return AMDGPU_GFX_PIPE_PRIO_NORMAL; 121c349dbc7Sjsg } 122c349dbc7Sjsg } 123c349dbc7Sjsg 1241bb76ff1Sjsg static enum amdgpu_ring_priority_level amdgpu_ctx_sched_prio_to_ring_prio(int32_t prio) 125ad8b1aafSjsg { 1261bb76ff1Sjsg switch (prio) { 1271bb76ff1Sjsg case AMDGPU_CTX_PRIORITY_HIGH: 1281bb76ff1Sjsg return AMDGPU_RING_PRIO_1; 1291bb76ff1Sjsg case AMDGPU_CTX_PRIORITY_VERY_HIGH: 1301bb76ff1Sjsg return AMDGPU_RING_PRIO_2; 1311bb76ff1Sjsg default: 1321bb76ff1Sjsg return AMDGPU_RING_PRIO_0; 1331bb76ff1Sjsg } 1341bb76ff1Sjsg } 135ad8b1aafSjsg 1361bb76ff1Sjsg static unsigned int amdgpu_ctx_get_hw_prio(struct amdgpu_ctx *ctx, u32 hw_ip) 1371bb76ff1Sjsg { 1381bb76ff1Sjsg struct amdgpu_device *adev = ctx->mgr->adev; 1391bb76ff1Sjsg unsigned int hw_prio; 1401bb76ff1Sjsg int32_t ctx_prio; 1411bb76ff1Sjsg 1421bb76ff1Sjsg ctx_prio = (ctx->override_priority == AMDGPU_CTX_PRIORITY_UNSET) ? 1431bb76ff1Sjsg ctx->init_priority : ctx->override_priority; 1441bb76ff1Sjsg 1451bb76ff1Sjsg switch (hw_ip) { 1461bb76ff1Sjsg case AMDGPU_HW_IP_GFX: 1471bb76ff1Sjsg case AMDGPU_HW_IP_COMPUTE: 1481bb76ff1Sjsg hw_prio = amdgpu_ctx_prio_to_gfx_pipe_prio(ctx_prio); 1491bb76ff1Sjsg break; 1501bb76ff1Sjsg case AMDGPU_HW_IP_VCE: 1511bb76ff1Sjsg case AMDGPU_HW_IP_VCN_ENC: 1521bb76ff1Sjsg hw_prio = amdgpu_ctx_sched_prio_to_ring_prio(ctx_prio); 1531bb76ff1Sjsg break; 1541bb76ff1Sjsg default: 1551bb76ff1Sjsg hw_prio = AMDGPU_RING_PRIO_DEFAULT; 1561bb76ff1Sjsg break; 1571bb76ff1Sjsg } 1581bb76ff1Sjsg 159ad8b1aafSjsg hw_ip = array_index_nospec(hw_ip, AMDGPU_HW_IP_NUM); 160ad8b1aafSjsg if (adev->gpu_sched[hw_ip][hw_prio].num_scheds == 0) 161ad8b1aafSjsg hw_prio = AMDGPU_RING_PRIO_DEFAULT; 162ad8b1aafSjsg 163ad8b1aafSjsg return hw_prio; 164ad8b1aafSjsg } 165ad8b1aafSjsg 1661bb76ff1Sjsg /* Calculate the time spend on the hw */ 1671bb76ff1Sjsg static ktime_t amdgpu_ctx_fence_time(struct dma_fence *fence) 1681bb76ff1Sjsg { 1691bb76ff1Sjsg struct drm_sched_fence *s_fence; 1701bb76ff1Sjsg 1711bb76ff1Sjsg if (!fence) 1721bb76ff1Sjsg return ns_to_ktime(0); 1731bb76ff1Sjsg 1741bb76ff1Sjsg /* When the fence is not even scheduled it can't have spend time */ 1751bb76ff1Sjsg s_fence = to_drm_sched_fence(fence); 1761bb76ff1Sjsg if (!test_bit(DMA_FENCE_FLAG_TIMESTAMP_BIT, &s_fence->scheduled.flags)) 1771bb76ff1Sjsg return ns_to_ktime(0); 1781bb76ff1Sjsg 1791bb76ff1Sjsg /* When it is still running account how much already spend */ 1801bb76ff1Sjsg if (!test_bit(DMA_FENCE_FLAG_TIMESTAMP_BIT, &s_fence->finished.flags)) 1811bb76ff1Sjsg return ktime_sub(ktime_get(), s_fence->scheduled.timestamp); 1821bb76ff1Sjsg 1831bb76ff1Sjsg return ktime_sub(s_fence->finished.timestamp, 1841bb76ff1Sjsg s_fence->scheduled.timestamp); 1851bb76ff1Sjsg } 1861bb76ff1Sjsg 1871bb76ff1Sjsg static ktime_t amdgpu_ctx_entity_time(struct amdgpu_ctx *ctx, 1881bb76ff1Sjsg struct amdgpu_ctx_entity *centity) 1891bb76ff1Sjsg { 1901bb76ff1Sjsg ktime_t res = ns_to_ktime(0); 1911bb76ff1Sjsg uint32_t i; 1921bb76ff1Sjsg 1931bb76ff1Sjsg spin_lock(&ctx->ring_lock); 1941bb76ff1Sjsg for (i = 0; i < amdgpu_sched_jobs; i++) { 1951bb76ff1Sjsg res = ktime_add(res, amdgpu_ctx_fence_time(centity->fences[i])); 1961bb76ff1Sjsg } 1971bb76ff1Sjsg spin_unlock(&ctx->ring_lock); 1981bb76ff1Sjsg return res; 1991bb76ff1Sjsg } 2001bb76ff1Sjsg 201ad8b1aafSjsg static int amdgpu_ctx_init_entity(struct amdgpu_ctx *ctx, u32 hw_ip, 202ad8b1aafSjsg const u32 ring) 203c349dbc7Sjsg { 204c349dbc7Sjsg struct drm_gpu_scheduler **scheds = NULL, *sched = NULL; 2051bb76ff1Sjsg struct amdgpu_device *adev = ctx->mgr->adev; 2061bb76ff1Sjsg struct amdgpu_ctx_entity *entity; 2071bb76ff1Sjsg enum drm_sched_priority drm_prio; 2081bb76ff1Sjsg unsigned int hw_prio, num_scheds; 2091bb76ff1Sjsg int32_t ctx_prio; 210c349dbc7Sjsg int r; 211c349dbc7Sjsg 2125ca02815Sjsg entity = kzalloc(struct_size(entity, fences, amdgpu_sched_jobs), 213c349dbc7Sjsg GFP_KERNEL); 214c349dbc7Sjsg if (!entity) 215c349dbc7Sjsg return -ENOMEM; 216c349dbc7Sjsg 2171bb76ff1Sjsg ctx_prio = (ctx->override_priority == AMDGPU_CTX_PRIORITY_UNSET) ? 218c349dbc7Sjsg ctx->init_priority : ctx->override_priority; 2191bb76ff1Sjsg entity->hw_ip = hw_ip; 2201bb76ff1Sjsg entity->sequence = 1; 2211bb76ff1Sjsg hw_prio = amdgpu_ctx_get_hw_prio(ctx, hw_ip); 2221bb76ff1Sjsg drm_prio = amdgpu_ctx_to_drm_sched_prio(ctx_prio); 223ad8b1aafSjsg 224ad8b1aafSjsg hw_ip = array_index_nospec(hw_ip, AMDGPU_HW_IP_NUM); 225ad8b1aafSjsg scheds = adev->gpu_sched[hw_ip][hw_prio].sched; 226ad8b1aafSjsg num_scheds = adev->gpu_sched[hw_ip][hw_prio].num_scheds; 227ad8b1aafSjsg 228ad8b1aafSjsg /* disable load balance if the hw engine retains context among dependent jobs */ 229ad8b1aafSjsg if (hw_ip == AMDGPU_HW_IP_VCN_ENC || 230ad8b1aafSjsg hw_ip == AMDGPU_HW_IP_VCN_DEC || 231ad8b1aafSjsg hw_ip == AMDGPU_HW_IP_UVD_ENC || 232ad8b1aafSjsg hw_ip == AMDGPU_HW_IP_UVD) { 233ad8b1aafSjsg sched = drm_sched_pick_best(scheds, num_scheds); 234c349dbc7Sjsg scheds = &sched; 235c349dbc7Sjsg num_scheds = 1; 236c349dbc7Sjsg } 237c349dbc7Sjsg 2381bb76ff1Sjsg r = drm_sched_entity_init(&entity->entity, drm_prio, scheds, num_scheds, 239c349dbc7Sjsg &ctx->guilty); 240c349dbc7Sjsg if (r) 241c349dbc7Sjsg goto error_free_entity; 242c349dbc7Sjsg 2431bb76ff1Sjsg /* It's not an error if we fail to install the new entity */ 2441bb76ff1Sjsg if (cmpxchg(&ctx->entities[hw_ip][ring], NULL, entity)) 2451bb76ff1Sjsg goto cleanup_entity; 2461bb76ff1Sjsg 247c349dbc7Sjsg return 0; 248c349dbc7Sjsg 2491bb76ff1Sjsg cleanup_entity: 2501bb76ff1Sjsg drm_sched_entity_fini(&entity->entity); 2511bb76ff1Sjsg 252c349dbc7Sjsg error_free_entity: 253c349dbc7Sjsg kfree(entity); 254c349dbc7Sjsg 255c349dbc7Sjsg return r; 256c349dbc7Sjsg } 257c349dbc7Sjsg 2581bb76ff1Sjsg static ktime_t amdgpu_ctx_fini_entity(struct amdgpu_ctx_entity *entity) 259fb4d8502Sjsg { 2601bb76ff1Sjsg ktime_t res = ns_to_ktime(0); 2611bb76ff1Sjsg int i; 2621bb76ff1Sjsg 2631bb76ff1Sjsg if (!entity) 2641bb76ff1Sjsg return res; 2651bb76ff1Sjsg 2661bb76ff1Sjsg for (i = 0; i < amdgpu_sched_jobs; ++i) { 2671bb76ff1Sjsg res = ktime_add(res, amdgpu_ctx_fence_time(entity->fences[i])); 2681bb76ff1Sjsg dma_fence_put(entity->fences[i]); 2691bb76ff1Sjsg } 2701bb76ff1Sjsg 2711bb76ff1Sjsg kfree(entity); 2721bb76ff1Sjsg return res; 2731bb76ff1Sjsg } 2741bb76ff1Sjsg 2751bb76ff1Sjsg static int amdgpu_ctx_get_stable_pstate(struct amdgpu_ctx *ctx, 2761bb76ff1Sjsg u32 *stable_pstate) 2771bb76ff1Sjsg { 2781bb76ff1Sjsg struct amdgpu_device *adev = ctx->mgr->adev; 2791bb76ff1Sjsg enum amd_dpm_forced_level current_level; 2801bb76ff1Sjsg 2811bb76ff1Sjsg current_level = amdgpu_dpm_get_performance_level(adev); 2821bb76ff1Sjsg 2831bb76ff1Sjsg switch (current_level) { 2841bb76ff1Sjsg case AMD_DPM_FORCED_LEVEL_PROFILE_STANDARD: 2851bb76ff1Sjsg *stable_pstate = AMDGPU_CTX_STABLE_PSTATE_STANDARD; 2861bb76ff1Sjsg break; 2871bb76ff1Sjsg case AMD_DPM_FORCED_LEVEL_PROFILE_MIN_SCLK: 2881bb76ff1Sjsg *stable_pstate = AMDGPU_CTX_STABLE_PSTATE_MIN_SCLK; 2891bb76ff1Sjsg break; 2901bb76ff1Sjsg case AMD_DPM_FORCED_LEVEL_PROFILE_MIN_MCLK: 2911bb76ff1Sjsg *stable_pstate = AMDGPU_CTX_STABLE_PSTATE_MIN_MCLK; 2921bb76ff1Sjsg break; 2931bb76ff1Sjsg case AMD_DPM_FORCED_LEVEL_PROFILE_PEAK: 2941bb76ff1Sjsg *stable_pstate = AMDGPU_CTX_STABLE_PSTATE_PEAK; 2951bb76ff1Sjsg break; 2961bb76ff1Sjsg default: 2971bb76ff1Sjsg *stable_pstate = AMDGPU_CTX_STABLE_PSTATE_NONE; 2981bb76ff1Sjsg break; 2991bb76ff1Sjsg } 3001bb76ff1Sjsg return 0; 3011bb76ff1Sjsg } 3021bb76ff1Sjsg 3031bb76ff1Sjsg static int amdgpu_ctx_init(struct amdgpu_ctx_mgr *mgr, int32_t priority, 3041bb76ff1Sjsg struct drm_file *filp, struct amdgpu_ctx *ctx) 3051bb76ff1Sjsg { 3061bb76ff1Sjsg u32 current_stable_pstate; 307fb4d8502Sjsg int r; 308fb4d8502Sjsg 309fb4d8502Sjsg r = amdgpu_ctx_priority_permit(filp, priority); 310fb4d8502Sjsg if (r) 311fb4d8502Sjsg return r; 312fb4d8502Sjsg 313fb4d8502Sjsg memset(ctx, 0, sizeof(*ctx)); 314c349dbc7Sjsg 315fb4d8502Sjsg kref_init(&ctx->refcount); 3161bb76ff1Sjsg ctx->mgr = mgr; 317fb4d8502Sjsg mtx_init(&ctx->ring_lock, IPL_TTY); 318fb4d8502Sjsg 3191bb76ff1Sjsg ctx->reset_counter = atomic_read(&mgr->adev->gpu_reset_counter); 320fb4d8502Sjsg ctx->reset_counter_query = ctx->reset_counter; 3211bb76ff1Sjsg ctx->vram_lost_counter = atomic_read(&mgr->adev->vram_lost_counter); 322fb4d8502Sjsg ctx->init_priority = priority; 3231bb76ff1Sjsg ctx->override_priority = AMDGPU_CTX_PRIORITY_UNSET; 3241bb76ff1Sjsg 3251bb76ff1Sjsg r = amdgpu_ctx_get_stable_pstate(ctx, ¤t_stable_pstate); 3261bb76ff1Sjsg if (r) 3271bb76ff1Sjsg return r; 3281bb76ff1Sjsg 3291bb76ff1Sjsg if (mgr->adev->pm.stable_pstate_ctx) 3301bb76ff1Sjsg ctx->stable_pstate = mgr->adev->pm.stable_pstate_ctx->stable_pstate; 3311bb76ff1Sjsg else 3321bb76ff1Sjsg ctx->stable_pstate = current_stable_pstate; 333fb4d8502Sjsg 334fb4d8502Sjsg return 0; 335c349dbc7Sjsg } 336c349dbc7Sjsg 3371bb76ff1Sjsg static int amdgpu_ctx_set_stable_pstate(struct amdgpu_ctx *ctx, 3381bb76ff1Sjsg u32 stable_pstate) 339c349dbc7Sjsg { 3401bb76ff1Sjsg struct amdgpu_device *adev = ctx->mgr->adev; 3411bb76ff1Sjsg enum amd_dpm_forced_level level; 3421bb76ff1Sjsg u32 current_stable_pstate; 3431bb76ff1Sjsg int r; 344c349dbc7Sjsg 3451bb76ff1Sjsg mutex_lock(&adev->pm.stable_pstate_ctx_lock); 3461bb76ff1Sjsg if (adev->pm.stable_pstate_ctx && adev->pm.stable_pstate_ctx != ctx) { 3471bb76ff1Sjsg r = -EBUSY; 3481bb76ff1Sjsg goto done; 3491bb76ff1Sjsg } 350c349dbc7Sjsg 3511bb76ff1Sjsg r = amdgpu_ctx_get_stable_pstate(ctx, ¤t_stable_pstate); 3521bb76ff1Sjsg if (r || (stable_pstate == current_stable_pstate)) 3531bb76ff1Sjsg goto done; 354c349dbc7Sjsg 3551bb76ff1Sjsg switch (stable_pstate) { 3561bb76ff1Sjsg case AMDGPU_CTX_STABLE_PSTATE_NONE: 3571bb76ff1Sjsg level = AMD_DPM_FORCED_LEVEL_AUTO; 3581bb76ff1Sjsg break; 3591bb76ff1Sjsg case AMDGPU_CTX_STABLE_PSTATE_STANDARD: 3601bb76ff1Sjsg level = AMD_DPM_FORCED_LEVEL_PROFILE_STANDARD; 3611bb76ff1Sjsg break; 3621bb76ff1Sjsg case AMDGPU_CTX_STABLE_PSTATE_MIN_SCLK: 3631bb76ff1Sjsg level = AMD_DPM_FORCED_LEVEL_PROFILE_MIN_SCLK; 3641bb76ff1Sjsg break; 3651bb76ff1Sjsg case AMDGPU_CTX_STABLE_PSTATE_MIN_MCLK: 3661bb76ff1Sjsg level = AMD_DPM_FORCED_LEVEL_PROFILE_MIN_MCLK; 3671bb76ff1Sjsg break; 3681bb76ff1Sjsg case AMDGPU_CTX_STABLE_PSTATE_PEAK: 3691bb76ff1Sjsg level = AMD_DPM_FORCED_LEVEL_PROFILE_PEAK; 3701bb76ff1Sjsg break; 3711bb76ff1Sjsg default: 3721bb76ff1Sjsg r = -EINVAL; 3731bb76ff1Sjsg goto done; 3741bb76ff1Sjsg } 375c349dbc7Sjsg 3761bb76ff1Sjsg r = amdgpu_dpm_force_performance_level(adev, level); 3771bb76ff1Sjsg 3781bb76ff1Sjsg if (level == AMD_DPM_FORCED_LEVEL_AUTO) 3791bb76ff1Sjsg adev->pm.stable_pstate_ctx = NULL; 3801bb76ff1Sjsg else 3811bb76ff1Sjsg adev->pm.stable_pstate_ctx = ctx; 3821bb76ff1Sjsg done: 3831bb76ff1Sjsg mutex_unlock(&adev->pm.stable_pstate_ctx_lock); 3841bb76ff1Sjsg 3851bb76ff1Sjsg return r; 386fb4d8502Sjsg } 387fb4d8502Sjsg 388fb4d8502Sjsg static void amdgpu_ctx_fini(struct kref *ref) 389fb4d8502Sjsg { 390fb4d8502Sjsg struct amdgpu_ctx *ctx = container_of(ref, struct amdgpu_ctx, refcount); 3911bb76ff1Sjsg struct amdgpu_ctx_mgr *mgr = ctx->mgr; 3921bb76ff1Sjsg struct amdgpu_device *adev = mgr->adev; 3931bb76ff1Sjsg unsigned i, j, idx; 394fb4d8502Sjsg 395fb4d8502Sjsg if (!adev) 396fb4d8502Sjsg return; 397fb4d8502Sjsg 398c349dbc7Sjsg for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) { 399c349dbc7Sjsg for (j = 0; j < AMDGPU_MAX_ENTITY_NUM; ++j) { 4001bb76ff1Sjsg ktime_t spend; 4011bb76ff1Sjsg 4021bb76ff1Sjsg spend = amdgpu_ctx_fini_entity(ctx->entities[i][j]); 4031bb76ff1Sjsg atomic64_add(ktime_to_ns(spend), &mgr->time_spend[i]); 404c349dbc7Sjsg } 405c349dbc7Sjsg } 406fb4d8502Sjsg 4071bb76ff1Sjsg if (drm_dev_enter(adev_to_drm(adev), &idx)) { 4081bb76ff1Sjsg amdgpu_ctx_set_stable_pstate(ctx, ctx->stable_pstate); 4091bb76ff1Sjsg drm_dev_exit(idx); 4101bb76ff1Sjsg } 4111bb76ff1Sjsg 412fb4d8502Sjsg kfree(ctx); 413fb4d8502Sjsg } 414fb4d8502Sjsg 415c349dbc7Sjsg int amdgpu_ctx_get_entity(struct amdgpu_ctx *ctx, u32 hw_ip, u32 instance, 416c349dbc7Sjsg u32 ring, struct drm_sched_entity **entity) 417c349dbc7Sjsg { 418c349dbc7Sjsg int r; 419c349dbc7Sjsg 420c349dbc7Sjsg if (hw_ip >= AMDGPU_HW_IP_NUM) { 421c349dbc7Sjsg DRM_ERROR("unknown HW IP type: %d\n", hw_ip); 422c349dbc7Sjsg return -EINVAL; 423c349dbc7Sjsg } 424c349dbc7Sjsg 425c349dbc7Sjsg /* Right now all IPs have only one instance - multiple rings. */ 426c349dbc7Sjsg if (instance != 0) { 427c349dbc7Sjsg DRM_DEBUG("invalid ip instance: %d\n", instance); 428c349dbc7Sjsg return -EINVAL; 429c349dbc7Sjsg } 430c349dbc7Sjsg 431c349dbc7Sjsg if (ring >= amdgpu_ctx_num_entities[hw_ip]) { 432c349dbc7Sjsg DRM_DEBUG("invalid ring: %d %d\n", hw_ip, ring); 433c349dbc7Sjsg return -EINVAL; 434c349dbc7Sjsg } 435c349dbc7Sjsg 436c349dbc7Sjsg if (ctx->entities[hw_ip][ring] == NULL) { 437c349dbc7Sjsg r = amdgpu_ctx_init_entity(ctx, hw_ip, ring); 438c349dbc7Sjsg if (r) 439c349dbc7Sjsg return r; 440c349dbc7Sjsg } 441c349dbc7Sjsg 442c349dbc7Sjsg *entity = &ctx->entities[hw_ip][ring]->entity; 443c349dbc7Sjsg return 0; 444c349dbc7Sjsg } 445c349dbc7Sjsg 446fb4d8502Sjsg static int amdgpu_ctx_alloc(struct amdgpu_device *adev, 447fb4d8502Sjsg struct amdgpu_fpriv *fpriv, 448fb4d8502Sjsg struct drm_file *filp, 4491bb76ff1Sjsg int32_t priority, 450fb4d8502Sjsg uint32_t *id) 451fb4d8502Sjsg { 452fb4d8502Sjsg struct amdgpu_ctx_mgr *mgr = &fpriv->ctx_mgr; 453fb4d8502Sjsg struct amdgpu_ctx *ctx; 454fb4d8502Sjsg int r; 455fb4d8502Sjsg 456fb4d8502Sjsg ctx = kmalloc(sizeof(*ctx), GFP_KERNEL); 457fb4d8502Sjsg if (!ctx) 458fb4d8502Sjsg return -ENOMEM; 459fb4d8502Sjsg 460fb4d8502Sjsg mutex_lock(&mgr->lock); 461c349dbc7Sjsg r = idr_alloc(&mgr->ctx_handles, ctx, 1, AMDGPU_VM_MAX_NUM_CTX, GFP_KERNEL); 462fb4d8502Sjsg if (r < 0) { 463fb4d8502Sjsg mutex_unlock(&mgr->lock); 464fb4d8502Sjsg kfree(ctx); 465fb4d8502Sjsg return r; 466fb4d8502Sjsg } 467fb4d8502Sjsg 468fb4d8502Sjsg *id = (uint32_t)r; 4691bb76ff1Sjsg r = amdgpu_ctx_init(mgr, priority, filp, ctx); 470fb4d8502Sjsg if (r) { 471fb4d8502Sjsg idr_remove(&mgr->ctx_handles, *id); 472fb4d8502Sjsg *id = 0; 473fb4d8502Sjsg kfree(ctx); 474fb4d8502Sjsg } 475fb4d8502Sjsg mutex_unlock(&mgr->lock); 476fb4d8502Sjsg return r; 477fb4d8502Sjsg } 478fb4d8502Sjsg 479fb4d8502Sjsg static void amdgpu_ctx_do_release(struct kref *ref) 480fb4d8502Sjsg { 481fb4d8502Sjsg struct amdgpu_ctx *ctx; 482c349dbc7Sjsg u32 i, j; 483fb4d8502Sjsg 484fb4d8502Sjsg ctx = container_of(ref, struct amdgpu_ctx, refcount); 485c349dbc7Sjsg for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) { 486c349dbc7Sjsg for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j) { 487c349dbc7Sjsg if (!ctx->entities[i][j]) 488fb4d8502Sjsg continue; 489fb4d8502Sjsg 490c349dbc7Sjsg drm_sched_entity_destroy(&ctx->entities[i][j]->entity); 491c349dbc7Sjsg } 492fb4d8502Sjsg } 493fb4d8502Sjsg 494fb4d8502Sjsg amdgpu_ctx_fini(ref); 495fb4d8502Sjsg } 496fb4d8502Sjsg 497fb4d8502Sjsg static int amdgpu_ctx_free(struct amdgpu_fpriv *fpriv, uint32_t id) 498fb4d8502Sjsg { 499fb4d8502Sjsg struct amdgpu_ctx_mgr *mgr = &fpriv->ctx_mgr; 500fb4d8502Sjsg struct amdgpu_ctx *ctx; 501fb4d8502Sjsg 502fb4d8502Sjsg mutex_lock(&mgr->lock); 503fb4d8502Sjsg ctx = idr_remove(&mgr->ctx_handles, id); 504fb4d8502Sjsg if (ctx) 505fb4d8502Sjsg kref_put(&ctx->refcount, amdgpu_ctx_do_release); 506fb4d8502Sjsg mutex_unlock(&mgr->lock); 507fb4d8502Sjsg return ctx ? 0 : -EINVAL; 508fb4d8502Sjsg } 509fb4d8502Sjsg 510fb4d8502Sjsg static int amdgpu_ctx_query(struct amdgpu_device *adev, 511fb4d8502Sjsg struct amdgpu_fpriv *fpriv, uint32_t id, 512fb4d8502Sjsg union drm_amdgpu_ctx_out *out) 513fb4d8502Sjsg { 514fb4d8502Sjsg struct amdgpu_ctx *ctx; 515fb4d8502Sjsg struct amdgpu_ctx_mgr *mgr; 516fb4d8502Sjsg unsigned reset_counter; 517fb4d8502Sjsg 518fb4d8502Sjsg if (!fpriv) 519fb4d8502Sjsg return -EINVAL; 520fb4d8502Sjsg 521fb4d8502Sjsg mgr = &fpriv->ctx_mgr; 522fb4d8502Sjsg mutex_lock(&mgr->lock); 523fb4d8502Sjsg ctx = idr_find(&mgr->ctx_handles, id); 524fb4d8502Sjsg if (!ctx) { 525fb4d8502Sjsg mutex_unlock(&mgr->lock); 526fb4d8502Sjsg return -EINVAL; 527fb4d8502Sjsg } 528fb4d8502Sjsg 529fb4d8502Sjsg /* TODO: these two are always zero */ 530fb4d8502Sjsg out->state.flags = 0x0; 531fb4d8502Sjsg out->state.hangs = 0x0; 532fb4d8502Sjsg 533fb4d8502Sjsg /* determine if a GPU reset has occured since the last call */ 534fb4d8502Sjsg reset_counter = atomic_read(&adev->gpu_reset_counter); 535fb4d8502Sjsg /* TODO: this should ideally return NO, GUILTY, or INNOCENT. */ 536fb4d8502Sjsg if (ctx->reset_counter_query == reset_counter) 537fb4d8502Sjsg out->state.reset_status = AMDGPU_CTX_NO_RESET; 538fb4d8502Sjsg else 539fb4d8502Sjsg out->state.reset_status = AMDGPU_CTX_UNKNOWN_RESET; 540fb4d8502Sjsg ctx->reset_counter_query = reset_counter; 541fb4d8502Sjsg 542fb4d8502Sjsg mutex_unlock(&mgr->lock); 543fb4d8502Sjsg return 0; 544fb4d8502Sjsg } 545fb4d8502Sjsg 5465ca02815Sjsg #define AMDGPU_RAS_COUNTE_DELAY_MS 3000 5475ca02815Sjsg 548fb4d8502Sjsg static int amdgpu_ctx_query2(struct amdgpu_device *adev, 549fb4d8502Sjsg struct amdgpu_fpriv *fpriv, uint32_t id, 550fb4d8502Sjsg union drm_amdgpu_ctx_out *out) 551fb4d8502Sjsg { 5525ca02815Sjsg struct amdgpu_ras *con = amdgpu_ras_get_context(adev); 553fb4d8502Sjsg struct amdgpu_ctx *ctx; 554fb4d8502Sjsg struct amdgpu_ctx_mgr *mgr; 555fb4d8502Sjsg 556fb4d8502Sjsg if (!fpriv) 557fb4d8502Sjsg return -EINVAL; 558fb4d8502Sjsg 559fb4d8502Sjsg mgr = &fpriv->ctx_mgr; 560fb4d8502Sjsg mutex_lock(&mgr->lock); 561fb4d8502Sjsg ctx = idr_find(&mgr->ctx_handles, id); 562fb4d8502Sjsg if (!ctx) { 563fb4d8502Sjsg mutex_unlock(&mgr->lock); 564fb4d8502Sjsg return -EINVAL; 565fb4d8502Sjsg } 566fb4d8502Sjsg 567fb4d8502Sjsg out->state.flags = 0x0; 568fb4d8502Sjsg out->state.hangs = 0x0; 569fb4d8502Sjsg 570fb4d8502Sjsg if (ctx->reset_counter != atomic_read(&adev->gpu_reset_counter)) 571fb4d8502Sjsg out->state.flags |= AMDGPU_CTX_QUERY2_FLAGS_RESET; 572fb4d8502Sjsg 573fb4d8502Sjsg if (ctx->vram_lost_counter != atomic_read(&adev->vram_lost_counter)) 574fb4d8502Sjsg out->state.flags |= AMDGPU_CTX_QUERY2_FLAGS_VRAMLOST; 575fb4d8502Sjsg 576fb4d8502Sjsg if (atomic_read(&ctx->guilty)) 577fb4d8502Sjsg out->state.flags |= AMDGPU_CTX_QUERY2_FLAGS_GUILTY; 578fb4d8502Sjsg 5795ca02815Sjsg if (adev->ras_enabled && con) { 5805ca02815Sjsg /* Return the cached values in O(1), 5815ca02815Sjsg * and schedule delayed work to cache 5825ca02815Sjsg * new vaues. 5835ca02815Sjsg */ 5845ca02815Sjsg int ce_count, ue_count; 5855ca02815Sjsg 5865ca02815Sjsg ce_count = atomic_read(&con->ras_ce_count); 5875ca02815Sjsg ue_count = atomic_read(&con->ras_ue_count); 5885ca02815Sjsg 5895ca02815Sjsg if (ce_count != ctx->ras_counter_ce) { 5905ca02815Sjsg ctx->ras_counter_ce = ce_count; 5915ca02815Sjsg out->state.flags |= AMDGPU_CTX_QUERY2_FLAGS_RAS_CE; 5925ca02815Sjsg } 5935ca02815Sjsg 5945ca02815Sjsg if (ue_count != ctx->ras_counter_ue) { 5955ca02815Sjsg ctx->ras_counter_ue = ue_count; 5965ca02815Sjsg out->state.flags |= AMDGPU_CTX_QUERY2_FLAGS_RAS_UE; 5975ca02815Sjsg } 5985ca02815Sjsg 5995ca02815Sjsg schedule_delayed_work(&con->ras_counte_delay_work, 6005ca02815Sjsg msecs_to_jiffies(AMDGPU_RAS_COUNTE_DELAY_MS)); 6015ca02815Sjsg } 6025ca02815Sjsg 603fb4d8502Sjsg mutex_unlock(&mgr->lock); 604fb4d8502Sjsg return 0; 605fb4d8502Sjsg } 606fb4d8502Sjsg 6071bb76ff1Sjsg 6081bb76ff1Sjsg 6091bb76ff1Sjsg static int amdgpu_ctx_stable_pstate(struct amdgpu_device *adev, 6101bb76ff1Sjsg struct amdgpu_fpriv *fpriv, uint32_t id, 6111bb76ff1Sjsg bool set, u32 *stable_pstate) 6121bb76ff1Sjsg { 6131bb76ff1Sjsg struct amdgpu_ctx *ctx; 6141bb76ff1Sjsg struct amdgpu_ctx_mgr *mgr; 6151bb76ff1Sjsg int r; 6161bb76ff1Sjsg 6171bb76ff1Sjsg if (!fpriv) 6181bb76ff1Sjsg return -EINVAL; 6191bb76ff1Sjsg 6201bb76ff1Sjsg mgr = &fpriv->ctx_mgr; 6211bb76ff1Sjsg mutex_lock(&mgr->lock); 6221bb76ff1Sjsg ctx = idr_find(&mgr->ctx_handles, id); 6231bb76ff1Sjsg if (!ctx) { 6241bb76ff1Sjsg mutex_unlock(&mgr->lock); 6251bb76ff1Sjsg return -EINVAL; 6261bb76ff1Sjsg } 6271bb76ff1Sjsg 6281bb76ff1Sjsg if (set) 6291bb76ff1Sjsg r = amdgpu_ctx_set_stable_pstate(ctx, *stable_pstate); 6301bb76ff1Sjsg else 6311bb76ff1Sjsg r = amdgpu_ctx_get_stable_pstate(ctx, stable_pstate); 6321bb76ff1Sjsg 6331bb76ff1Sjsg mutex_unlock(&mgr->lock); 6341bb76ff1Sjsg return r; 6351bb76ff1Sjsg } 6361bb76ff1Sjsg 637fb4d8502Sjsg int amdgpu_ctx_ioctl(struct drm_device *dev, void *data, 638fb4d8502Sjsg struct drm_file *filp) 639fb4d8502Sjsg { 640fb4d8502Sjsg int r; 6411bb76ff1Sjsg uint32_t id, stable_pstate; 6421bb76ff1Sjsg int32_t priority; 643fb4d8502Sjsg 644fb4d8502Sjsg union drm_amdgpu_ctx *args = data; 645ad8b1aafSjsg struct amdgpu_device *adev = drm_to_adev(dev); 646fb4d8502Sjsg struct amdgpu_fpriv *fpriv = filp->driver_priv; 647fb4d8502Sjsg 648fb4d8502Sjsg id = args->in.ctx_id; 6491bb76ff1Sjsg priority = args->in.priority; 650fb4d8502Sjsg 651fb4d8502Sjsg /* For backwards compatibility reasons, we need to accept 652fb4d8502Sjsg * ioctls with garbage in the priority field */ 6531bb76ff1Sjsg if (!amdgpu_ctx_priority_is_valid(priority)) 6541bb76ff1Sjsg priority = AMDGPU_CTX_PRIORITY_NORMAL; 655fb4d8502Sjsg 656fb4d8502Sjsg switch (args->in.op) { 657fb4d8502Sjsg case AMDGPU_CTX_OP_ALLOC_CTX: 658fb4d8502Sjsg r = amdgpu_ctx_alloc(adev, fpriv, filp, priority, &id); 659fb4d8502Sjsg args->out.alloc.ctx_id = id; 660fb4d8502Sjsg break; 661fb4d8502Sjsg case AMDGPU_CTX_OP_FREE_CTX: 662fb4d8502Sjsg r = amdgpu_ctx_free(fpriv, id); 663fb4d8502Sjsg break; 664fb4d8502Sjsg case AMDGPU_CTX_OP_QUERY_STATE: 665fb4d8502Sjsg r = amdgpu_ctx_query(adev, fpriv, id, &args->out); 666fb4d8502Sjsg break; 667fb4d8502Sjsg case AMDGPU_CTX_OP_QUERY_STATE2: 668fb4d8502Sjsg r = amdgpu_ctx_query2(adev, fpriv, id, &args->out); 669fb4d8502Sjsg break; 6701bb76ff1Sjsg case AMDGPU_CTX_OP_GET_STABLE_PSTATE: 6711bb76ff1Sjsg if (args->in.flags) 6721bb76ff1Sjsg return -EINVAL; 6731bb76ff1Sjsg r = amdgpu_ctx_stable_pstate(adev, fpriv, id, false, &stable_pstate); 6741bb76ff1Sjsg if (!r) 6751bb76ff1Sjsg args->out.pstate.flags = stable_pstate; 6761bb76ff1Sjsg break; 6771bb76ff1Sjsg case AMDGPU_CTX_OP_SET_STABLE_PSTATE: 6781bb76ff1Sjsg if (args->in.flags & ~AMDGPU_CTX_STABLE_PSTATE_FLAGS_MASK) 6791bb76ff1Sjsg return -EINVAL; 6801bb76ff1Sjsg stable_pstate = args->in.flags & AMDGPU_CTX_STABLE_PSTATE_FLAGS_MASK; 6811bb76ff1Sjsg if (stable_pstate > AMDGPU_CTX_STABLE_PSTATE_PEAK) 6821bb76ff1Sjsg return -EINVAL; 6831bb76ff1Sjsg r = amdgpu_ctx_stable_pstate(adev, fpriv, id, true, &stable_pstate); 6841bb76ff1Sjsg break; 685fb4d8502Sjsg default: 686fb4d8502Sjsg return -EINVAL; 687fb4d8502Sjsg } 688fb4d8502Sjsg 689fb4d8502Sjsg return r; 690fb4d8502Sjsg } 691fb4d8502Sjsg 692fb4d8502Sjsg struct amdgpu_ctx *amdgpu_ctx_get(struct amdgpu_fpriv *fpriv, uint32_t id) 693fb4d8502Sjsg { 694fb4d8502Sjsg struct amdgpu_ctx *ctx; 695fb4d8502Sjsg struct amdgpu_ctx_mgr *mgr; 696fb4d8502Sjsg 697fb4d8502Sjsg if (!fpriv) 698fb4d8502Sjsg return NULL; 699fb4d8502Sjsg 700fb4d8502Sjsg mgr = &fpriv->ctx_mgr; 701fb4d8502Sjsg 702fb4d8502Sjsg mutex_lock(&mgr->lock); 703fb4d8502Sjsg ctx = idr_find(&mgr->ctx_handles, id); 704fb4d8502Sjsg if (ctx) 705fb4d8502Sjsg kref_get(&ctx->refcount); 706fb4d8502Sjsg mutex_unlock(&mgr->lock); 707fb4d8502Sjsg return ctx; 708fb4d8502Sjsg } 709fb4d8502Sjsg 710fb4d8502Sjsg int amdgpu_ctx_put(struct amdgpu_ctx *ctx) 711fb4d8502Sjsg { 712fb4d8502Sjsg if (ctx == NULL) 713fb4d8502Sjsg return -EINVAL; 714fb4d8502Sjsg 715fb4d8502Sjsg kref_put(&ctx->refcount, amdgpu_ctx_do_release); 716fb4d8502Sjsg return 0; 717fb4d8502Sjsg } 718fb4d8502Sjsg 7191bb76ff1Sjsg uint64_t amdgpu_ctx_add_fence(struct amdgpu_ctx *ctx, 720c349dbc7Sjsg struct drm_sched_entity *entity, 7211bb76ff1Sjsg struct dma_fence *fence) 722fb4d8502Sjsg { 723c349dbc7Sjsg struct amdgpu_ctx_entity *centity = to_amdgpu_ctx_entity(entity); 724c349dbc7Sjsg uint64_t seq = centity->sequence; 725fb4d8502Sjsg struct dma_fence *other = NULL; 726c349dbc7Sjsg unsigned idx = 0; 727fb4d8502Sjsg 728fb4d8502Sjsg idx = seq & (amdgpu_sched_jobs - 1); 729c349dbc7Sjsg other = centity->fences[idx]; 7301bb76ff1Sjsg WARN_ON(other && !dma_fence_is_signaled(other)); 731fb4d8502Sjsg 732fb4d8502Sjsg dma_fence_get(fence); 733fb4d8502Sjsg 734fb4d8502Sjsg spin_lock(&ctx->ring_lock); 735c349dbc7Sjsg centity->fences[idx] = fence; 736c349dbc7Sjsg centity->sequence++; 737fb4d8502Sjsg spin_unlock(&ctx->ring_lock); 738fb4d8502Sjsg 7391bb76ff1Sjsg atomic64_add(ktime_to_ns(amdgpu_ctx_fence_time(other)), 7401bb76ff1Sjsg &ctx->mgr->time_spend[centity->hw_ip]); 7411bb76ff1Sjsg 742fb4d8502Sjsg dma_fence_put(other); 7431bb76ff1Sjsg return seq; 744fb4d8502Sjsg } 745fb4d8502Sjsg 746fb4d8502Sjsg struct dma_fence *amdgpu_ctx_get_fence(struct amdgpu_ctx *ctx, 747c349dbc7Sjsg struct drm_sched_entity *entity, 748c349dbc7Sjsg uint64_t seq) 749fb4d8502Sjsg { 750c349dbc7Sjsg struct amdgpu_ctx_entity *centity = to_amdgpu_ctx_entity(entity); 751fb4d8502Sjsg struct dma_fence *fence; 752fb4d8502Sjsg 753fb4d8502Sjsg spin_lock(&ctx->ring_lock); 754fb4d8502Sjsg 755fb4d8502Sjsg if (seq == ~0ull) 756c349dbc7Sjsg seq = centity->sequence - 1; 757fb4d8502Sjsg 758c349dbc7Sjsg if (seq >= centity->sequence) { 759fb4d8502Sjsg spin_unlock(&ctx->ring_lock); 760fb4d8502Sjsg return ERR_PTR(-EINVAL); 761fb4d8502Sjsg } 762fb4d8502Sjsg 763fb4d8502Sjsg 764c349dbc7Sjsg if (seq + amdgpu_sched_jobs < centity->sequence) { 765fb4d8502Sjsg spin_unlock(&ctx->ring_lock); 766fb4d8502Sjsg return NULL; 767fb4d8502Sjsg } 768fb4d8502Sjsg 769c349dbc7Sjsg fence = dma_fence_get(centity->fences[seq & (amdgpu_sched_jobs - 1)]); 770fb4d8502Sjsg spin_unlock(&ctx->ring_lock); 771fb4d8502Sjsg 772fb4d8502Sjsg return fence; 773fb4d8502Sjsg } 774fb4d8502Sjsg 775c349dbc7Sjsg static void amdgpu_ctx_set_entity_priority(struct amdgpu_ctx *ctx, 776c349dbc7Sjsg struct amdgpu_ctx_entity *aentity, 777c349dbc7Sjsg int hw_ip, 7781bb76ff1Sjsg int32_t priority) 779c349dbc7Sjsg { 7801bb76ff1Sjsg struct amdgpu_device *adev = ctx->mgr->adev; 781ad8b1aafSjsg unsigned int hw_prio; 782c349dbc7Sjsg struct drm_gpu_scheduler **scheds = NULL; 783c349dbc7Sjsg unsigned num_scheds; 784c349dbc7Sjsg 785c349dbc7Sjsg /* set sw priority */ 7861bb76ff1Sjsg drm_sched_entity_set_priority(&aentity->entity, 7871bb76ff1Sjsg amdgpu_ctx_to_drm_sched_prio(priority)); 788c349dbc7Sjsg 789c349dbc7Sjsg /* set hw priority */ 7901bb76ff1Sjsg if (hw_ip == AMDGPU_HW_IP_COMPUTE || hw_ip == AMDGPU_HW_IP_GFX) { 7911bb76ff1Sjsg hw_prio = amdgpu_ctx_get_hw_prio(ctx, hw_ip); 792ad8b1aafSjsg hw_prio = array_index_nospec(hw_prio, AMDGPU_RING_PRIO_MAX); 793ad8b1aafSjsg scheds = adev->gpu_sched[hw_ip][hw_prio].sched; 794ad8b1aafSjsg num_scheds = adev->gpu_sched[hw_ip][hw_prio].num_scheds; 795c349dbc7Sjsg drm_sched_entity_modify_sched(&aentity->entity, scheds, 796c349dbc7Sjsg num_scheds); 797c349dbc7Sjsg } 798c349dbc7Sjsg } 799c349dbc7Sjsg 800fb4d8502Sjsg void amdgpu_ctx_priority_override(struct amdgpu_ctx *ctx, 8011bb76ff1Sjsg int32_t priority) 802fb4d8502Sjsg { 8031bb76ff1Sjsg int32_t ctx_prio; 804c349dbc7Sjsg unsigned i, j; 805fb4d8502Sjsg 806fb4d8502Sjsg ctx->override_priority = priority; 807fb4d8502Sjsg 8081bb76ff1Sjsg ctx_prio = (ctx->override_priority == AMDGPU_CTX_PRIORITY_UNSET) ? 809fb4d8502Sjsg ctx->init_priority : ctx->override_priority; 810c349dbc7Sjsg for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) { 811c349dbc7Sjsg for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j) { 812c349dbc7Sjsg if (!ctx->entities[i][j]) 813fb4d8502Sjsg continue; 814fb4d8502Sjsg 815c349dbc7Sjsg amdgpu_ctx_set_entity_priority(ctx, ctx->entities[i][j], 816c349dbc7Sjsg i, ctx_prio); 817c349dbc7Sjsg } 818fb4d8502Sjsg } 819fb4d8502Sjsg } 820fb4d8502Sjsg 821c349dbc7Sjsg int amdgpu_ctx_wait_prev_fence(struct amdgpu_ctx *ctx, 822c349dbc7Sjsg struct drm_sched_entity *entity) 823fb4d8502Sjsg { 824c349dbc7Sjsg struct amdgpu_ctx_entity *centity = to_amdgpu_ctx_entity(entity); 825c349dbc7Sjsg struct dma_fence *other; 826c349dbc7Sjsg unsigned idx; 827c349dbc7Sjsg long r; 828fb4d8502Sjsg 829c349dbc7Sjsg spin_lock(&ctx->ring_lock); 830c349dbc7Sjsg idx = centity->sequence & (amdgpu_sched_jobs - 1); 831c349dbc7Sjsg other = dma_fence_get(centity->fences[idx]); 832c349dbc7Sjsg spin_unlock(&ctx->ring_lock); 833c349dbc7Sjsg 834c349dbc7Sjsg if (!other) 835c349dbc7Sjsg return 0; 836c349dbc7Sjsg 837fb4d8502Sjsg r = dma_fence_wait(other, true); 838c349dbc7Sjsg if (r < 0 && r != -ERESTARTSYS) 839fb4d8502Sjsg DRM_ERROR("Error (%ld) waiting for fence!\n", r); 840fb4d8502Sjsg 841c349dbc7Sjsg dma_fence_put(other); 842fb4d8502Sjsg return r; 843fb4d8502Sjsg } 844fb4d8502Sjsg 8451bb76ff1Sjsg void amdgpu_ctx_mgr_init(struct amdgpu_ctx_mgr *mgr, 8461bb76ff1Sjsg struct amdgpu_device *adev) 847fb4d8502Sjsg { 8481bb76ff1Sjsg unsigned int i; 8491bb76ff1Sjsg 8501bb76ff1Sjsg mgr->adev = adev; 851fb4d8502Sjsg rw_init(&mgr->lock, "mgrlk"); 8521bb76ff1Sjsg idr_init_base(&mgr->ctx_handles, 1); 8531bb76ff1Sjsg 8541bb76ff1Sjsg for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) 8551bb76ff1Sjsg atomic64_set(&mgr->time_spend[i], 0); 856fb4d8502Sjsg } 857fb4d8502Sjsg 858c349dbc7Sjsg long amdgpu_ctx_mgr_entity_flush(struct amdgpu_ctx_mgr *mgr, long timeout) 859fb4d8502Sjsg { 860fb4d8502Sjsg struct amdgpu_ctx *ctx; 861fb4d8502Sjsg struct idr *idp; 862c349dbc7Sjsg uint32_t id, i, j; 863fb4d8502Sjsg 864fb4d8502Sjsg idp = &mgr->ctx_handles; 865fb4d8502Sjsg 866fb4d8502Sjsg mutex_lock(&mgr->lock); 867fb4d8502Sjsg idr_for_each_entry(idp, ctx, id) { 868c349dbc7Sjsg for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) { 869c349dbc7Sjsg for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j) { 870c349dbc7Sjsg struct drm_sched_entity *entity; 871fb4d8502Sjsg 872c349dbc7Sjsg if (!ctx->entities[i][j]) 873fb4d8502Sjsg continue; 874fb4d8502Sjsg 875c349dbc7Sjsg entity = &ctx->entities[i][j]->entity; 876c349dbc7Sjsg timeout = drm_sched_entity_flush(entity, timeout); 877c349dbc7Sjsg } 878fb4d8502Sjsg } 879fb4d8502Sjsg } 880fb4d8502Sjsg mutex_unlock(&mgr->lock); 881c349dbc7Sjsg return timeout; 882fb4d8502Sjsg } 883fb4d8502Sjsg 884fb4d8502Sjsg void amdgpu_ctx_mgr_entity_fini(struct amdgpu_ctx_mgr *mgr) 885fb4d8502Sjsg { 886fb4d8502Sjsg struct amdgpu_ctx *ctx; 887fb4d8502Sjsg struct idr *idp; 888c349dbc7Sjsg uint32_t id, i, j; 889fb4d8502Sjsg 890fb4d8502Sjsg idp = &mgr->ctx_handles; 891fb4d8502Sjsg 892fb4d8502Sjsg idr_for_each_entry(idp, ctx, id) { 893c349dbc7Sjsg if (kref_read(&ctx->refcount) != 1) { 894c349dbc7Sjsg DRM_ERROR("ctx %p is still alive\n", ctx); 895c349dbc7Sjsg continue; 896c349dbc7Sjsg } 897fb4d8502Sjsg 898c349dbc7Sjsg for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) { 899c349dbc7Sjsg for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j) { 900c349dbc7Sjsg struct drm_sched_entity *entity; 901fb4d8502Sjsg 902c349dbc7Sjsg if (!ctx->entities[i][j]) 903fb4d8502Sjsg continue; 904fb4d8502Sjsg 905c349dbc7Sjsg entity = &ctx->entities[i][j]->entity; 906c349dbc7Sjsg drm_sched_entity_fini(entity); 907c349dbc7Sjsg } 908fb4d8502Sjsg } 909fb4d8502Sjsg } 910fb4d8502Sjsg } 911fb4d8502Sjsg 912fb4d8502Sjsg void amdgpu_ctx_mgr_fini(struct amdgpu_ctx_mgr *mgr) 913fb4d8502Sjsg { 914fb4d8502Sjsg struct amdgpu_ctx *ctx; 915fb4d8502Sjsg struct idr *idp; 916fb4d8502Sjsg uint32_t id; 917fb4d8502Sjsg 918fb4d8502Sjsg amdgpu_ctx_mgr_entity_fini(mgr); 919fb4d8502Sjsg 920fb4d8502Sjsg idp = &mgr->ctx_handles; 921fb4d8502Sjsg 922fb4d8502Sjsg idr_for_each_entry(idp, ctx, id) { 923fb4d8502Sjsg if (kref_put(&ctx->refcount, amdgpu_ctx_fini) != 1) 924fb4d8502Sjsg DRM_ERROR("ctx %p is still alive\n", ctx); 925fb4d8502Sjsg } 926fb4d8502Sjsg 927fb4d8502Sjsg idr_destroy(&mgr->ctx_handles); 928fb4d8502Sjsg mutex_destroy(&mgr->lock); 929fb4d8502Sjsg } 9305ca02815Sjsg 9311bb76ff1Sjsg void amdgpu_ctx_mgr_usage(struct amdgpu_ctx_mgr *mgr, 9321bb76ff1Sjsg ktime_t usage[AMDGPU_HW_IP_NUM]) 9335ca02815Sjsg { 9345ca02815Sjsg struct amdgpu_ctx *ctx; 9351bb76ff1Sjsg unsigned int hw_ip, i; 9365ca02815Sjsg uint32_t id; 9375ca02815Sjsg 9381bb76ff1Sjsg /* 9391bb76ff1Sjsg * This is a little bit racy because it can be that a ctx or a fence are 9401bb76ff1Sjsg * destroyed just in the moment we try to account them. But that is ok 9411bb76ff1Sjsg * since exactly that case is explicitely allowed by the interface. 9425ca02815Sjsg */ 9431bb76ff1Sjsg mutex_lock(&mgr->lock); 9441bb76ff1Sjsg for (hw_ip = 0; hw_ip < AMDGPU_HW_IP_NUM; ++hw_ip) { 9451bb76ff1Sjsg uint64_t ns = atomic64_read(&mgr->time_spend[hw_ip]); 9465ca02815Sjsg 9471bb76ff1Sjsg usage[hw_ip] = ns_to_ktime(ns); 9485ca02815Sjsg } 9495ca02815Sjsg 9501bb76ff1Sjsg idr_for_each_entry(&mgr->ctx_handles, ctx, id) { 9511bb76ff1Sjsg for (hw_ip = 0; hw_ip < AMDGPU_HW_IP_NUM; ++hw_ip) { 9521bb76ff1Sjsg for (i = 0; i < amdgpu_ctx_num_entities[hw_ip]; ++i) { 9531bb76ff1Sjsg struct amdgpu_ctx_entity *centity; 9541bb76ff1Sjsg ktime_t spend; 9555ca02815Sjsg 9561bb76ff1Sjsg centity = ctx->entities[hw_ip][i]; 9571bb76ff1Sjsg if (!centity) 9581bb76ff1Sjsg continue; 9591bb76ff1Sjsg spend = amdgpu_ctx_entity_time(ctx, centity); 9601bb76ff1Sjsg usage[hw_ip] = ktime_add(usage[hw_ip], spend); 9611bb76ff1Sjsg } 9621bb76ff1Sjsg } 9631bb76ff1Sjsg } 9641bb76ff1Sjsg mutex_unlock(&mgr->lock); 9655ca02815Sjsg } 966