1*1bb76ff1Sjsg /* 2*1bb76ff1Sjsg * Copyright 2021 Advanced Micro Devices, Inc. 3*1bb76ff1Sjsg * 4*1bb76ff1Sjsg * Permission is hereby granted, free of charge, to any person obtaining a 5*1bb76ff1Sjsg * copy of this software and associated documentation files (the "Software"), 6*1bb76ff1Sjsg * to deal in the Software without restriction, including without limitation 7*1bb76ff1Sjsg * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8*1bb76ff1Sjsg * and/or sell copies of the Software, and to permit persons to whom the 9*1bb76ff1Sjsg * Software is furnished to do so, subject to the following conditions: 10*1bb76ff1Sjsg * 11*1bb76ff1Sjsg * The above copyright notice and this permission notice shall be included in 12*1bb76ff1Sjsg * all copies or substantial portions of the Software. 13*1bb76ff1Sjsg * 14*1bb76ff1Sjsg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15*1bb76ff1Sjsg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16*1bb76ff1Sjsg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17*1bb76ff1Sjsg * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 18*1bb76ff1Sjsg * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19*1bb76ff1Sjsg * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20*1bb76ff1Sjsg * OTHER DEALINGS IN THE SOFTWARE. 21*1bb76ff1Sjsg * 22*1bb76ff1Sjsg */ 23*1bb76ff1Sjsg 24*1bb76ff1Sjsg #include "sienna_cichlid.h" 25*1bb76ff1Sjsg #include "amdgpu_reset.h" 26*1bb76ff1Sjsg #include "amdgpu_amdkfd.h" 27*1bb76ff1Sjsg #include "amdgpu_dpm.h" 28*1bb76ff1Sjsg #include "amdgpu_job.h" 29*1bb76ff1Sjsg #include "amdgpu_ring.h" 30*1bb76ff1Sjsg #include "amdgpu_ras.h" 31*1bb76ff1Sjsg #include "amdgpu_psp.h" 32*1bb76ff1Sjsg #include "amdgpu_xgmi.h" 33*1bb76ff1Sjsg 34*1bb76ff1Sjsg static bool sienna_cichlid_is_mode2_default(struct amdgpu_reset_control *reset_ctl) 35*1bb76ff1Sjsg { 36*1bb76ff1Sjsg #if 0 37*1bb76ff1Sjsg struct amdgpu_device *adev = (struct amdgpu_device *)reset_ctl->handle; 38*1bb76ff1Sjsg 39*1bb76ff1Sjsg if (adev->ip_versions[MP1_HWIP][0] == IP_VERSION(11, 0, 7) && 40*1bb76ff1Sjsg adev->pm.fw_version >= 0x3a5500 && !amdgpu_sriov_vf(adev)) 41*1bb76ff1Sjsg return true; 42*1bb76ff1Sjsg #endif 43*1bb76ff1Sjsg return false; 44*1bb76ff1Sjsg } 45*1bb76ff1Sjsg 46*1bb76ff1Sjsg static struct amdgpu_reset_handler * 47*1bb76ff1Sjsg sienna_cichlid_get_reset_handler(struct amdgpu_reset_control *reset_ctl, 48*1bb76ff1Sjsg struct amdgpu_reset_context *reset_context) 49*1bb76ff1Sjsg { 50*1bb76ff1Sjsg struct amdgpu_reset_handler *handler; 51*1bb76ff1Sjsg 52*1bb76ff1Sjsg if (reset_context->method != AMD_RESET_METHOD_NONE) { 53*1bb76ff1Sjsg list_for_each_entry(handler, &reset_ctl->reset_handlers, 54*1bb76ff1Sjsg handler_list) { 55*1bb76ff1Sjsg if (handler->reset_method == reset_context->method) 56*1bb76ff1Sjsg return handler; 57*1bb76ff1Sjsg } 58*1bb76ff1Sjsg } 59*1bb76ff1Sjsg 60*1bb76ff1Sjsg if (sienna_cichlid_is_mode2_default(reset_ctl)) { 61*1bb76ff1Sjsg list_for_each_entry (handler, &reset_ctl->reset_handlers, 62*1bb76ff1Sjsg handler_list) { 63*1bb76ff1Sjsg if (handler->reset_method == AMD_RESET_METHOD_MODE2) 64*1bb76ff1Sjsg return handler; 65*1bb76ff1Sjsg } 66*1bb76ff1Sjsg } 67*1bb76ff1Sjsg 68*1bb76ff1Sjsg return NULL; 69*1bb76ff1Sjsg } 70*1bb76ff1Sjsg 71*1bb76ff1Sjsg static int sienna_cichlid_mode2_suspend_ip(struct amdgpu_device *adev) 72*1bb76ff1Sjsg { 73*1bb76ff1Sjsg int r, i; 74*1bb76ff1Sjsg 75*1bb76ff1Sjsg amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE); 76*1bb76ff1Sjsg amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE); 77*1bb76ff1Sjsg 78*1bb76ff1Sjsg for (i = adev->num_ip_blocks - 1; i >= 0; i--) { 79*1bb76ff1Sjsg if (!(adev->ip_blocks[i].version->type == 80*1bb76ff1Sjsg AMD_IP_BLOCK_TYPE_GFX || 81*1bb76ff1Sjsg adev->ip_blocks[i].version->type == 82*1bb76ff1Sjsg AMD_IP_BLOCK_TYPE_SDMA)) 83*1bb76ff1Sjsg continue; 84*1bb76ff1Sjsg 85*1bb76ff1Sjsg r = adev->ip_blocks[i].version->funcs->suspend(adev); 86*1bb76ff1Sjsg 87*1bb76ff1Sjsg if (r) { 88*1bb76ff1Sjsg dev_err(adev->dev, 89*1bb76ff1Sjsg "suspend of IP block <%s> failed %d\n", 90*1bb76ff1Sjsg adev->ip_blocks[i].version->funcs->name, r); 91*1bb76ff1Sjsg return r; 92*1bb76ff1Sjsg } 93*1bb76ff1Sjsg adev->ip_blocks[i].status.hw = false; 94*1bb76ff1Sjsg } 95*1bb76ff1Sjsg 96*1bb76ff1Sjsg return r; 97*1bb76ff1Sjsg } 98*1bb76ff1Sjsg 99*1bb76ff1Sjsg static int 100*1bb76ff1Sjsg sienna_cichlid_mode2_prepare_hwcontext(struct amdgpu_reset_control *reset_ctl, 101*1bb76ff1Sjsg struct amdgpu_reset_context *reset_context) 102*1bb76ff1Sjsg { 103*1bb76ff1Sjsg int r = 0; 104*1bb76ff1Sjsg struct amdgpu_device *adev = (struct amdgpu_device *)reset_ctl->handle; 105*1bb76ff1Sjsg 106*1bb76ff1Sjsg if (!amdgpu_sriov_vf(adev)) { 107*1bb76ff1Sjsg if (adev->gfxhub.funcs->mode2_save_regs) 108*1bb76ff1Sjsg adev->gfxhub.funcs->mode2_save_regs(adev); 109*1bb76ff1Sjsg if (adev->gfxhub.funcs->halt) 110*1bb76ff1Sjsg adev->gfxhub.funcs->halt(adev); 111*1bb76ff1Sjsg r = sienna_cichlid_mode2_suspend_ip(adev); 112*1bb76ff1Sjsg } 113*1bb76ff1Sjsg 114*1bb76ff1Sjsg return r; 115*1bb76ff1Sjsg } 116*1bb76ff1Sjsg 117*1bb76ff1Sjsg static void sienna_cichlid_async_reset(struct work_struct *work) 118*1bb76ff1Sjsg { 119*1bb76ff1Sjsg struct amdgpu_reset_handler *handler; 120*1bb76ff1Sjsg struct amdgpu_reset_control *reset_ctl = 121*1bb76ff1Sjsg container_of(work, struct amdgpu_reset_control, reset_work); 122*1bb76ff1Sjsg struct amdgpu_device *adev = (struct amdgpu_device *)reset_ctl->handle; 123*1bb76ff1Sjsg 124*1bb76ff1Sjsg list_for_each_entry(handler, &reset_ctl->reset_handlers, 125*1bb76ff1Sjsg handler_list) { 126*1bb76ff1Sjsg if (handler->reset_method == reset_ctl->active_reset) { 127*1bb76ff1Sjsg dev_dbg(adev->dev, "Resetting device\n"); 128*1bb76ff1Sjsg handler->do_reset(adev); 129*1bb76ff1Sjsg break; 130*1bb76ff1Sjsg } 131*1bb76ff1Sjsg } 132*1bb76ff1Sjsg } 133*1bb76ff1Sjsg 134*1bb76ff1Sjsg static int sienna_cichlid_mode2_reset(struct amdgpu_device *adev) 135*1bb76ff1Sjsg { 136*1bb76ff1Sjsg /* disable BM */ 137*1bb76ff1Sjsg pci_clear_master(adev->pdev); 138*1bb76ff1Sjsg return amdgpu_dpm_mode2_reset(adev); 139*1bb76ff1Sjsg } 140*1bb76ff1Sjsg 141*1bb76ff1Sjsg static int 142*1bb76ff1Sjsg sienna_cichlid_mode2_perform_reset(struct amdgpu_reset_control *reset_ctl, 143*1bb76ff1Sjsg struct amdgpu_reset_context *reset_context) 144*1bb76ff1Sjsg { 145*1bb76ff1Sjsg struct amdgpu_device *adev = (struct amdgpu_device *)reset_ctl->handle; 146*1bb76ff1Sjsg int r; 147*1bb76ff1Sjsg 148*1bb76ff1Sjsg r = sienna_cichlid_mode2_reset(adev); 149*1bb76ff1Sjsg if (r) { 150*1bb76ff1Sjsg dev_err(adev->dev, 151*1bb76ff1Sjsg "ASIC reset failed with error, %d ", r); 152*1bb76ff1Sjsg } 153*1bb76ff1Sjsg return r; 154*1bb76ff1Sjsg } 155*1bb76ff1Sjsg 156*1bb76ff1Sjsg static int sienna_cichlid_mode2_restore_ip(struct amdgpu_device *adev) 157*1bb76ff1Sjsg { 158*1bb76ff1Sjsg int i, r; 159*1bb76ff1Sjsg struct psp_context *psp = &adev->psp; 160*1bb76ff1Sjsg 161*1bb76ff1Sjsg r = psp_rlc_autoload_start(psp); 162*1bb76ff1Sjsg if (r) { 163*1bb76ff1Sjsg dev_err(adev->dev, "Failed to start rlc autoload\n"); 164*1bb76ff1Sjsg return r; 165*1bb76ff1Sjsg } 166*1bb76ff1Sjsg 167*1bb76ff1Sjsg /* Reinit GFXHUB */ 168*1bb76ff1Sjsg if (adev->gfxhub.funcs->mode2_restore_regs) 169*1bb76ff1Sjsg adev->gfxhub.funcs->mode2_restore_regs(adev); 170*1bb76ff1Sjsg adev->gfxhub.funcs->init(adev); 171*1bb76ff1Sjsg r = adev->gfxhub.funcs->gart_enable(adev); 172*1bb76ff1Sjsg if (r) { 173*1bb76ff1Sjsg dev_err(adev->dev, "GFXHUB gart reenable failed after reset\n"); 174*1bb76ff1Sjsg return r; 175*1bb76ff1Sjsg } 176*1bb76ff1Sjsg 177*1bb76ff1Sjsg for (i = 0; i < adev->num_ip_blocks; i++) { 178*1bb76ff1Sjsg if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH) { 179*1bb76ff1Sjsg r = adev->ip_blocks[i].version->funcs->resume(adev); 180*1bb76ff1Sjsg if (r) { 181*1bb76ff1Sjsg dev_err(adev->dev, 182*1bb76ff1Sjsg "resume of IP block <%s> failed %d\n", 183*1bb76ff1Sjsg adev->ip_blocks[i].version->funcs->name, r); 184*1bb76ff1Sjsg return r; 185*1bb76ff1Sjsg } 186*1bb76ff1Sjsg 187*1bb76ff1Sjsg adev->ip_blocks[i].status.hw = true; 188*1bb76ff1Sjsg } 189*1bb76ff1Sjsg } 190*1bb76ff1Sjsg 191*1bb76ff1Sjsg for (i = 0; i < adev->num_ip_blocks; i++) { 192*1bb76ff1Sjsg if (!(adev->ip_blocks[i].version->type == 193*1bb76ff1Sjsg AMD_IP_BLOCK_TYPE_GFX || 194*1bb76ff1Sjsg adev->ip_blocks[i].version->type == 195*1bb76ff1Sjsg AMD_IP_BLOCK_TYPE_SDMA)) 196*1bb76ff1Sjsg continue; 197*1bb76ff1Sjsg r = adev->ip_blocks[i].version->funcs->resume(adev); 198*1bb76ff1Sjsg if (r) { 199*1bb76ff1Sjsg dev_err(adev->dev, 200*1bb76ff1Sjsg "resume of IP block <%s> failed %d\n", 201*1bb76ff1Sjsg adev->ip_blocks[i].version->funcs->name, r); 202*1bb76ff1Sjsg return r; 203*1bb76ff1Sjsg } 204*1bb76ff1Sjsg 205*1bb76ff1Sjsg adev->ip_blocks[i].status.hw = true; 206*1bb76ff1Sjsg } 207*1bb76ff1Sjsg 208*1bb76ff1Sjsg for (i = 0; i < adev->num_ip_blocks; i++) { 209*1bb76ff1Sjsg if (!(adev->ip_blocks[i].version->type == 210*1bb76ff1Sjsg AMD_IP_BLOCK_TYPE_GFX || 211*1bb76ff1Sjsg adev->ip_blocks[i].version->type == 212*1bb76ff1Sjsg AMD_IP_BLOCK_TYPE_SDMA)) 213*1bb76ff1Sjsg continue; 214*1bb76ff1Sjsg 215*1bb76ff1Sjsg if (adev->ip_blocks[i].version->funcs->late_init) { 216*1bb76ff1Sjsg r = adev->ip_blocks[i].version->funcs->late_init( 217*1bb76ff1Sjsg (void *)adev); 218*1bb76ff1Sjsg if (r) { 219*1bb76ff1Sjsg dev_err(adev->dev, 220*1bb76ff1Sjsg "late_init of IP block <%s> failed %d after reset\n", 221*1bb76ff1Sjsg adev->ip_blocks[i].version->funcs->name, 222*1bb76ff1Sjsg r); 223*1bb76ff1Sjsg return r; 224*1bb76ff1Sjsg } 225*1bb76ff1Sjsg } 226*1bb76ff1Sjsg adev->ip_blocks[i].status.late_initialized = true; 227*1bb76ff1Sjsg } 228*1bb76ff1Sjsg 229*1bb76ff1Sjsg amdgpu_device_set_cg_state(adev, AMD_CG_STATE_GATE); 230*1bb76ff1Sjsg amdgpu_device_set_pg_state(adev, AMD_PG_STATE_GATE); 231*1bb76ff1Sjsg 232*1bb76ff1Sjsg return r; 233*1bb76ff1Sjsg } 234*1bb76ff1Sjsg 235*1bb76ff1Sjsg static int 236*1bb76ff1Sjsg sienna_cichlid_mode2_restore_hwcontext(struct amdgpu_reset_control *reset_ctl, 237*1bb76ff1Sjsg struct amdgpu_reset_context *reset_context) 238*1bb76ff1Sjsg { 239*1bb76ff1Sjsg int r; 240*1bb76ff1Sjsg struct amdgpu_device *tmp_adev = (struct amdgpu_device *)reset_ctl->handle; 241*1bb76ff1Sjsg 242*1bb76ff1Sjsg dev_info(tmp_adev->dev, 243*1bb76ff1Sjsg "GPU reset succeeded, trying to resume\n"); 244*1bb76ff1Sjsg r = sienna_cichlid_mode2_restore_ip(tmp_adev); 245*1bb76ff1Sjsg if (r) 246*1bb76ff1Sjsg goto end; 247*1bb76ff1Sjsg 248*1bb76ff1Sjsg /* 249*1bb76ff1Sjsg * Add this ASIC as tracked as reset was already 250*1bb76ff1Sjsg * complete successfully. 251*1bb76ff1Sjsg */ 252*1bb76ff1Sjsg amdgpu_register_gpu_instance(tmp_adev); 253*1bb76ff1Sjsg 254*1bb76ff1Sjsg /* Resume RAS */ 255*1bb76ff1Sjsg amdgpu_ras_resume(tmp_adev); 256*1bb76ff1Sjsg 257*1bb76ff1Sjsg amdgpu_irq_gpu_reset_resume_helper(tmp_adev); 258*1bb76ff1Sjsg 259*1bb76ff1Sjsg r = amdgpu_ib_ring_tests(tmp_adev); 260*1bb76ff1Sjsg if (r) { 261*1bb76ff1Sjsg dev_err(tmp_adev->dev, 262*1bb76ff1Sjsg "ib ring test failed (%d).\n", r); 263*1bb76ff1Sjsg r = -EAGAIN; 264*1bb76ff1Sjsg goto end; 265*1bb76ff1Sjsg } 266*1bb76ff1Sjsg 267*1bb76ff1Sjsg end: 268*1bb76ff1Sjsg if (r) 269*1bb76ff1Sjsg return -EAGAIN; 270*1bb76ff1Sjsg else 271*1bb76ff1Sjsg return r; 272*1bb76ff1Sjsg } 273*1bb76ff1Sjsg 274*1bb76ff1Sjsg static struct amdgpu_reset_handler sienna_cichlid_mode2_handler = { 275*1bb76ff1Sjsg .reset_method = AMD_RESET_METHOD_MODE2, 276*1bb76ff1Sjsg .prepare_env = NULL, 277*1bb76ff1Sjsg .prepare_hwcontext = sienna_cichlid_mode2_prepare_hwcontext, 278*1bb76ff1Sjsg .perform_reset = sienna_cichlid_mode2_perform_reset, 279*1bb76ff1Sjsg .restore_hwcontext = sienna_cichlid_mode2_restore_hwcontext, 280*1bb76ff1Sjsg .restore_env = NULL, 281*1bb76ff1Sjsg .do_reset = sienna_cichlid_mode2_reset, 282*1bb76ff1Sjsg }; 283*1bb76ff1Sjsg 284*1bb76ff1Sjsg int sienna_cichlid_reset_init(struct amdgpu_device *adev) 285*1bb76ff1Sjsg { 286*1bb76ff1Sjsg struct amdgpu_reset_control *reset_ctl; 287*1bb76ff1Sjsg 288*1bb76ff1Sjsg reset_ctl = kzalloc(sizeof(*reset_ctl), GFP_KERNEL); 289*1bb76ff1Sjsg if (!reset_ctl) 290*1bb76ff1Sjsg return -ENOMEM; 291*1bb76ff1Sjsg 292*1bb76ff1Sjsg reset_ctl->handle = adev; 293*1bb76ff1Sjsg reset_ctl->async_reset = sienna_cichlid_async_reset; 294*1bb76ff1Sjsg reset_ctl->active_reset = AMD_RESET_METHOD_NONE; 295*1bb76ff1Sjsg reset_ctl->get_reset_handler = sienna_cichlid_get_reset_handler; 296*1bb76ff1Sjsg 297*1bb76ff1Sjsg INIT_LIST_HEAD(&reset_ctl->reset_handlers); 298*1bb76ff1Sjsg INIT_WORK(&reset_ctl->reset_work, reset_ctl->async_reset); 299*1bb76ff1Sjsg /* Only mode2 is handled through reset control now */ 300*1bb76ff1Sjsg amdgpu_reset_add_handler(reset_ctl, &sienna_cichlid_mode2_handler); 301*1bb76ff1Sjsg 302*1bb76ff1Sjsg adev->reset_cntl = reset_ctl; 303*1bb76ff1Sjsg 304*1bb76ff1Sjsg return 0; 305*1bb76ff1Sjsg } 306*1bb76ff1Sjsg 307*1bb76ff1Sjsg int sienna_cichlid_reset_fini(struct amdgpu_device *adev) 308*1bb76ff1Sjsg { 309*1bb76ff1Sjsg kfree(adev->reset_cntl); 310*1bb76ff1Sjsg adev->reset_cntl = NULL; 311*1bb76ff1Sjsg return 0; 312*1bb76ff1Sjsg } 313