xref: /openbsd/sys/dev/pci/drm/amd/amdgpu/sienna_cichlid.c (revision 1bb76ff1)
1*1bb76ff1Sjsg /*
2*1bb76ff1Sjsg  * Copyright 2021 Advanced Micro Devices, Inc.
3*1bb76ff1Sjsg  *
4*1bb76ff1Sjsg  * Permission is hereby granted, free of charge, to any person obtaining a
5*1bb76ff1Sjsg  * copy of this software and associated documentation files (the "Software"),
6*1bb76ff1Sjsg  * to deal in the Software without restriction, including without limitation
7*1bb76ff1Sjsg  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8*1bb76ff1Sjsg  * and/or sell copies of the Software, and to permit persons to whom the
9*1bb76ff1Sjsg  * Software is furnished to do so, subject to the following conditions:
10*1bb76ff1Sjsg  *
11*1bb76ff1Sjsg  * The above copyright notice and this permission notice shall be included in
12*1bb76ff1Sjsg  * all copies or substantial portions of the Software.
13*1bb76ff1Sjsg  *
14*1bb76ff1Sjsg  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15*1bb76ff1Sjsg  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16*1bb76ff1Sjsg  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17*1bb76ff1Sjsg  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18*1bb76ff1Sjsg  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19*1bb76ff1Sjsg  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20*1bb76ff1Sjsg  * OTHER DEALINGS IN THE SOFTWARE.
21*1bb76ff1Sjsg  *
22*1bb76ff1Sjsg  */
23*1bb76ff1Sjsg 
24*1bb76ff1Sjsg #include "sienna_cichlid.h"
25*1bb76ff1Sjsg #include "amdgpu_reset.h"
26*1bb76ff1Sjsg #include "amdgpu_amdkfd.h"
27*1bb76ff1Sjsg #include "amdgpu_dpm.h"
28*1bb76ff1Sjsg #include "amdgpu_job.h"
29*1bb76ff1Sjsg #include "amdgpu_ring.h"
30*1bb76ff1Sjsg #include "amdgpu_ras.h"
31*1bb76ff1Sjsg #include "amdgpu_psp.h"
32*1bb76ff1Sjsg #include "amdgpu_xgmi.h"
33*1bb76ff1Sjsg 
34*1bb76ff1Sjsg static bool sienna_cichlid_is_mode2_default(struct amdgpu_reset_control *reset_ctl)
35*1bb76ff1Sjsg {
36*1bb76ff1Sjsg #if 0
37*1bb76ff1Sjsg 	struct amdgpu_device *adev = (struct amdgpu_device *)reset_ctl->handle;
38*1bb76ff1Sjsg 
39*1bb76ff1Sjsg 	if (adev->ip_versions[MP1_HWIP][0] == IP_VERSION(11, 0, 7) &&
40*1bb76ff1Sjsg 	    adev->pm.fw_version >= 0x3a5500 && !amdgpu_sriov_vf(adev))
41*1bb76ff1Sjsg 		return true;
42*1bb76ff1Sjsg #endif
43*1bb76ff1Sjsg 	return false;
44*1bb76ff1Sjsg }
45*1bb76ff1Sjsg 
46*1bb76ff1Sjsg static struct amdgpu_reset_handler *
47*1bb76ff1Sjsg sienna_cichlid_get_reset_handler(struct amdgpu_reset_control *reset_ctl,
48*1bb76ff1Sjsg 			    struct amdgpu_reset_context *reset_context)
49*1bb76ff1Sjsg {
50*1bb76ff1Sjsg 	struct amdgpu_reset_handler *handler;
51*1bb76ff1Sjsg 
52*1bb76ff1Sjsg 	if (reset_context->method != AMD_RESET_METHOD_NONE) {
53*1bb76ff1Sjsg 		list_for_each_entry(handler, &reset_ctl->reset_handlers,
54*1bb76ff1Sjsg 				     handler_list) {
55*1bb76ff1Sjsg 			if (handler->reset_method == reset_context->method)
56*1bb76ff1Sjsg 				return handler;
57*1bb76ff1Sjsg 		}
58*1bb76ff1Sjsg 	}
59*1bb76ff1Sjsg 
60*1bb76ff1Sjsg 	if (sienna_cichlid_is_mode2_default(reset_ctl)) {
61*1bb76ff1Sjsg 		list_for_each_entry (handler, &reset_ctl->reset_handlers,
62*1bb76ff1Sjsg 				     handler_list) {
63*1bb76ff1Sjsg 			if (handler->reset_method == AMD_RESET_METHOD_MODE2)
64*1bb76ff1Sjsg 				return handler;
65*1bb76ff1Sjsg 		}
66*1bb76ff1Sjsg 	}
67*1bb76ff1Sjsg 
68*1bb76ff1Sjsg 	return NULL;
69*1bb76ff1Sjsg }
70*1bb76ff1Sjsg 
71*1bb76ff1Sjsg static int sienna_cichlid_mode2_suspend_ip(struct amdgpu_device *adev)
72*1bb76ff1Sjsg {
73*1bb76ff1Sjsg 	int r, i;
74*1bb76ff1Sjsg 
75*1bb76ff1Sjsg 	amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE);
76*1bb76ff1Sjsg 	amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE);
77*1bb76ff1Sjsg 
78*1bb76ff1Sjsg 	for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
79*1bb76ff1Sjsg 		if (!(adev->ip_blocks[i].version->type ==
80*1bb76ff1Sjsg 			      AMD_IP_BLOCK_TYPE_GFX ||
81*1bb76ff1Sjsg 		      adev->ip_blocks[i].version->type ==
82*1bb76ff1Sjsg 			      AMD_IP_BLOCK_TYPE_SDMA))
83*1bb76ff1Sjsg 			continue;
84*1bb76ff1Sjsg 
85*1bb76ff1Sjsg 		r = adev->ip_blocks[i].version->funcs->suspend(adev);
86*1bb76ff1Sjsg 
87*1bb76ff1Sjsg 		if (r) {
88*1bb76ff1Sjsg 			dev_err(adev->dev,
89*1bb76ff1Sjsg 				"suspend of IP block <%s> failed %d\n",
90*1bb76ff1Sjsg 				adev->ip_blocks[i].version->funcs->name, r);
91*1bb76ff1Sjsg 			return r;
92*1bb76ff1Sjsg 		}
93*1bb76ff1Sjsg 		adev->ip_blocks[i].status.hw = false;
94*1bb76ff1Sjsg 	}
95*1bb76ff1Sjsg 
96*1bb76ff1Sjsg 	return r;
97*1bb76ff1Sjsg }
98*1bb76ff1Sjsg 
99*1bb76ff1Sjsg static int
100*1bb76ff1Sjsg sienna_cichlid_mode2_prepare_hwcontext(struct amdgpu_reset_control *reset_ctl,
101*1bb76ff1Sjsg 				  struct amdgpu_reset_context *reset_context)
102*1bb76ff1Sjsg {
103*1bb76ff1Sjsg 	int r = 0;
104*1bb76ff1Sjsg 	struct amdgpu_device *adev = (struct amdgpu_device *)reset_ctl->handle;
105*1bb76ff1Sjsg 
106*1bb76ff1Sjsg 	if (!amdgpu_sriov_vf(adev)) {
107*1bb76ff1Sjsg 		if (adev->gfxhub.funcs->mode2_save_regs)
108*1bb76ff1Sjsg 			adev->gfxhub.funcs->mode2_save_regs(adev);
109*1bb76ff1Sjsg 		if (adev->gfxhub.funcs->halt)
110*1bb76ff1Sjsg 			adev->gfxhub.funcs->halt(adev);
111*1bb76ff1Sjsg 		r = sienna_cichlid_mode2_suspend_ip(adev);
112*1bb76ff1Sjsg 	}
113*1bb76ff1Sjsg 
114*1bb76ff1Sjsg 	return r;
115*1bb76ff1Sjsg }
116*1bb76ff1Sjsg 
117*1bb76ff1Sjsg static void sienna_cichlid_async_reset(struct work_struct *work)
118*1bb76ff1Sjsg {
119*1bb76ff1Sjsg 	struct amdgpu_reset_handler *handler;
120*1bb76ff1Sjsg 	struct amdgpu_reset_control *reset_ctl =
121*1bb76ff1Sjsg 		container_of(work, struct amdgpu_reset_control, reset_work);
122*1bb76ff1Sjsg 	struct amdgpu_device *adev = (struct amdgpu_device *)reset_ctl->handle;
123*1bb76ff1Sjsg 
124*1bb76ff1Sjsg 	list_for_each_entry(handler, &reset_ctl->reset_handlers,
125*1bb76ff1Sjsg 			     handler_list) {
126*1bb76ff1Sjsg 		if (handler->reset_method == reset_ctl->active_reset) {
127*1bb76ff1Sjsg 			dev_dbg(adev->dev, "Resetting device\n");
128*1bb76ff1Sjsg 			handler->do_reset(adev);
129*1bb76ff1Sjsg 			break;
130*1bb76ff1Sjsg 		}
131*1bb76ff1Sjsg 	}
132*1bb76ff1Sjsg }
133*1bb76ff1Sjsg 
134*1bb76ff1Sjsg static int sienna_cichlid_mode2_reset(struct amdgpu_device *adev)
135*1bb76ff1Sjsg {
136*1bb76ff1Sjsg 	/* disable BM */
137*1bb76ff1Sjsg 	pci_clear_master(adev->pdev);
138*1bb76ff1Sjsg 	return amdgpu_dpm_mode2_reset(adev);
139*1bb76ff1Sjsg }
140*1bb76ff1Sjsg 
141*1bb76ff1Sjsg static int
142*1bb76ff1Sjsg sienna_cichlid_mode2_perform_reset(struct amdgpu_reset_control *reset_ctl,
143*1bb76ff1Sjsg 			      struct amdgpu_reset_context *reset_context)
144*1bb76ff1Sjsg {
145*1bb76ff1Sjsg 	struct amdgpu_device *adev = (struct amdgpu_device *)reset_ctl->handle;
146*1bb76ff1Sjsg 	int r;
147*1bb76ff1Sjsg 
148*1bb76ff1Sjsg 	r = sienna_cichlid_mode2_reset(adev);
149*1bb76ff1Sjsg 	if (r) {
150*1bb76ff1Sjsg 		dev_err(adev->dev,
151*1bb76ff1Sjsg 			"ASIC reset failed with error, %d ", r);
152*1bb76ff1Sjsg 	}
153*1bb76ff1Sjsg 	return r;
154*1bb76ff1Sjsg }
155*1bb76ff1Sjsg 
156*1bb76ff1Sjsg static int sienna_cichlid_mode2_restore_ip(struct amdgpu_device *adev)
157*1bb76ff1Sjsg {
158*1bb76ff1Sjsg 	int i, r;
159*1bb76ff1Sjsg 	struct psp_context *psp = &adev->psp;
160*1bb76ff1Sjsg 
161*1bb76ff1Sjsg 	r = psp_rlc_autoload_start(psp);
162*1bb76ff1Sjsg 	if (r) {
163*1bb76ff1Sjsg 		dev_err(adev->dev, "Failed to start rlc autoload\n");
164*1bb76ff1Sjsg 		return r;
165*1bb76ff1Sjsg 	}
166*1bb76ff1Sjsg 
167*1bb76ff1Sjsg 	/* Reinit GFXHUB */
168*1bb76ff1Sjsg 	if (adev->gfxhub.funcs->mode2_restore_regs)
169*1bb76ff1Sjsg 		adev->gfxhub.funcs->mode2_restore_regs(adev);
170*1bb76ff1Sjsg 	adev->gfxhub.funcs->init(adev);
171*1bb76ff1Sjsg 	r = adev->gfxhub.funcs->gart_enable(adev);
172*1bb76ff1Sjsg 	if (r) {
173*1bb76ff1Sjsg 		dev_err(adev->dev, "GFXHUB gart reenable failed after reset\n");
174*1bb76ff1Sjsg 		return r;
175*1bb76ff1Sjsg 	}
176*1bb76ff1Sjsg 
177*1bb76ff1Sjsg 	for (i = 0; i < adev->num_ip_blocks; i++) {
178*1bb76ff1Sjsg 		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH) {
179*1bb76ff1Sjsg 			r = adev->ip_blocks[i].version->funcs->resume(adev);
180*1bb76ff1Sjsg 			if (r) {
181*1bb76ff1Sjsg 				dev_err(adev->dev,
182*1bb76ff1Sjsg 					"resume of IP block <%s> failed %d\n",
183*1bb76ff1Sjsg 					adev->ip_blocks[i].version->funcs->name, r);
184*1bb76ff1Sjsg 				return r;
185*1bb76ff1Sjsg 			}
186*1bb76ff1Sjsg 
187*1bb76ff1Sjsg 			adev->ip_blocks[i].status.hw = true;
188*1bb76ff1Sjsg 		}
189*1bb76ff1Sjsg 	}
190*1bb76ff1Sjsg 
191*1bb76ff1Sjsg 	for (i = 0; i < adev->num_ip_blocks; i++) {
192*1bb76ff1Sjsg 		if (!(adev->ip_blocks[i].version->type ==
193*1bb76ff1Sjsg 			      AMD_IP_BLOCK_TYPE_GFX ||
194*1bb76ff1Sjsg 		      adev->ip_blocks[i].version->type ==
195*1bb76ff1Sjsg 			      AMD_IP_BLOCK_TYPE_SDMA))
196*1bb76ff1Sjsg 			continue;
197*1bb76ff1Sjsg 		r = adev->ip_blocks[i].version->funcs->resume(adev);
198*1bb76ff1Sjsg 		if (r) {
199*1bb76ff1Sjsg 			dev_err(adev->dev,
200*1bb76ff1Sjsg 				"resume of IP block <%s> failed %d\n",
201*1bb76ff1Sjsg 				adev->ip_blocks[i].version->funcs->name, r);
202*1bb76ff1Sjsg 			return r;
203*1bb76ff1Sjsg 		}
204*1bb76ff1Sjsg 
205*1bb76ff1Sjsg 		adev->ip_blocks[i].status.hw = true;
206*1bb76ff1Sjsg 	}
207*1bb76ff1Sjsg 
208*1bb76ff1Sjsg 	for (i = 0; i < adev->num_ip_blocks; i++) {
209*1bb76ff1Sjsg 		if (!(adev->ip_blocks[i].version->type ==
210*1bb76ff1Sjsg 			      AMD_IP_BLOCK_TYPE_GFX ||
211*1bb76ff1Sjsg 		      adev->ip_blocks[i].version->type ==
212*1bb76ff1Sjsg 			      AMD_IP_BLOCK_TYPE_SDMA))
213*1bb76ff1Sjsg 			continue;
214*1bb76ff1Sjsg 
215*1bb76ff1Sjsg 		if (adev->ip_blocks[i].version->funcs->late_init) {
216*1bb76ff1Sjsg 			r = adev->ip_blocks[i].version->funcs->late_init(
217*1bb76ff1Sjsg 				(void *)adev);
218*1bb76ff1Sjsg 			if (r) {
219*1bb76ff1Sjsg 				dev_err(adev->dev,
220*1bb76ff1Sjsg 					"late_init of IP block <%s> failed %d after reset\n",
221*1bb76ff1Sjsg 					adev->ip_blocks[i].version->funcs->name,
222*1bb76ff1Sjsg 					r);
223*1bb76ff1Sjsg 				return r;
224*1bb76ff1Sjsg 			}
225*1bb76ff1Sjsg 		}
226*1bb76ff1Sjsg 		adev->ip_blocks[i].status.late_initialized = true;
227*1bb76ff1Sjsg 	}
228*1bb76ff1Sjsg 
229*1bb76ff1Sjsg 	amdgpu_device_set_cg_state(adev, AMD_CG_STATE_GATE);
230*1bb76ff1Sjsg 	amdgpu_device_set_pg_state(adev, AMD_PG_STATE_GATE);
231*1bb76ff1Sjsg 
232*1bb76ff1Sjsg 	return r;
233*1bb76ff1Sjsg }
234*1bb76ff1Sjsg 
235*1bb76ff1Sjsg static int
236*1bb76ff1Sjsg sienna_cichlid_mode2_restore_hwcontext(struct amdgpu_reset_control *reset_ctl,
237*1bb76ff1Sjsg 				  struct amdgpu_reset_context *reset_context)
238*1bb76ff1Sjsg {
239*1bb76ff1Sjsg 	int r;
240*1bb76ff1Sjsg 	struct amdgpu_device *tmp_adev = (struct amdgpu_device *)reset_ctl->handle;
241*1bb76ff1Sjsg 
242*1bb76ff1Sjsg 	dev_info(tmp_adev->dev,
243*1bb76ff1Sjsg 			"GPU reset succeeded, trying to resume\n");
244*1bb76ff1Sjsg 	r = sienna_cichlid_mode2_restore_ip(tmp_adev);
245*1bb76ff1Sjsg 	if (r)
246*1bb76ff1Sjsg 		goto end;
247*1bb76ff1Sjsg 
248*1bb76ff1Sjsg 	/*
249*1bb76ff1Sjsg 	* Add this ASIC as tracked as reset was already
250*1bb76ff1Sjsg 	* complete successfully.
251*1bb76ff1Sjsg 	*/
252*1bb76ff1Sjsg 	amdgpu_register_gpu_instance(tmp_adev);
253*1bb76ff1Sjsg 
254*1bb76ff1Sjsg 	/* Resume RAS */
255*1bb76ff1Sjsg 	amdgpu_ras_resume(tmp_adev);
256*1bb76ff1Sjsg 
257*1bb76ff1Sjsg 	amdgpu_irq_gpu_reset_resume_helper(tmp_adev);
258*1bb76ff1Sjsg 
259*1bb76ff1Sjsg 	r = amdgpu_ib_ring_tests(tmp_adev);
260*1bb76ff1Sjsg 	if (r) {
261*1bb76ff1Sjsg 		dev_err(tmp_adev->dev,
262*1bb76ff1Sjsg 			"ib ring test failed (%d).\n", r);
263*1bb76ff1Sjsg 		r = -EAGAIN;
264*1bb76ff1Sjsg 		goto end;
265*1bb76ff1Sjsg 	}
266*1bb76ff1Sjsg 
267*1bb76ff1Sjsg end:
268*1bb76ff1Sjsg 	if (r)
269*1bb76ff1Sjsg 		return -EAGAIN;
270*1bb76ff1Sjsg 	else
271*1bb76ff1Sjsg 		return r;
272*1bb76ff1Sjsg }
273*1bb76ff1Sjsg 
274*1bb76ff1Sjsg static struct amdgpu_reset_handler sienna_cichlid_mode2_handler = {
275*1bb76ff1Sjsg 	.reset_method		= AMD_RESET_METHOD_MODE2,
276*1bb76ff1Sjsg 	.prepare_env		= NULL,
277*1bb76ff1Sjsg 	.prepare_hwcontext	= sienna_cichlid_mode2_prepare_hwcontext,
278*1bb76ff1Sjsg 	.perform_reset		= sienna_cichlid_mode2_perform_reset,
279*1bb76ff1Sjsg 	.restore_hwcontext	= sienna_cichlid_mode2_restore_hwcontext,
280*1bb76ff1Sjsg 	.restore_env		= NULL,
281*1bb76ff1Sjsg 	.do_reset		= sienna_cichlid_mode2_reset,
282*1bb76ff1Sjsg };
283*1bb76ff1Sjsg 
284*1bb76ff1Sjsg int sienna_cichlid_reset_init(struct amdgpu_device *adev)
285*1bb76ff1Sjsg {
286*1bb76ff1Sjsg 	struct amdgpu_reset_control *reset_ctl;
287*1bb76ff1Sjsg 
288*1bb76ff1Sjsg 	reset_ctl = kzalloc(sizeof(*reset_ctl), GFP_KERNEL);
289*1bb76ff1Sjsg 	if (!reset_ctl)
290*1bb76ff1Sjsg 		return -ENOMEM;
291*1bb76ff1Sjsg 
292*1bb76ff1Sjsg 	reset_ctl->handle = adev;
293*1bb76ff1Sjsg 	reset_ctl->async_reset = sienna_cichlid_async_reset;
294*1bb76ff1Sjsg 	reset_ctl->active_reset = AMD_RESET_METHOD_NONE;
295*1bb76ff1Sjsg 	reset_ctl->get_reset_handler = sienna_cichlid_get_reset_handler;
296*1bb76ff1Sjsg 
297*1bb76ff1Sjsg 	INIT_LIST_HEAD(&reset_ctl->reset_handlers);
298*1bb76ff1Sjsg 	INIT_WORK(&reset_ctl->reset_work, reset_ctl->async_reset);
299*1bb76ff1Sjsg 	/* Only mode2 is handled through reset control now */
300*1bb76ff1Sjsg 	amdgpu_reset_add_handler(reset_ctl, &sienna_cichlid_mode2_handler);
301*1bb76ff1Sjsg 
302*1bb76ff1Sjsg 	adev->reset_cntl = reset_ctl;
303*1bb76ff1Sjsg 
304*1bb76ff1Sjsg 	return 0;
305*1bb76ff1Sjsg }
306*1bb76ff1Sjsg 
307*1bb76ff1Sjsg int sienna_cichlid_reset_fini(struct amdgpu_device *adev)
308*1bb76ff1Sjsg {
309*1bb76ff1Sjsg 	kfree(adev->reset_cntl);
310*1bb76ff1Sjsg 	adev->reset_cntl = NULL;
311*1bb76ff1Sjsg 	return 0;
312*1bb76ff1Sjsg }
313