xref: /openbsd/sys/dev/pci/drm/amd/amdgpu/sienna_cichlid.c (revision f005ef32)
1 /*
2  * Copyright 2021 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  */
23 
24 #include "sienna_cichlid.h"
25 #include "amdgpu_reset.h"
26 #include "amdgpu_amdkfd.h"
27 #include "amdgpu_dpm.h"
28 #include "amdgpu_job.h"
29 #include "amdgpu_ring.h"
30 #include "amdgpu_ras.h"
31 #include "amdgpu_psp.h"
32 #include "amdgpu_xgmi.h"
33 
sienna_cichlid_is_mode2_default(struct amdgpu_reset_control * reset_ctl)34 static bool sienna_cichlid_is_mode2_default(struct amdgpu_reset_control *reset_ctl)
35 {
36 #if 0
37 	struct amdgpu_device *adev = (struct amdgpu_device *)reset_ctl->handle;
38 
39 	if (adev->ip_versions[MP1_HWIP][0] == IP_VERSION(11, 0, 7) &&
40 	    adev->pm.fw_version >= 0x3a5500 && !amdgpu_sriov_vf(adev))
41 		return true;
42 #endif
43 	return amdgpu_reset_method == AMD_RESET_METHOD_MODE2;
44 }
45 
46 static struct amdgpu_reset_handler *
sienna_cichlid_get_reset_handler(struct amdgpu_reset_control * reset_ctl,struct amdgpu_reset_context * reset_context)47 sienna_cichlid_get_reset_handler(struct amdgpu_reset_control *reset_ctl,
48 			    struct amdgpu_reset_context *reset_context)
49 {
50 	struct amdgpu_reset_handler *handler;
51 
52 	if (reset_context->method != AMD_RESET_METHOD_NONE) {
53 		list_for_each_entry(handler, &reset_ctl->reset_handlers,
54 				     handler_list) {
55 			if (handler->reset_method == reset_context->method)
56 				return handler;
57 		}
58 	}
59 
60 	if (sienna_cichlid_is_mode2_default(reset_ctl)) {
61 		list_for_each_entry (handler, &reset_ctl->reset_handlers,
62 				     handler_list) {
63 			if (handler->reset_method == AMD_RESET_METHOD_MODE2)
64 				return handler;
65 		}
66 	}
67 
68 	return NULL;
69 }
70 
sienna_cichlid_mode2_suspend_ip(struct amdgpu_device * adev)71 static int sienna_cichlid_mode2_suspend_ip(struct amdgpu_device *adev)
72 {
73 	int r, i;
74 
75 	amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE);
76 	amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE);
77 
78 	for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
79 		if (!(adev->ip_blocks[i].version->type ==
80 			      AMD_IP_BLOCK_TYPE_GFX ||
81 		      adev->ip_blocks[i].version->type ==
82 			      AMD_IP_BLOCK_TYPE_SDMA))
83 			continue;
84 
85 		r = adev->ip_blocks[i].version->funcs->suspend(adev);
86 
87 		if (r) {
88 			dev_err(adev->dev,
89 				"suspend of IP block <%s> failed %d\n",
90 				adev->ip_blocks[i].version->funcs->name, r);
91 			return r;
92 		}
93 		adev->ip_blocks[i].status.hw = false;
94 	}
95 
96 	return r;
97 }
98 
99 static int
sienna_cichlid_mode2_prepare_hwcontext(struct amdgpu_reset_control * reset_ctl,struct amdgpu_reset_context * reset_context)100 sienna_cichlid_mode2_prepare_hwcontext(struct amdgpu_reset_control *reset_ctl,
101 				  struct amdgpu_reset_context *reset_context)
102 {
103 	int r = 0;
104 	struct amdgpu_device *adev = (struct amdgpu_device *)reset_ctl->handle;
105 
106 	if (!amdgpu_sriov_vf(adev)) {
107 		if (adev->gfxhub.funcs->mode2_save_regs)
108 			adev->gfxhub.funcs->mode2_save_regs(adev);
109 		if (adev->gfxhub.funcs->halt)
110 			adev->gfxhub.funcs->halt(adev);
111 		r = sienna_cichlid_mode2_suspend_ip(adev);
112 	}
113 
114 	return r;
115 }
116 
sienna_cichlid_async_reset(struct work_struct * work)117 static void sienna_cichlid_async_reset(struct work_struct *work)
118 {
119 	struct amdgpu_reset_handler *handler;
120 	struct amdgpu_reset_control *reset_ctl =
121 		container_of(work, struct amdgpu_reset_control, reset_work);
122 	struct amdgpu_device *adev = (struct amdgpu_device *)reset_ctl->handle;
123 
124 	list_for_each_entry(handler, &reset_ctl->reset_handlers,
125 			     handler_list) {
126 		if (handler->reset_method == reset_ctl->active_reset) {
127 			dev_dbg(adev->dev, "Resetting device\n");
128 			handler->do_reset(adev);
129 			break;
130 		}
131 	}
132 }
133 
sienna_cichlid_mode2_reset(struct amdgpu_device * adev)134 static int sienna_cichlid_mode2_reset(struct amdgpu_device *adev)
135 {
136 	/* disable BM */
137 	pci_clear_master(adev->pdev);
138 	return amdgpu_dpm_mode2_reset(adev);
139 }
140 
141 static int
sienna_cichlid_mode2_perform_reset(struct amdgpu_reset_control * reset_ctl,struct amdgpu_reset_context * reset_context)142 sienna_cichlid_mode2_perform_reset(struct amdgpu_reset_control *reset_ctl,
143 			      struct amdgpu_reset_context *reset_context)
144 {
145 	struct amdgpu_device *adev = (struct amdgpu_device *)reset_ctl->handle;
146 	int r;
147 
148 	r = sienna_cichlid_mode2_reset(adev);
149 	if (r) {
150 		dev_err(adev->dev,
151 			"ASIC reset failed with error, %d ", r);
152 	}
153 	return r;
154 }
155 
sienna_cichlid_mode2_restore_ip(struct amdgpu_device * adev)156 static int sienna_cichlid_mode2_restore_ip(struct amdgpu_device *adev)
157 {
158 	int i, r;
159 	struct psp_context *psp = &adev->psp;
160 
161 	r = psp_rlc_autoload_start(psp);
162 	if (r) {
163 		dev_err(adev->dev, "Failed to start rlc autoload\n");
164 		return r;
165 	}
166 
167 	/* Reinit GFXHUB */
168 	if (adev->gfxhub.funcs->mode2_restore_regs)
169 		adev->gfxhub.funcs->mode2_restore_regs(adev);
170 	adev->gfxhub.funcs->init(adev);
171 	r = adev->gfxhub.funcs->gart_enable(adev);
172 	if (r) {
173 		dev_err(adev->dev, "GFXHUB gart reenable failed after reset\n");
174 		return r;
175 	}
176 
177 	for (i = 0; i < adev->num_ip_blocks; i++) {
178 		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH) {
179 			r = adev->ip_blocks[i].version->funcs->resume(adev);
180 			if (r) {
181 				dev_err(adev->dev,
182 					"resume of IP block <%s> failed %d\n",
183 					adev->ip_blocks[i].version->funcs->name, r);
184 				return r;
185 			}
186 
187 			adev->ip_blocks[i].status.hw = true;
188 		}
189 	}
190 
191 	for (i = 0; i < adev->num_ip_blocks; i++) {
192 		if (!(adev->ip_blocks[i].version->type ==
193 			      AMD_IP_BLOCK_TYPE_GFX ||
194 		      adev->ip_blocks[i].version->type ==
195 			      AMD_IP_BLOCK_TYPE_SDMA))
196 			continue;
197 		r = adev->ip_blocks[i].version->funcs->resume(adev);
198 		if (r) {
199 			dev_err(adev->dev,
200 				"resume of IP block <%s> failed %d\n",
201 				adev->ip_blocks[i].version->funcs->name, r);
202 			return r;
203 		}
204 
205 		adev->ip_blocks[i].status.hw = true;
206 	}
207 
208 	for (i = 0; i < adev->num_ip_blocks; i++) {
209 		if (!(adev->ip_blocks[i].version->type ==
210 			      AMD_IP_BLOCK_TYPE_GFX ||
211 		      adev->ip_blocks[i].version->type ==
212 			      AMD_IP_BLOCK_TYPE_SDMA))
213 			continue;
214 
215 		if (adev->ip_blocks[i].version->funcs->late_init) {
216 			r = adev->ip_blocks[i].version->funcs->late_init(
217 				(void *)adev);
218 			if (r) {
219 				dev_err(adev->dev,
220 					"late_init of IP block <%s> failed %d after reset\n",
221 					adev->ip_blocks[i].version->funcs->name,
222 					r);
223 				return r;
224 			}
225 		}
226 		adev->ip_blocks[i].status.late_initialized = true;
227 	}
228 
229 	amdgpu_device_set_cg_state(adev, AMD_CG_STATE_GATE);
230 	amdgpu_device_set_pg_state(adev, AMD_PG_STATE_GATE);
231 
232 	return r;
233 }
234 
235 static int
sienna_cichlid_mode2_restore_hwcontext(struct amdgpu_reset_control * reset_ctl,struct amdgpu_reset_context * reset_context)236 sienna_cichlid_mode2_restore_hwcontext(struct amdgpu_reset_control *reset_ctl,
237 				  struct amdgpu_reset_context *reset_context)
238 {
239 	int r;
240 	struct amdgpu_device *tmp_adev = (struct amdgpu_device *)reset_ctl->handle;
241 
242 	dev_info(tmp_adev->dev,
243 			"GPU reset succeeded, trying to resume\n");
244 	r = sienna_cichlid_mode2_restore_ip(tmp_adev);
245 	if (r)
246 		goto end;
247 
248 	/*
249 	* Add this ASIC as tracked as reset was already
250 	* complete successfully.
251 	*/
252 	amdgpu_register_gpu_instance(tmp_adev);
253 
254 	/* Resume RAS */
255 	amdgpu_ras_resume(tmp_adev);
256 
257 	amdgpu_irq_gpu_reset_resume_helper(tmp_adev);
258 
259 	r = amdgpu_ib_ring_tests(tmp_adev);
260 	if (r) {
261 		dev_err(tmp_adev->dev,
262 			"ib ring test failed (%d).\n", r);
263 		r = -EAGAIN;
264 		goto end;
265 	}
266 
267 end:
268 	if (r)
269 		return -EAGAIN;
270 	else
271 		return r;
272 }
273 
274 static struct amdgpu_reset_handler sienna_cichlid_mode2_handler = {
275 	.reset_method		= AMD_RESET_METHOD_MODE2,
276 	.prepare_env		= NULL,
277 	.prepare_hwcontext	= sienna_cichlid_mode2_prepare_hwcontext,
278 	.perform_reset		= sienna_cichlid_mode2_perform_reset,
279 	.restore_hwcontext	= sienna_cichlid_mode2_restore_hwcontext,
280 	.restore_env		= NULL,
281 	.do_reset		= sienna_cichlid_mode2_reset,
282 };
283 
sienna_cichlid_reset_init(struct amdgpu_device * adev)284 int sienna_cichlid_reset_init(struct amdgpu_device *adev)
285 {
286 	struct amdgpu_reset_control *reset_ctl;
287 
288 	reset_ctl = kzalloc(sizeof(*reset_ctl), GFP_KERNEL);
289 	if (!reset_ctl)
290 		return -ENOMEM;
291 
292 	reset_ctl->handle = adev;
293 	reset_ctl->async_reset = sienna_cichlid_async_reset;
294 	reset_ctl->active_reset = AMD_RESET_METHOD_NONE;
295 	reset_ctl->get_reset_handler = sienna_cichlid_get_reset_handler;
296 
297 	INIT_LIST_HEAD(&reset_ctl->reset_handlers);
298 	INIT_WORK(&reset_ctl->reset_work, reset_ctl->async_reset);
299 	/* Only mode2 is handled through reset control now */
300 	amdgpu_reset_add_handler(reset_ctl, &sienna_cichlid_mode2_handler);
301 
302 	adev->reset_cntl = reset_ctl;
303 
304 	return 0;
305 }
306 
sienna_cichlid_reset_fini(struct amdgpu_device * adev)307 int sienna_cichlid_reset_fini(struct amdgpu_device *adev)
308 {
309 	kfree(adev->reset_cntl);
310 	adev->reset_cntl = NULL;
311 	return 0;
312 }
313