1 /*	$NetBSD: amdgpu_device.c,v 1.19 2023/05/25 12:07:43 riastradh Exp $	*/
2 
3 /*
4  * Copyright 2008 Advanced Micro Devices, Inc.
5  * Copyright 2008 Red Hat Inc.
6  * Copyright 2009 Jerome Glisse.
7  *
8  * Permission is hereby granted, free of charge, to any person obtaining a
9  * copy of this software and associated documentation files (the "Software"),
10  * to deal in the Software without restriction, including without limitation
11  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
12  * and/or sell copies of the Software, and to permit persons to whom the
13  * Software is furnished to do so, subject to the following conditions:
14  *
15  * The above copyright notice and this permission notice shall be included in
16  * all copies or substantial portions of the Software.
17  *
18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
21  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
22  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
23  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
24  * OTHER DEALINGS IN THE SOFTWARE.
25  *
26  * Authors: Dave Airlie
27  *          Alex Deucher
28  *          Jerome Glisse
29  */
30 #include <sys/cdefs.h>
31 __KERNEL_RCSID(0, "$NetBSD: amdgpu_device.c,v 1.19 2023/05/25 12:07:43 riastradh Exp $");
32 
33 #include <linux/power_supply.h>
34 #include <linux/kthread.h>
35 #include <linux/module.h>
36 #include <linux/console.h>
37 #include <linux/slab.h>
38 #include <linux/reboot.h>
39 
40 #include <drm/drm_atomic_helper.h>
41 #include <drm/drm_probe_helper.h>
42 #include <drm/amdgpu_drm.h>
43 #include <linux/vgaarb.h>
44 #include <linux/vga_switcheroo.h>
45 #include <linux/efi.h>
46 #include "amdgpu.h"
47 #include "amdgpu_trace.h"
48 #include "amdgpu_i2c.h"
49 #include "atom.h"
50 #include "amdgpu_atombios.h"
51 #include "amdgpu_atomfirmware.h"
52 #include "amd_pcie.h"
53 #ifdef CONFIG_DRM_AMDGPU_SI
54 #include "si.h"
55 #endif
56 #ifdef CONFIG_DRM_AMDGPU_CIK
57 #include "cik.h"
58 #endif
59 #include "vi.h"
60 #include "soc15.h"
61 #include "nv.h"
62 #include "bif/bif_4_1_d.h"
63 #include <linux/pci.h>
64 #include <linux/firmware.h>
65 #include "amdgpu_vf_error.h"
66 
67 #include "amdgpu_amdkfd.h"
68 #include "amdgpu_pm.h"
69 
70 #include "amdgpu_xgmi.h"
71 #include "amdgpu_ras.h"
72 #include "amdgpu_pmu.h"
73 
74 #include <linux/suspend.h>
75 #include <drm/task_barrier.h>
76 #include <linux/nbsd-namespace.h>
77 
78 MODULE_FIRMWARE("amdgpu/vega10_gpu_info.bin");
79 MODULE_FIRMWARE("amdgpu/vega12_gpu_info.bin");
80 MODULE_FIRMWARE("amdgpu/raven_gpu_info.bin");
81 MODULE_FIRMWARE("amdgpu/picasso_gpu_info.bin");
82 MODULE_FIRMWARE("amdgpu/raven2_gpu_info.bin");
83 MODULE_FIRMWARE("amdgpu/arcturus_gpu_info.bin");
84 MODULE_FIRMWARE("amdgpu/renoir_gpu_info.bin");
85 MODULE_FIRMWARE("amdgpu/navi10_gpu_info.bin");
86 MODULE_FIRMWARE("amdgpu/navi14_gpu_info.bin");
87 MODULE_FIRMWARE("amdgpu/navi12_gpu_info.bin");
88 
89 #define AMDGPU_RESUME_MS		2000
90 
91 const char *amdgpu_asic_name[] = {
92 	"TAHITI",
93 	"PITCAIRN",
94 	"VERDE",
95 	"OLAND",
96 	"HAINAN",
97 	"BONAIRE",
98 	"KAVERI",
99 	"KABINI",
100 	"HAWAII",
101 	"MULLINS",
102 	"TOPAZ",
103 	"TONGA",
104 	"FIJI",
105 	"CARRIZO",
106 	"STONEY",
107 	"POLARIS10",
108 	"POLARIS11",
109 	"POLARIS12",
110 	"VEGAM",
111 	"VEGA10",
112 	"VEGA12",
113 	"VEGA20",
114 	"RAVEN",
115 	"ARCTURUS",
116 	"RENOIR",
117 	"NAVI10",
118 	"NAVI14",
119 	"NAVI12",
120 	"LAST",
121 };
122 
123 #ifndef __NetBSD__		/* XXX amdgpu sysfs */
124 
125 /**
126  * DOC: pcie_replay_count
127  *
128  * The amdgpu driver provides a sysfs API for reporting the total number
129  * of PCIe replays (NAKs)
130  * The file pcie_replay_count is used for this and returns the total
131  * number of replays as a sum of the NAKs generated and NAKs received
132  */
133 
amdgpu_device_get_pcie_replay_count(struct device * dev,struct device_attribute * attr,char * buf)134 static ssize_t amdgpu_device_get_pcie_replay_count(struct device *dev,
135 		struct device_attribute *attr, char *buf)
136 {
137 	struct drm_device *ddev = dev_get_drvdata(dev);
138 	struct amdgpu_device *adev = ddev->dev_private;
139 	uint64_t cnt = amdgpu_asic_get_pcie_replay_count(adev);
140 
141 	return snprintf(buf, PAGE_SIZE, "%llu\n", cnt);
142 }
143 
144 static DEVICE_ATTR(pcie_replay_count, S_IRUGO,
145 		amdgpu_device_get_pcie_replay_count, NULL);
146 
147 #endif	/* __NetBSD__ */
148 
149 static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev);
150 
151 /**
152  * amdgpu_device_supports_boco - Is the device a dGPU with HG/PX power control
153  *
154  * @dev: drm_device pointer
155  *
156  * Returns true if the device is a dGPU with HG/PX power control,
157  * otherwise return false.
158  */
amdgpu_device_supports_boco(struct drm_device * dev)159 bool amdgpu_device_supports_boco(struct drm_device *dev)
160 {
161 	struct amdgpu_device *adev = dev->dev_private;
162 
163 	if (adev->flags & AMD_IS_PX)
164 		return true;
165 	return false;
166 }
167 
168 /**
169  * amdgpu_device_supports_baco - Does the device support BACO
170  *
171  * @dev: drm_device pointer
172  *
173  * Returns true if the device supporte BACO,
174  * otherwise return false.
175  */
amdgpu_device_supports_baco(struct drm_device * dev)176 bool amdgpu_device_supports_baco(struct drm_device *dev)
177 {
178 	struct amdgpu_device *adev = dev->dev_private;
179 
180 	return amdgpu_asic_supports_baco(adev);
181 }
182 
183 /**
184  * VRAM access helper functions.
185  *
186  * amdgpu_device_vram_access - read/write a buffer in vram
187  *
188  * @adev: amdgpu_device pointer
189  * @pos: offset of the buffer in vram
190  * @buf: virtual address of the buffer in system memory
191  * @size: read/write size, sizeof(@buf) must > @size
192  * @write: true - write to vram, otherwise - read from vram
193  */
amdgpu_device_vram_access(struct amdgpu_device * adev,loff_t pos,uint32_t * buf,size_t size,bool write)194 void amdgpu_device_vram_access(struct amdgpu_device *adev, loff_t pos,
195 			       uint32_t *buf, size_t size, bool write)
196 {
197 	uint64_t last;
198 	unsigned long flags;
199 
200 	last = size - 4;
201 	for (last += pos; pos <= last; pos += 4) {
202 		spin_lock_irqsave(&adev->mmio_idx_lock, flags);
203 		WREG32_NO_KIQ(mmMM_INDEX, ((uint32_t)pos) | 0x80000000);
204 		WREG32_NO_KIQ(mmMM_INDEX_HI, pos >> 31);
205 		if (write)
206 			WREG32_NO_KIQ(mmMM_DATA, *buf++);
207 		else
208 			*buf++ = RREG32_NO_KIQ(mmMM_DATA);
209 		spin_unlock_irqrestore(&adev->mmio_idx_lock, flags);
210 	}
211 }
212 
213 /*
214  * MMIO register access helper functions.
215  */
216 /**
217  * amdgpu_mm_rreg - read a memory mapped IO register
218  *
219  * @adev: amdgpu_device pointer
220  * @reg: dword aligned register offset
221  * @acc_flags: access flags which require special behavior
222  *
223  * Returns the 32 bit value from the offset specified.
224  */
amdgpu_mm_rreg(struct amdgpu_device * adev,uint32_t reg,uint32_t acc_flags)225 uint32_t amdgpu_mm_rreg(struct amdgpu_device *adev, uint32_t reg,
226 			uint32_t acc_flags)
227 {
228 	uint32_t ret;
229 
230 	if ((acc_flags & AMDGPU_REGS_KIQ) || (!(acc_flags & AMDGPU_REGS_NO_KIQ) && amdgpu_sriov_runtime(adev)))
231 		return amdgpu_kiq_rreg(adev, reg);
232 
233 	if ((reg * 4) < adev->rmmio_size && !(acc_flags & AMDGPU_REGS_IDX))
234 #ifdef __NetBSD__
235 		return bus_space_read_4(adev->rmmiot, adev->rmmioh, 4*reg);
236 #else
237 		ret = readl(((void __iomem *)adev->rmmio) + (reg * 4));
238 #endif
239 	else {
240 		unsigned long flags;
241 
242 		spin_lock_irqsave(&adev->mmio_idx_lock, flags);
243 #ifdef __NetBSD__
244 		bus_space_write_4(adev->rmmiot, adev->rmmioh, 4*mmMM_INDEX,
245 		    4*reg);
246 		ret = bus_space_read_4(adev->rmmiot, adev->rmmioh,
247 		    4*mmMM_DATA);
248 #else
249 		writel((reg * 4), ((void __iomem *)adev->rmmio) + (mmMM_INDEX * 4));
250 		ret = readl(((void __iomem *)adev->rmmio) + (mmMM_DATA * 4));
251 #endif
252 		spin_unlock_irqrestore(&adev->mmio_idx_lock, flags);
253 	}
254 	trace_amdgpu_mm_rreg(adev->pdev->device, reg, ret);
255 	return ret;
256 }
257 
258 /*
259  * MMIO register read with bytes helper functions
260  * @offset:bytes offset from MMIO start
261  *
262 */
263 
264 /**
265  * amdgpu_mm_rreg8 - read a memory mapped IO register
266  *
267  * @adev: amdgpu_device pointer
268  * @offset: byte aligned register offset
269  *
270  * Returns the 8 bit value from the offset specified.
271  */
amdgpu_mm_rreg8(struct amdgpu_device * adev,uint32_t offset)272 uint8_t amdgpu_mm_rreg8(struct amdgpu_device *adev, uint32_t offset) {
273 	if (offset < adev->rmmio_size)
274 #ifdef __NetBSD__
275 		return bus_space_read_1(adev->rmmiot, adev->rmmioh, offset);
276 #else
277 		return (readb(adev->rmmio + offset));
278 #endif
279 	BUG();
280 }
281 
282 /*
283  * MMIO register write with bytes helper functions
284  * @offset:bytes offset from MMIO start
285  * @value: the value want to be written to the register
286  *
287 */
288 /**
289  * amdgpu_mm_wreg8 - read a memory mapped IO register
290  *
291  * @adev: amdgpu_device pointer
292  * @offset: byte aligned register offset
293  * @value: 8 bit value to write
294  *
295  * Writes the value specified to the offset specified.
296  */
amdgpu_mm_wreg8(struct amdgpu_device * adev,uint32_t offset,uint8_t value)297 void amdgpu_mm_wreg8(struct amdgpu_device *adev, uint32_t offset, uint8_t value) {
298 	if (offset < adev->rmmio_size)
299 #ifdef __NetBSD__
300 		bus_space_write_1(adev->rmmiot, adev->rmmioh, offset, value);
301 #else
302 		writeb(value, adev->rmmio + offset);
303 #endif
304 	else
305 		BUG();
306 }
307 
308 /**
309  * amdgpu_mm_wreg - write to a memory mapped IO register
310  *
311  * @adev: amdgpu_device pointer
312  * @reg: dword aligned register offset
313  * @v: 32 bit value to write to the register
314  * @acc_flags: access flags which require special behavior
315  *
316  * Writes the value specified to the offset specified.
317  */
amdgpu_mm_wreg(struct amdgpu_device * adev,uint32_t reg,uint32_t v,uint32_t acc_flags)318 void amdgpu_mm_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v,
319 		    uint32_t acc_flags)
320 {
321 	trace_amdgpu_mm_wreg(adev->pdev->device, reg, v);
322 
323 	if (adev->asic_type >= CHIP_VEGA10 && reg == 0) {
324 		adev->last_mm_index = v;
325 	}
326 
327 	if ((acc_flags & AMDGPU_REGS_KIQ) || (!(acc_flags & AMDGPU_REGS_NO_KIQ) && amdgpu_sriov_runtime(adev)))
328 		return amdgpu_kiq_wreg(adev, reg, v);
329 
330 	if ((reg * 4) < adev->rmmio_size && !(acc_flags & AMDGPU_REGS_IDX))
331 #ifdef __NetBSD__
332 		bus_space_write_4(adev->rmmiot, adev->rmmioh, 4*reg, v);
333 #else
334 		writel(v, ((void __iomem *)adev->rmmio) + (reg * 4));
335 #endif
336 	else {
337 		unsigned long flags;
338 
339 		spin_lock_irqsave(&adev->mmio_idx_lock, flags);
340 #ifdef __NetBSD__
341 		bus_space_write_4(adev->rmmiot, adev->rmmioh, 4*mmMM_INDEX,
342 		    reg*4);
343 		bus_space_write_4(adev->rmmiot, adev->rmmioh, 4*mmMM_DATA, v);
344 #else
345 		writel((reg * 4), ((void __iomem *)adev->rmmio) + (mmMM_INDEX * 4));
346 		writel(v, ((void __iomem *)adev->rmmio) + (mmMM_DATA * 4));
347 #endif
348 		spin_unlock_irqrestore(&adev->mmio_idx_lock, flags);
349 	}
350 
351 	if (adev->asic_type >= CHIP_VEGA10 && reg == 1 && adev->last_mm_index == 0x5702C) {
352 		udelay(500);
353 	}
354 }
355 
356 /**
357  * amdgpu_io_rreg - read an IO register
358  *
359  * @adev: amdgpu_device pointer
360  * @reg: dword aligned register offset
361  *
362  * Returns the 32 bit value from the offset specified.
363  */
amdgpu_io_rreg(struct amdgpu_device * adev,u32 reg)364 u32 amdgpu_io_rreg(struct amdgpu_device *adev, u32 reg)
365 {
366 	if ((reg * 4) < adev->rio_mem_size)
367 #ifdef __NetBSD__
368 		return bus_space_read_4(adev->rio_memt, adev->rio_memh, 4*reg);
369 #else
370 		return ioread32(adev->rio_mem + (reg * 4));
371 #endif
372 	else {
373 #ifdef __NetBSD__
374 		bus_space_write_4(adev->rio_memt, adev->rio_memh, 4*mmMM_INDEX,
375 		    4*reg);
376 		return bus_space_read_4(adev->rio_memt, adev->rio_memh,
377 		    4*mmMM_DATA);
378 #else
379 		iowrite32((reg * 4), adev->rio_mem + (mmMM_INDEX * 4));
380 		return ioread32(adev->rio_mem + (mmMM_DATA * 4));
381 #endif
382 	}
383 }
384 
385 /**
386  * amdgpu_io_wreg - write to an IO register
387  *
388  * @adev: amdgpu_device pointer
389  * @reg: dword aligned register offset
390  * @v: 32 bit value to write to the register
391  *
392  * Writes the value specified to the offset specified.
393  */
amdgpu_io_wreg(struct amdgpu_device * adev,u32 reg,u32 v)394 void amdgpu_io_wreg(struct amdgpu_device *adev, u32 reg, u32 v)
395 {
396 	if (adev->asic_type >= CHIP_VEGA10 && reg == 0) {
397 		adev->last_mm_index = v;
398 	}
399 
400 	if ((reg * 4) < adev->rio_mem_size)
401 #ifdef __NetBSD__
402 		bus_space_write_4(adev->rio_memt, adev->rio_memh, 4*reg, v);
403 #else
404 		iowrite32(v, adev->rio_mem + (reg * 4));
405 #endif
406 	else {
407 #ifdef __NetBSD__
408 		bus_space_write_4(adev->rio_memt, adev->rio_memh, 4*mmMM_INDEX,
409 		    4*reg);
410 		bus_space_write_4(adev->rio_memt, adev->rio_memh, 4*mmMM_DATA,
411 		    v);
412 #else
413 		iowrite32((reg * 4), adev->rio_mem + (mmMM_INDEX * 4));
414 		iowrite32(v, adev->rio_mem + (mmMM_DATA * 4));
415 #endif
416 	}
417 
418 	if (adev->asic_type >= CHIP_VEGA10 && reg == 1 && adev->last_mm_index == 0x5702C) {
419 		udelay(500);
420 	}
421 }
422 
423 /**
424  * amdgpu_mm_rdoorbell - read a doorbell dword
425  *
426  * @adev: amdgpu_device pointer
427  * @index: doorbell index
428  *
429  * Returns the value in the doorbell aperture at the
430  * requested doorbell index (CIK).
431  */
amdgpu_mm_rdoorbell(struct amdgpu_device * adev,u32 index)432 u32 amdgpu_mm_rdoorbell(struct amdgpu_device *adev, u32 index)
433 {
434 	if (index < adev->doorbell.num_doorbells) {
435 #ifdef __NetBSD__
436 		return bus_space_read_4(adev->doorbell.bst, adev->doorbell.bsh,
437 		    4*index);
438 #else
439 		return readl(adev->doorbell.ptr + index);
440 #endif
441 	} else {
442 		DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index);
443 		return 0;
444 	}
445 }
446 
447 /**
448  * amdgpu_mm_wdoorbell - write a doorbell dword
449  *
450  * @adev: amdgpu_device pointer
451  * @index: doorbell index
452  * @v: value to write
453  *
454  * Writes @v to the doorbell aperture at the
455  * requested doorbell index (CIK).
456  */
amdgpu_mm_wdoorbell(struct amdgpu_device * adev,u32 index,u32 v)457 void amdgpu_mm_wdoorbell(struct amdgpu_device *adev, u32 index, u32 v)
458 {
459 	if (index < adev->doorbell.num_doorbells) {
460 #ifdef __NetBSD__
461 		bus_space_write_4(adev->doorbell.bst, adev->doorbell.bsh,
462 		    4*index, v);
463 #else
464 		writel(v, adev->doorbell.ptr + index);
465 #endif
466 	} else {
467 		DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index);
468 	}
469 }
470 
471 /**
472  * amdgpu_mm_rdoorbell64 - read a doorbell Qword
473  *
474  * @adev: amdgpu_device pointer
475  * @index: doorbell index
476  *
477  * Returns the value in the doorbell aperture at the
478  * requested doorbell index (VEGA10+).
479  */
amdgpu_mm_rdoorbell64(struct amdgpu_device * adev,u32 index)480 u64 amdgpu_mm_rdoorbell64(struct amdgpu_device *adev, u32 index)
481 {
482 	if (index < adev->doorbell.num_doorbells) {
483 #ifdef __NetBSD__
484 #ifdef _LP64
485 		return bus_space_read_8(adev->doorbell.bst, adev->doorbell.bsh,
486 		    4*index);
487 #else
488 		uint64_t lo, hi;
489 #if _BYTE_ORDER == _LITTLE_ENDIAN
490 		lo = bus_space_read_4(adev->doorbell.bst, adev->doorbell.bsh,
491 		    4*index);
492 		hi = bus_space_read_4(adev->doorbell.bst, adev->doorbell.bsh,
493 		    4*index + 4);
494 #else
495 		hi = bus_space_read_4(adev->doorbell.bst, adev->doorbell.bsh,
496 		    4*index);
497 		lo = bus_space_read_4(adev->doorbell.bst, adev->doorbell.bsh,
498 		    4*index + 4);
499 #endif
500 		return lo | (hi << 32);
501 #endif
502 #else
503 		return atomic64_read((atomic64_t *)(adev->doorbell.ptr + index));
504 #endif
505 	} else {
506 		DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index);
507 		return 0;
508 	}
509 }
510 
511 /**
512  * amdgpu_mm_wdoorbell64 - write a doorbell Qword
513  *
514  * @adev: amdgpu_device pointer
515  * @index: doorbell index
516  * @v: value to write
517  *
518  * Writes @v to the doorbell aperture at the
519  * requested doorbell index (VEGA10+).
520  */
amdgpu_mm_wdoorbell64(struct amdgpu_device * adev,u32 index,u64 v)521 void amdgpu_mm_wdoorbell64(struct amdgpu_device *adev, u32 index, u64 v)
522 {
523 	if (index < adev->doorbell.num_doorbells) {
524 #ifdef __NetBSD__
525 #ifdef _LP64
526 		bus_space_write_8(adev->doorbell.bst, adev->doorbell.bsh,
527 		    4*index, v);
528 #else
529 		/*
530 		 * XXX This might not be as atomic as one might hope...
531 		 */
532 #if _BYTE_ORDER == _LITTLE_ENDIAN
533 		bus_space_write_4(adev->doorbell.bst, adev->doorbell.bsh,
534 		    4*index, v & 0xffffffffU);
535 		bus_space_write_4(adev->doorbell.bst, adev->doorbell.bsh,
536 		    4*index + 4, v >> 32);
537 #else
538 		bus_space_write_4(adev->doorbell.bst, adev->doorbell.bsh,
539 		    4*index, v >> 32);
540 		bus_space_write_4(adev->doorbell.bst, adev->doorbell.bsh,
541 		    4*index + 4, v & 0xffffffffU);
542 #endif
543 #endif
544 #else
545 		atomic64_set((atomic64_t *)(adev->doorbell.ptr + index), v);
546 #endif
547 	} else {
548 		DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index);
549 	}
550 }
551 
552 /**
553  * amdgpu_invalid_rreg - dummy reg read function
554  *
555  * @adev: amdgpu device pointer
556  * @reg: offset of register
557  *
558  * Dummy register read function.  Used for register blocks
559  * that certain asics don't have (all asics).
560  * Returns the value in the register.
561  */
amdgpu_invalid_rreg(struct amdgpu_device * adev,uint32_t reg)562 static uint32_t amdgpu_invalid_rreg(struct amdgpu_device *adev, uint32_t reg)
563 {
564 	DRM_ERROR("Invalid callback to read register 0x%04X\n", reg);
565 	BUG();
566 	return 0;
567 }
568 
569 /**
570  * amdgpu_invalid_wreg - dummy reg write function
571  *
572  * @adev: amdgpu device pointer
573  * @reg: offset of register
574  * @v: value to write to the register
575  *
576  * Dummy register read function.  Used for register blocks
577  * that certain asics don't have (all asics).
578  */
amdgpu_invalid_wreg(struct amdgpu_device * adev,uint32_t reg,uint32_t v)579 static void amdgpu_invalid_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v)
580 {
581 	DRM_ERROR("Invalid callback to write register 0x%04X with 0x%08X\n",
582 		  reg, v);
583 	BUG();
584 }
585 
586 /**
587  * amdgpu_invalid_rreg64 - dummy 64 bit reg read function
588  *
589  * @adev: amdgpu device pointer
590  * @reg: offset of register
591  *
592  * Dummy register read function.  Used for register blocks
593  * that certain asics don't have (all asics).
594  * Returns the value in the register.
595  */
amdgpu_invalid_rreg64(struct amdgpu_device * adev,uint32_t reg)596 static uint64_t amdgpu_invalid_rreg64(struct amdgpu_device *adev, uint32_t reg)
597 {
598 	DRM_ERROR("Invalid callback to read 64 bit register 0x%04X\n", reg);
599 	BUG();
600 	return 0;
601 }
602 
603 /**
604  * amdgpu_invalid_wreg64 - dummy reg write function
605  *
606  * @adev: amdgpu device pointer
607  * @reg: offset of register
608  * @v: value to write to the register
609  *
610  * Dummy register read function.  Used for register blocks
611  * that certain asics don't have (all asics).
612  */
amdgpu_invalid_wreg64(struct amdgpu_device * adev,uint32_t reg,uint64_t v)613 static void amdgpu_invalid_wreg64(struct amdgpu_device *adev, uint32_t reg, uint64_t v)
614 {
615 	DRM_ERROR("Invalid callback to write 64 bit register 0x%04X with 0x%08"PRIX64"\n",
616 		  reg, v);
617 	BUG();
618 }
619 
620 /**
621  * amdgpu_block_invalid_rreg - dummy reg read function
622  *
623  * @adev: amdgpu device pointer
624  * @block: offset of instance
625  * @reg: offset of register
626  *
627  * Dummy register read function.  Used for register blocks
628  * that certain asics don't have (all asics).
629  * Returns the value in the register.
630  */
amdgpu_block_invalid_rreg(struct amdgpu_device * adev,uint32_t block,uint32_t reg)631 static uint32_t amdgpu_block_invalid_rreg(struct amdgpu_device *adev,
632 					  uint32_t block, uint32_t reg)
633 {
634 	DRM_ERROR("Invalid callback to read register 0x%04X in block 0x%04X\n",
635 		  reg, block);
636 	BUG();
637 	return 0;
638 }
639 
640 /**
641  * amdgpu_block_invalid_wreg - dummy reg write function
642  *
643  * @adev: amdgpu device pointer
644  * @block: offset of instance
645  * @reg: offset of register
646  * @v: value to write to the register
647  *
648  * Dummy register read function.  Used for register blocks
649  * that certain asics don't have (all asics).
650  */
amdgpu_block_invalid_wreg(struct amdgpu_device * adev,uint32_t block,uint32_t reg,uint32_t v)651 static void amdgpu_block_invalid_wreg(struct amdgpu_device *adev,
652 				      uint32_t block,
653 				      uint32_t reg, uint32_t v)
654 {
655 	DRM_ERROR("Invalid block callback to write register 0x%04X in block 0x%04X with 0x%08X\n",
656 		  reg, block, v);
657 	BUG();
658 }
659 
660 /**
661  * amdgpu_device_vram_scratch_init - allocate the VRAM scratch page
662  *
663  * @adev: amdgpu device pointer
664  *
665  * Allocates a scratch page of VRAM for use by various things in the
666  * driver.
667  */
amdgpu_device_vram_scratch_init(struct amdgpu_device * adev)668 static int amdgpu_device_vram_scratch_init(struct amdgpu_device *adev)
669 {
670 	return amdgpu_bo_create_kernel(adev, AMDGPU_GPU_PAGE_SIZE,
671 				       PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM,
672 				       &adev->vram_scratch.robj,
673 				       &adev->vram_scratch.gpu_addr,
674 				       (void **)__UNVOLATILE(&adev->vram_scratch.ptr));
675 }
676 
677 /**
678  * amdgpu_device_vram_scratch_fini - Free the VRAM scratch page
679  *
680  * @adev: amdgpu device pointer
681  *
682  * Frees the VRAM scratch page.
683  */
amdgpu_device_vram_scratch_fini(struct amdgpu_device * adev)684 static void amdgpu_device_vram_scratch_fini(struct amdgpu_device *adev)
685 {
686 	amdgpu_bo_free_kernel(&adev->vram_scratch.robj, NULL, NULL);
687 }
688 
689 /**
690  * amdgpu_device_program_register_sequence - program an array of registers.
691  *
692  * @adev: amdgpu_device pointer
693  * @registers: pointer to the register array
694  * @array_size: size of the register array
695  *
696  * Programs an array or registers with and and or masks.
697  * This is a helper for setting golden registers.
698  */
amdgpu_device_program_register_sequence(struct amdgpu_device * adev,const u32 * registers,const u32 array_size)699 void amdgpu_device_program_register_sequence(struct amdgpu_device *adev,
700 					     const u32 *registers,
701 					     const u32 array_size)
702 {
703 	u32 tmp, reg, and_mask, or_mask;
704 	int i;
705 
706 	if (array_size % 3)
707 		return;
708 
709 	for (i = 0; i < array_size; i +=3) {
710 		reg = registers[i + 0];
711 		and_mask = registers[i + 1];
712 		or_mask = registers[i + 2];
713 
714 		if (and_mask == 0xffffffff) {
715 			tmp = or_mask;
716 		} else {
717 			tmp = RREG32(reg);
718 			tmp &= ~and_mask;
719 			if (adev->family >= AMDGPU_FAMILY_AI)
720 				tmp |= (or_mask & and_mask);
721 			else
722 				tmp |= or_mask;
723 		}
724 		WREG32(reg, tmp);
725 	}
726 }
727 
728 /**
729  * amdgpu_device_pci_config_reset - reset the GPU
730  *
731  * @adev: amdgpu_device pointer
732  *
733  * Resets the GPU using the pci config reset sequence.
734  * Only applicable to asics prior to vega10.
735  */
amdgpu_device_pci_config_reset(struct amdgpu_device * adev)736 void amdgpu_device_pci_config_reset(struct amdgpu_device *adev)
737 {
738 	pci_write_config_dword(adev->pdev, 0x7c, AMDGPU_ASIC_RESET_DATA);
739 }
740 
741 /*
742  * GPU doorbell aperture helpers function.
743  */
744 /**
745  * amdgpu_device_doorbell_init - Init doorbell driver information.
746  *
747  * @adev: amdgpu_device pointer
748  *
749  * Init doorbell driver information (CIK)
750  * Returns 0 on success, error on failure.
751  */
amdgpu_device_doorbell_init(struct amdgpu_device * adev)752 static int amdgpu_device_doorbell_init(struct amdgpu_device *adev)
753 {
754 
755 	/* No doorbell on SI hardware generation */
756 	if (adev->asic_type < CHIP_BONAIRE) {
757 		adev->doorbell.base = 0;
758 		adev->doorbell.size = 0;
759 		adev->doorbell.num_doorbells = 0;
760 #ifndef __NetBSD__
761 		adev->doorbell.ptr = NULL;
762 #endif
763 		return 0;
764 	}
765 
766 	if (pci_resource_flags(adev->pdev, 2) & IORESOURCE_UNSET)
767 		return -EINVAL;
768 
769 	amdgpu_asic_init_doorbell_index(adev);
770 
771 	/* doorbell bar mapping */
772 	adev->doorbell.base = pci_resource_start(adev->pdev, 2);
773 	adev->doorbell.size = pci_resource_len(adev->pdev, 2);
774 
775 	adev->doorbell.num_doorbells = min_t(u32, adev->doorbell.size / sizeof(u32),
776 					     adev->doorbell_index.max_assignment+1);
777 	if (adev->doorbell.num_doorbells == 0)
778 		return -EINVAL;
779 
780 	/* For Vega, reserve and map two pages on doorbell BAR since SDMA
781 	 * paging queue doorbell use the second page. The
782 	 * AMDGPU_DOORBELL64_MAX_ASSIGNMENT definition assumes all the
783 	 * doorbells are in the first page. So with paging queue enabled,
784 	 * the max num_doorbells should + 1 page (0x400 in dword)
785 	 */
786 	if (adev->asic_type >= CHIP_VEGA10)
787 		adev->doorbell.num_doorbells += 0x400;
788 
789 #ifdef __NetBSD__
790 	int r;
791 	adev->doorbell.bst = adev->pdev->pd_pa.pa_memt;
792 	/* XXX errno NetBSD->Linux */
793 	r = -bus_space_map(adev->doorbell.bst, adev->doorbell.base,
794 	    adev->doorbell.num_doorbells * sizeof(u32), 0,
795 	    &adev->doorbell.bsh);
796 	if (r)
797 		return r;
798 #else
799 	adev->doorbell.ptr = ioremap(adev->doorbell.base,
800 				     adev->doorbell.num_doorbells *
801 				     sizeof(u32));
802 	if (adev->doorbell.ptr == NULL)
803 		return -ENOMEM;
804 #endif
805 
806 	return 0;
807 }
808 
809 /**
810  * amdgpu_device_doorbell_fini - Tear down doorbell driver information.
811  *
812  * @adev: amdgpu_device pointer
813  *
814  * Tear down doorbell driver information (CIK)
815  */
amdgpu_device_doorbell_fini(struct amdgpu_device * adev)816 static void amdgpu_device_doorbell_fini(struct amdgpu_device *adev)
817 {
818 #ifdef __NetBSD__
819 	if (adev->doorbell.num_doorbells) {
820 		bus_space_unmap(adev->doorbell.bst, adev->doorbell.bsh,
821 		    adev->doorbell.num_doorbells * sizeof(u32));
822 		adev->doorbell.num_doorbells = 0;
823 	}
824 #else
825 	iounmap(adev->doorbell.ptr);
826 	adev->doorbell.ptr = NULL;
827 #endif
828 }
829 
830 
831 
832 /*
833  * amdgpu_device_wb_*()
834  * Writeback is the method by which the GPU updates special pages in memory
835  * with the status of certain GPU events (fences, ring pointers,etc.).
836  */
837 
838 /**
839  * amdgpu_device_wb_fini - Disable Writeback and free memory
840  *
841  * @adev: amdgpu_device pointer
842  *
843  * Disables Writeback and frees the Writeback memory (all asics).
844  * Used at driver shutdown.
845  */
amdgpu_device_wb_fini(struct amdgpu_device * adev)846 static void amdgpu_device_wb_fini(struct amdgpu_device *adev)
847 {
848 	if (adev->wb.wb_obj) {
849 		amdgpu_bo_free_kernel(&adev->wb.wb_obj,
850 				      &adev->wb.gpu_addr,
851 				      (void **)__UNVOLATILE(&adev->wb.wb));
852 		adev->wb.wb_obj = NULL;
853 	}
854 }
855 
856 /**
857  * amdgpu_device_wb_init- Init Writeback driver info and allocate memory
858  *
859  * @adev: amdgpu_device pointer
860  *
861  * Initializes writeback and allocates writeback memory (all asics).
862  * Used at driver startup.
863  * Returns 0 on success or an -error on failure.
864  */
amdgpu_device_wb_init(struct amdgpu_device * adev)865 static int amdgpu_device_wb_init(struct amdgpu_device *adev)
866 {
867 	int r;
868 
869 	if (adev->wb.wb_obj == NULL) {
870 		/* AMDGPU_MAX_WB * sizeof(uint32_t) * 8 = AMDGPU_MAX_WB 256bit slots */
871 		r = amdgpu_bo_create_kernel(adev, AMDGPU_MAX_WB * sizeof(uint32_t) * 8,
872 					    PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
873 					    &adev->wb.wb_obj, &adev->wb.gpu_addr,
874 					    (void **)__UNVOLATILE(&adev->wb.wb));
875 		if (r) {
876 			dev_warn(adev->dev, "(%d) create WB bo failed\n", r);
877 			return r;
878 		}
879 
880 		adev->wb.num_wb = AMDGPU_MAX_WB;
881 		memset(&adev->wb.used, 0, sizeof(adev->wb.used));
882 
883 		/* clear wb memory */
884 		memset(__UNVOLATILE(adev->wb.wb), 0, AMDGPU_MAX_WB * sizeof(uint32_t) * 8);
885 	}
886 
887 	return 0;
888 }
889 
890 /**
891  * amdgpu_device_wb_get - Allocate a wb entry
892  *
893  * @adev: amdgpu_device pointer
894  * @wb: wb index
895  *
896  * Allocate a wb slot for use by the driver (all asics).
897  * Returns 0 on success or -EINVAL on failure.
898  */
amdgpu_device_wb_get(struct amdgpu_device * adev,u32 * wb)899 int amdgpu_device_wb_get(struct amdgpu_device *adev, u32 *wb)
900 {
901 	unsigned long offset = find_first_zero_bit(adev->wb.used, adev->wb.num_wb);
902 
903 	if (offset < adev->wb.num_wb) {
904 		__set_bit(offset, adev->wb.used);
905 		*wb = offset << 3; /* convert to dw offset */
906 		return 0;
907 	} else {
908 		return -EINVAL;
909 	}
910 }
911 
912 /**
913  * amdgpu_device_wb_free - Free a wb entry
914  *
915  * @adev: amdgpu_device pointer
916  * @wb: wb index
917  *
918  * Free a wb slot allocated for use by the driver (all asics)
919  */
amdgpu_device_wb_free(struct amdgpu_device * adev,u32 wb)920 void amdgpu_device_wb_free(struct amdgpu_device *adev, u32 wb)
921 {
922 	wb >>= 3;
923 	if (wb < adev->wb.num_wb)
924 		__clear_bit(wb, adev->wb.used);
925 }
926 
927 /**
928  * amdgpu_device_resize_fb_bar - try to resize FB BAR
929  *
930  * @adev: amdgpu_device pointer
931  *
932  * Try to resize FB BAR to make all VRAM CPU accessible. We try very hard not
933  * to fail, but if any of the BARs is not accessible after the size we abort
934  * driver loading by returning -ENODEV.
935  */
amdgpu_device_resize_fb_bar(struct amdgpu_device * adev)936 int amdgpu_device_resize_fb_bar(struct amdgpu_device *adev)
937 {
938 	u64 space_needed = roundup_pow_of_two(adev->gmc.real_vram_size);
939 	u32 rbar_size = order_base_2(((space_needed >> 20) | 1)) - 1;
940 	struct pci_bus *root;
941 	struct resource *res;
942 	unsigned i;
943 	u16 cmd;
944 	int r;
945 
946 	/* Bypass for VF */
947 	if (amdgpu_sriov_vf(adev))
948 		return 0;
949 
950 #ifdef __NetBSD__		/* XXX amdgpu fb resize */
951 	__USE(space_needed);
952 	__USE(rbar_size);
953 	__USE(root);
954 	__USE(res);
955 	__USE(i);
956 	__USE(cmd);
957 	__USE(r);
958 #else
959 
960 	/* Check if the root BUS has 64bit memory resources */
961 	root = adev->pdev->bus;
962 	while (root->parent)
963 		root = root->parent;
964 
965 	pci_bus_for_each_resource(root, res, i) {
966 		if (res && res->flags & (IORESOURCE_MEM | IORESOURCE_MEM_64) &&
967 		    res->start > 0x100000000ull)
968 			break;
969 	}
970 
971 	/* Trying to resize is pointless without a root hub window above 4GB */
972 	if (!res)
973 		return 0;
974 
975 	/* Disable memory decoding while we change the BAR addresses and size */
976 	pci_read_config_word(adev->pdev, PCI_COMMAND, &cmd);
977 	pci_write_config_word(adev->pdev, PCI_COMMAND,
978 			      cmd & ~PCI_COMMAND_MEMORY);
979 
980 	/* Free the VRAM and doorbell BAR, we most likely need to move both. */
981 	amdgpu_device_doorbell_fini(adev);
982 	if (adev->asic_type >= CHIP_BONAIRE)
983 		pci_release_resource(adev->pdev, 2);
984 
985 	pci_release_resource(adev->pdev, 0);
986 
987 	r = pci_resize_resource(adev->pdev, 0, rbar_size);
988 	if (r == -ENOSPC)
989 		DRM_INFO("Not enough PCI address space for a large BAR.");
990 	else if (r && r != -ENOTSUPP)
991 		DRM_ERROR("Problem resizing BAR0 (%d).", r);
992 
993 	pci_assign_unassigned_bus_resources(adev->pdev->bus);
994 
995 	/* When the doorbell or fb BAR isn't available we have no chance of
996 	 * using the device.
997 	 */
998 	r = amdgpu_device_doorbell_init(adev);
999 	if (r || (pci_resource_flags(adev->pdev, 0) & IORESOURCE_UNSET))
1000 		return -ENODEV;
1001 
1002 	pci_write_config_word(adev->pdev, PCI_COMMAND, cmd);
1003 
1004 #endif
1005 
1006 	return 0;
1007 }
1008 
1009 /*
1010  * GPU helpers function.
1011  */
1012 /**
1013  * amdgpu_device_need_post - check if the hw need post or not
1014  *
1015  * @adev: amdgpu_device pointer
1016  *
1017  * Check if the asic has been initialized (all asics) at driver startup
1018  * or post is needed if  hw reset is performed.
1019  * Returns true if need or false if not.
1020  */
amdgpu_device_need_post(struct amdgpu_device * adev)1021 bool amdgpu_device_need_post(struct amdgpu_device *adev)
1022 {
1023 	uint32_t reg;
1024 
1025 	if (amdgpu_sriov_vf(adev))
1026 		return false;
1027 
1028 	if (amdgpu_passthrough(adev)) {
1029 		/* for FIJI: In whole GPU pass-through virtualization case, after VM reboot
1030 		 * some old smc fw still need driver do vPost otherwise gpu hang, while
1031 		 * those smc fw version above 22.15 doesn't have this flaw, so we force
1032 		 * vpost executed for smc version below 22.15
1033 		 */
1034 		if (adev->asic_type == CHIP_FIJI) {
1035 			int err;
1036 			uint32_t fw_ver;
1037 			err = request_firmware(&adev->pm.fw, "amdgpu/fiji_smc.bin", adev->dev);
1038 			/* force vPost if error occured */
1039 			if (err)
1040 				return true;
1041 
1042 			fw_ver = *((uint32_t *)adev->pm.fw->data + 69);
1043 			if (fw_ver < 0x00160e00)
1044 				return true;
1045 		}
1046 	}
1047 
1048 	if (adev->has_hw_reset) {
1049 		adev->has_hw_reset = false;
1050 		return true;
1051 	}
1052 
1053 	/* bios scratch used on CIK+ */
1054 	if (adev->asic_type >= CHIP_BONAIRE)
1055 		return amdgpu_atombios_scratch_need_asic_init(adev);
1056 
1057 	/* check MEM_SIZE for older asics */
1058 	reg = amdgpu_asic_get_config_memsize(adev);
1059 
1060 	if ((reg != 0) && (reg != 0xffffffff))
1061 		return false;
1062 
1063 	return true;
1064 }
1065 
1066 #ifndef __NetBSD__		/* XXX amdgpu vga */
1067 /* if we get transitioned to only one device, take VGA back */
1068 /**
1069  * amdgpu_device_vga_set_decode - enable/disable vga decode
1070  *
1071  * @cookie: amdgpu_device pointer
1072  * @state: enable/disable vga decode
1073  *
1074  * Enable/disable vga decode (all asics).
1075  * Returns VGA resource flags.
1076  */
amdgpu_device_vga_set_decode(void * cookie,bool state)1077 static unsigned int amdgpu_device_vga_set_decode(void *cookie, bool state)
1078 {
1079 	struct amdgpu_device *adev = cookie;
1080 	amdgpu_asic_set_vga_state(adev, state);
1081 	if (state)
1082 		return VGA_RSRC_LEGACY_IO | VGA_RSRC_LEGACY_MEM |
1083 		       VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM;
1084 	else
1085 		return VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM;
1086 }
1087 #endif	/* __NetBSD__ */
1088 
1089 /**
1090  * amdgpu_device_check_block_size - validate the vm block size
1091  *
1092  * @adev: amdgpu_device pointer
1093  *
1094  * Validates the vm block size specified via module parameter.
1095  * The vm block size defines number of bits in page table versus page directory,
1096  * a page is 4KB so we have 12 bits offset, minimum 9 bits in the
1097  * page table and the remaining bits are in the page directory.
1098  */
amdgpu_device_check_block_size(struct amdgpu_device * adev)1099 static void amdgpu_device_check_block_size(struct amdgpu_device *adev)
1100 {
1101 	/* defines number of bits in page table versus page directory,
1102 	 * a page is 4KB so we have 12 bits offset, minimum 9 bits in the
1103 	 * page table and the remaining bits are in the page directory */
1104 	if (amdgpu_vm_block_size == -1)
1105 		return;
1106 
1107 	if (amdgpu_vm_block_size < 9) {
1108 		dev_warn(adev->dev, "VM page table size (%d) too small\n",
1109 			 amdgpu_vm_block_size);
1110 		amdgpu_vm_block_size = -1;
1111 	}
1112 }
1113 
1114 /**
1115  * amdgpu_device_check_vm_size - validate the vm size
1116  *
1117  * @adev: amdgpu_device pointer
1118  *
1119  * Validates the vm size in GB specified via module parameter.
1120  * The VM size is the size of the GPU virtual memory space in GB.
1121  */
amdgpu_device_check_vm_size(struct amdgpu_device * adev)1122 static void amdgpu_device_check_vm_size(struct amdgpu_device *adev)
1123 {
1124 	/* no need to check the default value */
1125 	if (amdgpu_vm_size == -1)
1126 		return;
1127 
1128 	if (amdgpu_vm_size < 1) {
1129 		dev_warn(adev->dev, "VM size (%d) too small, min is 1GB\n",
1130 			 amdgpu_vm_size);
1131 		amdgpu_vm_size = -1;
1132 	}
1133 }
1134 
amdgpu_device_check_smu_prv_buffer_size(struct amdgpu_device * adev)1135 static void amdgpu_device_check_smu_prv_buffer_size(struct amdgpu_device *adev)
1136 {
1137 	struct sysinfo si;
1138 	bool is_os_64 = (sizeof(void *) == 8);
1139 	uint64_t total_memory;
1140 	uint64_t dram_size_seven_GB = 0x1B8000000;
1141 	uint64_t dram_size_three_GB = 0xB8000000;
1142 
1143 	if (amdgpu_smu_memory_pool_size == 0)
1144 		return;
1145 
1146 	if (!is_os_64) {
1147 		DRM_WARN("Not 64-bit OS, feature not supported\n");
1148 		goto def_value;
1149 	}
1150 	si_meminfo(&si);
1151 	total_memory = (uint64_t)si.totalram * si.mem_unit;
1152 
1153 	if ((amdgpu_smu_memory_pool_size == 1) ||
1154 		(amdgpu_smu_memory_pool_size == 2)) {
1155 		if (total_memory < dram_size_three_GB)
1156 			goto def_value1;
1157 	} else if ((amdgpu_smu_memory_pool_size == 4) ||
1158 		(amdgpu_smu_memory_pool_size == 8)) {
1159 		if (total_memory < dram_size_seven_GB)
1160 			goto def_value1;
1161 	} else {
1162 		DRM_WARN("Smu memory pool size not supported\n");
1163 		goto def_value;
1164 	}
1165 	adev->pm.smu_prv_buffer_size = amdgpu_smu_memory_pool_size << 28;
1166 
1167 	return;
1168 
1169 def_value1:
1170 	DRM_WARN("No enough system memory\n");
1171 def_value:
1172 	adev->pm.smu_prv_buffer_size = 0;
1173 }
1174 
1175 /**
1176  * amdgpu_device_check_arguments - validate module params
1177  *
1178  * @adev: amdgpu_device pointer
1179  *
1180  * Validates certain module parameters and updates
1181  * the associated values used by the driver (all asics).
1182  */
amdgpu_device_check_arguments(struct amdgpu_device * adev)1183 static int amdgpu_device_check_arguments(struct amdgpu_device *adev)
1184 {
1185 	if (amdgpu_sched_jobs < 4) {
1186 		dev_warn(adev->dev, "sched jobs (%d) must be at least 4\n",
1187 			 amdgpu_sched_jobs);
1188 		amdgpu_sched_jobs = 4;
1189 	} else if (!is_power_of_2(amdgpu_sched_jobs)){
1190 		dev_warn(adev->dev, "sched jobs (%d) must be a power of 2\n",
1191 			 amdgpu_sched_jobs);
1192 		amdgpu_sched_jobs = roundup_pow_of_two(amdgpu_sched_jobs);
1193 	}
1194 
1195 	if (amdgpu_gart_size != -1 && amdgpu_gart_size < 32) {
1196 		/* gart size must be greater or equal to 32M */
1197 		dev_warn(adev->dev, "gart size (%d) too small\n",
1198 			 amdgpu_gart_size);
1199 		amdgpu_gart_size = -1;
1200 	}
1201 
1202 	if (amdgpu_gtt_size != -1 && amdgpu_gtt_size < 32) {
1203 		/* gtt size must be greater or equal to 32M */
1204 		dev_warn(adev->dev, "gtt size (%d) too small\n",
1205 				 amdgpu_gtt_size);
1206 		amdgpu_gtt_size = -1;
1207 	}
1208 
1209 	/* valid range is between 4 and 9 inclusive */
1210 	if (amdgpu_vm_fragment_size != -1 &&
1211 	    (amdgpu_vm_fragment_size > 9 || amdgpu_vm_fragment_size < 4)) {
1212 		dev_warn(adev->dev, "valid range is between 4 and 9\n");
1213 		amdgpu_vm_fragment_size = -1;
1214 	}
1215 
1216 	amdgpu_device_check_smu_prv_buffer_size(adev);
1217 
1218 	amdgpu_device_check_vm_size(adev);
1219 
1220 	amdgpu_device_check_block_size(adev);
1221 
1222 	adev->firmware.load_type = amdgpu_ucode_get_load_type(adev, amdgpu_fw_load_type);
1223 
1224 	return 0;
1225 }
1226 
1227 #ifndef __NetBSD__		/* XXX amdgpu vga */
1228 /**
1229  * amdgpu_switcheroo_set_state - set switcheroo state
1230  *
1231  * @pdev: pci dev pointer
1232  * @state: vga_switcheroo state
1233  *
1234  * Callback for the switcheroo driver.  Suspends or resumes the
1235  * the asics before or after it is powered up using ACPI methods.
1236  */
amdgpu_switcheroo_set_state(struct pci_dev * pdev,enum vga_switcheroo_state state)1237 static void amdgpu_switcheroo_set_state(struct pci_dev *pdev, enum vga_switcheroo_state state)
1238 {
1239 	struct drm_device *dev = pci_get_drvdata(pdev);
1240 	int r;
1241 
1242 	if (amdgpu_device_supports_boco(dev) && state == VGA_SWITCHEROO_OFF)
1243 		return;
1244 
1245 	if (state == VGA_SWITCHEROO_ON) {
1246 		pr_info("amdgpu: switched on\n");
1247 		/* don't suspend or resume card normally */
1248 		dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
1249 
1250 #ifndef __NetBSD__		/* pmf handles this for us.  */
1251 		pci_set_power_state(dev->pdev, PCI_D0);
1252 		pci_restore_state(dev->pdev);
1253 		r = pci_enable_device(dev->pdev);
1254 		if (r)
1255 			DRM_WARN("pci_enable_device failed (%d)\n", r);
1256 #endif
1257 		amdgpu_device_resume(dev, true);
1258 
1259 		dev->switch_power_state = DRM_SWITCH_POWER_ON;
1260 		drm_kms_helper_poll_enable(dev);
1261 	} else {
1262 		pr_info("amdgpu: switched off\n");
1263 		drm_kms_helper_poll_disable(dev);
1264 		dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
1265 		amdgpu_device_suspend(dev, true);
1266 #ifndef __NetBSD__		/* pmf handles this for us.  */
1267 		pci_save_state(dev->pdev);
1268 		/* Shut down the device */
1269 		pci_disable_device(dev->pdev);
1270 		pci_set_power_state(dev->pdev, PCI_D3cold);
1271 		dev->switch_power_state = DRM_SWITCH_POWER_OFF;
1272 #endif
1273 	}
1274 }
1275 
1276 /**
1277  * amdgpu_switcheroo_can_switch - see if switcheroo state can change
1278  *
1279  * @pdev: pci dev pointer
1280  *
1281  * Callback for the switcheroo driver.  Check of the switcheroo
1282  * state can be changed.
1283  * Returns true if the state can be changed, false if not.
1284  */
amdgpu_switcheroo_can_switch(struct pci_dev * pdev)1285 static bool amdgpu_switcheroo_can_switch(struct pci_dev *pdev)
1286 {
1287 	struct drm_device *dev = pci_get_drvdata(pdev);
1288 
1289 	/*
1290 	* FIXME: open_count is protected by drm_global_mutex but that would lead to
1291 	* locking inversion with the driver load path. And the access here is
1292 	* completely racy anyway. So don't bother with locking for now.
1293 	*/
1294 	return dev->open_count == 0;
1295 }
1296 
1297 static const struct vga_switcheroo_client_ops amdgpu_switcheroo_ops = {
1298 	.set_gpu_state = amdgpu_switcheroo_set_state,
1299 	.reprobe = NULL,
1300 	.can_switch = amdgpu_switcheroo_can_switch,
1301 };
1302 #endif	/* __NetBSD__ */
1303 
1304 /**
1305  * amdgpu_device_ip_set_clockgating_state - set the CG state
1306  *
1307  * @dev: amdgpu_device pointer
1308  * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1309  * @state: clockgating state (gate or ungate)
1310  *
1311  * Sets the requested clockgating state for all instances of
1312  * the hardware IP specified.
1313  * Returns the error code from the last instance.
1314  */
amdgpu_device_ip_set_clockgating_state(void * dev,enum amd_ip_block_type block_type,enum amd_clockgating_state state)1315 int amdgpu_device_ip_set_clockgating_state(void *dev,
1316 					   enum amd_ip_block_type block_type,
1317 					   enum amd_clockgating_state state)
1318 {
1319 	struct amdgpu_device *adev = dev;
1320 	int i, r = 0;
1321 
1322 	for (i = 0; i < adev->num_ip_blocks; i++) {
1323 		if (!adev->ip_blocks[i].status.valid)
1324 			continue;
1325 		if (adev->ip_blocks[i].version->type != block_type)
1326 			continue;
1327 		if (!adev->ip_blocks[i].version->funcs->set_clockgating_state)
1328 			continue;
1329 		r = adev->ip_blocks[i].version->funcs->set_clockgating_state(
1330 			(void *)adev, state);
1331 		if (r)
1332 			DRM_ERROR("set_clockgating_state of IP block <%s> failed %d\n",
1333 				  adev->ip_blocks[i].version->funcs->name, r);
1334 	}
1335 	return r;
1336 }
1337 
1338 /**
1339  * amdgpu_device_ip_set_powergating_state - set the PG state
1340  *
1341  * @dev: amdgpu_device pointer
1342  * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1343  * @state: powergating state (gate or ungate)
1344  *
1345  * Sets the requested powergating state for all instances of
1346  * the hardware IP specified.
1347  * Returns the error code from the last instance.
1348  */
amdgpu_device_ip_set_powergating_state(void * dev,enum amd_ip_block_type block_type,enum amd_powergating_state state)1349 int amdgpu_device_ip_set_powergating_state(void *dev,
1350 					   enum amd_ip_block_type block_type,
1351 					   enum amd_powergating_state state)
1352 {
1353 	struct amdgpu_device *adev = dev;
1354 	int i, r = 0;
1355 
1356 	for (i = 0; i < adev->num_ip_blocks; i++) {
1357 		if (!adev->ip_blocks[i].status.valid)
1358 			continue;
1359 		if (adev->ip_blocks[i].version->type != block_type)
1360 			continue;
1361 		if (!adev->ip_blocks[i].version->funcs->set_powergating_state)
1362 			continue;
1363 		r = adev->ip_blocks[i].version->funcs->set_powergating_state(
1364 			(void *)adev, state);
1365 		if (r)
1366 			DRM_ERROR("set_powergating_state of IP block <%s> failed %d\n",
1367 				  adev->ip_blocks[i].version->funcs->name, r);
1368 	}
1369 	return r;
1370 }
1371 
1372 /**
1373  * amdgpu_device_ip_get_clockgating_state - get the CG state
1374  *
1375  * @adev: amdgpu_device pointer
1376  * @flags: clockgating feature flags
1377  *
1378  * Walks the list of IPs on the device and updates the clockgating
1379  * flags for each IP.
1380  * Updates @flags with the feature flags for each hardware IP where
1381  * clockgating is enabled.
1382  */
amdgpu_device_ip_get_clockgating_state(struct amdgpu_device * adev,u32 * flags)1383 void amdgpu_device_ip_get_clockgating_state(struct amdgpu_device *adev,
1384 					    u32 *flags)
1385 {
1386 	int i;
1387 
1388 	for (i = 0; i < adev->num_ip_blocks; i++) {
1389 		if (!adev->ip_blocks[i].status.valid)
1390 			continue;
1391 		if (adev->ip_blocks[i].version->funcs->get_clockgating_state)
1392 			adev->ip_blocks[i].version->funcs->get_clockgating_state((void *)adev, flags);
1393 	}
1394 }
1395 
1396 /**
1397  * amdgpu_device_ip_wait_for_idle - wait for idle
1398  *
1399  * @adev: amdgpu_device pointer
1400  * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1401  *
1402  * Waits for the request hardware IP to be idle.
1403  * Returns 0 for success or a negative error code on failure.
1404  */
amdgpu_device_ip_wait_for_idle(struct amdgpu_device * adev,enum amd_ip_block_type block_type)1405 int amdgpu_device_ip_wait_for_idle(struct amdgpu_device *adev,
1406 				   enum amd_ip_block_type block_type)
1407 {
1408 	int i, r;
1409 
1410 	for (i = 0; i < adev->num_ip_blocks; i++) {
1411 		if (!adev->ip_blocks[i].status.valid)
1412 			continue;
1413 		if (adev->ip_blocks[i].version->type == block_type) {
1414 			r = adev->ip_blocks[i].version->funcs->wait_for_idle((void *)adev);
1415 			if (r)
1416 				return r;
1417 			break;
1418 		}
1419 	}
1420 	return 0;
1421 
1422 }
1423 
1424 /**
1425  * amdgpu_device_ip_is_idle - is the hardware IP idle
1426  *
1427  * @adev: amdgpu_device pointer
1428  * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1429  *
1430  * Check if the hardware IP is idle or not.
1431  * Returns true if it the IP is idle, false if not.
1432  */
amdgpu_device_ip_is_idle(struct amdgpu_device * adev,enum amd_ip_block_type block_type)1433 bool amdgpu_device_ip_is_idle(struct amdgpu_device *adev,
1434 			      enum amd_ip_block_type block_type)
1435 {
1436 	int i;
1437 
1438 	for (i = 0; i < adev->num_ip_blocks; i++) {
1439 		if (!adev->ip_blocks[i].status.valid)
1440 			continue;
1441 		if (adev->ip_blocks[i].version->type == block_type)
1442 			return adev->ip_blocks[i].version->funcs->is_idle((void *)adev);
1443 	}
1444 	return true;
1445 
1446 }
1447 
1448 /**
1449  * amdgpu_device_ip_get_ip_block - get a hw IP pointer
1450  *
1451  * @adev: amdgpu_device pointer
1452  * @type: Type of hardware IP (SMU, GFX, UVD, etc.)
1453  *
1454  * Returns a pointer to the hardware IP block structure
1455  * if it exists for the asic, otherwise NULL.
1456  */
1457 struct amdgpu_ip_block *
amdgpu_device_ip_get_ip_block(struct amdgpu_device * adev,enum amd_ip_block_type type)1458 amdgpu_device_ip_get_ip_block(struct amdgpu_device *adev,
1459 			      enum amd_ip_block_type type)
1460 {
1461 	int i;
1462 
1463 	for (i = 0; i < adev->num_ip_blocks; i++)
1464 		if (adev->ip_blocks[i].version->type == type)
1465 			return &adev->ip_blocks[i];
1466 
1467 	return NULL;
1468 }
1469 
1470 /**
1471  * amdgpu_device_ip_block_version_cmp
1472  *
1473  * @adev: amdgpu_device pointer
1474  * @type: enum amd_ip_block_type
1475  * @major: major version
1476  * @minor: minor version
1477  *
1478  * return 0 if equal or greater
1479  * return 1 if smaller or the ip_block doesn't exist
1480  */
amdgpu_device_ip_block_version_cmp(struct amdgpu_device * adev,enum amd_ip_block_type type,u32 major,u32 minor)1481 int amdgpu_device_ip_block_version_cmp(struct amdgpu_device *adev,
1482 				       enum amd_ip_block_type type,
1483 				       u32 major, u32 minor)
1484 {
1485 	struct amdgpu_ip_block *ip_block = amdgpu_device_ip_get_ip_block(adev, type);
1486 
1487 	if (ip_block && ((ip_block->version->major > major) ||
1488 			((ip_block->version->major == major) &&
1489 			(ip_block->version->minor >= minor))))
1490 		return 0;
1491 
1492 	return 1;
1493 }
1494 
1495 /**
1496  * amdgpu_device_ip_block_add
1497  *
1498  * @adev: amdgpu_device pointer
1499  * @ip_block_version: pointer to the IP to add
1500  *
1501  * Adds the IP block driver information to the collection of IPs
1502  * on the asic.
1503  */
amdgpu_device_ip_block_add(struct amdgpu_device * adev,const struct amdgpu_ip_block_version * ip_block_version)1504 int amdgpu_device_ip_block_add(struct amdgpu_device *adev,
1505 			       const struct amdgpu_ip_block_version *ip_block_version)
1506 {
1507 	if (!ip_block_version)
1508 		return -EINVAL;
1509 
1510 	DRM_INFO("add ip block number %d <%s>\n", adev->num_ip_blocks,
1511 		  ip_block_version->funcs->name);
1512 
1513 	adev->ip_blocks[adev->num_ip_blocks++].version = ip_block_version;
1514 
1515 	return 0;
1516 }
1517 
1518 /**
1519  * amdgpu_device_enable_virtual_display - enable virtual display feature
1520  *
1521  * @adev: amdgpu_device pointer
1522  *
1523  * Enabled the virtual display feature if the user has enabled it via
1524  * the module parameter virtual_display.  This feature provides a virtual
1525  * display hardware on headless boards or in virtualized environments.
1526  * This function parses and validates the configuration string specified by
1527  * the user and configues the virtual display configuration (number of
1528  * virtual connectors, crtcs, etc.) specified.
1529  */
amdgpu_device_enable_virtual_display(struct amdgpu_device * adev)1530 static void amdgpu_device_enable_virtual_display(struct amdgpu_device *adev)
1531 {
1532 	adev->enable_virtual_display = false;
1533 
1534 	if (amdgpu_virtual_display) {
1535 		struct drm_device *ddev = adev->ddev;
1536 		const char *pci_address_name = pci_name(ddev->pdev);
1537 		char *pciaddstr, *pciaddstr_tmp, *pciaddname_tmp, *pciaddname;
1538 
1539 		pciaddstr = kstrdup(amdgpu_virtual_display, GFP_KERNEL);
1540 		pciaddstr_tmp = pciaddstr;
1541 		while ((pciaddname_tmp = strsep(&pciaddstr_tmp, ";"))) {
1542 			pciaddname = strsep(&pciaddname_tmp, ",");
1543 			if (!strcmp("all", pciaddname)
1544 			    || !strcmp(pci_address_name, pciaddname)) {
1545 				long num_crtc;
1546 				int res = -1;
1547 
1548 				adev->enable_virtual_display = true;
1549 
1550 				if (pciaddname_tmp)
1551 					res = kstrtol(pciaddname_tmp, 10,
1552 						      &num_crtc);
1553 
1554 				if (!res) {
1555 					if (num_crtc < 1)
1556 						num_crtc = 1;
1557 					if (num_crtc > 6)
1558 						num_crtc = 6;
1559 					adev->mode_info.num_crtc = num_crtc;
1560 				} else {
1561 					adev->mode_info.num_crtc = 1;
1562 				}
1563 				break;
1564 			}
1565 		}
1566 
1567 		DRM_INFO("virtual display string:%s, %s:virtual_display:%d, num_crtc:%d\n",
1568 			 amdgpu_virtual_display, pci_address_name,
1569 			 adev->enable_virtual_display, adev->mode_info.num_crtc);
1570 
1571 		kfree(pciaddstr);
1572 	}
1573 }
1574 
1575 /**
1576  * amdgpu_device_parse_gpu_info_fw - parse gpu info firmware
1577  *
1578  * @adev: amdgpu_device pointer
1579  *
1580  * Parses the asic configuration parameters specified in the gpu info
1581  * firmware and makes them availale to the driver for use in configuring
1582  * the asic.
1583  * Returns 0 on success, -EINVAL on failure.
1584  */
amdgpu_device_parse_gpu_info_fw(struct amdgpu_device * adev)1585 static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev)
1586 {
1587 	const char *chip_name;
1588 	char fw_name[30];
1589 	int err;
1590 	const struct gpu_info_firmware_header_v1_0 *hdr;
1591 
1592 	adev->firmware.gpu_info_fw = NULL;
1593 
1594 	switch (adev->asic_type) {
1595 	case CHIP_TOPAZ:
1596 	case CHIP_TONGA:
1597 	case CHIP_FIJI:
1598 	case CHIP_POLARIS10:
1599 	case CHIP_POLARIS11:
1600 	case CHIP_POLARIS12:
1601 	case CHIP_VEGAM:
1602 	case CHIP_CARRIZO:
1603 	case CHIP_STONEY:
1604 #ifdef CONFIG_DRM_AMDGPU_SI
1605 	case CHIP_VERDE:
1606 	case CHIP_TAHITI:
1607 	case CHIP_PITCAIRN:
1608 	case CHIP_OLAND:
1609 	case CHIP_HAINAN:
1610 #endif
1611 #ifdef CONFIG_DRM_AMDGPU_CIK
1612 	case CHIP_BONAIRE:
1613 	case CHIP_HAWAII:
1614 	case CHIP_KAVERI:
1615 	case CHIP_KABINI:
1616 	case CHIP_MULLINS:
1617 #endif
1618 	case CHIP_VEGA20:
1619 	default:
1620 		return 0;
1621 	case CHIP_VEGA10:
1622 		chip_name = "vega10";
1623 		break;
1624 	case CHIP_VEGA12:
1625 		chip_name = "vega12";
1626 		break;
1627 	case CHIP_RAVEN:
1628 		if (adev->rev_id >= 8)
1629 			chip_name = "raven2";
1630 		else if (adev->pdev->device == 0x15d8)
1631 			chip_name = "picasso";
1632 		else
1633 			chip_name = "raven";
1634 		break;
1635 	case CHIP_ARCTURUS:
1636 		chip_name = "arcturus";
1637 		break;
1638 	case CHIP_RENOIR:
1639 		chip_name = "renoir";
1640 		break;
1641 	case CHIP_NAVI10:
1642 		chip_name = "navi10";
1643 		break;
1644 	case CHIP_NAVI14:
1645 		chip_name = "navi14";
1646 		break;
1647 	case CHIP_NAVI12:
1648 		chip_name = "navi12";
1649 		break;
1650 	}
1651 
1652 	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_gpu_info.bin", chip_name);
1653 	err = request_firmware(&adev->firmware.gpu_info_fw, fw_name, adev->dev);
1654 	if (err) {
1655 		dev_err(adev->dev,
1656 			"Failed to load gpu_info firmware \"%s\"\n",
1657 			fw_name);
1658 		goto out;
1659 	}
1660 	err = amdgpu_ucode_validate(adev->firmware.gpu_info_fw);
1661 	if (err) {
1662 		dev_err(adev->dev,
1663 			"Failed to validate gpu_info firmware \"%s\"\n",
1664 			fw_name);
1665 		goto out;
1666 	}
1667 
1668 	hdr = (const struct gpu_info_firmware_header_v1_0 *)adev->firmware.gpu_info_fw->data;
1669 	amdgpu_ucode_print_gpu_info_hdr(&hdr->header);
1670 
1671 	switch (hdr->version_major) {
1672 	case 1:
1673 	{
1674 		const struct gpu_info_firmware_v1_0 *gpu_info_fw =
1675 			(const struct gpu_info_firmware_v1_0 *)(adev->firmware.gpu_info_fw->data +
1676 								le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1677 
1678 		if (amdgpu_discovery && adev->asic_type >= CHIP_NAVI10)
1679 			goto parse_soc_bounding_box;
1680 
1681 		adev->gfx.config.max_shader_engines = le32_to_cpu(gpu_info_fw->gc_num_se);
1682 		adev->gfx.config.max_cu_per_sh = le32_to_cpu(gpu_info_fw->gc_num_cu_per_sh);
1683 		adev->gfx.config.max_sh_per_se = le32_to_cpu(gpu_info_fw->gc_num_sh_per_se);
1684 		adev->gfx.config.max_backends_per_se = le32_to_cpu(gpu_info_fw->gc_num_rb_per_se);
1685 		adev->gfx.config.max_texture_channel_caches =
1686 			le32_to_cpu(gpu_info_fw->gc_num_tccs);
1687 		adev->gfx.config.max_gprs = le32_to_cpu(gpu_info_fw->gc_num_gprs);
1688 		adev->gfx.config.max_gs_threads = le32_to_cpu(gpu_info_fw->gc_num_max_gs_thds);
1689 		adev->gfx.config.gs_vgt_table_depth = le32_to_cpu(gpu_info_fw->gc_gs_table_depth);
1690 		adev->gfx.config.gs_prim_buffer_depth = le32_to_cpu(gpu_info_fw->gc_gsprim_buff_depth);
1691 		adev->gfx.config.double_offchip_lds_buf =
1692 			le32_to_cpu(gpu_info_fw->gc_double_offchip_lds_buffer);
1693 		adev->gfx.cu_info.wave_front_size = le32_to_cpu(gpu_info_fw->gc_wave_size);
1694 		adev->gfx.cu_info.max_waves_per_simd =
1695 			le32_to_cpu(gpu_info_fw->gc_max_waves_per_simd);
1696 		adev->gfx.cu_info.max_scratch_slots_per_cu =
1697 			le32_to_cpu(gpu_info_fw->gc_max_scratch_slots_per_cu);
1698 		adev->gfx.cu_info.lds_size = le32_to_cpu(gpu_info_fw->gc_lds_size);
1699 		if (hdr->version_minor >= 1) {
1700 			const struct gpu_info_firmware_v1_1 *gpu_info_fw =
1701 				(const struct gpu_info_firmware_v1_1 *)(adev->firmware.gpu_info_fw->data +
1702 									le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1703 			adev->gfx.config.num_sc_per_sh =
1704 				le32_to_cpu(gpu_info_fw->num_sc_per_sh);
1705 			adev->gfx.config.num_packer_per_sc =
1706 				le32_to_cpu(gpu_info_fw->num_packer_per_sc);
1707 		}
1708 
1709 parse_soc_bounding_box:
1710 		/*
1711 		 * soc bounding box info is not integrated in disocovery table,
1712 		 * we always need to parse it from gpu info firmware.
1713 		 */
1714 		if (hdr->version_minor == 2) {
1715 			const struct gpu_info_firmware_v1_2 *gpu_info_fw =
1716 				(const struct gpu_info_firmware_v1_2 *)(adev->firmware.gpu_info_fw->data +
1717 									le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1718 			adev->dm.soc_bounding_box = &gpu_info_fw->soc_bounding_box;
1719 		}
1720 		break;
1721 	}
1722 	default:
1723 		dev_err(adev->dev,
1724 			"Unsupported gpu_info table %d\n", hdr->header.ucode_version);
1725 		err = -EINVAL;
1726 		goto out;
1727 	}
1728 out:
1729 	return err;
1730 }
1731 
1732 /**
1733  * amdgpu_device_ip_early_init - run early init for hardware IPs
1734  *
1735  * @adev: amdgpu_device pointer
1736  *
1737  * Early initialization pass for hardware IPs.  The hardware IPs that make
1738  * up each asic are discovered each IP's early_init callback is run.  This
1739  * is the first stage in initializing the asic.
1740  * Returns 0 on success, negative error code on failure.
1741  */
amdgpu_device_ip_early_init(struct amdgpu_device * adev)1742 static int amdgpu_device_ip_early_init(struct amdgpu_device *adev)
1743 {
1744 	int i, r;
1745 
1746 	amdgpu_device_enable_virtual_display(adev);
1747 
1748 	switch (adev->asic_type) {
1749 	case CHIP_TOPAZ:
1750 	case CHIP_TONGA:
1751 	case CHIP_FIJI:
1752 	case CHIP_POLARIS10:
1753 	case CHIP_POLARIS11:
1754 	case CHIP_POLARIS12:
1755 	case CHIP_VEGAM:
1756 	case CHIP_CARRIZO:
1757 	case CHIP_STONEY:
1758 		if (adev->asic_type == CHIP_CARRIZO || adev->asic_type == CHIP_STONEY)
1759 			adev->family = AMDGPU_FAMILY_CZ;
1760 		else
1761 			adev->family = AMDGPU_FAMILY_VI;
1762 
1763 		r = vi_set_ip_blocks(adev);
1764 		if (r)
1765 			return r;
1766 		break;
1767 #ifdef CONFIG_DRM_AMDGPU_SI
1768 	case CHIP_VERDE:
1769 	case CHIP_TAHITI:
1770 	case CHIP_PITCAIRN:
1771 	case CHIP_OLAND:
1772 	case CHIP_HAINAN:
1773 		adev->family = AMDGPU_FAMILY_SI;
1774 		r = si_set_ip_blocks(adev);
1775 		if (r)
1776 			return r;
1777 		break;
1778 #endif
1779 #ifdef CONFIG_DRM_AMDGPU_CIK
1780 	case CHIP_BONAIRE:
1781 	case CHIP_HAWAII:
1782 	case CHIP_KAVERI:
1783 	case CHIP_KABINI:
1784 	case CHIP_MULLINS:
1785 		if ((adev->asic_type == CHIP_BONAIRE) || (adev->asic_type == CHIP_HAWAII))
1786 			adev->family = AMDGPU_FAMILY_CI;
1787 		else
1788 			adev->family = AMDGPU_FAMILY_KV;
1789 
1790 		r = cik_set_ip_blocks(adev);
1791 		if (r)
1792 			return r;
1793 		break;
1794 #endif
1795 	case CHIP_VEGA10:
1796 	case CHIP_VEGA12:
1797 	case CHIP_VEGA20:
1798 	case CHIP_RAVEN:
1799 	case CHIP_ARCTURUS:
1800 	case CHIP_RENOIR:
1801 		if (adev->asic_type == CHIP_RAVEN ||
1802 		    adev->asic_type == CHIP_RENOIR)
1803 			adev->family = AMDGPU_FAMILY_RV;
1804 		else
1805 			adev->family = AMDGPU_FAMILY_AI;
1806 
1807 		r = soc15_set_ip_blocks(adev);
1808 		if (r)
1809 			return r;
1810 		break;
1811 	case  CHIP_NAVI10:
1812 	case  CHIP_NAVI14:
1813 	case  CHIP_NAVI12:
1814 		adev->family = AMDGPU_FAMILY_NV;
1815 
1816 		r = nv_set_ip_blocks(adev);
1817 		if (r)
1818 			return r;
1819 		break;
1820 	default:
1821 		/* FIXME: not supported yet */
1822 		return -EINVAL;
1823 	}
1824 
1825 	r = amdgpu_device_parse_gpu_info_fw(adev);
1826 	if (r)
1827 		return r;
1828 
1829 	if (amdgpu_discovery && adev->asic_type >= CHIP_NAVI10)
1830 		amdgpu_discovery_get_gfx_info(adev);
1831 
1832 	amdgpu_amdkfd_device_probe(adev);
1833 
1834 	if (amdgpu_sriov_vf(adev)) {
1835 		r = amdgpu_virt_request_full_gpu(adev, true);
1836 		if (r)
1837 			return -EAGAIN;
1838 	}
1839 
1840 	adev->pm.pp_feature = amdgpu_pp_feature_mask;
1841 	if (amdgpu_sriov_vf(adev) || sched_policy == KFD_SCHED_POLICY_NO_HWS)
1842 		adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
1843 
1844 	for (i = 0; i < adev->num_ip_blocks; i++) {
1845 		if ((amdgpu_ip_block_mask & (1 << i)) == 0) {
1846 			DRM_ERROR("disabled ip block: %d <%s>\n",
1847 				  i, adev->ip_blocks[i].version->funcs->name);
1848 			adev->ip_blocks[i].status.valid = false;
1849 		} else {
1850 			if (adev->ip_blocks[i].version->funcs->early_init) {
1851 				r = adev->ip_blocks[i].version->funcs->early_init((void *)adev);
1852 				if (r == -ENOENT) {
1853 					adev->ip_blocks[i].status.valid = false;
1854 				} else if (r) {
1855 					DRM_ERROR("early_init of IP block <%s> failed %d\n",
1856 						  adev->ip_blocks[i].version->funcs->name, r);
1857 					return r;
1858 				} else {
1859 					adev->ip_blocks[i].status.valid = true;
1860 				}
1861 			} else {
1862 				adev->ip_blocks[i].status.valid = true;
1863 			}
1864 		}
1865 		/* get the vbios after the asic_funcs are set up */
1866 		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON) {
1867 			/* Read BIOS */
1868 			if (!amdgpu_get_bios(adev))
1869 				return -EINVAL;
1870 
1871 			r = amdgpu_atombios_init(adev);
1872 			if (r) {
1873 				dev_err(adev->dev, "amdgpu_atombios_init failed\n");
1874 				amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_INIT_FAIL, 0, 0);
1875 				return r;
1876 			}
1877 		}
1878 	}
1879 
1880 	adev->cg_flags &= amdgpu_cg_mask;
1881 	adev->pg_flags &= amdgpu_pg_mask;
1882 
1883 	return 0;
1884 }
1885 
amdgpu_device_ip_hw_init_phase1(struct amdgpu_device * adev)1886 static int amdgpu_device_ip_hw_init_phase1(struct amdgpu_device *adev)
1887 {
1888 	int i, r;
1889 
1890 	for (i = 0; i < adev->num_ip_blocks; i++) {
1891 		if (!adev->ip_blocks[i].status.sw)
1892 			continue;
1893 		if (adev->ip_blocks[i].status.hw)
1894 			continue;
1895 		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
1896 		    (amdgpu_sriov_vf(adev) && (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)) ||
1897 		    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH) {
1898 			r = adev->ip_blocks[i].version->funcs->hw_init(adev);
1899 			if (r) {
1900 				DRM_ERROR("hw_init of IP block <%s> failed %d\n",
1901 					  adev->ip_blocks[i].version->funcs->name, r);
1902 				return r;
1903 			}
1904 			adev->ip_blocks[i].status.hw = true;
1905 		}
1906 	}
1907 
1908 	return 0;
1909 }
1910 
amdgpu_device_ip_hw_init_phase2(struct amdgpu_device * adev)1911 static int amdgpu_device_ip_hw_init_phase2(struct amdgpu_device *adev)
1912 {
1913 	int i, r;
1914 
1915 	for (i = 0; i < adev->num_ip_blocks; i++) {
1916 		if (!adev->ip_blocks[i].status.sw)
1917 			continue;
1918 		if (adev->ip_blocks[i].status.hw)
1919 			continue;
1920 		r = adev->ip_blocks[i].version->funcs->hw_init(adev);
1921 		if (r) {
1922 			DRM_ERROR("hw_init of IP block <%s> failed %d\n",
1923 				  adev->ip_blocks[i].version->funcs->name, r);
1924 			return r;
1925 		}
1926 		adev->ip_blocks[i].status.hw = true;
1927 	}
1928 
1929 	return 0;
1930 }
1931 
amdgpu_device_fw_loading(struct amdgpu_device * adev)1932 static int amdgpu_device_fw_loading(struct amdgpu_device *adev)
1933 {
1934 	int r = 0;
1935 	int i;
1936 	uint32_t smu_version;
1937 
1938 	if (adev->asic_type >= CHIP_VEGA10) {
1939 		for (i = 0; i < adev->num_ip_blocks; i++) {
1940 			if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_PSP)
1941 				continue;
1942 
1943 			/* no need to do the fw loading again if already done*/
1944 			if (adev->ip_blocks[i].status.hw == true)
1945 				break;
1946 
1947 			if (adev->in_gpu_reset || adev->in_suspend) {
1948 				r = adev->ip_blocks[i].version->funcs->resume(adev);
1949 				if (r) {
1950 					DRM_ERROR("resume of IP block <%s> failed %d\n",
1951 							  adev->ip_blocks[i].version->funcs->name, r);
1952 					return r;
1953 				}
1954 			} else {
1955 				r = adev->ip_blocks[i].version->funcs->hw_init(adev);
1956 				if (r) {
1957 					DRM_ERROR("hw_init of IP block <%s> failed %d\n",
1958 							  adev->ip_blocks[i].version->funcs->name, r);
1959 					return r;
1960 				}
1961 			}
1962 
1963 			adev->ip_blocks[i].status.hw = true;
1964 			break;
1965 		}
1966 	}
1967 
1968 	if (!amdgpu_sriov_vf(adev) || adev->asic_type == CHIP_TONGA)
1969 		r = amdgpu_pm_load_smu_firmware(adev, &smu_version);
1970 
1971 	return r;
1972 }
1973 
1974 /**
1975  * amdgpu_device_ip_init - run init for hardware IPs
1976  *
1977  * @adev: amdgpu_device pointer
1978  *
1979  * Main initialization pass for hardware IPs.  The list of all the hardware
1980  * IPs that make up the asic is walked and the sw_init and hw_init callbacks
1981  * are run.  sw_init initializes the software state associated with each IP
1982  * and hw_init initializes the hardware associated with each IP.
1983  * Returns 0 on success, negative error code on failure.
1984  */
amdgpu_device_ip_init(struct amdgpu_device * adev)1985 static int amdgpu_device_ip_init(struct amdgpu_device *adev)
1986 {
1987 	int i, r;
1988 
1989 	r = amdgpu_ras_init(adev);
1990 	if (r)
1991 		return r;
1992 
1993 	for (i = 0; i < adev->num_ip_blocks; i++) {
1994 		if (!adev->ip_blocks[i].status.valid)
1995 			continue;
1996 		r = adev->ip_blocks[i].version->funcs->sw_init((void *)adev);
1997 		if (r) {
1998 			DRM_ERROR("sw_init of IP block <%s> failed %d\n",
1999 				  adev->ip_blocks[i].version->funcs->name, r);
2000 			goto init_failed;
2001 		}
2002 		adev->ip_blocks[i].status.sw = true;
2003 
2004 		/* need to do gmc hw init early so we can allocate gpu mem */
2005 		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) {
2006 			r = amdgpu_device_vram_scratch_init(adev);
2007 			if (r) {
2008 				DRM_ERROR("amdgpu_vram_scratch_init failed %d\n", r);
2009 				goto init_failed;
2010 			}
2011 			r = adev->ip_blocks[i].version->funcs->hw_init((void *)adev);
2012 			if (r) {
2013 				DRM_ERROR("hw_init %d failed %d\n", i, r);
2014 				goto init_failed;
2015 			}
2016 			r = amdgpu_device_wb_init(adev);
2017 			if (r) {
2018 				DRM_ERROR("amdgpu_device_wb_init failed %d\n", r);
2019 				goto init_failed;
2020 			}
2021 			adev->ip_blocks[i].status.hw = true;
2022 
2023 			/* right after GMC hw init, we create CSA */
2024 			if (amdgpu_mcbp || amdgpu_sriov_vf(adev)) {
2025 				r = amdgpu_allocate_static_csa(adev, &adev->virt.csa_obj,
2026 								AMDGPU_GEM_DOMAIN_VRAM,
2027 								AMDGPU_CSA_SIZE);
2028 				if (r) {
2029 					DRM_ERROR("allocate CSA failed %d\n", r);
2030 					goto init_failed;
2031 				}
2032 			}
2033 		}
2034 	}
2035 
2036 	if (amdgpu_sriov_vf(adev))
2037 		amdgpu_virt_init_data_exchange(adev);
2038 
2039 	r = amdgpu_ib_pool_init(adev);
2040 	if (r) {
2041 		dev_err(adev->dev, "IB initialization failed (%d).\n", r);
2042 		amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_IB_INIT_FAIL, 0, r);
2043 		goto init_failed;
2044 	}
2045 
2046 	r = amdgpu_ucode_create_bo(adev); /* create ucode bo when sw_init complete*/
2047 	if (r)
2048 		goto init_failed;
2049 
2050 	r = amdgpu_device_ip_hw_init_phase1(adev);
2051 	if (r)
2052 		goto init_failed;
2053 
2054 	r = amdgpu_device_fw_loading(adev);
2055 	if (r)
2056 		goto init_failed;
2057 
2058 	r = amdgpu_device_ip_hw_init_phase2(adev);
2059 	if (r)
2060 		goto init_failed;
2061 
2062 	/*
2063 	 * retired pages will be loaded from eeprom and reserved here,
2064 	 * it should be called after amdgpu_device_ip_hw_init_phase2  since
2065 	 * for some ASICs the RAS EEPROM code relies on SMU fully functioning
2066 	 * for I2C communication which only true at this point.
2067 	 * recovery_init may fail, but it can free all resources allocated by
2068 	 * itself and its failure should not stop amdgpu init process.
2069 	 *
2070 	 * Note: theoretically, this should be called before all vram allocations
2071 	 * to protect retired page from abusing
2072 	 */
2073 	amdgpu_ras_recovery_init(adev);
2074 
2075 	if (adev->gmc.xgmi.num_physical_nodes > 1)
2076 		amdgpu_xgmi_add_device(adev);
2077 	amdgpu_amdkfd_device_init(adev);
2078 
2079 init_failed:
2080 	if (amdgpu_sriov_vf(adev))
2081 		amdgpu_virt_release_full_gpu(adev, true);
2082 
2083 	return r;
2084 }
2085 
2086 /**
2087  * amdgpu_device_fill_reset_magic - writes reset magic to gart pointer
2088  *
2089  * @adev: amdgpu_device pointer
2090  *
2091  * Writes a reset magic value to the gart pointer in VRAM.  The driver calls
2092  * this function before a GPU reset.  If the value is retained after a
2093  * GPU reset, VRAM has not been lost.  Some GPU resets may destry VRAM contents.
2094  */
amdgpu_device_fill_reset_magic(struct amdgpu_device * adev)2095 static void amdgpu_device_fill_reset_magic(struct amdgpu_device *adev)
2096 {
2097 	memcpy(adev->reset_magic, adev->gart.ptr, AMDGPU_RESET_MAGIC_NUM);
2098 }
2099 
2100 /**
2101  * amdgpu_device_check_vram_lost - check if vram is valid
2102  *
2103  * @adev: amdgpu_device pointer
2104  *
2105  * Checks the reset magic value written to the gart pointer in VRAM.
2106  * The driver calls this after a GPU reset to see if the contents of
2107  * VRAM is lost or now.
2108  * returns true if vram is lost, false if not.
2109  */
amdgpu_device_check_vram_lost(struct amdgpu_device * adev)2110 static bool amdgpu_device_check_vram_lost(struct amdgpu_device *adev)
2111 {
2112 	return !!memcmp(adev->gart.ptr, adev->reset_magic,
2113 			AMDGPU_RESET_MAGIC_NUM);
2114 }
2115 
2116 /**
2117  * amdgpu_device_set_cg_state - set clockgating for amdgpu device
2118  *
2119  * @adev: amdgpu_device pointer
2120  * @state: clockgating state (gate or ungate)
2121  *
2122  * The list of all the hardware IPs that make up the asic is walked and the
2123  * set_clockgating_state callbacks are run.
2124  * Late initialization pass enabling clockgating for hardware IPs.
2125  * Fini or suspend, pass disabling clockgating for hardware IPs.
2126  * Returns 0 on success, negative error code on failure.
2127  */
2128 
amdgpu_device_set_cg_state(struct amdgpu_device * adev,enum amd_clockgating_state state)2129 static int amdgpu_device_set_cg_state(struct amdgpu_device *adev,
2130 						enum amd_clockgating_state state)
2131 {
2132 	int i, j, r;
2133 
2134 	if (amdgpu_emu_mode == 1)
2135 		return 0;
2136 
2137 	for (j = 0; j < adev->num_ip_blocks; j++) {
2138 		i = state == AMD_CG_STATE_GATE ? j : adev->num_ip_blocks - j - 1;
2139 		if (!adev->ip_blocks[i].status.late_initialized)
2140 			continue;
2141 		/* skip CG for VCE/UVD, it's handled specially */
2142 		if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&
2143 		    adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE &&
2144 		    adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCN &&
2145 		    adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_JPEG &&
2146 		    adev->ip_blocks[i].version->funcs->set_clockgating_state) {
2147 			/* enable clockgating to save power */
2148 			r = adev->ip_blocks[i].version->funcs->set_clockgating_state((void *)adev,
2149 										     state);
2150 			if (r) {
2151 				DRM_ERROR("set_clockgating_state(gate) of IP block <%s> failed %d\n",
2152 					  adev->ip_blocks[i].version->funcs->name, r);
2153 				return r;
2154 			}
2155 		}
2156 	}
2157 
2158 	return 0;
2159 }
2160 
amdgpu_device_set_pg_state(struct amdgpu_device * adev,enum amd_powergating_state state)2161 static int amdgpu_device_set_pg_state(struct amdgpu_device *adev, enum amd_powergating_state state)
2162 {
2163 	int i, j, r;
2164 
2165 	if (amdgpu_emu_mode == 1)
2166 		return 0;
2167 
2168 	for (j = 0; j < adev->num_ip_blocks; j++) {
2169 		i = state == AMD_PG_STATE_GATE ? j : adev->num_ip_blocks - j - 1;
2170 		if (!adev->ip_blocks[i].status.late_initialized)
2171 			continue;
2172 		/* skip CG for VCE/UVD, it's handled specially */
2173 		if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&
2174 		    adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE &&
2175 		    adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCN &&
2176 		    adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_JPEG &&
2177 		    adev->ip_blocks[i].version->funcs->set_powergating_state) {
2178 			/* enable powergating to save power */
2179 			r = adev->ip_blocks[i].version->funcs->set_powergating_state((void *)adev,
2180 											state);
2181 			if (r) {
2182 				DRM_ERROR("set_powergating_state(gate) of IP block <%s> failed %d\n",
2183 					  adev->ip_blocks[i].version->funcs->name, r);
2184 				return r;
2185 			}
2186 		}
2187 	}
2188 	return 0;
2189 }
2190 
amdgpu_device_enable_mgpu_fan_boost(void)2191 static int amdgpu_device_enable_mgpu_fan_boost(void)
2192 {
2193 	struct amdgpu_gpu_instance *gpu_ins;
2194 	struct amdgpu_device *adev;
2195 	int i, ret = 0;
2196 
2197 	mutex_lock(&mgpu_info.mutex);
2198 
2199 	/*
2200 	 * MGPU fan boost feature should be enabled
2201 	 * only when there are two or more dGPUs in
2202 	 * the system
2203 	 */
2204 	if (mgpu_info.num_dgpu < 2)
2205 		goto out;
2206 
2207 	for (i = 0; i < mgpu_info.num_dgpu; i++) {
2208 		gpu_ins = &(mgpu_info.gpu_ins[i]);
2209 		adev = gpu_ins->adev;
2210 		if (!(adev->flags & AMD_IS_APU) &&
2211 		    !gpu_ins->mgpu_fan_enabled &&
2212 		    adev->powerplay.pp_funcs &&
2213 		    adev->powerplay.pp_funcs->enable_mgpu_fan_boost) {
2214 			ret = amdgpu_dpm_enable_mgpu_fan_boost(adev);
2215 			if (ret)
2216 				break;
2217 
2218 			gpu_ins->mgpu_fan_enabled = 1;
2219 		}
2220 	}
2221 
2222 out:
2223 	mutex_unlock(&mgpu_info.mutex);
2224 
2225 	return ret;
2226 }
2227 
2228 /**
2229  * amdgpu_device_ip_late_init - run late init for hardware IPs
2230  *
2231  * @adev: amdgpu_device pointer
2232  *
2233  * Late initialization pass for hardware IPs.  The list of all the hardware
2234  * IPs that make up the asic is walked and the late_init callbacks are run.
2235  * late_init covers any special initialization that an IP requires
2236  * after all of the have been initialized or something that needs to happen
2237  * late in the init process.
2238  * Returns 0 on success, negative error code on failure.
2239  */
amdgpu_device_ip_late_init(struct amdgpu_device * adev)2240 static int amdgpu_device_ip_late_init(struct amdgpu_device *adev)
2241 {
2242 	struct amdgpu_gpu_instance *gpu_instance;
2243 	int i = 0, r;
2244 
2245 	for (i = 0; i < adev->num_ip_blocks; i++) {
2246 		if (!adev->ip_blocks[i].status.hw)
2247 			continue;
2248 		if (adev->ip_blocks[i].version->funcs->late_init) {
2249 			r = adev->ip_blocks[i].version->funcs->late_init((void *)adev);
2250 			if (r) {
2251 				DRM_ERROR("late_init of IP block <%s> failed %d\n",
2252 					  adev->ip_blocks[i].version->funcs->name, r);
2253 				return r;
2254 			}
2255 		}
2256 		adev->ip_blocks[i].status.late_initialized = true;
2257 	}
2258 
2259 	amdgpu_device_set_cg_state(adev, AMD_CG_STATE_GATE);
2260 	amdgpu_device_set_pg_state(adev, AMD_PG_STATE_GATE);
2261 
2262 	amdgpu_device_fill_reset_magic(adev);
2263 
2264 	r = amdgpu_device_enable_mgpu_fan_boost();
2265 	if (r)
2266 		DRM_ERROR("enable mgpu fan boost failed (%d).\n", r);
2267 
2268 
2269 	if (adev->gmc.xgmi.num_physical_nodes > 1) {
2270 		mutex_lock(&mgpu_info.mutex);
2271 
2272 		/*
2273 		 * Reset device p-state to low as this was booted with high.
2274 		 *
2275 		 * This should be performed only after all devices from the same
2276 		 * hive get initialized.
2277 		 *
2278 		 * However, it's unknown how many device in the hive in advance.
2279 		 * As this is counted one by one during devices initializations.
2280 		 *
2281 		 * So, we wait for all XGMI interlinked devices initialized.
2282 		 * This may bring some delays as those devices may come from
2283 		 * different hives. But that should be OK.
2284 		 */
2285 		if (mgpu_info.num_dgpu == adev->gmc.xgmi.num_physical_nodes) {
2286 			for (i = 0; i < mgpu_info.num_gpu; i++) {
2287 				gpu_instance = &(mgpu_info.gpu_ins[i]);
2288 				if (gpu_instance->adev->flags & AMD_IS_APU)
2289 					continue;
2290 
2291 				r = amdgpu_xgmi_set_pstate(gpu_instance->adev, 0);
2292 				if (r) {
2293 					DRM_ERROR("pstate setting failed (%d).\n", r);
2294 					break;
2295 				}
2296 			}
2297 		}
2298 
2299 		mutex_unlock(&mgpu_info.mutex);
2300 	}
2301 
2302 	return 0;
2303 }
2304 
2305 /**
2306  * amdgpu_device_ip_fini - run fini for hardware IPs
2307  *
2308  * @adev: amdgpu_device pointer
2309  *
2310  * Main teardown pass for hardware IPs.  The list of all the hardware
2311  * IPs that make up the asic is walked and the hw_fini and sw_fini callbacks
2312  * are run.  hw_fini tears down the hardware associated with each IP
2313  * and sw_fini tears down any software state associated with each IP.
2314  * Returns 0 on success, negative error code on failure.
2315  */
amdgpu_device_ip_fini(struct amdgpu_device * adev)2316 static int amdgpu_device_ip_fini(struct amdgpu_device *adev)
2317 {
2318 	int i, r;
2319 
2320 	amdgpu_ras_pre_fini(adev);
2321 
2322 	if (adev->gmc.xgmi.num_physical_nodes > 1)
2323 		amdgpu_xgmi_remove_device(adev);
2324 
2325 	amdgpu_amdkfd_device_fini(adev);
2326 
2327 	amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE);
2328 	amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE);
2329 
2330 	/* need to disable SMC first */
2331 	for (i = 0; i < adev->num_ip_blocks; i++) {
2332 		if (!adev->ip_blocks[i].status.hw)
2333 			continue;
2334 		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) {
2335 			r = adev->ip_blocks[i].version->funcs->hw_fini((void *)adev);
2336 			/* XXX handle errors */
2337 			if (r) {
2338 				DRM_DEBUG("hw_fini of IP block <%s> failed %d\n",
2339 					  adev->ip_blocks[i].version->funcs->name, r);
2340 			}
2341 			adev->ip_blocks[i].status.hw = false;
2342 			break;
2343 		}
2344 	}
2345 
2346 	for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
2347 		if (!adev->ip_blocks[i].status.hw)
2348 			continue;
2349 
2350 		r = adev->ip_blocks[i].version->funcs->hw_fini((void *)adev);
2351 		/* XXX handle errors */
2352 		if (r) {
2353 			DRM_DEBUG("hw_fini of IP block <%s> failed %d\n",
2354 				  adev->ip_blocks[i].version->funcs->name, r);
2355 		}
2356 
2357 		adev->ip_blocks[i].status.hw = false;
2358 	}
2359 
2360 
2361 	for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
2362 		if (!adev->ip_blocks[i].status.sw)
2363 			continue;
2364 
2365 		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) {
2366 			amdgpu_ucode_free_bo(adev);
2367 			amdgpu_free_static_csa(&adev->virt.csa_obj);
2368 			amdgpu_device_wb_fini(adev);
2369 			amdgpu_device_vram_scratch_fini(adev);
2370 			amdgpu_ib_pool_fini(adev);
2371 		}
2372 
2373 		r = adev->ip_blocks[i].version->funcs->sw_fini((void *)adev);
2374 		/* XXX handle errors */
2375 		if (r) {
2376 			DRM_DEBUG("sw_fini of IP block <%s> failed %d\n",
2377 				  adev->ip_blocks[i].version->funcs->name, r);
2378 		}
2379 		adev->ip_blocks[i].status.sw = false;
2380 		adev->ip_blocks[i].status.valid = false;
2381 	}
2382 
2383 	for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
2384 		if (!adev->ip_blocks[i].status.late_initialized)
2385 			continue;
2386 		if (adev->ip_blocks[i].version->funcs->late_fini)
2387 			adev->ip_blocks[i].version->funcs->late_fini((void *)adev);
2388 		adev->ip_blocks[i].status.late_initialized = false;
2389 	}
2390 
2391 	amdgpu_ras_fini(adev);
2392 
2393 	if (amdgpu_sriov_vf(adev))
2394 		if (amdgpu_virt_release_full_gpu(adev, false))
2395 			DRM_ERROR("failed to release exclusive mode on fini\n");
2396 
2397 	return 0;
2398 }
2399 
2400 /**
2401  * amdgpu_device_delayed_init_work_handler - work handler for IB tests
2402  *
2403  * @work: work_struct.
2404  */
amdgpu_device_delayed_init_work_handler(struct work_struct * work)2405 static void amdgpu_device_delayed_init_work_handler(struct work_struct *work)
2406 {
2407 	struct amdgpu_device *adev =
2408 		container_of(work, struct amdgpu_device, delayed_init_work.work);
2409 	int r;
2410 
2411 	r = amdgpu_ib_ring_tests(adev);
2412 	if (r)
2413 		DRM_ERROR("ib ring test failed (%d).\n", r);
2414 }
2415 
amdgpu_device_delay_enable_gfx_off(struct work_struct * work)2416 static void amdgpu_device_delay_enable_gfx_off(struct work_struct *work)
2417 {
2418 	struct amdgpu_device *adev =
2419 		container_of(work, struct amdgpu_device, gfx.gfx_off_delay_work.work);
2420 
2421 	mutex_lock(&adev->gfx.gfx_off_mutex);
2422 	if (!adev->gfx.gfx_off_state && !adev->gfx.gfx_off_req_count) {
2423 		if (!amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, true))
2424 			adev->gfx.gfx_off_state = true;
2425 	}
2426 	mutex_unlock(&adev->gfx.gfx_off_mutex);
2427 }
2428 
2429 /**
2430  * amdgpu_device_ip_suspend_phase1 - run suspend for hardware IPs (phase 1)
2431  *
2432  * @adev: amdgpu_device pointer
2433  *
2434  * Main suspend function for hardware IPs.  The list of all the hardware
2435  * IPs that make up the asic is walked, clockgating is disabled and the
2436  * suspend callbacks are run.  suspend puts the hardware and software state
2437  * in each IP into a state suitable for suspend.
2438  * Returns 0 on success, negative error code on failure.
2439  */
amdgpu_device_ip_suspend_phase1(struct amdgpu_device * adev)2440 static int amdgpu_device_ip_suspend_phase1(struct amdgpu_device *adev)
2441 {
2442 	int i, r;
2443 
2444 	amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE);
2445 	amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE);
2446 
2447 	for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
2448 		if (!adev->ip_blocks[i].status.valid)
2449 			continue;
2450 		/* displays are handled separately */
2451 		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE) {
2452 			/* XXX handle errors */
2453 			r = adev->ip_blocks[i].version->funcs->suspend(adev);
2454 			/* XXX handle errors */
2455 			if (r) {
2456 				DRM_ERROR("suspend of IP block <%s> failed %d\n",
2457 					  adev->ip_blocks[i].version->funcs->name, r);
2458 				return r;
2459 			}
2460 			adev->ip_blocks[i].status.hw = false;
2461 		}
2462 	}
2463 
2464 	return 0;
2465 }
2466 
2467 /**
2468  * amdgpu_device_ip_suspend_phase2 - run suspend for hardware IPs (phase 2)
2469  *
2470  * @adev: amdgpu_device pointer
2471  *
2472  * Main suspend function for hardware IPs.  The list of all the hardware
2473  * IPs that make up the asic is walked, clockgating is disabled and the
2474  * suspend callbacks are run.  suspend puts the hardware and software state
2475  * in each IP into a state suitable for suspend.
2476  * Returns 0 on success, negative error code on failure.
2477  */
amdgpu_device_ip_suspend_phase2(struct amdgpu_device * adev)2478 static int amdgpu_device_ip_suspend_phase2(struct amdgpu_device *adev)
2479 {
2480 	int i, r __unused;
2481 
2482 	for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
2483 		if (!adev->ip_blocks[i].status.valid)
2484 			continue;
2485 		/* displays are handled in phase1 */
2486 		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE)
2487 			continue;
2488 		/* PSP lost connection when err_event_athub occurs */
2489 		if (amdgpu_ras_intr_triggered() &&
2490 		    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP) {
2491 			adev->ip_blocks[i].status.hw = false;
2492 			continue;
2493 		}
2494 		/* XXX handle errors */
2495 		r = adev->ip_blocks[i].version->funcs->suspend(adev);
2496 		/* XXX handle errors */
2497 		if (r) {
2498 			DRM_ERROR("suspend of IP block <%s> failed %d\n",
2499 				  adev->ip_blocks[i].version->funcs->name, r);
2500 		}
2501 		adev->ip_blocks[i].status.hw = false;
2502 		/* handle putting the SMC in the appropriate state */
2503 		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) {
2504 			r = amdgpu_dpm_set_mp1_state(adev, adev->mp1_state);
2505 			if (r) {
2506 				DRM_ERROR("SMC failed to set mp1 state %d, %d\n",
2507 					  adev->mp1_state, r);
2508 				return r;
2509 			}
2510 		}
2511 
2512 		adev->ip_blocks[i].status.hw = false;
2513 	}
2514 
2515 	return 0;
2516 }
2517 
2518 /**
2519  * amdgpu_device_ip_suspend - run suspend for hardware IPs
2520  *
2521  * @adev: amdgpu_device pointer
2522  *
2523  * Main suspend function for hardware IPs.  The list of all the hardware
2524  * IPs that make up the asic is walked, clockgating is disabled and the
2525  * suspend callbacks are run.  suspend puts the hardware and software state
2526  * in each IP into a state suitable for suspend.
2527  * Returns 0 on success, negative error code on failure.
2528  */
amdgpu_device_ip_suspend(struct amdgpu_device * adev)2529 int amdgpu_device_ip_suspend(struct amdgpu_device *adev)
2530 {
2531 	int r;
2532 
2533 	if (amdgpu_sriov_vf(adev))
2534 		amdgpu_virt_request_full_gpu(adev, false);
2535 
2536 	r = amdgpu_device_ip_suspend_phase1(adev);
2537 	if (r)
2538 		return r;
2539 	r = amdgpu_device_ip_suspend_phase2(adev);
2540 
2541 	if (amdgpu_sriov_vf(adev))
2542 		amdgpu_virt_release_full_gpu(adev, false);
2543 
2544 	return r;
2545 }
2546 
amdgpu_device_ip_reinit_early_sriov(struct amdgpu_device * adev)2547 static int amdgpu_device_ip_reinit_early_sriov(struct amdgpu_device *adev)
2548 {
2549 	int i, r;
2550 
2551 	static enum amd_ip_block_type ip_order[] = {
2552 		AMD_IP_BLOCK_TYPE_GMC,
2553 		AMD_IP_BLOCK_TYPE_COMMON,
2554 		AMD_IP_BLOCK_TYPE_PSP,
2555 		AMD_IP_BLOCK_TYPE_IH,
2556 	};
2557 
2558 	for (i = 0; i < ARRAY_SIZE(ip_order); i++) {
2559 		int j;
2560 		struct amdgpu_ip_block *block;
2561 
2562 		for (j = 0; j < adev->num_ip_blocks; j++) {
2563 			block = &adev->ip_blocks[j];
2564 
2565 			block->status.hw = false;
2566 			if (block->version->type != ip_order[i] ||
2567 				!block->status.valid)
2568 				continue;
2569 
2570 			r = block->version->funcs->hw_init(adev);
2571 			DRM_INFO("RE-INIT-early: %s %s\n", block->version->funcs->name, r?"failed":"succeeded");
2572 			if (r)
2573 				return r;
2574 			block->status.hw = true;
2575 		}
2576 	}
2577 
2578 	return 0;
2579 }
2580 
amdgpu_device_ip_reinit_late_sriov(struct amdgpu_device * adev)2581 static int amdgpu_device_ip_reinit_late_sriov(struct amdgpu_device *adev)
2582 {
2583 	int i, r;
2584 
2585 	static enum amd_ip_block_type ip_order[] = {
2586 		AMD_IP_BLOCK_TYPE_SMC,
2587 		AMD_IP_BLOCK_TYPE_DCE,
2588 		AMD_IP_BLOCK_TYPE_GFX,
2589 		AMD_IP_BLOCK_TYPE_SDMA,
2590 		AMD_IP_BLOCK_TYPE_UVD,
2591 		AMD_IP_BLOCK_TYPE_VCE,
2592 		AMD_IP_BLOCK_TYPE_VCN
2593 	};
2594 
2595 	for (i = 0; i < ARRAY_SIZE(ip_order); i++) {
2596 		int j;
2597 		struct amdgpu_ip_block *block;
2598 
2599 		for (j = 0; j < adev->num_ip_blocks; j++) {
2600 			block = &adev->ip_blocks[j];
2601 
2602 			if (block->version->type != ip_order[i] ||
2603 				!block->status.valid ||
2604 				block->status.hw)
2605 				continue;
2606 
2607 			if (block->version->type == AMD_IP_BLOCK_TYPE_SMC)
2608 				r = block->version->funcs->resume(adev);
2609 			else
2610 				r = block->version->funcs->hw_init(adev);
2611 
2612 			DRM_INFO("RE-INIT-late: %s %s\n", block->version->funcs->name, r?"failed":"succeeded");
2613 			if (r)
2614 				return r;
2615 			block->status.hw = true;
2616 		}
2617 	}
2618 
2619 	return 0;
2620 }
2621 
2622 /**
2623  * amdgpu_device_ip_resume_phase1 - run resume for hardware IPs
2624  *
2625  * @adev: amdgpu_device pointer
2626  *
2627  * First resume function for hardware IPs.  The list of all the hardware
2628  * IPs that make up the asic is walked and the resume callbacks are run for
2629  * COMMON, GMC, and IH.  resume puts the hardware into a functional state
2630  * after a suspend and updates the software state as necessary.  This
2631  * function is also used for restoring the GPU after a GPU reset.
2632  * Returns 0 on success, negative error code on failure.
2633  */
amdgpu_device_ip_resume_phase1(struct amdgpu_device * adev)2634 static int amdgpu_device_ip_resume_phase1(struct amdgpu_device *adev)
2635 {
2636 	int i, r;
2637 
2638 	for (i = 0; i < adev->num_ip_blocks; i++) {
2639 		if (!adev->ip_blocks[i].status.valid || adev->ip_blocks[i].status.hw)
2640 			continue;
2641 		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
2642 		    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
2643 		    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH) {
2644 
2645 			r = adev->ip_blocks[i].version->funcs->resume(adev);
2646 			if (r) {
2647 				DRM_ERROR("resume of IP block <%s> failed %d\n",
2648 					  adev->ip_blocks[i].version->funcs->name, r);
2649 				return r;
2650 			}
2651 			adev->ip_blocks[i].status.hw = true;
2652 		}
2653 	}
2654 
2655 	return 0;
2656 }
2657 
2658 /**
2659  * amdgpu_device_ip_resume_phase2 - run resume for hardware IPs
2660  *
2661  * @adev: amdgpu_device pointer
2662  *
2663  * First resume function for hardware IPs.  The list of all the hardware
2664  * IPs that make up the asic is walked and the resume callbacks are run for
2665  * all blocks except COMMON, GMC, and IH.  resume puts the hardware into a
2666  * functional state after a suspend and updates the software state as
2667  * necessary.  This function is also used for restoring the GPU after a GPU
2668  * reset.
2669  * Returns 0 on success, negative error code on failure.
2670  */
amdgpu_device_ip_resume_phase2(struct amdgpu_device * adev)2671 static int amdgpu_device_ip_resume_phase2(struct amdgpu_device *adev)
2672 {
2673 	int i, r;
2674 
2675 	for (i = 0; i < adev->num_ip_blocks; i++) {
2676 		if (!adev->ip_blocks[i].status.valid || adev->ip_blocks[i].status.hw)
2677 			continue;
2678 		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
2679 		    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
2680 		    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH ||
2681 		    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)
2682 			continue;
2683 		r = adev->ip_blocks[i].version->funcs->resume(adev);
2684 		if (r) {
2685 			DRM_ERROR("resume of IP block <%s> failed %d\n",
2686 				  adev->ip_blocks[i].version->funcs->name, r);
2687 			return r;
2688 		}
2689 		adev->ip_blocks[i].status.hw = true;
2690 	}
2691 
2692 	return 0;
2693 }
2694 
2695 /**
2696  * amdgpu_device_ip_resume - run resume for hardware IPs
2697  *
2698  * @adev: amdgpu_device pointer
2699  *
2700  * Main resume function for hardware IPs.  The hardware IPs
2701  * are split into two resume functions because they are
2702  * are also used in in recovering from a GPU reset and some additional
2703  * steps need to be take between them.  In this case (S3/S4) they are
2704  * run sequentially.
2705  * Returns 0 on success, negative error code on failure.
2706  */
amdgpu_device_ip_resume(struct amdgpu_device * adev)2707 static int amdgpu_device_ip_resume(struct amdgpu_device *adev)
2708 {
2709 	int r;
2710 
2711 	r = amdgpu_device_ip_resume_phase1(adev);
2712 	if (r)
2713 		return r;
2714 
2715 	r = amdgpu_device_fw_loading(adev);
2716 	if (r)
2717 		return r;
2718 
2719 	r = amdgpu_device_ip_resume_phase2(adev);
2720 
2721 	return r;
2722 }
2723 
2724 /**
2725  * amdgpu_device_detect_sriov_bios - determine if the board supports SR-IOV
2726  *
2727  * @adev: amdgpu_device pointer
2728  *
2729  * Query the VBIOS data tables to determine if the board supports SR-IOV.
2730  */
amdgpu_device_detect_sriov_bios(struct amdgpu_device * adev)2731 static void amdgpu_device_detect_sriov_bios(struct amdgpu_device *adev)
2732 {
2733 	if (amdgpu_sriov_vf(adev)) {
2734 		if (adev->is_atom_fw) {
2735 			if (amdgpu_atomfirmware_gpu_supports_virtualization(adev))
2736 				adev->virt.caps |= AMDGPU_SRIOV_CAPS_SRIOV_VBIOS;
2737 		} else {
2738 			if (amdgpu_atombios_has_gpu_virtualization_table(adev))
2739 				adev->virt.caps |= AMDGPU_SRIOV_CAPS_SRIOV_VBIOS;
2740 		}
2741 
2742 		if (!(adev->virt.caps & AMDGPU_SRIOV_CAPS_SRIOV_VBIOS))
2743 			amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_NO_VBIOS, 0, 0);
2744 	}
2745 }
2746 
2747 /**
2748  * amdgpu_device_asic_has_dc_support - determine if DC supports the asic
2749  *
2750  * @asic_type: AMD asic type
2751  *
2752  * Check if there is DC (new modesetting infrastructre) support for an asic.
2753  * returns true if DC has support, false if not.
2754  */
amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type)2755 bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type)
2756 {
2757 	switch (asic_type) {
2758 #if defined(CONFIG_DRM_AMD_DC)
2759 	case CHIP_BONAIRE:
2760 	case CHIP_KAVERI:
2761 	case CHIP_KABINI:
2762 	case CHIP_MULLINS:
2763 		/*
2764 		 * We have systems in the wild with these ASICs that require
2765 		 * LVDS and VGA support which is not supported with DC.
2766 		 *
2767 		 * Fallback to the non-DC driver here by default so as not to
2768 		 * cause regressions.
2769 		 */
2770 		return amdgpu_dc > 0;
2771 	case CHIP_HAWAII:
2772 	case CHIP_CARRIZO:
2773 	case CHIP_STONEY:
2774 	case CHIP_POLARIS10:
2775 	case CHIP_POLARIS11:
2776 	case CHIP_POLARIS12:
2777 	case CHIP_VEGAM:
2778 	case CHIP_TONGA:
2779 	case CHIP_FIJI:
2780 	case CHIP_VEGA10:
2781 	case CHIP_VEGA12:
2782 	case CHIP_VEGA20:
2783 #if defined(CONFIG_DRM_AMD_DC_DCN)
2784 	case CHIP_RAVEN:
2785 	case CHIP_NAVI10:
2786 	case CHIP_NAVI14:
2787 	case CHIP_NAVI12:
2788 	case CHIP_RENOIR:
2789 #endif
2790 		return amdgpu_dc != 0;
2791 #endif
2792 	default:
2793 		if (amdgpu_dc > 0)
2794 			DRM_INFO("Display Core has been requested via kernel parameter "
2795 					 "but isn't supported by ASIC, ignoring\n");
2796 		return false;
2797 	}
2798 }
2799 
2800 /**
2801  * amdgpu_device_has_dc_support - check if dc is supported
2802  *
2803  * @adev: amdgpu_device_pointer
2804  *
2805  * Returns true for supported, false for not supported
2806  */
amdgpu_device_has_dc_support(struct amdgpu_device * adev)2807 bool amdgpu_device_has_dc_support(struct amdgpu_device *adev)
2808 {
2809 	if (amdgpu_sriov_vf(adev))
2810 		return false;
2811 
2812 	return amdgpu_device_asic_has_dc_support(adev->asic_type);
2813 }
2814 
2815 
amdgpu_device_xgmi_reset_func(struct work_struct * __work)2816 static void amdgpu_device_xgmi_reset_func(struct work_struct *__work)
2817 {
2818 	struct amdgpu_device *adev =
2819 		container_of(__work, struct amdgpu_device, xgmi_reset_work);
2820 	struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev, 0);
2821 
2822 	/* It's a bug to not have a hive within this function */
2823 	if (WARN_ON(!hive))
2824 		return;
2825 
2826 	/*
2827 	 * Use task barrier to synchronize all xgmi reset works across the
2828 	 * hive. task_barrier_enter and task_barrier_exit will block
2829 	 * until all the threads running the xgmi reset works reach
2830 	 * those points. task_barrier_full will do both blocks.
2831 	 */
2832 	if (amdgpu_asic_reset_method(adev) == AMD_RESET_METHOD_BACO) {
2833 
2834 		task_barrier_enter(&hive->tb);
2835 		adev->asic_reset_res = amdgpu_device_baco_enter(adev->ddev);
2836 
2837 		if (adev->asic_reset_res)
2838 			goto fail;
2839 
2840 		task_barrier_exit(&hive->tb);
2841 		adev->asic_reset_res = amdgpu_device_baco_exit(adev->ddev);
2842 
2843 		if (adev->asic_reset_res)
2844 			goto fail;
2845 	} else {
2846 
2847 		task_barrier_full(&hive->tb);
2848 		adev->asic_reset_res =  amdgpu_asic_reset(adev);
2849 	}
2850 
2851 fail:
2852 	if (adev->asic_reset_res)
2853 		DRM_WARN("ASIC reset failed with error, %d for drm dev, %s",
2854 			 adev->asic_reset_res, adev->ddev->unique);
2855 }
2856 
amdgpu_device_get_job_timeout_settings(struct amdgpu_device * adev)2857 static int amdgpu_device_get_job_timeout_settings(struct amdgpu_device *adev)
2858 {
2859 	char *input = amdgpu_lockup_timeout;
2860 	char *timeout_setting = NULL;
2861 	int index = 0;
2862 	long timeout;
2863 	int ret = 0;
2864 
2865 	/*
2866 	 * By default timeout for non compute jobs is 10000.
2867 	 * And there is no timeout enforced on compute jobs.
2868 	 * In SR-IOV or passthrough mode, timeout for compute
2869 	 * jobs are 10000 by default.
2870 	 */
2871 	adev->gfx_timeout = msecs_to_jiffies(10000);
2872 	adev->sdma_timeout = adev->video_timeout = adev->gfx_timeout;
2873 	if (amdgpu_sriov_vf(adev) || amdgpu_passthrough(adev))
2874 		adev->compute_timeout = adev->gfx_timeout;
2875 	else
2876 		adev->compute_timeout = MAX_SCHEDULE_TIMEOUT;
2877 
2878 	if (strnlen(input, AMDGPU_MAX_TIMEOUT_PARAM_LENGTH)) {
2879 		while ((timeout_setting = strsep(&input, ",")) &&
2880 				strnlen(timeout_setting, AMDGPU_MAX_TIMEOUT_PARAM_LENGTH)) {
2881 			ret = kstrtol(timeout_setting, 0, &timeout);
2882 			if (ret)
2883 				return ret;
2884 
2885 			if (timeout == 0) {
2886 				index++;
2887 				continue;
2888 			} else if (timeout < 0) {
2889 				timeout = MAX_SCHEDULE_TIMEOUT;
2890 			} else {
2891 				timeout = msecs_to_jiffies(timeout);
2892 			}
2893 
2894 			switch (index++) {
2895 			case 0:
2896 				adev->gfx_timeout = timeout;
2897 				break;
2898 			case 1:
2899 				adev->compute_timeout = timeout;
2900 				break;
2901 			case 2:
2902 				adev->sdma_timeout = timeout;
2903 				break;
2904 			case 3:
2905 				adev->video_timeout = timeout;
2906 				break;
2907 			default:
2908 				break;
2909 			}
2910 		}
2911 		/*
2912 		 * There is only one value specified and
2913 		 * it should apply to all non-compute jobs.
2914 		 */
2915 		if (index == 1) {
2916 			adev->sdma_timeout = adev->video_timeout = adev->gfx_timeout;
2917 			if (amdgpu_sriov_vf(adev) || amdgpu_passthrough(adev))
2918 				adev->compute_timeout = adev->gfx_timeout;
2919 		}
2920 	}
2921 
2922 	return ret;
2923 }
2924 
2925 /**
2926  * amdgpu_device_init - initialize the driver
2927  *
2928  * @adev: amdgpu_device pointer
2929  * @ddev: drm dev pointer
2930  * @pdev: pci dev pointer
2931  * @flags: driver flags
2932  *
2933  * Initializes the driver info and hw (all asics).
2934  * Returns 0 for success or an error on failure.
2935  * Called at driver startup.
2936  */
amdgpu_device_init(struct amdgpu_device * adev,struct drm_device * ddev,struct pci_dev * pdev,uint32_t flags)2937 int amdgpu_device_init(struct amdgpu_device *adev,
2938 		       struct drm_device *ddev,
2939 		       struct pci_dev *pdev,
2940 		       uint32_t flags)
2941 {
2942 	int r, i;
2943 	bool boco = false;
2944 	u32 max_MBps;
2945 
2946 	adev->shutdown = false;
2947 	adev->dev = pci_dev_dev(pdev);
2948 	adev->ddev = ddev;
2949 	adev->pdev = pdev;
2950 	adev->flags = flags;
2951 
2952 	if (amdgpu_force_asic_type >= 0 && amdgpu_force_asic_type < CHIP_LAST)
2953 		adev->asic_type = amdgpu_force_asic_type;
2954 	else
2955 		adev->asic_type = flags & AMD_ASIC_MASK;
2956 
2957 	adev->usec_timeout = AMDGPU_MAX_USEC_TIMEOUT;
2958 	if (amdgpu_emu_mode == 1)
2959 		adev->usec_timeout *= 2;
2960 	adev->gmc.gart_size = 512 * 1024 * 1024;
2961 	adev->accel_working = false;
2962 	adev->num_rings = 0;
2963 	adev->mman.buffer_funcs = NULL;
2964 	adev->mman.buffer_funcs_ring = NULL;
2965 	adev->vm_manager.vm_pte_funcs = NULL;
2966 	adev->vm_manager.vm_pte_num_scheds = 0;
2967 	adev->gmc.gmc_funcs = NULL;
2968 	adev->fence_context = dma_fence_context_alloc(AMDGPU_MAX_RINGS);
2969 	bitmap_zero(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
2970 
2971 	adev->smc_rreg = &amdgpu_invalid_rreg;
2972 	adev->smc_wreg = &amdgpu_invalid_wreg;
2973 	adev->pcie_rreg = &amdgpu_invalid_rreg;
2974 	adev->pcie_wreg = &amdgpu_invalid_wreg;
2975 	adev->pciep_rreg = &amdgpu_invalid_rreg;
2976 	adev->pciep_wreg = &amdgpu_invalid_wreg;
2977 	adev->pcie_rreg64 = &amdgpu_invalid_rreg64;
2978 	adev->pcie_wreg64 = &amdgpu_invalid_wreg64;
2979 	adev->uvd_ctx_rreg = &amdgpu_invalid_rreg;
2980 	adev->uvd_ctx_wreg = &amdgpu_invalid_wreg;
2981 	adev->didt_rreg = &amdgpu_invalid_rreg;
2982 	adev->didt_wreg = &amdgpu_invalid_wreg;
2983 	adev->gc_cac_rreg = &amdgpu_invalid_rreg;
2984 	adev->gc_cac_wreg = &amdgpu_invalid_wreg;
2985 	adev->audio_endpt_rreg = &amdgpu_block_invalid_rreg;
2986 	adev->audio_endpt_wreg = &amdgpu_block_invalid_wreg;
2987 
2988 	DRM_INFO("initializing kernel modesetting (%s 0x%04X:0x%04X 0x%04X:0x%04X 0x%02X).\n",
2989 		 amdgpu_asic_name[adev->asic_type], pdev->vendor, pdev->device,
2990 		 pdev->subsystem_vendor, pdev->subsystem_device, pdev->revision);
2991 
2992 	/* mutex initialization are all done here so we
2993 	 * can recall function without having locking issues */
2994 	atomic_set(&adev->irq.ih.lock, 0);
2995 	mutex_init(&adev->firmware.mutex);
2996 	mutex_init(&adev->pm.mutex);
2997 	mutex_init(&adev->gfx.gpu_clock_mutex);
2998 	mutex_init(&adev->srbm_mutex);
2999 	mutex_init(&adev->gfx.pipe_reserve_mutex);
3000 	mutex_init(&adev->gfx.gfx_off_mutex);
3001 	mutex_init(&adev->grbm_idx_mutex);
3002 	mutex_init(&adev->mn_lock);
3003 	mutex_init(&adev->virt.vf_errors.lock);
3004 	hash_init(adev->mn_hash);
3005 	mutex_init(&adev->lock_reset);
3006 	mutex_init(&adev->psp.mutex);
3007 	mutex_init(&adev->notifier_lock);
3008 
3009 	spin_lock_init(&adev->mmio_idx_lock);
3010 	spin_lock_init(&adev->smc_idx_lock);
3011 	spin_lock_init(&adev->pcie_idx_lock);
3012 	spin_lock_init(&adev->uvd_ctx_idx_lock);
3013 	spin_lock_init(&adev->didt_idx_lock);
3014 	spin_lock_init(&adev->gc_cac_idx_lock);
3015 	spin_lock_init(&adev->se_cac_idx_lock);
3016 	spin_lock_init(&adev->audio_endpt_idx_lock);
3017 	spin_lock_init(&adev->mm_stats.lock);
3018 
3019 	INIT_LIST_HEAD(&adev->shadow_list);
3020 	mutex_init(&adev->shadow_list_lock);
3021 
3022 	INIT_LIST_HEAD(&adev->ring_lru_list);
3023 	spin_lock_init(&adev->ring_lru_list_lock);
3024 
3025 	INIT_DELAYED_WORK(&adev->delayed_init_work,
3026 			  amdgpu_device_delayed_init_work_handler);
3027 	INIT_DELAYED_WORK(&adev->gfx.gfx_off_delay_work,
3028 			  amdgpu_device_delay_enable_gfx_off);
3029 
3030 	INIT_WORK(&adev->xgmi_reset_work, amdgpu_device_xgmi_reset_func);
3031 
3032 	r = amdgpu_device_check_arguments(adev);
3033 	if (r)
3034 		return r;
3035 
3036 	adev->gfx.gfx_off_req_count = 1;
3037 	adev->pm.ac_power = power_supply_is_system_supplied() > 0 ? true : false;
3038 
3039 	/* Registers mapping */
3040 	/* TODO: block userspace mapping of io register */
3041 	if (adev->asic_type >= CHIP_BONAIRE) {
3042 		adev->rmmio_base = pci_resource_start(adev->pdev, 5);
3043 		adev->rmmio_size = pci_resource_len(adev->pdev, 5);
3044 	} else {
3045 		adev->rmmio_base = pci_resource_start(adev->pdev, 2);
3046 		adev->rmmio_size = pci_resource_len(adev->pdev, 2);
3047 	}
3048 
3049 #ifdef __NetBSD__
3050 	if (pci_mapreg_map(&adev->pdev->pd_pa, PCI_BAR(5),
3051 		pci_mapreg_type(adev->pdev->pd_pa.pa_pc,
3052 		    adev->pdev->pd_pa.pa_tag, PCI_BAR(5)),
3053 		0,
3054 		&adev->rmmiot, &adev->rmmioh,
3055 		&adev->rmmio_base, &adev->rmmio_size))
3056 		return -EIO;
3057 	DRM_INFO("register mmio base: 0x%8"PRIXMAX"\n",
3058 	    (uintmax_t)adev->rmmio_base);
3059 	DRM_INFO("register mmio size: %"PRIuMAX"\n",
3060 	    (uintmax_t)adev->rmmio_size);
3061 #else
3062 	adev->rmmio = ioremap(adev->rmmio_base, adev->rmmio_size);
3063 	if (adev->rmmio == NULL) {
3064 		return -ENOMEM;
3065 	}
3066 	DRM_INFO("register mmio base: 0x%08X\n", (uint32_t)adev->rmmio_base);
3067 	DRM_INFO("register mmio size: %u\n", (unsigned)adev->rmmio_size);
3068 #endif
3069 
3070 	/* io port mapping */
3071 	for (i = 0; i < DEVICE_COUNT_RESOURCE; i++) {
3072 #ifdef __NetBSD__
3073 		if (pci_mapreg_map(&adev->pdev->pd_pa, PCI_BAR(i),
3074 			PCI_MAPREG_TYPE_IO, 0,
3075 			&adev->rio_memt, &adev->rio_memh,
3076 			NULL, &adev->rio_mem_size) == 0)
3077 			break;
3078 #else
3079 		if (pci_resource_flags(adev->pdev, i) & IORESOURCE_IO) {
3080 			adev->rio_mem_size = pci_resource_len(adev->pdev, i);
3081 			adev->rio_mem = pci_iomap(adev->pdev, i, adev->rio_mem_size);
3082 			break;
3083 		}
3084 #endif
3085 	}
3086 #ifdef __NetBSD__
3087 	if (i == DEVICE_COUNT_RESOURCE)
3088 #else
3089 	if (adev->rio_mem == NULL)
3090 #endif
3091 		DRM_INFO("PCI I/O BAR is not found.\n");
3092 
3093 	/* enable PCIE atomic ops */
3094 #ifndef __NetBSD__		/* XXX amdgpu pcie atomics */
3095 	r = pci_enable_atomic_ops_to_root(adev->pdev,
3096 					  PCI_EXP_DEVCAP2_ATOMIC_COMP32 |
3097 					  PCI_EXP_DEVCAP2_ATOMIC_COMP64);
3098 	if (r) {
3099 		adev->have_atomics_support = false;
3100 		DRM_INFO("PCIE atomic ops is not supported\n");
3101 	} else {
3102 		adev->have_atomics_support = true;
3103 	}
3104 #endif
3105 
3106 	amdgpu_device_get_pcie_info(adev);
3107 
3108 	if (amdgpu_mcbp)
3109 		DRM_INFO("MCBP is enabled\n");
3110 
3111 	if (amdgpu_mes && adev->asic_type >= CHIP_NAVI10)
3112 		adev->enable_mes = true;
3113 
3114 	if (amdgpu_discovery && adev->asic_type >= CHIP_NAVI10) {
3115 		r = amdgpu_discovery_init(adev);
3116 		if (r) {
3117 			dev_err(adev->dev, "amdgpu_discovery_init failed\n");
3118 			return r;
3119 		}
3120 	}
3121 
3122 	/* early init functions */
3123 	r = amdgpu_device_ip_early_init(adev);
3124 	if (r)
3125 		return r;
3126 
3127 	r = amdgpu_device_get_job_timeout_settings(adev);
3128 	if (r) {
3129 		dev_err(adev->dev, "invalid lockup_timeout parameter syntax\n");
3130 		return r;
3131 	}
3132 
3133 	/* doorbell bar mapping and doorbell index init*/
3134 	amdgpu_device_doorbell_init(adev);
3135 
3136 #ifndef __NetBSD__		/* XXX amdgpu vga */
3137 	/* if we have > 1 VGA cards, then disable the amdgpu VGA resources */
3138 	/* this will fail for cards that aren't VGA class devices, just
3139 	 * ignore it */
3140 	vga_client_register(adev->pdev, adev, NULL, amdgpu_device_vga_set_decode);
3141 
3142 	if (amdgpu_device_supports_boco(ddev))
3143 		boco = true;
3144 	if (amdgpu_has_atpx() &&
3145 	    (amdgpu_is_atpx_hybrid() ||
3146 	     amdgpu_has_atpx_dgpu_power_cntl()) &&
3147 	    !pci_is_thunderbolt_attached(adev->pdev))
3148 		vga_switcheroo_register_client(adev->pdev,
3149 					       &amdgpu_switcheroo_ops, boco);
3150 	if (boco)
3151 		vga_switcheroo_init_domain_pm_ops(adev->dev, &adev->vga_pm_domain);
3152 #endif
3153 
3154 	if (amdgpu_emu_mode == 1) {
3155 		/* post the asic on emulation mode */
3156 		emu_soc_asic_init(adev);
3157 		goto fence_driver_init;
3158 	}
3159 
3160 	/* detect if we are with an SRIOV vbios */
3161 	amdgpu_device_detect_sriov_bios(adev);
3162 
3163 	/* check if we need to reset the asic
3164 	 *  E.g., driver was not cleanly unloaded previously, etc.
3165 	 */
3166 	if (!amdgpu_sriov_vf(adev) && amdgpu_asic_need_reset_on_init(adev)) {
3167 		r = amdgpu_asic_reset(adev);
3168 		if (r) {
3169 			dev_err(adev->dev, "asic reset on init failed\n");
3170 			goto failed;
3171 		}
3172 	}
3173 
3174 	/* Post card if necessary */
3175 	if (amdgpu_device_need_post(adev)) {
3176 		if (!adev->bios) {
3177 			dev_err(adev->dev, "no vBIOS found\n");
3178 			r = -EINVAL;
3179 			goto failed;
3180 		}
3181 		DRM_INFO("GPU posting now...\n");
3182 		r = amdgpu_atom_asic_init(adev->mode_info.atom_context);
3183 		if (r) {
3184 			dev_err(adev->dev, "gpu post error!\n");
3185 			goto failed;
3186 		}
3187 	}
3188 
3189 	if (adev->is_atom_fw) {
3190 		/* Initialize clocks */
3191 		r = amdgpu_atomfirmware_get_clock_info(adev);
3192 		if (r) {
3193 			dev_err(adev->dev, "amdgpu_atomfirmware_get_clock_info failed\n");
3194 			amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0);
3195 			goto failed;
3196 		}
3197 	} else {
3198 		/* Initialize clocks */
3199 		r = amdgpu_atombios_get_clock_info(adev);
3200 		if (r) {
3201 			dev_err(adev->dev, "amdgpu_atombios_get_clock_info failed\n");
3202 			amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0);
3203 			goto failed;
3204 		}
3205 		/* init i2c buses */
3206 		if (!amdgpu_device_has_dc_support(adev))
3207 			amdgpu_atombios_i2c_init(adev);
3208 	}
3209 
3210 fence_driver_init:
3211 	/* Fence driver */
3212 	r = amdgpu_fence_driver_init(adev);
3213 	if (r) {
3214 		dev_err(adev->dev, "amdgpu_fence_driver_init failed\n");
3215 		amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_FENCE_INIT_FAIL, 0, 0);
3216 		goto failed;
3217 	}
3218 
3219 	/* init the mode config */
3220 	drm_mode_config_init(adev->ddev);
3221 
3222 	r = amdgpu_device_ip_init(adev);
3223 	if (r) {
3224 		/* failed in exclusive mode due to timeout */
3225 		if (amdgpu_sriov_vf(adev) &&
3226 		    !amdgpu_sriov_runtime(adev) &&
3227 		    amdgpu_virt_mmio_blocked(adev) &&
3228 		    !amdgpu_virt_wait_reset(adev)) {
3229 			dev_err(adev->dev, "VF exclusive mode timeout\n");
3230 			/* Don't send request since VF is inactive. */
3231 			adev->virt.caps &= ~AMDGPU_SRIOV_CAPS_RUNTIME;
3232 			adev->virt.ops = NULL;
3233 			r = -EAGAIN;
3234 			goto failed;
3235 		}
3236 		dev_err(adev->dev, "amdgpu_device_ip_init failed\n");
3237 		amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_INIT_FAIL, 0, 0);
3238 		goto failed;
3239 	}
3240 
3241 	DRM_DEBUG("SE %d, SH per SE %d, CU per SH %d, active_cu_number %d\n",
3242 			adev->gfx.config.max_shader_engines,
3243 			adev->gfx.config.max_sh_per_se,
3244 			adev->gfx.config.max_cu_per_sh,
3245 			adev->gfx.cu_info.number);
3246 
3247 	amdgpu_ctx_init_sched(adev);
3248 
3249 	adev->accel_working = true;
3250 
3251 	amdgpu_vm_check_compute_bug(adev);
3252 
3253 	/* Initialize the buffer migration limit. */
3254 	if (amdgpu_moverate >= 0)
3255 		max_MBps = amdgpu_moverate;
3256 	else
3257 		max_MBps = 8; /* Allow 8 MB/s. */
3258 	/* Get a log2 for easy divisions. */
3259 	adev->mm_stats.log2_max_MBps = ilog2(max(1u, max_MBps));
3260 
3261 	amdgpu_fbdev_init(adev);
3262 
3263 	r = amdgpu_pm_sysfs_init(adev);
3264 	if (r) {
3265 		adev->pm_sysfs_en = false;
3266 		DRM_ERROR("registering pm debugfs failed (%d).\n", r);
3267 	} else
3268 		adev->pm_sysfs_en = true;
3269 
3270 	r = amdgpu_ucode_sysfs_init(adev);
3271 	if (r) {
3272 		adev->ucode_sysfs_en = false;
3273 		DRM_ERROR("Creating firmware sysfs failed (%d).\n", r);
3274 	} else
3275 		adev->ucode_sysfs_en = true;
3276 
3277 	r = amdgpu_debugfs_gem_init(adev);
3278 	if (r)
3279 		DRM_ERROR("registering gem debugfs failed (%d).\n", r);
3280 
3281 	r = amdgpu_debugfs_regs_init(adev);
3282 	if (r)
3283 		DRM_ERROR("registering register debugfs failed (%d).\n", r);
3284 
3285 	r = amdgpu_debugfs_firmware_init(adev);
3286 	if (r)
3287 		DRM_ERROR("registering firmware debugfs failed (%d).\n", r);
3288 
3289 	r = amdgpu_debugfs_init(adev);
3290 	if (r)
3291 		DRM_ERROR("Creating debugfs files failed (%d).\n", r);
3292 
3293 	if ((amdgpu_testing & 1)) {
3294 		if (adev->accel_working)
3295 			amdgpu_test_moves(adev);
3296 		else
3297 			DRM_INFO("amdgpu: acceleration disabled, skipping move tests\n");
3298 	}
3299 	if (amdgpu_benchmarking) {
3300 		if (adev->accel_working)
3301 			amdgpu_benchmark(adev, amdgpu_benchmarking);
3302 		else
3303 			DRM_INFO("amdgpu: acceleration disabled, skipping benchmarks\n");
3304 	}
3305 
3306 	/*
3307 	 * Register gpu instance before amdgpu_device_enable_mgpu_fan_boost.
3308 	 * Otherwise the mgpu fan boost feature will be skipped due to the
3309 	 * gpu instance is counted less.
3310 	 */
3311 	amdgpu_register_gpu_instance(adev);
3312 
3313 	/* enable clockgating, etc. after ib tests, etc. since some blocks require
3314 	 * explicit gating rather than handling it automatically.
3315 	 */
3316 	r = amdgpu_device_ip_late_init(adev);
3317 	if (r) {
3318 		dev_err(adev->dev, "amdgpu_device_ip_late_init failed\n");
3319 		amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_LATE_INIT_FAIL, 0, r);
3320 		goto failed;
3321 	}
3322 
3323 	/* must succeed. */
3324 	amdgpu_ras_resume(adev);
3325 
3326 	queue_delayed_work(system_wq, &adev->delayed_init_work,
3327 			   msecs_to_jiffies(AMDGPU_RESUME_MS));
3328 
3329 #ifndef __NetBSD__		/* XXX amdgpu sysfs */
3330 	r = device_create_file(adev->dev, &dev_attr_pcie_replay_count);
3331 	if (r) {
3332 		dev_err(adev->dev, "Could not create pcie_replay_count");
3333 		return r;
3334 	}
3335 #endif
3336 
3337 	if (IS_ENABLED(CONFIG_PERF_EVENTS))
3338 		r = amdgpu_pmu_init(adev);
3339 	if (r)
3340 		dev_err(adev->dev, "amdgpu_pmu_init failed\n");
3341 
3342 	return 0;
3343 
3344 failed:
3345 	amdgpu_vf_error_trans_all(adev);
3346 	if (boco)
3347 		vga_switcheroo_fini_domain_pm_ops(adev->dev);
3348 
3349 	return r;
3350 }
3351 
3352 /**
3353  * amdgpu_device_fini - tear down the driver
3354  *
3355  * @adev: amdgpu_device pointer
3356  *
3357  * Tear down the driver info (all asics).
3358  * Called at driver shutdown.
3359  */
amdgpu_device_fini(struct amdgpu_device * adev)3360 void amdgpu_device_fini(struct amdgpu_device *adev)
3361 {
3362 	int r __unused;
3363 
3364 	DRM_INFO("amdgpu: finishing device.\n");
3365 	flush_delayed_work(&adev->delayed_init_work);
3366 	adev->shutdown = true;
3367 
3368 	/* disable all interrupts */
3369 	amdgpu_irq_disable_all(adev);
3370 	if (adev->mode_info.mode_config_initialized){
3371 		if (!amdgpu_device_has_dc_support(adev))
3372 			drm_helper_force_disable_all(adev->ddev);
3373 		else
3374 			drm_atomic_helper_shutdown(adev->ddev);
3375 	}
3376 	amdgpu_fence_driver_fini(adev);
3377 	if (adev->pm_sysfs_en)
3378 		amdgpu_pm_sysfs_fini(adev);
3379 	amdgpu_fbdev_fini(adev);
3380 	r = amdgpu_device_ip_fini(adev);
3381 	if (adev->firmware.gpu_info_fw) {
3382 		release_firmware(adev->firmware.gpu_info_fw);
3383 		adev->firmware.gpu_info_fw = NULL;
3384 	}
3385 	adev->accel_working = false;
3386 	/* free i2c buses */
3387 	if (!amdgpu_device_has_dc_support(adev))
3388 		amdgpu_i2c_fini(adev);
3389 
3390 	if (amdgpu_emu_mode != 1)
3391 		amdgpu_atombios_fini(adev);
3392 
3393 	kfree(adev->bios);
3394 	adev->bios = NULL;
3395 #ifndef __NetBSD__		/* XXX amdgpu vga */
3396 	if (amdgpu_has_atpx() &&
3397 	    (amdgpu_is_atpx_hybrid() ||
3398 	     amdgpu_has_atpx_dgpu_power_cntl()) &&
3399 	    !pci_is_thunderbolt_attached(adev->pdev))
3400 		vga_switcheroo_unregister_client(adev->pdev);
3401 	if (amdgpu_device_supports_boco(adev->ddev))
3402 		vga_switcheroo_fini_domain_pm_ops(adev->dev);
3403 	vga_client_register(adev->pdev, NULL, NULL, NULL);
3404 #endif
3405 #ifdef __NetBSD__
3406 	if (adev->rio_mem_size)
3407 		bus_space_unmap(adev->rio_memt, adev->rio_memh,
3408 		    adev->rio_mem_size);
3409 	adev->rio_mem_size = 0;
3410 	bus_space_unmap(adev->rmmiot, adev->rmmioh, adev->rmmio_size);
3411 #else
3412 	if (adev->rio_mem)
3413 		pci_iounmap(adev->pdev, adev->rio_mem);
3414 	adev->rio_mem = NULL;
3415 	iounmap(adev->rmmio);
3416 	adev->rmmio = NULL;
3417 #endif
3418 	amdgpu_device_doorbell_fini(adev);
3419 
3420 	amdgpu_debugfs_regs_cleanup(adev);
3421 #ifndef __NetBSD__		/* XXX amdgpu sysfs */
3422 	device_remove_file(adev->dev, &dev_attr_pcie_replay_count);
3423 #endif
3424 	if (adev->ucode_sysfs_en)
3425 		amdgpu_ucode_sysfs_fini(adev);
3426 	if (IS_ENABLED(CONFIG_PERF_EVENTS))
3427 		amdgpu_pmu_fini(adev);
3428 	amdgpu_debugfs_preempt_cleanup(adev);
3429 	if (amdgpu_discovery && adev->asic_type >= CHIP_NAVI10)
3430 		amdgpu_discovery_fini(adev);
3431 	spin_lock_destroy(&adev->ring_lru_list_lock);
3432 	mutex_destroy(&adev->shadow_list_lock);
3433 	spin_lock_destroy(&adev->mm_stats.lock);
3434 	spin_lock_destroy(&adev->audio_endpt_idx_lock);
3435 	spin_lock_destroy(&adev->se_cac_idx_lock);
3436 	spin_lock_destroy(&adev->gc_cac_idx_lock);
3437 	spin_lock_destroy(&adev->didt_idx_lock);
3438 	spin_lock_destroy(&adev->uvd_ctx_idx_lock);
3439 	spin_lock_destroy(&adev->pcie_idx_lock);
3440 	spin_lock_destroy(&adev->smc_idx_lock);
3441 	spin_lock_destroy(&adev->mmio_idx_lock);
3442 	mutex_destroy(&adev->notifier_lock);
3443 	mutex_destroy(&adev->psp.mutex);
3444 	mutex_destroy(&adev->lock_reset);
3445 	/* hash_destroy(adev->mn_hash)? */
3446 	mutex_destroy(&adev->virt.vf_errors.lock);
3447 	mutex_destroy(&adev->mn_lock);
3448 	mutex_destroy(&adev->grbm_idx_mutex);
3449 	mutex_destroy(&adev->gfx.gfx_off_mutex);
3450 	mutex_destroy(&adev->gfx.pipe_reserve_mutex);
3451 	mutex_destroy(&adev->srbm_mutex);
3452 	mutex_destroy(&adev->gfx.gpu_clock_mutex);
3453 	mutex_destroy(&adev->pm.mutex);
3454 	mutex_destroy(&adev->firmware.mutex);
3455 }
3456 
3457 
3458 /*
3459  * Suspend & resume.
3460  */
3461 /**
3462  * amdgpu_device_suspend - initiate device suspend
3463  *
3464  * @dev: drm dev pointer
3465  * @suspend: suspend state
3466  * @fbcon : notify the fbdev of suspend
3467  *
3468  * Puts the hw in the suspend state (all asics).
3469  * Returns 0 for success or an error on failure.
3470  * Called at driver suspend.
3471  */
amdgpu_device_suspend(struct drm_device * dev,bool fbcon)3472 int amdgpu_device_suspend(struct drm_device *dev, bool fbcon)
3473 {
3474 	struct amdgpu_device *adev;
3475 	struct drm_crtc *crtc;
3476 	struct drm_connector *connector;
3477 	struct drm_connector_list_iter iter;
3478 	int r;
3479 
3480 	if (dev == NULL || dev->dev_private == NULL) {
3481 		return -ENODEV;
3482 	}
3483 
3484 	adev = dev->dev_private;
3485 
3486 	if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
3487 		return 0;
3488 
3489 	adev->in_suspend = true;
3490 	drm_kms_helper_poll_disable(dev);
3491 
3492 	if (fbcon)
3493 		amdgpu_fbdev_set_suspend(adev, 1);
3494 
3495 	cancel_delayed_work_sync(&adev->delayed_init_work);
3496 
3497 	if (!amdgpu_device_has_dc_support(adev)) {
3498 		/* turn off display hw */
3499 		drm_modeset_lock_all(dev);
3500 		drm_connector_list_iter_begin(dev, &iter);
3501 		drm_for_each_connector_iter(connector, &iter)
3502 			drm_helper_connector_dpms(connector,
3503 						  DRM_MODE_DPMS_OFF);
3504 		drm_connector_list_iter_end(&iter);
3505 		drm_modeset_unlock_all(dev);
3506 			/* unpin the front buffers and cursors */
3507 		list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
3508 			struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
3509 			struct drm_framebuffer *fb = crtc->primary->fb;
3510 			struct amdgpu_bo *robj;
3511 
3512 			if (amdgpu_crtc->cursor_bo && !adev->enable_virtual_display) {
3513 				struct amdgpu_bo *aobj = gem_to_amdgpu_bo(amdgpu_crtc->cursor_bo);
3514 				r = amdgpu_bo_reserve(aobj, true);
3515 				if (r == 0) {
3516 					amdgpu_bo_unpin(aobj);
3517 					amdgpu_bo_unreserve(aobj);
3518 				}
3519 			}
3520 
3521 			if (fb == NULL || fb->obj[0] == NULL) {
3522 				continue;
3523 			}
3524 			robj = gem_to_amdgpu_bo(fb->obj[0]);
3525 			/* don't unpin kernel fb objects */
3526 			if (!amdgpu_fbdev_robj_is_fb(adev, robj)) {
3527 				r = amdgpu_bo_reserve(robj, true);
3528 				if (r == 0) {
3529 					amdgpu_bo_unpin(robj);
3530 					amdgpu_bo_unreserve(robj);
3531 				}
3532 			}
3533 		}
3534 	}
3535 
3536 	amdgpu_amdkfd_suspend(adev);
3537 
3538 	amdgpu_ras_suspend(adev);
3539 
3540 	r = amdgpu_device_ip_suspend_phase1(adev);
3541 
3542 	/* evict vram memory */
3543 	amdgpu_bo_evict_vram(adev);
3544 
3545 	amdgpu_fence_driver_suspend(adev);
3546 
3547 	r = amdgpu_device_ip_suspend_phase2(adev);
3548 
3549 	/* evict remaining vram memory
3550 	 * This second call to evict vram is to evict the gart page table
3551 	 * using the CPU.
3552 	 */
3553 	amdgpu_bo_evict_vram(adev);
3554 
3555 	return 0;
3556 }
3557 
3558 /**
3559  * amdgpu_device_resume - initiate device resume
3560  *
3561  * @dev: drm dev pointer
3562  * @resume: resume state
3563  * @fbcon : notify the fbdev of resume
3564  *
3565  * Bring the hw back to operating state (all asics).
3566  * Returns 0 for success or an error on failure.
3567  * Called at driver resume.
3568  */
amdgpu_device_resume(struct drm_device * dev,bool fbcon)3569 int amdgpu_device_resume(struct drm_device *dev, bool fbcon)
3570 {
3571 	struct drm_connector *connector;
3572 	struct drm_connector_list_iter iter;
3573 	struct amdgpu_device *adev = dev->dev_private;
3574 	struct drm_crtc *crtc;
3575 	int r = 0;
3576 
3577 	if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
3578 		return 0;
3579 
3580 	/* post card */
3581 	if (amdgpu_device_need_post(adev)) {
3582 		r = amdgpu_atom_asic_init(adev->mode_info.atom_context);
3583 		if (r)
3584 			DRM_ERROR("amdgpu asic init failed\n");
3585 	}
3586 
3587 	r = amdgpu_device_ip_resume(adev);
3588 	if (r) {
3589 		DRM_ERROR("amdgpu_device_ip_resume failed (%d).\n", r);
3590 		return r;
3591 	}
3592 	amdgpu_fence_driver_resume(adev);
3593 
3594 
3595 	r = amdgpu_device_ip_late_init(adev);
3596 	if (r)
3597 		return r;
3598 
3599 	queue_delayed_work(system_wq, &adev->delayed_init_work,
3600 			   msecs_to_jiffies(AMDGPU_RESUME_MS));
3601 
3602 	if (!amdgpu_device_has_dc_support(adev)) {
3603 		/* pin cursors */
3604 		list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
3605 			struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
3606 
3607 			if (amdgpu_crtc->cursor_bo && !adev->enable_virtual_display) {
3608 				struct amdgpu_bo *aobj = gem_to_amdgpu_bo(amdgpu_crtc->cursor_bo);
3609 				r = amdgpu_bo_reserve(aobj, true);
3610 				if (r == 0) {
3611 					r = amdgpu_bo_pin(aobj, AMDGPU_GEM_DOMAIN_VRAM);
3612 					if (r != 0)
3613 						DRM_ERROR("Failed to pin cursor BO (%d)\n", r);
3614 					amdgpu_crtc->cursor_addr = amdgpu_bo_gpu_offset(aobj);
3615 					amdgpu_bo_unreserve(aobj);
3616 				}
3617 			}
3618 		}
3619 	}
3620 	r = amdgpu_amdkfd_resume(adev);
3621 	if (r)
3622 		return r;
3623 
3624 	/* Make sure IB tests flushed */
3625 	flush_delayed_work(&adev->delayed_init_work);
3626 
3627 	/* blat the mode back in */
3628 	if (fbcon) {
3629 		if (!amdgpu_device_has_dc_support(adev)) {
3630 			/* pre DCE11 */
3631 			drm_helper_resume_force_mode(dev);
3632 
3633 			/* turn on display hw */
3634 			drm_modeset_lock_all(dev);
3635 
3636 			drm_connector_list_iter_begin(dev, &iter);
3637 			drm_for_each_connector_iter(connector, &iter)
3638 				drm_helper_connector_dpms(connector,
3639 							  DRM_MODE_DPMS_ON);
3640 			drm_connector_list_iter_end(&iter);
3641 
3642 			drm_modeset_unlock_all(dev);
3643 		}
3644 		amdgpu_fbdev_set_suspend(adev, 0);
3645 	}
3646 
3647 	drm_kms_helper_poll_enable(dev);
3648 
3649 	amdgpu_ras_resume(adev);
3650 
3651 	/*
3652 	 * Most of the connector probing functions try to acquire runtime pm
3653 	 * refs to ensure that the GPU is powered on when connector polling is
3654 	 * performed. Since we're calling this from a runtime PM callback,
3655 	 * trying to acquire rpm refs will cause us to deadlock.
3656 	 *
3657 	 * Since we're guaranteed to be holding the rpm lock, it's safe to
3658 	 * temporarily disable the rpm helpers so this doesn't deadlock us.
3659 	 */
3660 #ifdef CONFIG_PM
3661 	dev->dev->power.disable_depth++;
3662 #endif
3663 	if (!amdgpu_device_has_dc_support(adev))
3664 		drm_helper_hpd_irq_event(dev);
3665 	else
3666 		drm_kms_helper_hotplug_event(dev);
3667 #ifdef CONFIG_PM
3668 	dev->dev->power.disable_depth--;
3669 #endif
3670 	adev->in_suspend = false;
3671 
3672 	return 0;
3673 }
3674 
3675 /**
3676  * amdgpu_device_ip_check_soft_reset - did soft reset succeed
3677  *
3678  * @adev: amdgpu_device pointer
3679  *
3680  * The list of all the hardware IPs that make up the asic is walked and
3681  * the check_soft_reset callbacks are run.  check_soft_reset determines
3682  * if the asic is still hung or not.
3683  * Returns true if any of the IPs are still in a hung state, false if not.
3684  */
amdgpu_device_ip_check_soft_reset(struct amdgpu_device * adev)3685 static bool amdgpu_device_ip_check_soft_reset(struct amdgpu_device *adev)
3686 {
3687 	int i;
3688 	bool asic_hang = false;
3689 
3690 	if (amdgpu_sriov_vf(adev))
3691 		return true;
3692 
3693 	if (amdgpu_asic_need_full_reset(adev))
3694 		return true;
3695 
3696 	for (i = 0; i < adev->num_ip_blocks; i++) {
3697 		if (!adev->ip_blocks[i].status.valid)
3698 			continue;
3699 		if (adev->ip_blocks[i].version->funcs->check_soft_reset)
3700 			adev->ip_blocks[i].status.hang =
3701 				adev->ip_blocks[i].version->funcs->check_soft_reset(adev);
3702 		if (adev->ip_blocks[i].status.hang) {
3703 			DRM_INFO("IP block:%s is hung!\n", adev->ip_blocks[i].version->funcs->name);
3704 			asic_hang = true;
3705 		}
3706 	}
3707 	return asic_hang;
3708 }
3709 
3710 /**
3711  * amdgpu_device_ip_pre_soft_reset - prepare for soft reset
3712  *
3713  * @adev: amdgpu_device pointer
3714  *
3715  * The list of all the hardware IPs that make up the asic is walked and the
3716  * pre_soft_reset callbacks are run if the block is hung.  pre_soft_reset
3717  * handles any IP specific hardware or software state changes that are
3718  * necessary for a soft reset to succeed.
3719  * Returns 0 on success, negative error code on failure.
3720  */
amdgpu_device_ip_pre_soft_reset(struct amdgpu_device * adev)3721 static int amdgpu_device_ip_pre_soft_reset(struct amdgpu_device *adev)
3722 {
3723 	int i, r = 0;
3724 
3725 	for (i = 0; i < adev->num_ip_blocks; i++) {
3726 		if (!adev->ip_blocks[i].status.valid)
3727 			continue;
3728 		if (adev->ip_blocks[i].status.hang &&
3729 		    adev->ip_blocks[i].version->funcs->pre_soft_reset) {
3730 			r = adev->ip_blocks[i].version->funcs->pre_soft_reset(adev);
3731 			if (r)
3732 				return r;
3733 		}
3734 	}
3735 
3736 	return 0;
3737 }
3738 
3739 /**
3740  * amdgpu_device_ip_need_full_reset - check if a full asic reset is needed
3741  *
3742  * @adev: amdgpu_device pointer
3743  *
3744  * Some hardware IPs cannot be soft reset.  If they are hung, a full gpu
3745  * reset is necessary to recover.
3746  * Returns true if a full asic reset is required, false if not.
3747  */
amdgpu_device_ip_need_full_reset(struct amdgpu_device * adev)3748 static bool amdgpu_device_ip_need_full_reset(struct amdgpu_device *adev)
3749 {
3750 	int i;
3751 
3752 	if (amdgpu_asic_need_full_reset(adev))
3753 		return true;
3754 
3755 	for (i = 0; i < adev->num_ip_blocks; i++) {
3756 		if (!adev->ip_blocks[i].status.valid)
3757 			continue;
3758 		if ((adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) ||
3759 		    (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) ||
3760 		    (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_ACP) ||
3761 		    (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE) ||
3762 		     adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP) {
3763 			if (adev->ip_blocks[i].status.hang) {
3764 				DRM_INFO("Some block need full reset!\n");
3765 				return true;
3766 			}
3767 		}
3768 	}
3769 	return false;
3770 }
3771 
3772 /**
3773  * amdgpu_device_ip_soft_reset - do a soft reset
3774  *
3775  * @adev: amdgpu_device pointer
3776  *
3777  * The list of all the hardware IPs that make up the asic is walked and the
3778  * soft_reset callbacks are run if the block is hung.  soft_reset handles any
3779  * IP specific hardware or software state changes that are necessary to soft
3780  * reset the IP.
3781  * Returns 0 on success, negative error code on failure.
3782  */
amdgpu_device_ip_soft_reset(struct amdgpu_device * adev)3783 static int amdgpu_device_ip_soft_reset(struct amdgpu_device *adev)
3784 {
3785 	int i, r = 0;
3786 
3787 	for (i = 0; i < adev->num_ip_blocks; i++) {
3788 		if (!adev->ip_blocks[i].status.valid)
3789 			continue;
3790 		if (adev->ip_blocks[i].status.hang &&
3791 		    adev->ip_blocks[i].version->funcs->soft_reset) {
3792 			r = adev->ip_blocks[i].version->funcs->soft_reset(adev);
3793 			if (r)
3794 				return r;
3795 		}
3796 	}
3797 
3798 	return 0;
3799 }
3800 
3801 /**
3802  * amdgpu_device_ip_post_soft_reset - clean up from soft reset
3803  *
3804  * @adev: amdgpu_device pointer
3805  *
3806  * The list of all the hardware IPs that make up the asic is walked and the
3807  * post_soft_reset callbacks are run if the asic was hung.  post_soft_reset
3808  * handles any IP specific hardware or software state changes that are
3809  * necessary after the IP has been soft reset.
3810  * Returns 0 on success, negative error code on failure.
3811  */
amdgpu_device_ip_post_soft_reset(struct amdgpu_device * adev)3812 static int amdgpu_device_ip_post_soft_reset(struct amdgpu_device *adev)
3813 {
3814 	int i, r = 0;
3815 
3816 	for (i = 0; i < adev->num_ip_blocks; i++) {
3817 		if (!adev->ip_blocks[i].status.valid)
3818 			continue;
3819 		if (adev->ip_blocks[i].status.hang &&
3820 		    adev->ip_blocks[i].version->funcs->post_soft_reset)
3821 			r = adev->ip_blocks[i].version->funcs->post_soft_reset(adev);
3822 		if (r)
3823 			return r;
3824 	}
3825 
3826 	return 0;
3827 }
3828 
3829 /**
3830  * amdgpu_device_recover_vram - Recover some VRAM contents
3831  *
3832  * @adev: amdgpu_device pointer
3833  *
3834  * Restores the contents of VRAM buffers from the shadows in GTT.  Used to
3835  * restore things like GPUVM page tables after a GPU reset where
3836  * the contents of VRAM might be lost.
3837  *
3838  * Returns:
3839  * 0 on success, negative error code on failure.
3840  */
amdgpu_device_recover_vram(struct amdgpu_device * adev)3841 static int amdgpu_device_recover_vram(struct amdgpu_device *adev)
3842 {
3843 	struct dma_fence *fence = NULL, *next = NULL;
3844 	struct amdgpu_bo *shadow;
3845 	long r = 1, tmo;
3846 
3847 	if (amdgpu_sriov_runtime(adev))
3848 		tmo = msecs_to_jiffies(8000);
3849 	else
3850 		tmo = msecs_to_jiffies(100);
3851 
3852 	DRM_INFO("recover vram bo from shadow start\n");
3853 	mutex_lock(&adev->shadow_list_lock);
3854 	list_for_each_entry(shadow, &adev->shadow_list, shadow_list) {
3855 
3856 		/* No need to recover an evicted BO */
3857 		if (shadow->tbo.mem.mem_type != TTM_PL_TT ||
3858 		    shadow->tbo.mem.start == AMDGPU_BO_INVALID_OFFSET ||
3859 		    shadow->parent->tbo.mem.mem_type != TTM_PL_VRAM)
3860 			continue;
3861 
3862 		r = amdgpu_bo_restore_shadow(shadow, &next);
3863 		if (r)
3864 			break;
3865 
3866 		if (fence) {
3867 			tmo = dma_fence_wait_timeout(fence, false, tmo);
3868 			dma_fence_put(fence);
3869 			fence = next;
3870 			if (tmo == 0) {
3871 				r = -ETIMEDOUT;
3872 				break;
3873 			} else if (tmo < 0) {
3874 				r = tmo;
3875 				break;
3876 			}
3877 		} else {
3878 			fence = next;
3879 		}
3880 	}
3881 	mutex_unlock(&adev->shadow_list_lock);
3882 
3883 	if (fence)
3884 		tmo = dma_fence_wait_timeout(fence, false, tmo);
3885 	dma_fence_put(fence);
3886 
3887 	if (r < 0 || tmo <= 0) {
3888 		DRM_ERROR("recover vram bo from shadow failed, r is %ld, tmo is %ld\n", r, tmo);
3889 		return -EIO;
3890 	}
3891 
3892 	DRM_INFO("recover vram bo from shadow done\n");
3893 	return 0;
3894 }
3895 
3896 
3897 /**
3898  * amdgpu_device_reset_sriov - reset ASIC for SR-IOV vf
3899  *
3900  * @adev: amdgpu device pointer
3901  * @from_hypervisor: request from hypervisor
3902  *
3903  * do VF FLR and reinitialize Asic
3904  * return 0 means succeeded otherwise failed
3905  */
amdgpu_device_reset_sriov(struct amdgpu_device * adev,bool from_hypervisor)3906 static int amdgpu_device_reset_sriov(struct amdgpu_device *adev,
3907 				     bool from_hypervisor)
3908 {
3909 	int r;
3910 
3911 	if (from_hypervisor)
3912 		r = amdgpu_virt_request_full_gpu(adev, true);
3913 	else
3914 		r = amdgpu_virt_reset_gpu(adev);
3915 	if (r)
3916 		return r;
3917 
3918 	/* Resume IP prior to SMC */
3919 	r = amdgpu_device_ip_reinit_early_sriov(adev);
3920 	if (r)
3921 		goto error;
3922 
3923 	amdgpu_virt_init_data_exchange(adev);
3924 	/* we need recover gart prior to run SMC/CP/SDMA resume */
3925 	amdgpu_gtt_mgr_recover(&adev->mman.bdev.man[TTM_PL_TT]);
3926 
3927 	r = amdgpu_device_fw_loading(adev);
3928 	if (r)
3929 		return r;
3930 
3931 	/* now we are okay to resume SMC/CP/SDMA */
3932 	r = amdgpu_device_ip_reinit_late_sriov(adev);
3933 	if (r)
3934 		goto error;
3935 
3936 	amdgpu_irq_gpu_reset_resume_helper(adev);
3937 	r = amdgpu_ib_ring_tests(adev);
3938 	amdgpu_amdkfd_post_reset(adev);
3939 
3940 error:
3941 	amdgpu_virt_release_full_gpu(adev, true);
3942 	if (!r && adev->virt.gim_feature & AMDGIM_FEATURE_GIM_FLR_VRAMLOST) {
3943 		amdgpu_inc_vram_lost(adev);
3944 		r = amdgpu_device_recover_vram(adev);
3945 	}
3946 
3947 	return r;
3948 }
3949 
3950 /**
3951  * amdgpu_device_should_recover_gpu - check if we should try GPU recovery
3952  *
3953  * @adev: amdgpu device pointer
3954  *
3955  * Check amdgpu_gpu_recovery and SRIOV status to see if we should try to recover
3956  * a hung GPU.
3957  */
amdgpu_device_should_recover_gpu(struct amdgpu_device * adev)3958 bool amdgpu_device_should_recover_gpu(struct amdgpu_device *adev)
3959 {
3960 	if (!amdgpu_device_ip_check_soft_reset(adev)) {
3961 		DRM_INFO("Timeout, but no hardware hang detected.\n");
3962 		return false;
3963 	}
3964 
3965 	if (amdgpu_gpu_recovery == 0)
3966 		goto disabled;
3967 
3968 	if (amdgpu_sriov_vf(adev))
3969 		return true;
3970 
3971 	if (amdgpu_gpu_recovery == -1) {
3972 		switch (adev->asic_type) {
3973 		case CHIP_BONAIRE:
3974 		case CHIP_HAWAII:
3975 		case CHIP_TOPAZ:
3976 		case CHIP_TONGA:
3977 		case CHIP_FIJI:
3978 		case CHIP_POLARIS10:
3979 		case CHIP_POLARIS11:
3980 		case CHIP_POLARIS12:
3981 		case CHIP_VEGAM:
3982 		case CHIP_VEGA20:
3983 		case CHIP_VEGA10:
3984 		case CHIP_VEGA12:
3985 		case CHIP_RAVEN:
3986 		case CHIP_ARCTURUS:
3987 		case CHIP_RENOIR:
3988 		case CHIP_NAVI10:
3989 		case CHIP_NAVI14:
3990 		case CHIP_NAVI12:
3991 			break;
3992 		default:
3993 			goto disabled;
3994 		}
3995 	}
3996 
3997 	return true;
3998 
3999 disabled:
4000 		DRM_INFO("GPU recovery disabled.\n");
4001 		return false;
4002 }
4003 
4004 
amdgpu_device_pre_asic_reset(struct amdgpu_device * adev,struct amdgpu_job * job,bool * need_full_reset_arg)4005 static int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev,
4006 					struct amdgpu_job *job,
4007 					bool *need_full_reset_arg)
4008 {
4009 	int i, r = 0;
4010 	bool need_full_reset  = *need_full_reset_arg;
4011 
4012 	/* block all schedulers and reset given job's ring */
4013 	for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
4014 		struct amdgpu_ring *ring = adev->rings[i];
4015 
4016 		if (!ring || !ring->sched.thread)
4017 			continue;
4018 
4019 		/* after all hw jobs are reset, hw fence is meaningless, so force_completion */
4020 		amdgpu_fence_driver_force_completion(ring);
4021 	}
4022 
4023 	if(job)
4024 		drm_sched_increase_karma(&job->base);
4025 
4026 	/* Don't suspend on bare metal if we are not going to HW reset the ASIC */
4027 	if (!amdgpu_sriov_vf(adev)) {
4028 
4029 		if (!need_full_reset)
4030 			need_full_reset = amdgpu_device_ip_need_full_reset(adev);
4031 
4032 		if (!need_full_reset) {
4033 			amdgpu_device_ip_pre_soft_reset(adev);
4034 			r = amdgpu_device_ip_soft_reset(adev);
4035 			amdgpu_device_ip_post_soft_reset(adev);
4036 			if (r || amdgpu_device_ip_check_soft_reset(adev)) {
4037 				DRM_INFO("soft reset failed, will fallback to full reset!\n");
4038 				need_full_reset = true;
4039 			}
4040 		}
4041 
4042 		if (need_full_reset)
4043 			r = amdgpu_device_ip_suspend(adev);
4044 
4045 		*need_full_reset_arg = need_full_reset;
4046 	}
4047 
4048 	return r;
4049 }
4050 
amdgpu_do_asic_reset(struct amdgpu_hive_info * hive,struct list_head * device_list_handle,bool * need_full_reset_arg)4051 static int amdgpu_do_asic_reset(struct amdgpu_hive_info *hive,
4052 			       struct list_head *device_list_handle,
4053 			       bool *need_full_reset_arg)
4054 {
4055 	struct amdgpu_device *tmp_adev = NULL;
4056 	bool need_full_reset = *need_full_reset_arg, vram_lost = false;
4057 	int r = 0;
4058 
4059 	/*
4060 	 * ASIC reset has to be done on all HGMI hive nodes ASAP
4061 	 * to allow proper links negotiation in FW (within 1 sec)
4062 	 */
4063 	if (need_full_reset) {
4064 		list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
4065 			/* For XGMI run all resets in parallel to speed up the process */
4066 			if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) {
4067 				if (!queue_work(system_unbound_wq, &tmp_adev->xgmi_reset_work))
4068 					r = -EALREADY;
4069 			} else
4070 				r = amdgpu_asic_reset(tmp_adev);
4071 
4072 			if (r) {
4073 				DRM_ERROR("ASIC reset failed with error, %d for drm dev, %s",
4074 					 r, tmp_adev->ddev->unique);
4075 				break;
4076 			}
4077 		}
4078 
4079 		/* For XGMI wait for all resets to complete before proceed */
4080 		if (!r) {
4081 			list_for_each_entry(tmp_adev, device_list_handle,
4082 					    gmc.xgmi.head) {
4083 				if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) {
4084 					flush_work(&tmp_adev->xgmi_reset_work);
4085 					r = tmp_adev->asic_reset_res;
4086 					if (r)
4087 						break;
4088 				}
4089 			}
4090 		}
4091 	}
4092 
4093 	if (!r && amdgpu_ras_intr_triggered())
4094 		amdgpu_ras_intr_cleared();
4095 
4096 	list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
4097 		if (need_full_reset) {
4098 			/* post card */
4099 			if (amdgpu_atom_asic_init(tmp_adev->mode_info.atom_context))
4100 				DRM_WARN("asic atom init failed!");
4101 
4102 			if (!r) {
4103 				dev_info(tmp_adev->dev, "GPU reset succeeded, trying to resume\n");
4104 				r = amdgpu_device_ip_resume_phase1(tmp_adev);
4105 				if (r)
4106 					goto out;
4107 
4108 				vram_lost = amdgpu_device_check_vram_lost(tmp_adev);
4109 				if (vram_lost) {
4110 					DRM_INFO("VRAM is lost due to GPU reset!\n");
4111 					amdgpu_inc_vram_lost(tmp_adev);
4112 				}
4113 
4114 				r = amdgpu_gtt_mgr_recover(
4115 					&tmp_adev->mman.bdev.man[TTM_PL_TT]);
4116 				if (r)
4117 					goto out;
4118 
4119 				r = amdgpu_device_fw_loading(tmp_adev);
4120 				if (r)
4121 					return r;
4122 
4123 				r = amdgpu_device_ip_resume_phase2(tmp_adev);
4124 				if (r)
4125 					goto out;
4126 
4127 				if (vram_lost)
4128 					amdgpu_device_fill_reset_magic(tmp_adev);
4129 
4130 				/*
4131 				 * Add this ASIC as tracked as reset was already
4132 				 * complete successfully.
4133 				 */
4134 				amdgpu_register_gpu_instance(tmp_adev);
4135 
4136 				r = amdgpu_device_ip_late_init(tmp_adev);
4137 				if (r)
4138 					goto out;
4139 
4140 				/* must succeed. */
4141 				amdgpu_ras_resume(tmp_adev);
4142 
4143 				/* Update PSP FW topology after reset */
4144 				if (hive && tmp_adev->gmc.xgmi.num_physical_nodes > 1)
4145 					r = amdgpu_xgmi_update_topology(hive, tmp_adev);
4146 			}
4147 		}
4148 
4149 
4150 out:
4151 		if (!r) {
4152 			amdgpu_irq_gpu_reset_resume_helper(tmp_adev);
4153 			r = amdgpu_ib_ring_tests(tmp_adev);
4154 			if (r) {
4155 				dev_err(tmp_adev->dev, "ib ring test failed (%d).\n", r);
4156 				r = amdgpu_device_ip_suspend(tmp_adev);
4157 				need_full_reset = true;
4158 				r = -EAGAIN;
4159 				goto end;
4160 			}
4161 		}
4162 
4163 		if (!r)
4164 			r = amdgpu_device_recover_vram(tmp_adev);
4165 		else
4166 			tmp_adev->asic_reset_res = r;
4167 	}
4168 
4169 end:
4170 	*need_full_reset_arg = need_full_reset;
4171 	return r;
4172 }
4173 
amdgpu_device_lock_adev(struct amdgpu_device * adev,bool trylock)4174 static bool amdgpu_device_lock_adev(struct amdgpu_device *adev, bool trylock)
4175 {
4176 	if (trylock) {
4177 		if (!mutex_trylock(&adev->lock_reset))
4178 			return false;
4179 	} else
4180 		mutex_lock(&adev->lock_reset);
4181 
4182 	atomic_inc(&adev->gpu_reset_counter);
4183 	adev->in_gpu_reset = true;
4184 	switch (amdgpu_asic_reset_method(adev)) {
4185 	case AMD_RESET_METHOD_MODE1:
4186 		adev->mp1_state = PP_MP1_STATE_SHUTDOWN;
4187 		break;
4188 	case AMD_RESET_METHOD_MODE2:
4189 		adev->mp1_state = PP_MP1_STATE_RESET;
4190 		break;
4191 	default:
4192 		adev->mp1_state = PP_MP1_STATE_NONE;
4193 		break;
4194 	}
4195 
4196 	return true;
4197 }
4198 
amdgpu_device_unlock_adev(struct amdgpu_device * adev)4199 static void amdgpu_device_unlock_adev(struct amdgpu_device *adev)
4200 {
4201 	amdgpu_vf_error_trans_all(adev);
4202 	adev->mp1_state = PP_MP1_STATE_NONE;
4203 	adev->in_gpu_reset = false;
4204 	mutex_unlock(&adev->lock_reset);
4205 }
4206 
4207 /**
4208  * amdgpu_device_gpu_recover - reset the asic and recover scheduler
4209  *
4210  * @adev: amdgpu device pointer
4211  * @job: which job trigger hang
4212  *
4213  * Attempt to reset the GPU if it has hung (all asics).
4214  * Attempt to do soft-reset or full-reset and reinitialize Asic
4215  * Returns 0 for success or an error on failure.
4216  */
4217 
amdgpu_device_gpu_recover(struct amdgpu_device * adev,struct amdgpu_job * job)4218 int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
4219 			      struct amdgpu_job *job)
4220 {
4221 	struct list_head device_list, *device_list_handle =  NULL;
4222 	bool need_full_reset, job_signaled;
4223 	struct amdgpu_hive_info *hive = NULL;
4224 	struct amdgpu_device *tmp_adev = NULL;
4225 	int i, r = 0;
4226 	bool in_ras_intr = amdgpu_ras_intr_triggered();
4227 	bool use_baco =
4228 		(amdgpu_asic_reset_method(adev) == AMD_RESET_METHOD_BACO) ?
4229 		true : false;
4230 
4231 	/*
4232 	 * Flush RAM to disk so that after reboot
4233 	 * the user can read log and see why the system rebooted.
4234 	 */
4235 	if (in_ras_intr && !use_baco && amdgpu_ras_get_context(adev)->reboot) {
4236 
4237 		DRM_WARN("Emergency reboot.");
4238 
4239 		ksys_sync_helper();
4240 		emergency_restart();
4241 	}
4242 
4243 	need_full_reset = job_signaled = false;
4244 	INIT_LIST_HEAD(&device_list);
4245 
4246 	dev_info(adev->dev, "GPU %s begin!\n",
4247 		(in_ras_intr && !use_baco) ? "jobs stop":"reset");
4248 
4249 	cancel_delayed_work_sync(&adev->delayed_init_work);
4250 
4251 	hive = amdgpu_get_xgmi_hive(adev, false);
4252 
4253 	/*
4254 	 * Here we trylock to avoid chain of resets executing from
4255 	 * either trigger by jobs on different adevs in XGMI hive or jobs on
4256 	 * different schedulers for same device while this TO handler is running.
4257 	 * We always reset all schedulers for device and all devices for XGMI
4258 	 * hive so that should take care of them too.
4259 	 */
4260 
4261 	if (hive && !mutex_trylock(&hive->reset_lock)) {
4262 		DRM_INFO("Bailing on TDR for s_job:%"PRIx64", hive: %"PRIx64" as another already in progress",
4263 			  job ? job->base.id : -1, hive->hive_id);
4264 		return 0;
4265 	}
4266 
4267 	/* Start with adev pre asic reset first for soft reset check.*/
4268 	if (!amdgpu_device_lock_adev(adev, !hive)) {
4269 		DRM_INFO("Bailing on TDR for s_job:%"PRIx64", as another already in progress",
4270 			  job ? job->base.id : -1);
4271 		return 0;
4272 	}
4273 
4274 	/* Block kfd: SRIOV would do it separately */
4275 	if (!amdgpu_sriov_vf(adev))
4276                 amdgpu_amdkfd_pre_reset(adev);
4277 
4278 	/* Build list of devices to reset */
4279 	if  (adev->gmc.xgmi.num_physical_nodes > 1) {
4280 		if (!hive) {
4281 			/*unlock kfd: SRIOV would do it separately */
4282 			if (!amdgpu_sriov_vf(adev))
4283 		                amdgpu_amdkfd_post_reset(adev);
4284 			amdgpu_device_unlock_adev(adev);
4285 			return -ENODEV;
4286 		}
4287 
4288 		/*
4289 		 * In case we are in XGMI hive mode device reset is done for all the
4290 		 * nodes in the hive to retrain all XGMI links and hence the reset
4291 		 * sequence is executed in loop on all nodes.
4292 		 */
4293 		device_list_handle = &hive->device_list;
4294 	} else {
4295 		list_add_tail(&adev->gmc.xgmi.head, &device_list);
4296 		device_list_handle = &device_list;
4297 	}
4298 
4299 	/* block all schedulers and reset given job's ring */
4300 	list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
4301 		if (tmp_adev != adev) {
4302 			amdgpu_device_lock_adev(tmp_adev, false);
4303 			if (!amdgpu_sriov_vf(tmp_adev))
4304 			                amdgpu_amdkfd_pre_reset(tmp_adev);
4305 		}
4306 
4307 		/*
4308 		 * Mark these ASICs to be reseted as untracked first
4309 		 * And add them back after reset completed
4310 		 */
4311 		amdgpu_unregister_gpu_instance(tmp_adev);
4312 
4313 		/* disable ras on ALL IPs */
4314 		if (!(in_ras_intr && !use_baco) &&
4315 		      amdgpu_device_ip_need_full_reset(tmp_adev))
4316 			amdgpu_ras_suspend(tmp_adev);
4317 
4318 		for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
4319 			struct amdgpu_ring *ring = tmp_adev->rings[i];
4320 
4321 			if (!ring || !ring->sched.thread)
4322 				continue;
4323 
4324 			drm_sched_stop(&ring->sched, job ? &job->base : NULL);
4325 
4326 			if (in_ras_intr && !use_baco)
4327 				amdgpu_job_stop_all_jobs_on_sched(&ring->sched);
4328 		}
4329 	}
4330 
4331 
4332 	if (in_ras_intr && !use_baco)
4333 		goto skip_sched_resume;
4334 
4335 	/*
4336 	 * Must check guilty signal here since after this point all old
4337 	 * HW fences are force signaled.
4338 	 *
4339 	 * job->base holds a reference to parent fence
4340 	 */
4341 	if (job && job->base.s_fence->parent &&
4342 	    dma_fence_is_signaled(job->base.s_fence->parent))
4343 		job_signaled = true;
4344 
4345 	if (job_signaled) {
4346 		dev_info(adev->dev, "Guilty job already signaled, skipping HW reset");
4347 		goto skip_hw_reset;
4348 	}
4349 
4350 
4351 	/* Guilty job will be freed after this*/
4352 	r = amdgpu_device_pre_asic_reset(adev, job, &need_full_reset);
4353 	if (r) {
4354 		/*TODO Should we stop ?*/
4355 		DRM_ERROR("GPU pre asic reset failed with err, %d for drm dev, %s ",
4356 			  r, adev->ddev->unique);
4357 		adev->asic_reset_res = r;
4358 	}
4359 
4360 retry:	/* Rest of adevs pre asic reset from XGMI hive. */
4361 	list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
4362 
4363 		if (tmp_adev == adev)
4364 			continue;
4365 
4366 		r = amdgpu_device_pre_asic_reset(tmp_adev,
4367 						 NULL,
4368 						 &need_full_reset);
4369 		/*TODO Should we stop ?*/
4370 		if (r) {
4371 			DRM_ERROR("GPU pre asic reset failed with err, %d for drm dev, %s ",
4372 				  r, tmp_adev->ddev->unique);
4373 			tmp_adev->asic_reset_res = r;
4374 		}
4375 	}
4376 
4377 	/* Actual ASIC resets if needed.*/
4378 	/* TODO Implement XGMI hive reset logic for SRIOV */
4379 	if (amdgpu_sriov_vf(adev)) {
4380 		r = amdgpu_device_reset_sriov(adev, job ? false : true);
4381 		if (r)
4382 			adev->asic_reset_res = r;
4383 	} else {
4384 		r  = amdgpu_do_asic_reset(hive, device_list_handle, &need_full_reset);
4385 		if (r && r == -EAGAIN)
4386 			goto retry;
4387 	}
4388 
4389 skip_hw_reset:
4390 
4391 	/* Post ASIC reset for all devs .*/
4392 	list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
4393 
4394 		for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
4395 			struct amdgpu_ring *ring = tmp_adev->rings[i];
4396 
4397 			if (!ring || !ring->sched.thread)
4398 				continue;
4399 
4400 			/* No point to resubmit jobs if we didn't HW reset*/
4401 			if (!tmp_adev->asic_reset_res && !job_signaled)
4402 				drm_sched_resubmit_jobs(&ring->sched);
4403 
4404 			drm_sched_start(&ring->sched, !tmp_adev->asic_reset_res);
4405 		}
4406 
4407 		if (!amdgpu_device_has_dc_support(tmp_adev) && !job_signaled) {
4408 			drm_helper_resume_force_mode(tmp_adev->ddev);
4409 		}
4410 
4411 		tmp_adev->asic_reset_res = 0;
4412 
4413 		if (r) {
4414 			/* bad news, how to tell it to userspace ? */
4415 			dev_info(tmp_adev->dev, "GPU reset(%d) failed\n", atomic_read(&tmp_adev->gpu_reset_counter));
4416 			amdgpu_vf_error_put(tmp_adev, AMDGIM_ERROR_VF_GPU_RESET_FAIL, 0, r);
4417 		} else {
4418 			dev_info(tmp_adev->dev, "GPU reset(%d) succeeded!\n", atomic_read(&tmp_adev->gpu_reset_counter));
4419 		}
4420 	}
4421 
4422 skip_sched_resume:
4423 	list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
4424 		/*unlock kfd: SRIOV would do it separately */
4425 		if (!(in_ras_intr && !use_baco) && !amdgpu_sriov_vf(tmp_adev))
4426 	                amdgpu_amdkfd_post_reset(tmp_adev);
4427 		amdgpu_device_unlock_adev(tmp_adev);
4428 	}
4429 
4430 	if (hive)
4431 		mutex_unlock(&hive->reset_lock);
4432 
4433 	if (r)
4434 		dev_info(adev->dev, "GPU reset end with ret = %d\n", r);
4435 	return r;
4436 }
4437 
4438 /**
4439  * amdgpu_device_get_pcie_info - fence pcie info about the PCIE slot
4440  *
4441  * @adev: amdgpu_device pointer
4442  *
4443  * Fetchs and stores in the driver the PCIE capabilities (gen speed
4444  * and lanes) of the slot the device is in. Handles APUs and
4445  * virtualized environments where PCIE config space may not be available.
4446  */
amdgpu_device_get_pcie_info(struct amdgpu_device * adev)4447 static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev)
4448 {
4449 	struct pci_dev *pdev;
4450 	enum pci_bus_speed speed_cap, platform_speed_cap;
4451 	enum pcie_link_width platform_link_width;
4452 
4453 	if (amdgpu_pcie_gen_cap)
4454 		adev->pm.pcie_gen_mask = amdgpu_pcie_gen_cap;
4455 
4456 	if (amdgpu_pcie_lane_cap)
4457 		adev->pm.pcie_mlw_mask = amdgpu_pcie_lane_cap;
4458 
4459 	/* covers APUs as well */
4460 	if (pci_is_root_bus(adev->pdev->bus)) {
4461 		if (adev->pm.pcie_gen_mask == 0)
4462 			adev->pm.pcie_gen_mask = AMDGPU_DEFAULT_PCIE_GEN_MASK;
4463 		if (adev->pm.pcie_mlw_mask == 0)
4464 			adev->pm.pcie_mlw_mask = AMDGPU_DEFAULT_PCIE_MLW_MASK;
4465 		return;
4466 	}
4467 
4468 	if (adev->pm.pcie_gen_mask && adev->pm.pcie_mlw_mask)
4469 		return;
4470 
4471 	pcie_bandwidth_available(adev->pdev, NULL,
4472 				 &platform_speed_cap, &platform_link_width);
4473 
4474 	if (adev->pm.pcie_gen_mask == 0) {
4475 		/* asic caps */
4476 		pdev = adev->pdev;
4477 		speed_cap = pcie_get_speed_cap(pdev);
4478 		if (speed_cap == PCI_SPEED_UNKNOWN) {
4479 			adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
4480 						  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
4481 						  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3);
4482 		} else {
4483 			if (speed_cap == PCIE_SPEED_16_0GT)
4484 				adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
4485 							  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
4486 							  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3 |
4487 							  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN4);
4488 			else if (speed_cap == PCIE_SPEED_8_0GT)
4489 				adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
4490 							  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
4491 							  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3);
4492 			else if (speed_cap == PCIE_SPEED_5_0GT)
4493 				adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
4494 							  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2);
4495 			else
4496 				adev->pm.pcie_gen_mask |= CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1;
4497 		}
4498 		/* platform caps */
4499 		if (platform_speed_cap == PCI_SPEED_UNKNOWN) {
4500 			adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
4501 						   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2);
4502 		} else {
4503 			if (platform_speed_cap == PCIE_SPEED_16_0GT)
4504 				adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
4505 							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
4506 							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3 |
4507 							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN4);
4508 			else if (platform_speed_cap == PCIE_SPEED_8_0GT)
4509 				adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
4510 							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
4511 							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3);
4512 			else if (platform_speed_cap == PCIE_SPEED_5_0GT)
4513 				adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
4514 							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2);
4515 			else
4516 				adev->pm.pcie_gen_mask |= CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1;
4517 
4518 		}
4519 	}
4520 	if (adev->pm.pcie_mlw_mask == 0) {
4521 		if (platform_link_width == PCIE_LNK_WIDTH_UNKNOWN) {
4522 			adev->pm.pcie_mlw_mask |= AMDGPU_DEFAULT_PCIE_MLW_MASK;
4523 		} else {
4524 			switch (platform_link_width) {
4525 			case PCIE_LNK_X32:
4526 				adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X32 |
4527 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 |
4528 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
4529 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
4530 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
4531 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
4532 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
4533 				break;
4534 			case PCIE_LNK_X16:
4535 				adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 |
4536 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
4537 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
4538 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
4539 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
4540 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
4541 				break;
4542 			case PCIE_LNK_X12:
4543 				adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
4544 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
4545 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
4546 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
4547 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
4548 				break;
4549 			case PCIE_LNK_X8:
4550 				adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
4551 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
4552 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
4553 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
4554 				break;
4555 			case PCIE_LNK_X4:
4556 				adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
4557 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
4558 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
4559 				break;
4560 			case PCIE_LNK_X2:
4561 				adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
4562 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
4563 				break;
4564 			case PCIE_LNK_X1:
4565 				adev->pm.pcie_mlw_mask = CAIL_PCIE_LINK_WIDTH_SUPPORT_X1;
4566 				break;
4567 			default:
4568 				break;
4569 			}
4570 		}
4571 	}
4572 }
4573 
amdgpu_device_baco_enter(struct drm_device * dev)4574 int amdgpu_device_baco_enter(struct drm_device *dev)
4575 {
4576 	struct amdgpu_device *adev = dev->dev_private;
4577 	struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
4578 
4579 	if (!amdgpu_device_supports_baco(adev->ddev))
4580 		return -ENOTSUPP;
4581 
4582 	if (ras && ras->supported)
4583 		adev->nbio.funcs->enable_doorbell_interrupt(adev, false);
4584 
4585 	return amdgpu_dpm_baco_enter(adev);
4586 }
4587 
amdgpu_device_baco_exit(struct drm_device * dev)4588 int amdgpu_device_baco_exit(struct drm_device *dev)
4589 {
4590 	struct amdgpu_device *adev = dev->dev_private;
4591 	struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
4592 	int ret = 0;
4593 
4594 	if (!amdgpu_device_supports_baco(adev->ddev))
4595 		return -ENOTSUPP;
4596 
4597 	ret = amdgpu_dpm_baco_exit(adev);
4598 	if (ret)
4599 		return ret;
4600 
4601 	if (ras && ras->supported)
4602 		adev->nbio.funcs->enable_doorbell_interrupt(adev, true);
4603 
4604 	return 0;
4605 }
4606