xref: /openbsd/sys/dev/pci/drm/amd/amdgpu/amdgpu_device.c (revision 4a4ef11e)
1fb4d8502Sjsg /*
2fb4d8502Sjsg  * Copyright 2008 Advanced Micro Devices, Inc.
3fb4d8502Sjsg  * Copyright 2008 Red Hat Inc.
4fb4d8502Sjsg  * Copyright 2009 Jerome Glisse.
5fb4d8502Sjsg  *
6fb4d8502Sjsg  * Permission is hereby granted, free of charge, to any person obtaining a
7fb4d8502Sjsg  * copy of this software and associated documentation files (the "Software"),
8fb4d8502Sjsg  * to deal in the Software without restriction, including without limitation
9fb4d8502Sjsg  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10fb4d8502Sjsg  * and/or sell copies of the Software, and to permit persons to whom the
11fb4d8502Sjsg  * Software is furnished to do so, subject to the following conditions:
12fb4d8502Sjsg  *
13fb4d8502Sjsg  * The above copyright notice and this permission notice shall be included in
14fb4d8502Sjsg  * all copies or substantial portions of the Software.
15fb4d8502Sjsg  *
16fb4d8502Sjsg  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17fb4d8502Sjsg  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18fb4d8502Sjsg  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
19fb4d8502Sjsg  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
20fb4d8502Sjsg  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21fb4d8502Sjsg  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22fb4d8502Sjsg  * OTHER DEALINGS IN THE SOFTWARE.
23fb4d8502Sjsg  *
24fb4d8502Sjsg  * Authors: Dave Airlie
25fb4d8502Sjsg  *          Alex Deucher
26fb4d8502Sjsg  *          Jerome Glisse
27fb4d8502Sjsg  */
28fb4d8502Sjsg #include <linux/power_supply.h>
29fb4d8502Sjsg #include <linux/kthread.h>
30c349dbc7Sjsg #include <linux/module.h>
31fb4d8502Sjsg #include <linux/console.h>
32fb4d8502Sjsg #include <linux/slab.h>
331bb76ff1Sjsg #include <linux/iommu.h>
34f9c49ec7Sjsg #include <linux/pci.h>
351bb76ff1Sjsg #include <linux/devcoredump.h>
361bb76ff1Sjsg #include <generated/utsrelease.h>
371bb76ff1Sjsg #include <linux/pci-p2pdma.h>
3878c2b773Sjsg #include <linux/apple-gmux.h>
39c349dbc7Sjsg 
40269b8745Sjsg #include <drm/drm_aperture.h>
41fb4d8502Sjsg #include <drm/drm_atomic_helper.h>
42f005ef32Sjsg #include <drm/drm_crtc_helper.h>
43f005ef32Sjsg #include <drm/drm_fb_helper.h>
44c349dbc7Sjsg #include <drm/drm_probe_helper.h>
45fb4d8502Sjsg #include <drm/amdgpu_drm.h>
46997286d4Sjsg #include <linux/device.h>
47fb4d8502Sjsg #include <linux/vgaarb.h>
48fb4d8502Sjsg #include <linux/vga_switcheroo.h>
49fb4d8502Sjsg #include <linux/efi.h>
50fb4d8502Sjsg #include "amdgpu.h"
51fb4d8502Sjsg #include "amdgpu_trace.h"
52fb4d8502Sjsg #include "amdgpu_i2c.h"
53fb4d8502Sjsg #include "atom.h"
54fb4d8502Sjsg #include "amdgpu_atombios.h"
55fb4d8502Sjsg #include "amdgpu_atomfirmware.h"
56fb4d8502Sjsg #include "amd_pcie.h"
57fb4d8502Sjsg #ifdef CONFIG_DRM_AMDGPU_SI
58fb4d8502Sjsg #include "si.h"
59fb4d8502Sjsg #endif
60fb4d8502Sjsg #ifdef CONFIG_DRM_AMDGPU_CIK
61fb4d8502Sjsg #include "cik.h"
62fb4d8502Sjsg #endif
63fb4d8502Sjsg #include "vi.h"
64fb4d8502Sjsg #include "soc15.h"
65c349dbc7Sjsg #include "nv.h"
66fb4d8502Sjsg #include "bif/bif_4_1_d.h"
67fb4d8502Sjsg #include <linux/firmware.h>
68fb4d8502Sjsg #include "amdgpu_vf_error.h"
69fb4d8502Sjsg 
70fb4d8502Sjsg #include "amdgpu_amdkfd.h"
71fb4d8502Sjsg #include "amdgpu_pm.h"
72fb4d8502Sjsg 
73c349dbc7Sjsg #include "amdgpu_xgmi.h"
74c349dbc7Sjsg #include "amdgpu_ras.h"
75c349dbc7Sjsg #include "amdgpu_pmu.h"
76ad8b1aafSjsg #include "amdgpu_fru_eeprom.h"
775ca02815Sjsg #include "amdgpu_reset.h"
78c349dbc7Sjsg 
79c349dbc7Sjsg #include <linux/suspend.h>
80c349dbc7Sjsg #include <drm/task_barrier.h>
81ad8b1aafSjsg #include <linux/pm_runtime.h>
82c349dbc7Sjsg 
835ca02815Sjsg #include <drm/drm_drv.h>
845ca02815Sjsg 
85e73b7337Sjsg #if IS_ENABLED(CONFIG_X86) && defined(__linux__)
86e73b7337Sjsg #include <asm/intel-family.h>
87e73b7337Sjsg #endif
88e73b7337Sjsg 
89fb4d8502Sjsg MODULE_FIRMWARE("amdgpu/vega10_gpu_info.bin");
90fb4d8502Sjsg MODULE_FIRMWARE("amdgpu/vega12_gpu_info.bin");
91fb4d8502Sjsg MODULE_FIRMWARE("amdgpu/raven_gpu_info.bin");
924fe6e3f4Sjsg MODULE_FIRMWARE("amdgpu/picasso_gpu_info.bin");
93c349dbc7Sjsg MODULE_FIRMWARE("amdgpu/raven2_gpu_info.bin");
94c349dbc7Sjsg MODULE_FIRMWARE("amdgpu/arcturus_gpu_info.bin");
95c349dbc7Sjsg MODULE_FIRMWARE("amdgpu/navi12_gpu_info.bin");
96fb4d8502Sjsg 
97fb4d8502Sjsg #define AMDGPU_RESUME_MS		2000
981bb76ff1Sjsg #define AMDGPU_MAX_RETRY_LIMIT		2
991bb76ff1Sjsg #define AMDGPU_RETRY_SRIOV_RESET(r) ((r) == -EBUSY || (r) == -ETIMEDOUT || (r) == -EINVAL)
100fb4d8502Sjsg 
101269b8745Sjsg static const struct drm_driver amdgpu_kms_driver;
102269b8745Sjsg 
103c349dbc7Sjsg const char *amdgpu_asic_name[] = {
104fb4d8502Sjsg 	"TAHITI",
105fb4d8502Sjsg 	"PITCAIRN",
106fb4d8502Sjsg 	"VERDE",
107fb4d8502Sjsg 	"OLAND",
108fb4d8502Sjsg 	"HAINAN",
109fb4d8502Sjsg 	"BONAIRE",
110fb4d8502Sjsg 	"KAVERI",
111fb4d8502Sjsg 	"KABINI",
112fb4d8502Sjsg 	"HAWAII",
113fb4d8502Sjsg 	"MULLINS",
114fb4d8502Sjsg 	"TOPAZ",
115fb4d8502Sjsg 	"TONGA",
116fb4d8502Sjsg 	"FIJI",
117fb4d8502Sjsg 	"CARRIZO",
118fb4d8502Sjsg 	"STONEY",
119fb4d8502Sjsg 	"POLARIS10",
120fb4d8502Sjsg 	"POLARIS11",
121fb4d8502Sjsg 	"POLARIS12",
122fb4d8502Sjsg 	"VEGAM",
123fb4d8502Sjsg 	"VEGA10",
124fb4d8502Sjsg 	"VEGA12",
125fb4d8502Sjsg 	"VEGA20",
126fb4d8502Sjsg 	"RAVEN",
127c349dbc7Sjsg 	"ARCTURUS",
128c349dbc7Sjsg 	"RENOIR",
1295ca02815Sjsg 	"ALDEBARAN",
130c349dbc7Sjsg 	"NAVI10",
1315ca02815Sjsg 	"CYAN_SKILLFISH",
132c349dbc7Sjsg 	"NAVI14",
133c349dbc7Sjsg 	"NAVI12",
134ad8b1aafSjsg 	"SIENNA_CICHLID",
135ad8b1aafSjsg 	"NAVY_FLOUNDER",
1365ca02815Sjsg 	"VANGOGH",
1375ca02815Sjsg 	"DIMGREY_CAVEFISH",
1385ca02815Sjsg 	"BEIGE_GOBY",
1395ca02815Sjsg 	"YELLOW_CARP",
1401bb76ff1Sjsg 	"IP DISCOVERY",
141fb4d8502Sjsg 	"LAST",
142fb4d8502Sjsg };
143fb4d8502Sjsg 
144c349dbc7Sjsg /**
145c349dbc7Sjsg  * DOC: pcie_replay_count
146c349dbc7Sjsg  *
147c349dbc7Sjsg  * The amdgpu driver provides a sysfs API for reporting the total number
148c349dbc7Sjsg  * of PCIe replays (NAKs)
149c349dbc7Sjsg  * The file pcie_replay_count is used for this and returns the total
150c349dbc7Sjsg  * number of replays as a sum of the NAKs generated and NAKs received
151c349dbc7Sjsg  */
152c349dbc7Sjsg 
amdgpu_device_get_pcie_replay_count(struct device * dev,struct device_attribute * attr,char * buf)153c349dbc7Sjsg static ssize_t amdgpu_device_get_pcie_replay_count(struct device *dev,
154c349dbc7Sjsg 		struct device_attribute *attr, char *buf)
155c349dbc7Sjsg {
156c349dbc7Sjsg 	struct drm_device *ddev = dev_get_drvdata(dev);
157ad8b1aafSjsg 	struct amdgpu_device *adev = drm_to_adev(ddev);
158c349dbc7Sjsg 	uint64_t cnt = amdgpu_asic_get_pcie_replay_count(adev);
159c349dbc7Sjsg 
1605ca02815Sjsg 	return sysfs_emit(buf, "%llu\n", cnt);
161c349dbc7Sjsg }
162c349dbc7Sjsg 
163f005ef32Sjsg static DEVICE_ATTR(pcie_replay_count, 0444,
164c349dbc7Sjsg 		amdgpu_device_get_pcie_replay_count, NULL);
165c349dbc7Sjsg 
166fb4d8502Sjsg static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev);
167fb4d8502Sjsg 
168ad8b1aafSjsg 
169ad8b1aafSjsg /**
1705ca02815Sjsg  * amdgpu_device_supports_px - Is the device a dGPU with ATPX power control
171fb4d8502Sjsg  *
172fb4d8502Sjsg  * @dev: drm_device pointer
173fb4d8502Sjsg  *
1745ca02815Sjsg  * Returns true if the device is a dGPU with ATPX power control,
1755ca02815Sjsg  * otherwise return false.
1765ca02815Sjsg  */
amdgpu_device_supports_px(struct drm_device * dev)1775ca02815Sjsg bool amdgpu_device_supports_px(struct drm_device *dev)
1785ca02815Sjsg {
1795ca02815Sjsg 	struct amdgpu_device *adev = drm_to_adev(dev);
1805ca02815Sjsg 
1815ca02815Sjsg 	if ((adev->flags & AMD_IS_PX) && !amdgpu_is_atpx_hybrid())
1825ca02815Sjsg 		return true;
1835ca02815Sjsg 	return false;
1845ca02815Sjsg }
1855ca02815Sjsg 
1865ca02815Sjsg /**
1875ca02815Sjsg  * amdgpu_device_supports_boco - Is the device a dGPU with ACPI power resources
1885ca02815Sjsg  *
1895ca02815Sjsg  * @dev: drm_device pointer
1905ca02815Sjsg  *
1915ca02815Sjsg  * Returns true if the device is a dGPU with ACPI power control,
192fb4d8502Sjsg  * otherwise return false.
193fb4d8502Sjsg  */
amdgpu_device_supports_boco(struct drm_device * dev)194c349dbc7Sjsg bool amdgpu_device_supports_boco(struct drm_device *dev)
195fb4d8502Sjsg {
196ad8b1aafSjsg 	struct amdgpu_device *adev = drm_to_adev(dev);
197fb4d8502Sjsg 
1985ca02815Sjsg 	if (adev->has_pr3 ||
1995ca02815Sjsg 	    ((adev->flags & AMD_IS_PX) && amdgpu_is_atpx_hybrid()))
200fb4d8502Sjsg 		return true;
201fb4d8502Sjsg 	return false;
202fb4d8502Sjsg }
203fb4d8502Sjsg 
204c349dbc7Sjsg /**
205c349dbc7Sjsg  * amdgpu_device_supports_baco - Does the device support BACO
206c349dbc7Sjsg  *
207c349dbc7Sjsg  * @dev: drm_device pointer
208c349dbc7Sjsg  *
209c349dbc7Sjsg  * Returns true if the device supporte BACO,
210c349dbc7Sjsg  * otherwise return false.
211c349dbc7Sjsg  */
amdgpu_device_supports_baco(struct drm_device * dev)212c349dbc7Sjsg bool amdgpu_device_supports_baco(struct drm_device *dev)
213c349dbc7Sjsg {
214ad8b1aafSjsg 	struct amdgpu_device *adev = drm_to_adev(dev);
215c349dbc7Sjsg 
216c349dbc7Sjsg 	return amdgpu_asic_supports_baco(adev);
217c349dbc7Sjsg }
218c349dbc7Sjsg 
2195ca02815Sjsg /**
2205ca02815Sjsg  * amdgpu_device_supports_smart_shift - Is the device dGPU with
2215ca02815Sjsg  * smart shift support
2225ca02815Sjsg  *
2235ca02815Sjsg  * @dev: drm_device pointer
2245ca02815Sjsg  *
2255ca02815Sjsg  * Returns true if the device is a dGPU with Smart Shift support,
2265ca02815Sjsg  * otherwise returns false.
2275ca02815Sjsg  */
amdgpu_device_supports_smart_shift(struct drm_device * dev)2285ca02815Sjsg bool amdgpu_device_supports_smart_shift(struct drm_device *dev)
2295ca02815Sjsg {
2305ca02815Sjsg 	return (amdgpu_device_supports_boco(dev) &&
2315ca02815Sjsg 		amdgpu_acpi_is_power_shift_control_supported());
2325ca02815Sjsg }
2335ca02815Sjsg 
234ad8b1aafSjsg /*
235ad8b1aafSjsg  * VRAM access helper functions
236ad8b1aafSjsg  */
237ad8b1aafSjsg 
238c349dbc7Sjsg /**
2395ca02815Sjsg  * amdgpu_device_mm_access - access vram by MM_INDEX/MM_DATA
2405ca02815Sjsg  *
2415ca02815Sjsg  * @adev: amdgpu_device pointer
2425ca02815Sjsg  * @pos: offset of the buffer in vram
2435ca02815Sjsg  * @buf: virtual address of the buffer in system memory
2445ca02815Sjsg  * @size: read/write size, sizeof(@buf) must > @size
2455ca02815Sjsg  * @write: true - write to vram, otherwise - read from vram
2465ca02815Sjsg  */
amdgpu_device_mm_access(struct amdgpu_device * adev,loff_t pos,void * buf,size_t size,bool write)2475ca02815Sjsg void amdgpu_device_mm_access(struct amdgpu_device *adev, loff_t pos,
2485ca02815Sjsg 			     void *buf, size_t size, bool write)
2495ca02815Sjsg {
2505ca02815Sjsg 	unsigned long flags;
2515ca02815Sjsg 	uint32_t hi = ~0, tmp = 0;
2525ca02815Sjsg 	uint32_t *data = buf;
2535ca02815Sjsg 	uint64_t last;
2545ca02815Sjsg 	int idx;
2555ca02815Sjsg 
2561bb76ff1Sjsg 	if (!drm_dev_enter(adev_to_drm(adev), &idx))
2575ca02815Sjsg 		return;
2585ca02815Sjsg 
2595ca02815Sjsg 	BUG_ON(!IS_ALIGNED(pos, 4) || !IS_ALIGNED(size, 4));
2605ca02815Sjsg 
2615ca02815Sjsg 	spin_lock_irqsave(&adev->mmio_idx_lock, flags);
2625ca02815Sjsg 	for (last = pos + size; pos < last; pos += 4) {
2635ca02815Sjsg 		tmp = pos >> 31;
2645ca02815Sjsg 
2655ca02815Sjsg 		WREG32_NO_KIQ(mmMM_INDEX, ((uint32_t)pos) | 0x80000000);
2665ca02815Sjsg 		if (tmp != hi) {
2675ca02815Sjsg 			WREG32_NO_KIQ(mmMM_INDEX_HI, tmp);
2685ca02815Sjsg 			hi = tmp;
2695ca02815Sjsg 		}
2705ca02815Sjsg 		if (write)
2715ca02815Sjsg 			WREG32_NO_KIQ(mmMM_DATA, *data++);
2725ca02815Sjsg 		else
2735ca02815Sjsg 			*data++ = RREG32_NO_KIQ(mmMM_DATA);
2745ca02815Sjsg 	}
2755ca02815Sjsg 
2765ca02815Sjsg 	spin_unlock_irqrestore(&adev->mmio_idx_lock, flags);
2775ca02815Sjsg 	drm_dev_exit(idx);
2785ca02815Sjsg }
2795ca02815Sjsg 
2805ca02815Sjsg /**
2811bb76ff1Sjsg  * amdgpu_device_aper_access - access vram by vram aperature
2825ca02815Sjsg  *
2835ca02815Sjsg  * @adev: amdgpu_device pointer
2845ca02815Sjsg  * @pos: offset of the buffer in vram
2855ca02815Sjsg  * @buf: virtual address of the buffer in system memory
2865ca02815Sjsg  * @size: read/write size, sizeof(@buf) must > @size
2875ca02815Sjsg  * @write: true - write to vram, otherwise - read from vram
2885ca02815Sjsg  *
2895ca02815Sjsg  * The return value means how many bytes have been transferred.
2905ca02815Sjsg  */
amdgpu_device_aper_access(struct amdgpu_device * adev,loff_t pos,void * buf,size_t size,bool write)2915ca02815Sjsg size_t amdgpu_device_aper_access(struct amdgpu_device *adev, loff_t pos,
2925ca02815Sjsg 				 void *buf, size_t size, bool write)
2935ca02815Sjsg {
2945ca02815Sjsg #ifdef CONFIG_64BIT
2955ca02815Sjsg 	void __iomem *addr;
2965ca02815Sjsg 	size_t count = 0;
2975ca02815Sjsg 	uint64_t last;
2985ca02815Sjsg 
2995ca02815Sjsg 	if (!adev->mman.aper_base_kaddr)
3005ca02815Sjsg 		return 0;
3015ca02815Sjsg 
3025ca02815Sjsg 	last = min(pos + size, adev->gmc.visible_vram_size);
3035ca02815Sjsg 	if (last > pos) {
3045ca02815Sjsg 		addr = adev->mman.aper_base_kaddr + pos;
3055ca02815Sjsg 		count = last - pos;
3065ca02815Sjsg 
3075ca02815Sjsg 		if (write) {
3085ca02815Sjsg 			memcpy_toio(addr, buf, count);
309f005ef32Sjsg 			/* Make sure HDP write cache flush happens without any reordering
310f005ef32Sjsg 			 * after the system memory contents are sent over PCIe device
311f005ef32Sjsg 			 */
3125ca02815Sjsg 			mb();
3135ca02815Sjsg 			amdgpu_device_flush_hdp(adev, NULL);
3145ca02815Sjsg 		} else {
3155ca02815Sjsg 			amdgpu_device_invalidate_hdp(adev, NULL);
316f005ef32Sjsg 			/* Make sure HDP read cache is invalidated before issuing a read
317f005ef32Sjsg 			 * to the PCIe device
318f005ef32Sjsg 			 */
3195ca02815Sjsg 			mb();
3205ca02815Sjsg 			memcpy_fromio(buf, addr, count);
3215ca02815Sjsg 		}
3225ca02815Sjsg 
3235ca02815Sjsg 	}
3245ca02815Sjsg 
3255ca02815Sjsg 	return count;
3265ca02815Sjsg #else
3275ca02815Sjsg 	return 0;
3285ca02815Sjsg #endif
3295ca02815Sjsg }
3305ca02815Sjsg 
3315ca02815Sjsg /**
332c349dbc7Sjsg  * amdgpu_device_vram_access - read/write a buffer in vram
333c349dbc7Sjsg  *
334c349dbc7Sjsg  * @adev: amdgpu_device pointer
335c349dbc7Sjsg  * @pos: offset of the buffer in vram
336c349dbc7Sjsg  * @buf: virtual address of the buffer in system memory
337c349dbc7Sjsg  * @size: read/write size, sizeof(@buf) must > @size
338c349dbc7Sjsg  * @write: true - write to vram, otherwise - read from vram
339c349dbc7Sjsg  */
amdgpu_device_vram_access(struct amdgpu_device * adev,loff_t pos,void * buf,size_t size,bool write)340c349dbc7Sjsg void amdgpu_device_vram_access(struct amdgpu_device *adev, loff_t pos,
3415ca02815Sjsg 			       void *buf, size_t size, bool write)
342c349dbc7Sjsg {
3435ca02815Sjsg 	size_t count;
344c349dbc7Sjsg 
3455ca02815Sjsg 	/* try to using vram apreature to access vram first */
3465ca02815Sjsg 	count = amdgpu_device_aper_access(adev, pos, buf, size, write);
347c349dbc7Sjsg 	size -= count;
3485ca02815Sjsg 	if (size) {
3495ca02815Sjsg 		/* using MM to access rest vram */
3505ca02815Sjsg 		pos += count;
3515ca02815Sjsg 		buf += count;
3525ca02815Sjsg 		amdgpu_device_mm_access(adev, pos, buf, size, write);
353c349dbc7Sjsg 	}
354c349dbc7Sjsg }
355c349dbc7Sjsg 
356fb4d8502Sjsg /*
357ad8b1aafSjsg  * register access helper functions.
358fb4d8502Sjsg  */
3595ca02815Sjsg 
3605ca02815Sjsg /* Check if hw access should be skipped because of hotplug or device error */
amdgpu_device_skip_hw_access(struct amdgpu_device * adev)3615ca02815Sjsg bool amdgpu_device_skip_hw_access(struct amdgpu_device *adev)
3625ca02815Sjsg {
3635ca02815Sjsg 	if (adev->no_hw_access)
3645ca02815Sjsg 		return true;
3655ca02815Sjsg 
3665ca02815Sjsg #ifdef CONFIG_LOCKDEP
3675ca02815Sjsg 	/*
3685ca02815Sjsg 	 * This is a bit complicated to understand, so worth a comment. What we assert
3695ca02815Sjsg 	 * here is that the GPU reset is not running on another thread in parallel.
3705ca02815Sjsg 	 *
3715ca02815Sjsg 	 * For this we trylock the read side of the reset semaphore, if that succeeds
3725ca02815Sjsg 	 * we know that the reset is not running in paralell.
3735ca02815Sjsg 	 *
3745ca02815Sjsg 	 * If the trylock fails we assert that we are either already holding the read
3755ca02815Sjsg 	 * side of the lock or are the reset thread itself and hold the write side of
3765ca02815Sjsg 	 * the lock.
3775ca02815Sjsg 	 */
3785ca02815Sjsg 	if (in_task()) {
3791bb76ff1Sjsg 		if (down_read_trylock(&adev->reset_domain->sem))
3801bb76ff1Sjsg 			up_read(&adev->reset_domain->sem);
3815ca02815Sjsg 		else
3821bb76ff1Sjsg 			lockdep_assert_held(&adev->reset_domain->sem);
3835ca02815Sjsg 	}
3845ca02815Sjsg #endif
3855ca02815Sjsg 	return false;
3865ca02815Sjsg }
3875ca02815Sjsg 
388fb4d8502Sjsg /**
389ad8b1aafSjsg  * amdgpu_device_rreg - read a memory mapped IO or indirect register
390fb4d8502Sjsg  *
391fb4d8502Sjsg  * @adev: amdgpu_device pointer
392fb4d8502Sjsg  * @reg: dword aligned register offset
393fb4d8502Sjsg  * @acc_flags: access flags which require special behavior
394fb4d8502Sjsg  *
395fb4d8502Sjsg  * Returns the 32 bit value from the offset specified.
396fb4d8502Sjsg  */
amdgpu_device_rreg(struct amdgpu_device * adev,uint32_t reg,uint32_t acc_flags)397ad8b1aafSjsg uint32_t amdgpu_device_rreg(struct amdgpu_device *adev,
398ad8b1aafSjsg 			    uint32_t reg, uint32_t acc_flags)
399fb4d8502Sjsg {
400fb4d8502Sjsg 	uint32_t ret;
401fb4d8502Sjsg 
4025ca02815Sjsg 	if (amdgpu_device_skip_hw_access(adev))
403ad8b1aafSjsg 		return 0;
404fb4d8502Sjsg 
405ad8b1aafSjsg 	if ((reg * 4) < adev->rmmio_size) {
406ad8b1aafSjsg 		if (!(acc_flags & AMDGPU_REGS_NO_KIQ) &&
407ad8b1aafSjsg 		    amdgpu_sriov_runtime(adev) &&
4081bb76ff1Sjsg 		    down_read_trylock(&adev->reset_domain->sem)) {
409ad8b1aafSjsg 			ret = amdgpu_kiq_rreg(adev, reg);
4101bb76ff1Sjsg 			up_read(&adev->reset_domain->sem);
411ad8b1aafSjsg 		} else {
412e54dbfe7Sjsg 			ret = readl(((void __iomem *)adev->rmmio) + (reg * 4));
413fb4d8502Sjsg 		}
414ad8b1aafSjsg 	} else {
415ad8b1aafSjsg 		ret = adev->pcie_rreg(adev, reg * 4);
416ad8b1aafSjsg 	}
417ad8b1aafSjsg 
418ad8b1aafSjsg 	trace_amdgpu_device_rreg(adev->pdev->device, reg, ret);
419ad8b1aafSjsg 
420fb4d8502Sjsg 	return ret;
421fb4d8502Sjsg }
422fb4d8502Sjsg 
423fb4d8502Sjsg /*
424fb4d8502Sjsg  * MMIO register read with bytes helper functions
425fb4d8502Sjsg  * @offset:bytes offset from MMIO start
426fb4d8502Sjsg  */
427fb4d8502Sjsg 
428fb4d8502Sjsg /**
429fb4d8502Sjsg  * amdgpu_mm_rreg8 - read a memory mapped IO register
430fb4d8502Sjsg  *
431fb4d8502Sjsg  * @adev: amdgpu_device pointer
432fb4d8502Sjsg  * @offset: byte aligned register offset
433fb4d8502Sjsg  *
434fb4d8502Sjsg  * Returns the 8 bit value from the offset specified.
435fb4d8502Sjsg  */
amdgpu_mm_rreg8(struct amdgpu_device * adev,uint32_t offset)436ad8b1aafSjsg uint8_t amdgpu_mm_rreg8(struct amdgpu_device *adev, uint32_t offset)
437ad8b1aafSjsg {
4385ca02815Sjsg 	if (amdgpu_device_skip_hw_access(adev))
439ad8b1aafSjsg 		return 0;
440ad8b1aafSjsg 
441fb4d8502Sjsg 	if (offset < adev->rmmio_size)
442e54dbfe7Sjsg 		return (readb(adev->rmmio + offset));
443fb4d8502Sjsg 	BUG();
444fb4d8502Sjsg }
445fb4d8502Sjsg 
446fb4d8502Sjsg /*
447fb4d8502Sjsg  * MMIO register write with bytes helper functions
448fb4d8502Sjsg  * @offset:bytes offset from MMIO start
449fb4d8502Sjsg  * @value: the value want to be written to the register
450fb4d8502Sjsg  */
451f005ef32Sjsg 
452fb4d8502Sjsg /**
453fb4d8502Sjsg  * amdgpu_mm_wreg8 - read a memory mapped IO register
454fb4d8502Sjsg  *
455fb4d8502Sjsg  * @adev: amdgpu_device pointer
456fb4d8502Sjsg  * @offset: byte aligned register offset
457fb4d8502Sjsg  * @value: 8 bit value to write
458fb4d8502Sjsg  *
459fb4d8502Sjsg  * Writes the value specified to the offset specified.
460fb4d8502Sjsg  */
amdgpu_mm_wreg8(struct amdgpu_device * adev,uint32_t offset,uint8_t value)461ad8b1aafSjsg void amdgpu_mm_wreg8(struct amdgpu_device *adev, uint32_t offset, uint8_t value)
462ad8b1aafSjsg {
4635ca02815Sjsg 	if (amdgpu_device_skip_hw_access(adev))
464ad8b1aafSjsg 		return;
465ad8b1aafSjsg 
466fb4d8502Sjsg 	if (offset < adev->rmmio_size)
467e54dbfe7Sjsg 		writeb(value, adev->rmmio + offset);
468fb4d8502Sjsg 	else
469fb4d8502Sjsg 		BUG();
470fb4d8502Sjsg }
471fb4d8502Sjsg 
472fb4d8502Sjsg /**
473ad8b1aafSjsg  * amdgpu_device_wreg - write to a memory mapped IO or indirect register
474c349dbc7Sjsg  *
475c349dbc7Sjsg  * @adev: amdgpu_device pointer
476c349dbc7Sjsg  * @reg: dword aligned register offset
477c349dbc7Sjsg  * @v: 32 bit value to write to the register
478c349dbc7Sjsg  * @acc_flags: access flags which require special behavior
479c349dbc7Sjsg  *
480c349dbc7Sjsg  * Writes the value specified to the offset specified.
481c349dbc7Sjsg  */
amdgpu_device_wreg(struct amdgpu_device * adev,uint32_t reg,uint32_t v,uint32_t acc_flags)482ad8b1aafSjsg void amdgpu_device_wreg(struct amdgpu_device *adev,
483ad8b1aafSjsg 			uint32_t reg, uint32_t v,
484c349dbc7Sjsg 			uint32_t acc_flags)
485c349dbc7Sjsg {
4865ca02815Sjsg 	if (amdgpu_device_skip_hw_access(adev))
487ad8b1aafSjsg 		return;
488ad8b1aafSjsg 
489ad8b1aafSjsg 	if ((reg * 4) < adev->rmmio_size) {
490ad8b1aafSjsg 		if (!(acc_flags & AMDGPU_REGS_NO_KIQ) &&
491ad8b1aafSjsg 		    amdgpu_sriov_runtime(adev) &&
4921bb76ff1Sjsg 		    down_read_trylock(&adev->reset_domain->sem)) {
493ad8b1aafSjsg 			amdgpu_kiq_wreg(adev, reg, v);
4941bb76ff1Sjsg 			up_read(&adev->reset_domain->sem);
495ad8b1aafSjsg 		} else {
496ad8b1aafSjsg 			writel(v, ((void __iomem *)adev->rmmio) + (reg * 4));
497ad8b1aafSjsg 		}
498ad8b1aafSjsg 	} else {
499ad8b1aafSjsg 		adev->pcie_wreg(adev, reg * 4, v);
500c349dbc7Sjsg 	}
501c349dbc7Sjsg 
502ad8b1aafSjsg 	trace_amdgpu_device_wreg(adev->pdev->device, reg, v);
503c349dbc7Sjsg }
504c349dbc7Sjsg 
5051bb76ff1Sjsg /**
5061bb76ff1Sjsg  * amdgpu_mm_wreg_mmio_rlc -  write register either with direct/indirect mmio or with RLC path if in range
507c349dbc7Sjsg  *
5081bb76ff1Sjsg  * @adev: amdgpu_device pointer
5091bb76ff1Sjsg  * @reg: mmio/rlc register
5101bb76ff1Sjsg  * @v: value to write
5111bb76ff1Sjsg  *
5121bb76ff1Sjsg  * this function is invoked only for the debugfs register access
5131bb76ff1Sjsg  */
amdgpu_mm_wreg_mmio_rlc(struct amdgpu_device * adev,uint32_t reg,uint32_t v,uint32_t xcc_id)514ad8b1aafSjsg void amdgpu_mm_wreg_mmio_rlc(struct amdgpu_device *adev,
515f005ef32Sjsg 			     uint32_t reg, uint32_t v,
516f005ef32Sjsg 			     uint32_t xcc_id)
517c349dbc7Sjsg {
5185ca02815Sjsg 	if (amdgpu_device_skip_hw_access(adev))
519ad8b1aafSjsg 		return;
520ad8b1aafSjsg 
521c349dbc7Sjsg 	if (amdgpu_sriov_fullaccess(adev) &&
522c349dbc7Sjsg 	    adev->gfx.rlc.funcs &&
523c349dbc7Sjsg 	    adev->gfx.rlc.funcs->is_rlcg_access_range) {
524c349dbc7Sjsg 		if (adev->gfx.rlc.funcs->is_rlcg_access_range(adev, reg))
525f005ef32Sjsg 			return amdgpu_sriov_wreg(adev, reg, v, 0, 0, xcc_id);
5261bb76ff1Sjsg 	} else if ((reg * 4) >= adev->rmmio_size) {
5271bb76ff1Sjsg 		adev->pcie_wreg(adev, reg * 4, v);
528ad8b1aafSjsg 	} else {
529ad8b1aafSjsg 		writel(v, ((void __iomem *)adev->rmmio) + (reg * 4));
530c349dbc7Sjsg 	}
531c349dbc7Sjsg }
532c349dbc7Sjsg 
533c349dbc7Sjsg /**
534ad8b1aafSjsg  * amdgpu_device_indirect_rreg - read an indirect register
535ad8b1aafSjsg  *
536ad8b1aafSjsg  * @adev: amdgpu_device pointer
5375ca02815Sjsg  * @reg_addr: indirect register address to read from
538ad8b1aafSjsg  *
539ad8b1aafSjsg  * Returns the value of indirect register @reg_addr
540ad8b1aafSjsg  */
amdgpu_device_indirect_rreg(struct amdgpu_device * adev,u32 reg_addr)541ad8b1aafSjsg u32 amdgpu_device_indirect_rreg(struct amdgpu_device *adev,
542ad8b1aafSjsg 				u32 reg_addr)
543ad8b1aafSjsg {
544f005ef32Sjsg 	unsigned long flags, pcie_index, pcie_data;
545ad8b1aafSjsg 	void __iomem *pcie_index_offset;
546ad8b1aafSjsg 	void __iomem *pcie_data_offset;
547f005ef32Sjsg 	u32 r;
548f005ef32Sjsg 
549f005ef32Sjsg 	pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
550f005ef32Sjsg 	pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
551ad8b1aafSjsg 
552ad8b1aafSjsg 	spin_lock_irqsave(&adev->pcie_idx_lock, flags);
553ad8b1aafSjsg 	pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
554ad8b1aafSjsg 	pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
555ad8b1aafSjsg 
556ad8b1aafSjsg 	writel(reg_addr, pcie_index_offset);
557ad8b1aafSjsg 	readl(pcie_index_offset);
558ad8b1aafSjsg 	r = readl(pcie_data_offset);
559ad8b1aafSjsg 	spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
560ad8b1aafSjsg 
561ad8b1aafSjsg 	return r;
562ad8b1aafSjsg }
563ad8b1aafSjsg 
amdgpu_device_indirect_rreg_ext(struct amdgpu_device * adev,u64 reg_addr)564f005ef32Sjsg u32 amdgpu_device_indirect_rreg_ext(struct amdgpu_device *adev,
565f005ef32Sjsg 				    u64 reg_addr)
566f005ef32Sjsg {
567f005ef32Sjsg 	unsigned long flags, pcie_index, pcie_index_hi, pcie_data;
568f005ef32Sjsg 	u32 r;
569f005ef32Sjsg 	void __iomem *pcie_index_offset;
570f005ef32Sjsg 	void __iomem *pcie_index_hi_offset;
571f005ef32Sjsg 	void __iomem *pcie_data_offset;
572f005ef32Sjsg 
573f005ef32Sjsg 	pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
574f005ef32Sjsg 	pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
575f005ef32Sjsg 	if (adev->nbio.funcs->get_pcie_index_hi_offset)
576f005ef32Sjsg 		pcie_index_hi = adev->nbio.funcs->get_pcie_index_hi_offset(adev);
577f005ef32Sjsg 	else
578f005ef32Sjsg 		pcie_index_hi = 0;
579f005ef32Sjsg 
580f005ef32Sjsg 	spin_lock_irqsave(&adev->pcie_idx_lock, flags);
581f005ef32Sjsg 	pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
582f005ef32Sjsg 	pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
583f005ef32Sjsg 	if (pcie_index_hi != 0)
584f005ef32Sjsg 		pcie_index_hi_offset = (void __iomem *)adev->rmmio +
585f005ef32Sjsg 				pcie_index_hi * 4;
586f005ef32Sjsg 
587f005ef32Sjsg 	writel(reg_addr, pcie_index_offset);
588f005ef32Sjsg 	readl(pcie_index_offset);
589f005ef32Sjsg 	if (pcie_index_hi != 0) {
590f005ef32Sjsg 		writel((reg_addr >> 32) & 0xff, pcie_index_hi_offset);
591f005ef32Sjsg 		readl(pcie_index_hi_offset);
592f005ef32Sjsg 	}
593f005ef32Sjsg 	r = readl(pcie_data_offset);
594f005ef32Sjsg 
595f005ef32Sjsg 	/* clear the high bits */
596f005ef32Sjsg 	if (pcie_index_hi != 0) {
597f005ef32Sjsg 		writel(0, pcie_index_hi_offset);
598f005ef32Sjsg 		readl(pcie_index_hi_offset);
599f005ef32Sjsg 	}
600f005ef32Sjsg 
601f005ef32Sjsg 	spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
602f005ef32Sjsg 
603f005ef32Sjsg 	return r;
604f005ef32Sjsg }
605f005ef32Sjsg 
606ad8b1aafSjsg /**
607ad8b1aafSjsg  * amdgpu_device_indirect_rreg64 - read a 64bits indirect register
608ad8b1aafSjsg  *
609ad8b1aafSjsg  * @adev: amdgpu_device pointer
6105ca02815Sjsg  * @reg_addr: indirect register address to read from
611ad8b1aafSjsg  *
612ad8b1aafSjsg  * Returns the value of indirect register @reg_addr
613ad8b1aafSjsg  */
amdgpu_device_indirect_rreg64(struct amdgpu_device * adev,u32 reg_addr)614ad8b1aafSjsg u64 amdgpu_device_indirect_rreg64(struct amdgpu_device *adev,
615ad8b1aafSjsg 				  u32 reg_addr)
616ad8b1aafSjsg {
617f005ef32Sjsg 	unsigned long flags, pcie_index, pcie_data;
618ad8b1aafSjsg 	void __iomem *pcie_index_offset;
619ad8b1aafSjsg 	void __iomem *pcie_data_offset;
620f005ef32Sjsg 	u64 r;
621f005ef32Sjsg 
622f005ef32Sjsg 	pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
623f005ef32Sjsg 	pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
624ad8b1aafSjsg 
625ad8b1aafSjsg 	spin_lock_irqsave(&adev->pcie_idx_lock, flags);
626ad8b1aafSjsg 	pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
627ad8b1aafSjsg 	pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
628ad8b1aafSjsg 
629ad8b1aafSjsg 	/* read low 32 bits */
630ad8b1aafSjsg 	writel(reg_addr, pcie_index_offset);
631ad8b1aafSjsg 	readl(pcie_index_offset);
632ad8b1aafSjsg 	r = readl(pcie_data_offset);
633ad8b1aafSjsg 	/* read high 32 bits */
634ad8b1aafSjsg 	writel(reg_addr + 4, pcie_index_offset);
635ad8b1aafSjsg 	readl(pcie_index_offset);
636ad8b1aafSjsg 	r |= ((u64)readl(pcie_data_offset) << 32);
637ad8b1aafSjsg 	spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
638ad8b1aafSjsg 
639ad8b1aafSjsg 	return r;
640ad8b1aafSjsg }
641ad8b1aafSjsg 
642ad8b1aafSjsg /**
643ad8b1aafSjsg  * amdgpu_device_indirect_wreg - write an indirect register address
644ad8b1aafSjsg  *
645ad8b1aafSjsg  * @adev: amdgpu_device pointer
646ad8b1aafSjsg  * @reg_addr: indirect register offset
647ad8b1aafSjsg  * @reg_data: indirect register data
648ad8b1aafSjsg  *
649ad8b1aafSjsg  */
amdgpu_device_indirect_wreg(struct amdgpu_device * adev,u32 reg_addr,u32 reg_data)650ad8b1aafSjsg void amdgpu_device_indirect_wreg(struct amdgpu_device *adev,
651ad8b1aafSjsg 				 u32 reg_addr, u32 reg_data)
652ad8b1aafSjsg {
653f005ef32Sjsg 	unsigned long flags, pcie_index, pcie_data;
654ad8b1aafSjsg 	void __iomem *pcie_index_offset;
655ad8b1aafSjsg 	void __iomem *pcie_data_offset;
656ad8b1aafSjsg 
657f005ef32Sjsg 	pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
658f005ef32Sjsg 	pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
659f005ef32Sjsg 
660ad8b1aafSjsg 	spin_lock_irqsave(&adev->pcie_idx_lock, flags);
661ad8b1aafSjsg 	pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
662ad8b1aafSjsg 	pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
663ad8b1aafSjsg 
664ad8b1aafSjsg 	writel(reg_addr, pcie_index_offset);
665ad8b1aafSjsg 	readl(pcie_index_offset);
666ad8b1aafSjsg 	writel(reg_data, pcie_data_offset);
667ad8b1aafSjsg 	readl(pcie_data_offset);
668ad8b1aafSjsg 	spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
669ad8b1aafSjsg }
670ad8b1aafSjsg 
amdgpu_device_indirect_wreg_ext(struct amdgpu_device * adev,u64 reg_addr,u32 reg_data)671f005ef32Sjsg void amdgpu_device_indirect_wreg_ext(struct amdgpu_device *adev,
672f005ef32Sjsg 				     u64 reg_addr, u32 reg_data)
673f005ef32Sjsg {
674f005ef32Sjsg 	unsigned long flags, pcie_index, pcie_index_hi, pcie_data;
675f005ef32Sjsg 	void __iomem *pcie_index_offset;
676f005ef32Sjsg 	void __iomem *pcie_index_hi_offset;
677f005ef32Sjsg 	void __iomem *pcie_data_offset;
678f005ef32Sjsg 
679f005ef32Sjsg 	pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
680f005ef32Sjsg 	pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
681f005ef32Sjsg 	if (adev->nbio.funcs->get_pcie_index_hi_offset)
682f005ef32Sjsg 		pcie_index_hi = adev->nbio.funcs->get_pcie_index_hi_offset(adev);
683f005ef32Sjsg 	else
684f005ef32Sjsg 		pcie_index_hi = 0;
685f005ef32Sjsg 
686f005ef32Sjsg 	spin_lock_irqsave(&adev->pcie_idx_lock, flags);
687f005ef32Sjsg 	pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
688f005ef32Sjsg 	pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
689f005ef32Sjsg 	if (pcie_index_hi != 0)
690f005ef32Sjsg 		pcie_index_hi_offset = (void __iomem *)adev->rmmio +
691f005ef32Sjsg 				pcie_index_hi * 4;
692f005ef32Sjsg 
693f005ef32Sjsg 	writel(reg_addr, pcie_index_offset);
694f005ef32Sjsg 	readl(pcie_index_offset);
695f005ef32Sjsg 	if (pcie_index_hi != 0) {
696f005ef32Sjsg 		writel((reg_addr >> 32) & 0xff, pcie_index_hi_offset);
697f005ef32Sjsg 		readl(pcie_index_hi_offset);
698f005ef32Sjsg 	}
699f005ef32Sjsg 	writel(reg_data, pcie_data_offset);
700f005ef32Sjsg 	readl(pcie_data_offset);
701f005ef32Sjsg 
702f005ef32Sjsg 	/* clear the high bits */
703f005ef32Sjsg 	if (pcie_index_hi != 0) {
704f005ef32Sjsg 		writel(0, pcie_index_hi_offset);
705f005ef32Sjsg 		readl(pcie_index_hi_offset);
706f005ef32Sjsg 	}
707f005ef32Sjsg 
708f005ef32Sjsg 	spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
709f005ef32Sjsg }
710f005ef32Sjsg 
711ad8b1aafSjsg /**
712ad8b1aafSjsg  * amdgpu_device_indirect_wreg64 - write a 64bits indirect register address
713ad8b1aafSjsg  *
714ad8b1aafSjsg  * @adev: amdgpu_device pointer
715ad8b1aafSjsg  * @reg_addr: indirect register offset
716ad8b1aafSjsg  * @reg_data: indirect register data
717ad8b1aafSjsg  *
718ad8b1aafSjsg  */
amdgpu_device_indirect_wreg64(struct amdgpu_device * adev,u32 reg_addr,u64 reg_data)719ad8b1aafSjsg void amdgpu_device_indirect_wreg64(struct amdgpu_device *adev,
720ad8b1aafSjsg 				   u32 reg_addr, u64 reg_data)
721ad8b1aafSjsg {
722f005ef32Sjsg 	unsigned long flags, pcie_index, pcie_data;
723ad8b1aafSjsg 	void __iomem *pcie_index_offset;
724ad8b1aafSjsg 	void __iomem *pcie_data_offset;
725ad8b1aafSjsg 
726f005ef32Sjsg 	pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
727f005ef32Sjsg 	pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
728f005ef32Sjsg 
729ad8b1aafSjsg 	spin_lock_irqsave(&adev->pcie_idx_lock, flags);
730ad8b1aafSjsg 	pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
731ad8b1aafSjsg 	pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
732ad8b1aafSjsg 
733ad8b1aafSjsg 	/* write low 32 bits */
734ad8b1aafSjsg 	writel(reg_addr, pcie_index_offset);
735ad8b1aafSjsg 	readl(pcie_index_offset);
736ad8b1aafSjsg 	writel((u32)(reg_data & 0xffffffffULL), pcie_data_offset);
737ad8b1aafSjsg 	readl(pcie_data_offset);
738ad8b1aafSjsg 	/* write high 32 bits */
739ad8b1aafSjsg 	writel(reg_addr + 4, pcie_index_offset);
740ad8b1aafSjsg 	readl(pcie_index_offset);
741ad8b1aafSjsg 	writel((u32)(reg_data >> 32), pcie_data_offset);
742ad8b1aafSjsg 	readl(pcie_data_offset);
743ad8b1aafSjsg 	spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
744ad8b1aafSjsg }
745ad8b1aafSjsg 
746ad8b1aafSjsg /**
747f005ef32Sjsg  * amdgpu_device_get_rev_id - query device rev_id
748f005ef32Sjsg  *
749f005ef32Sjsg  * @adev: amdgpu_device pointer
750f005ef32Sjsg  *
751f005ef32Sjsg  * Return device rev_id
752f005ef32Sjsg  */
amdgpu_device_get_rev_id(struct amdgpu_device * adev)753f005ef32Sjsg u32 amdgpu_device_get_rev_id(struct amdgpu_device *adev)
754f005ef32Sjsg {
755f005ef32Sjsg 	return adev->nbio.funcs->get_rev_id(adev);
756f005ef32Sjsg }
757f005ef32Sjsg 
758f005ef32Sjsg /**
759fb4d8502Sjsg  * amdgpu_invalid_rreg - dummy reg read function
760fb4d8502Sjsg  *
761ad8b1aafSjsg  * @adev: amdgpu_device pointer
762fb4d8502Sjsg  * @reg: offset of register
763fb4d8502Sjsg  *
764fb4d8502Sjsg  * Dummy register read function.  Used for register blocks
765fb4d8502Sjsg  * that certain asics don't have (all asics).
766fb4d8502Sjsg  * Returns the value in the register.
767fb4d8502Sjsg  */
amdgpu_invalid_rreg(struct amdgpu_device * adev,uint32_t reg)768fb4d8502Sjsg static uint32_t amdgpu_invalid_rreg(struct amdgpu_device *adev, uint32_t reg)
769fb4d8502Sjsg {
770fb4d8502Sjsg 	DRM_ERROR("Invalid callback to read register 0x%04X\n", reg);
771fb4d8502Sjsg 	BUG();
772fb4d8502Sjsg 	return 0;
773fb4d8502Sjsg }
774fb4d8502Sjsg 
amdgpu_invalid_rreg_ext(struct amdgpu_device * adev,uint64_t reg)775f005ef32Sjsg static uint32_t amdgpu_invalid_rreg_ext(struct amdgpu_device *adev, uint64_t reg)
776f005ef32Sjsg {
777f005ef32Sjsg 	DRM_ERROR("Invalid callback to read register 0x%llX\n", reg);
778f005ef32Sjsg 	BUG();
779f005ef32Sjsg 	return 0;
780f005ef32Sjsg }
781f005ef32Sjsg 
782fb4d8502Sjsg /**
783fb4d8502Sjsg  * amdgpu_invalid_wreg - dummy reg write function
784fb4d8502Sjsg  *
785ad8b1aafSjsg  * @adev: amdgpu_device pointer
786fb4d8502Sjsg  * @reg: offset of register
787fb4d8502Sjsg  * @v: value to write to the register
788fb4d8502Sjsg  *
789fb4d8502Sjsg  * Dummy register read function.  Used for register blocks
790fb4d8502Sjsg  * that certain asics don't have (all asics).
791fb4d8502Sjsg  */
amdgpu_invalid_wreg(struct amdgpu_device * adev,uint32_t reg,uint32_t v)792fb4d8502Sjsg static void amdgpu_invalid_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v)
793fb4d8502Sjsg {
794fb4d8502Sjsg 	DRM_ERROR("Invalid callback to write register 0x%04X with 0x%08X\n",
795fb4d8502Sjsg 		  reg, v);
796fb4d8502Sjsg 	BUG();
797fb4d8502Sjsg }
798fb4d8502Sjsg 
amdgpu_invalid_wreg_ext(struct amdgpu_device * adev,uint64_t reg,uint32_t v)799f005ef32Sjsg static void amdgpu_invalid_wreg_ext(struct amdgpu_device *adev, uint64_t reg, uint32_t v)
800f005ef32Sjsg {
801f005ef32Sjsg 	DRM_ERROR("Invalid callback to write register 0x%llX with 0x%08X\n",
802f005ef32Sjsg 		  reg, v);
803f005ef32Sjsg 	BUG();
804f005ef32Sjsg }
805f005ef32Sjsg 
806fb4d8502Sjsg /**
807c349dbc7Sjsg  * amdgpu_invalid_rreg64 - dummy 64 bit reg read function
808c349dbc7Sjsg  *
809ad8b1aafSjsg  * @adev: amdgpu_device pointer
810c349dbc7Sjsg  * @reg: offset of register
811c349dbc7Sjsg  *
812c349dbc7Sjsg  * Dummy register read function.  Used for register blocks
813c349dbc7Sjsg  * that certain asics don't have (all asics).
814c349dbc7Sjsg  * Returns the value in the register.
815c349dbc7Sjsg  */
amdgpu_invalid_rreg64(struct amdgpu_device * adev,uint32_t reg)816c349dbc7Sjsg static uint64_t amdgpu_invalid_rreg64(struct amdgpu_device *adev, uint32_t reg)
817c349dbc7Sjsg {
818c349dbc7Sjsg 	DRM_ERROR("Invalid callback to read 64 bit register 0x%04X\n", reg);
819c349dbc7Sjsg 	BUG();
820c349dbc7Sjsg 	return 0;
821c349dbc7Sjsg }
822c349dbc7Sjsg 
823c349dbc7Sjsg /**
824c349dbc7Sjsg  * amdgpu_invalid_wreg64 - dummy reg write function
825c349dbc7Sjsg  *
826ad8b1aafSjsg  * @adev: amdgpu_device pointer
827c349dbc7Sjsg  * @reg: offset of register
828c349dbc7Sjsg  * @v: value to write to the register
829c349dbc7Sjsg  *
830c349dbc7Sjsg  * Dummy register read function.  Used for register blocks
831c349dbc7Sjsg  * that certain asics don't have (all asics).
832c349dbc7Sjsg  */
amdgpu_invalid_wreg64(struct amdgpu_device * adev,uint32_t reg,uint64_t v)833c349dbc7Sjsg static void amdgpu_invalid_wreg64(struct amdgpu_device *adev, uint32_t reg, uint64_t v)
834c349dbc7Sjsg {
835c349dbc7Sjsg 	DRM_ERROR("Invalid callback to write 64 bit register 0x%04X with 0x%08llX\n",
836c349dbc7Sjsg 		  reg, v);
837c349dbc7Sjsg 	BUG();
838c349dbc7Sjsg }
839c349dbc7Sjsg 
840c349dbc7Sjsg /**
841fb4d8502Sjsg  * amdgpu_block_invalid_rreg - dummy reg read function
842fb4d8502Sjsg  *
843ad8b1aafSjsg  * @adev: amdgpu_device pointer
844fb4d8502Sjsg  * @block: offset of instance
845fb4d8502Sjsg  * @reg: offset of register
846fb4d8502Sjsg  *
847fb4d8502Sjsg  * Dummy register read function.  Used for register blocks
848fb4d8502Sjsg  * that certain asics don't have (all asics).
849fb4d8502Sjsg  * Returns the value in the register.
850fb4d8502Sjsg  */
amdgpu_block_invalid_rreg(struct amdgpu_device * adev,uint32_t block,uint32_t reg)851fb4d8502Sjsg static uint32_t amdgpu_block_invalid_rreg(struct amdgpu_device *adev,
852fb4d8502Sjsg 					  uint32_t block, uint32_t reg)
853fb4d8502Sjsg {
854fb4d8502Sjsg 	DRM_ERROR("Invalid callback to read register 0x%04X in block 0x%04X\n",
855fb4d8502Sjsg 		  reg, block);
856fb4d8502Sjsg 	BUG();
857fb4d8502Sjsg 	return 0;
858fb4d8502Sjsg }
859fb4d8502Sjsg 
860fb4d8502Sjsg /**
861fb4d8502Sjsg  * amdgpu_block_invalid_wreg - dummy reg write function
862fb4d8502Sjsg  *
863ad8b1aafSjsg  * @adev: amdgpu_device pointer
864fb4d8502Sjsg  * @block: offset of instance
865fb4d8502Sjsg  * @reg: offset of register
866fb4d8502Sjsg  * @v: value to write to the register
867fb4d8502Sjsg  *
868fb4d8502Sjsg  * Dummy register read function.  Used for register blocks
869fb4d8502Sjsg  * that certain asics don't have (all asics).
870fb4d8502Sjsg  */
amdgpu_block_invalid_wreg(struct amdgpu_device * adev,uint32_t block,uint32_t reg,uint32_t v)871fb4d8502Sjsg static void amdgpu_block_invalid_wreg(struct amdgpu_device *adev,
872fb4d8502Sjsg 				      uint32_t block,
873fb4d8502Sjsg 				      uint32_t reg, uint32_t v)
874fb4d8502Sjsg {
875fb4d8502Sjsg 	DRM_ERROR("Invalid block callback to write register 0x%04X in block 0x%04X with 0x%08X\n",
876fb4d8502Sjsg 		  reg, block, v);
877fb4d8502Sjsg 	BUG();
878fb4d8502Sjsg }
879fb4d8502Sjsg 
880fb4d8502Sjsg /**
881ad8b1aafSjsg  * amdgpu_device_asic_init - Wrapper for atom asic_init
882ad8b1aafSjsg  *
883ad8b1aafSjsg  * @adev: amdgpu_device pointer
884ad8b1aafSjsg  *
885ad8b1aafSjsg  * Does any asic specific work and then calls atom asic init.
886ad8b1aafSjsg  */
amdgpu_device_asic_init(struct amdgpu_device * adev)887ad8b1aafSjsg static int amdgpu_device_asic_init(struct amdgpu_device *adev)
888ad8b1aafSjsg {
889f005ef32Sjsg 	int ret;
890f005ef32Sjsg 
891ad8b1aafSjsg 	amdgpu_asic_pre_asic_init(adev);
892ad8b1aafSjsg 
893f005ef32Sjsg 	if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 3) ||
894f005ef32Sjsg 	    adev->ip_versions[GC_HWIP][0] >= IP_VERSION(11, 0, 0)) {
895f005ef32Sjsg 		amdgpu_psp_wait_for_bootloader(adev);
896f005ef32Sjsg 		ret = amdgpu_atomfirmware_asic_init(adev, true);
897f005ef32Sjsg 		return ret;
898f005ef32Sjsg 	} else {
899ad8b1aafSjsg 		return amdgpu_atom_asic_init(adev->mode_info.atom_context);
900ad8b1aafSjsg 	}
901ad8b1aafSjsg 
902f005ef32Sjsg 	return 0;
903f005ef32Sjsg }
904f005ef32Sjsg 
905ad8b1aafSjsg /**
906f005ef32Sjsg  * amdgpu_device_mem_scratch_init - allocate the VRAM scratch page
907fb4d8502Sjsg  *
908ad8b1aafSjsg  * @adev: amdgpu_device pointer
909fb4d8502Sjsg  *
910fb4d8502Sjsg  * Allocates a scratch page of VRAM for use by various things in the
911fb4d8502Sjsg  * driver.
912fb4d8502Sjsg  */
amdgpu_device_mem_scratch_init(struct amdgpu_device * adev)913f005ef32Sjsg static int amdgpu_device_mem_scratch_init(struct amdgpu_device *adev)
914fb4d8502Sjsg {
915f005ef32Sjsg 	return amdgpu_bo_create_kernel(adev, AMDGPU_GPU_PAGE_SIZE, PAGE_SIZE,
916f005ef32Sjsg 				       AMDGPU_GEM_DOMAIN_VRAM |
917f005ef32Sjsg 				       AMDGPU_GEM_DOMAIN_GTT,
918f005ef32Sjsg 				       &adev->mem_scratch.robj,
919f005ef32Sjsg 				       &adev->mem_scratch.gpu_addr,
920f005ef32Sjsg 				       (void **)&adev->mem_scratch.ptr);
921fb4d8502Sjsg }
922fb4d8502Sjsg 
923fb4d8502Sjsg /**
924f005ef32Sjsg  * amdgpu_device_mem_scratch_fini - Free the VRAM scratch page
925fb4d8502Sjsg  *
926ad8b1aafSjsg  * @adev: amdgpu_device pointer
927fb4d8502Sjsg  *
928fb4d8502Sjsg  * Frees the VRAM scratch page.
929fb4d8502Sjsg  */
amdgpu_device_mem_scratch_fini(struct amdgpu_device * adev)930f005ef32Sjsg static void amdgpu_device_mem_scratch_fini(struct amdgpu_device *adev)
931fb4d8502Sjsg {
932f005ef32Sjsg 	amdgpu_bo_free_kernel(&adev->mem_scratch.robj, NULL, NULL);
933fb4d8502Sjsg }
934fb4d8502Sjsg 
935fb4d8502Sjsg /**
936fb4d8502Sjsg  * amdgpu_device_program_register_sequence - program an array of registers.
937fb4d8502Sjsg  *
938fb4d8502Sjsg  * @adev: amdgpu_device pointer
939fb4d8502Sjsg  * @registers: pointer to the register array
940fb4d8502Sjsg  * @array_size: size of the register array
941fb4d8502Sjsg  *
942f005ef32Sjsg  * Programs an array or registers with and or masks.
943fb4d8502Sjsg  * This is a helper for setting golden registers.
944fb4d8502Sjsg  */
amdgpu_device_program_register_sequence(struct amdgpu_device * adev,const u32 * registers,const u32 array_size)945fb4d8502Sjsg void amdgpu_device_program_register_sequence(struct amdgpu_device *adev,
946fb4d8502Sjsg 					     const u32 *registers,
947fb4d8502Sjsg 					     const u32 array_size)
948fb4d8502Sjsg {
949fb4d8502Sjsg 	u32 tmp, reg, and_mask, or_mask;
950fb4d8502Sjsg 	int i;
951fb4d8502Sjsg 
952fb4d8502Sjsg 	if (array_size % 3)
953fb4d8502Sjsg 		return;
954fb4d8502Sjsg 
955fb4d8502Sjsg 	for (i = 0; i < array_size; i += 3) {
956fb4d8502Sjsg 		reg = registers[i + 0];
957fb4d8502Sjsg 		and_mask = registers[i + 1];
958fb4d8502Sjsg 		or_mask = registers[i + 2];
959fb4d8502Sjsg 
960fb4d8502Sjsg 		if (and_mask == 0xffffffff) {
961fb4d8502Sjsg 			tmp = or_mask;
962fb4d8502Sjsg 		} else {
963fb4d8502Sjsg 			tmp = RREG32(reg);
964fb4d8502Sjsg 			tmp &= ~and_mask;
965c349dbc7Sjsg 			if (adev->family >= AMDGPU_FAMILY_AI)
966c349dbc7Sjsg 				tmp |= (or_mask & and_mask);
967c349dbc7Sjsg 			else
968fb4d8502Sjsg 				tmp |= or_mask;
969fb4d8502Sjsg 		}
970fb4d8502Sjsg 		WREG32(reg, tmp);
971fb4d8502Sjsg 	}
972fb4d8502Sjsg }
973fb4d8502Sjsg 
974fb4d8502Sjsg /**
975fb4d8502Sjsg  * amdgpu_device_pci_config_reset - reset the GPU
976fb4d8502Sjsg  *
977fb4d8502Sjsg  * @adev: amdgpu_device pointer
978fb4d8502Sjsg  *
979fb4d8502Sjsg  * Resets the GPU using the pci config reset sequence.
980fb4d8502Sjsg  * Only applicable to asics prior to vega10.
981fb4d8502Sjsg  */
amdgpu_device_pci_config_reset(struct amdgpu_device * adev)982fb4d8502Sjsg void amdgpu_device_pci_config_reset(struct amdgpu_device *adev)
983fb4d8502Sjsg {
984fb4d8502Sjsg 	pci_write_config_dword(adev->pdev, 0x7c, AMDGPU_ASIC_RESET_DATA);
985fb4d8502Sjsg }
986fb4d8502Sjsg 
9875ca02815Sjsg /**
9885ca02815Sjsg  * amdgpu_device_pci_reset - reset the GPU using generic PCI means
9895ca02815Sjsg  *
9905ca02815Sjsg  * @adev: amdgpu_device pointer
9915ca02815Sjsg  *
9925ca02815Sjsg  * Resets the GPU using generic pci reset interfaces (FLR, SBR, etc.).
9935ca02815Sjsg  */
amdgpu_device_pci_reset(struct amdgpu_device * adev)9945ca02815Sjsg int amdgpu_device_pci_reset(struct amdgpu_device *adev)
9955ca02815Sjsg {
9965ca02815Sjsg 	STUB();
9975ca02815Sjsg 	return -ENOSYS;
9985ca02815Sjsg #ifdef notyet
9995ca02815Sjsg 	return pci_reset_function(adev->pdev);
10005ca02815Sjsg #endif
10015ca02815Sjsg }
10025ca02815Sjsg 
1003fb4d8502Sjsg /*
1004fb4d8502Sjsg  * amdgpu_device_wb_*()
1005fb4d8502Sjsg  * Writeback is the method by which the GPU updates special pages in memory
1006fb4d8502Sjsg  * with the status of certain GPU events (fences, ring pointers,etc.).
1007fb4d8502Sjsg  */
1008fb4d8502Sjsg 
1009fb4d8502Sjsg /**
1010fb4d8502Sjsg  * amdgpu_device_wb_fini - Disable Writeback and free memory
1011fb4d8502Sjsg  *
1012fb4d8502Sjsg  * @adev: amdgpu_device pointer
1013fb4d8502Sjsg  *
1014fb4d8502Sjsg  * Disables Writeback and frees the Writeback memory (all asics).
1015fb4d8502Sjsg  * Used at driver shutdown.
1016fb4d8502Sjsg  */
amdgpu_device_wb_fini(struct amdgpu_device * adev)1017fb4d8502Sjsg static void amdgpu_device_wb_fini(struct amdgpu_device *adev)
1018fb4d8502Sjsg {
1019fb4d8502Sjsg 	if (adev->wb.wb_obj) {
1020fb4d8502Sjsg 		amdgpu_bo_free_kernel(&adev->wb.wb_obj,
1021fb4d8502Sjsg 				      &adev->wb.gpu_addr,
1022fb4d8502Sjsg 				      (void **)&adev->wb.wb);
1023fb4d8502Sjsg 		adev->wb.wb_obj = NULL;
1024fb4d8502Sjsg 	}
1025fb4d8502Sjsg }
1026fb4d8502Sjsg 
1027fb4d8502Sjsg /**
1028fb4d8502Sjsg  * amdgpu_device_wb_init - Init Writeback driver info and allocate memory
1029fb4d8502Sjsg  *
1030fb4d8502Sjsg  * @adev: amdgpu_device pointer
1031fb4d8502Sjsg  *
1032fb4d8502Sjsg  * Initializes writeback and allocates writeback memory (all asics).
1033fb4d8502Sjsg  * Used at driver startup.
1034fb4d8502Sjsg  * Returns 0 on success or an -error on failure.
1035fb4d8502Sjsg  */
amdgpu_device_wb_init(struct amdgpu_device * adev)1036fb4d8502Sjsg static int amdgpu_device_wb_init(struct amdgpu_device *adev)
1037fb4d8502Sjsg {
1038fb4d8502Sjsg 	int r;
1039fb4d8502Sjsg 
1040fb4d8502Sjsg 	if (adev->wb.wb_obj == NULL) {
1041fb4d8502Sjsg 		/* AMDGPU_MAX_WB * sizeof(uint32_t) * 8 = AMDGPU_MAX_WB 256bit slots */
1042fb4d8502Sjsg 		r = amdgpu_bo_create_kernel(adev, AMDGPU_MAX_WB * sizeof(uint32_t) * 8,
1043fb4d8502Sjsg 					    PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
1044fb4d8502Sjsg 					    &adev->wb.wb_obj, &adev->wb.gpu_addr,
1045fb4d8502Sjsg 					    (void **)&adev->wb.wb);
1046fb4d8502Sjsg 		if (r) {
1047fb4d8502Sjsg 			dev_warn(adev->dev, "(%d) create WB bo failed\n", r);
1048fb4d8502Sjsg 			return r;
1049fb4d8502Sjsg 		}
1050fb4d8502Sjsg 
1051fb4d8502Sjsg 		adev->wb.num_wb = AMDGPU_MAX_WB;
1052fb4d8502Sjsg 		memset(&adev->wb.used, 0, sizeof(adev->wb.used));
1053fb4d8502Sjsg 
1054fb4d8502Sjsg 		/* clear wb memory */
1055fb4d8502Sjsg 		memset((char *)adev->wb.wb, 0, AMDGPU_MAX_WB * sizeof(uint32_t) * 8);
1056fb4d8502Sjsg 	}
1057fb4d8502Sjsg 
1058fb4d8502Sjsg 	return 0;
1059fb4d8502Sjsg }
1060fb4d8502Sjsg 
1061fb4d8502Sjsg /**
1062fb4d8502Sjsg  * amdgpu_device_wb_get - Allocate a wb entry
1063fb4d8502Sjsg  *
1064fb4d8502Sjsg  * @adev: amdgpu_device pointer
1065fb4d8502Sjsg  * @wb: wb index
1066fb4d8502Sjsg  *
1067fb4d8502Sjsg  * Allocate a wb slot for use by the driver (all asics).
1068fb4d8502Sjsg  * Returns 0 on success or -EINVAL on failure.
1069fb4d8502Sjsg  */
amdgpu_device_wb_get(struct amdgpu_device * adev,u32 * wb)1070fb4d8502Sjsg int amdgpu_device_wb_get(struct amdgpu_device *adev, u32 *wb)
1071fb4d8502Sjsg {
1072fb4d8502Sjsg 	unsigned long offset = find_first_zero_bit(adev->wb.used, adev->wb.num_wb);
1073fb4d8502Sjsg 
1074fb4d8502Sjsg 	if (offset < adev->wb.num_wb) {
1075fb4d8502Sjsg 		__set_bit(offset, adev->wb.used);
1076fb4d8502Sjsg 		*wb = offset << 3; /* convert to dw offset */
1077fb4d8502Sjsg 		return 0;
1078fb4d8502Sjsg 	} else {
1079fb4d8502Sjsg 		return -EINVAL;
1080fb4d8502Sjsg 	}
1081fb4d8502Sjsg }
1082fb4d8502Sjsg 
1083fb4d8502Sjsg /**
1084fb4d8502Sjsg  * amdgpu_device_wb_free - Free a wb entry
1085fb4d8502Sjsg  *
1086fb4d8502Sjsg  * @adev: amdgpu_device pointer
1087fb4d8502Sjsg  * @wb: wb index
1088fb4d8502Sjsg  *
1089fb4d8502Sjsg  * Free a wb slot allocated for use by the driver (all asics)
1090fb4d8502Sjsg  */
amdgpu_device_wb_free(struct amdgpu_device * adev,u32 wb)1091fb4d8502Sjsg void amdgpu_device_wb_free(struct amdgpu_device *adev, u32 wb)
1092fb4d8502Sjsg {
1093fb4d8502Sjsg 	wb >>= 3;
1094fb4d8502Sjsg 	if (wb < adev->wb.num_wb)
1095fb4d8502Sjsg 		__clear_bit(wb, adev->wb.used);
1096fb4d8502Sjsg }
1097fb4d8502Sjsg 
1098fb4d8502Sjsg /**
1099fb4d8502Sjsg  * amdgpu_device_resize_fb_bar - try to resize FB BAR
1100fb4d8502Sjsg  *
1101fb4d8502Sjsg  * @adev: amdgpu_device pointer
1102fb4d8502Sjsg  *
1103fb4d8502Sjsg  * Try to resize FB BAR to make all VRAM CPU accessible. We try very hard not
1104fb4d8502Sjsg  * to fail, but if any of the BARs is not accessible after the size we abort
1105fb4d8502Sjsg  * driver loading by returning -ENODEV.
1106fb4d8502Sjsg  */
amdgpu_device_resize_fb_bar(struct amdgpu_device * adev)1107fb4d8502Sjsg int amdgpu_device_resize_fb_bar(struct amdgpu_device *adev)
1108fb4d8502Sjsg {
110950f19d19Skettenis #ifdef __linux__
11105ca02815Sjsg 	int rbar_size = pci_rebar_bytes_to_size(adev->gmc.real_vram_size);
1111fb4d8502Sjsg 	struct pci_bus *root;
1112fb4d8502Sjsg 	struct resource *res;
1113f005ef32Sjsg 	unsigned int i;
1114fb4d8502Sjsg 	u16 cmd;
1115fb4d8502Sjsg 	int r;
1116fb4d8502Sjsg 
11177e1de2c2Sjsg 	if (!IS_ENABLED(CONFIG_PHYS_ADDR_T_64BIT))
11187e1de2c2Sjsg 		return 0;
11197e1de2c2Sjsg 
1120fb4d8502Sjsg 	/* Bypass for VF */
1121fb4d8502Sjsg 	if (amdgpu_sriov_vf(adev))
1122fb4d8502Sjsg 		return 0;
1123fb4d8502Sjsg 
1124ad8b1aafSjsg 	/* skip if the bios has already enabled large BAR */
1125ad8b1aafSjsg 	if (adev->gmc.real_vram_size &&
1126ad8b1aafSjsg 	    (pci_resource_len(adev->pdev, 0) >= adev->gmc.real_vram_size))
1127ad8b1aafSjsg 		return 0;
1128ad8b1aafSjsg 
1129fb4d8502Sjsg 	/* Check if the root BUS has 64bit memory resources */
1130fb4d8502Sjsg 	root = adev->pdev->bus;
1131fb4d8502Sjsg 	while (root->parent)
1132fb4d8502Sjsg 		root = root->parent;
1133fb4d8502Sjsg 
1134fb4d8502Sjsg 	pci_bus_for_each_resource(root, res, i) {
1135fb4d8502Sjsg 		if (res && res->flags & (IORESOURCE_MEM | IORESOURCE_MEM_64) &&
1136fb4d8502Sjsg 		    res->start > 0x100000000ull)
1137fb4d8502Sjsg 			break;
1138fb4d8502Sjsg 	}
1139fb4d8502Sjsg 
1140fb4d8502Sjsg 	/* Trying to resize is pointless without a root hub window above 4GB */
1141fb4d8502Sjsg 	if (!res)
1142fb4d8502Sjsg 		return 0;
1143fb4d8502Sjsg 
11445ca02815Sjsg 	/* Limit the BAR size to what is available */
11455ca02815Sjsg 	rbar_size = min(fls(pci_rebar_get_possible_sizes(adev->pdev, 0)) - 1,
11465ca02815Sjsg 			rbar_size);
11475ca02815Sjsg 
1148fb4d8502Sjsg 	/* Disable memory decoding while we change the BAR addresses and size */
1149fb4d8502Sjsg 	pci_read_config_word(adev->pdev, PCI_COMMAND, &cmd);
1150fb4d8502Sjsg 	pci_write_config_word(adev->pdev, PCI_COMMAND,
1151fb4d8502Sjsg 			      cmd & ~PCI_COMMAND_MEMORY);
1152fb4d8502Sjsg 
1153fb4d8502Sjsg 	/* Free the VRAM and doorbell BAR, we most likely need to move both. */
1154f005ef32Sjsg 	amdgpu_doorbell_fini(adev);
1155fb4d8502Sjsg 	if (adev->asic_type >= CHIP_BONAIRE)
1156fb4d8502Sjsg 		pci_release_resource(adev->pdev, 2);
1157fb4d8502Sjsg 
1158fb4d8502Sjsg 	pci_release_resource(adev->pdev, 0);
1159fb4d8502Sjsg 
1160fb4d8502Sjsg 	r = pci_resize_resource(adev->pdev, 0, rbar_size);
1161fb4d8502Sjsg 	if (r == -ENOSPC)
1162fb4d8502Sjsg 		DRM_INFO("Not enough PCI address space for a large BAR.");
1163fb4d8502Sjsg 	else if (r && r != -ENOTSUPP)
1164fb4d8502Sjsg 		DRM_ERROR("Problem resizing BAR0 (%d).", r);
1165fb4d8502Sjsg 
1166fb4d8502Sjsg 	pci_assign_unassigned_bus_resources(adev->pdev->bus);
1167fb4d8502Sjsg 
1168fb4d8502Sjsg 	/* When the doorbell or fb BAR isn't available we have no chance of
1169fb4d8502Sjsg 	 * using the device.
1170fb4d8502Sjsg 	 */
1171f005ef32Sjsg 	r = amdgpu_doorbell_init(adev);
1172fb4d8502Sjsg 	if (r || (pci_resource_flags(adev->pdev, 0) & IORESOURCE_UNSET))
1173fb4d8502Sjsg 		return -ENODEV;
1174fb4d8502Sjsg 
1175fb4d8502Sjsg 	pci_write_config_word(adev->pdev, PCI_COMMAND, cmd);
117650f19d19Skettenis #endif /* __linux__ */
1177fb4d8502Sjsg 
1178fb4d8502Sjsg 	return 0;
1179fb4d8502Sjsg }
1180fb4d8502Sjsg 
amdgpu_device_read_bios(struct amdgpu_device * adev)1181f005ef32Sjsg static bool amdgpu_device_read_bios(struct amdgpu_device *adev)
1182f005ef32Sjsg {
1183f005ef32Sjsg 	if (hweight32(adev->aid_mask) && (adev->flags & AMD_IS_APU))
1184f005ef32Sjsg 		return false;
1185f005ef32Sjsg 
1186f005ef32Sjsg 	return true;
1187f005ef32Sjsg }
1188f005ef32Sjsg 
1189fb4d8502Sjsg /*
1190fb4d8502Sjsg  * GPU helpers function.
1191fb4d8502Sjsg  */
1192fb4d8502Sjsg /**
1193fb4d8502Sjsg  * amdgpu_device_need_post - check if the hw need post or not
1194fb4d8502Sjsg  *
1195fb4d8502Sjsg  * @adev: amdgpu_device pointer
1196fb4d8502Sjsg  *
1197fb4d8502Sjsg  * Check if the asic has been initialized (all asics) at driver startup
1198fb4d8502Sjsg  * or post is needed if  hw reset is performed.
1199fb4d8502Sjsg  * Returns true if need or false if not.
1200fb4d8502Sjsg  */
amdgpu_device_need_post(struct amdgpu_device * adev)1201fb4d8502Sjsg bool amdgpu_device_need_post(struct amdgpu_device *adev)
1202fb4d8502Sjsg {
1203fb4d8502Sjsg 	uint32_t reg;
1204fb4d8502Sjsg 
1205fb4d8502Sjsg 	if (amdgpu_sriov_vf(adev))
1206fb4d8502Sjsg 		return false;
1207fb4d8502Sjsg 
1208f005ef32Sjsg 	if (!amdgpu_device_read_bios(adev))
1209f005ef32Sjsg 		return false;
1210f005ef32Sjsg 
1211fb4d8502Sjsg 	if (amdgpu_passthrough(adev)) {
1212fb4d8502Sjsg 		/* for FIJI: In whole GPU pass-through virtualization case, after VM reboot
1213fb4d8502Sjsg 		 * some old smc fw still need driver do vPost otherwise gpu hang, while
1214fb4d8502Sjsg 		 * those smc fw version above 22.15 doesn't have this flaw, so we force
1215fb4d8502Sjsg 		 * vpost executed for smc version below 22.15
1216fb4d8502Sjsg 		 */
1217fb4d8502Sjsg 		if (adev->asic_type == CHIP_FIJI) {
1218fb4d8502Sjsg 			int err;
1219fb4d8502Sjsg 			uint32_t fw_ver;
1220f005ef32Sjsg 
1221fb4d8502Sjsg 			err = request_firmware(&adev->pm.fw, "amdgpu/fiji_smc.bin", adev->dev);
1222fb4d8502Sjsg 			/* force vPost if error occured */
1223fb4d8502Sjsg 			if (err)
1224fb4d8502Sjsg 				return true;
1225fb4d8502Sjsg 
1226fb4d8502Sjsg 			fw_ver = *((uint32_t *)adev->pm.fw->data + 69);
12275a307a65Sjsg 			release_firmware(adev->pm.fw);
1228fb4d8502Sjsg 			if (fw_ver < 0x00160e00)
1229fb4d8502Sjsg 				return true;
1230fb4d8502Sjsg 		}
1231fb4d8502Sjsg 	}
1232fb4d8502Sjsg 
12335ca02815Sjsg 	/* Don't post if we need to reset whole hive on init */
12345ca02815Sjsg 	if (adev->gmc.xgmi.pending_reset)
12355ca02815Sjsg 		return false;
12365ca02815Sjsg 
1237fb4d8502Sjsg 	if (adev->has_hw_reset) {
1238fb4d8502Sjsg 		adev->has_hw_reset = false;
1239fb4d8502Sjsg 		return true;
1240fb4d8502Sjsg 	}
1241fb4d8502Sjsg 
1242fb4d8502Sjsg 	/* bios scratch used on CIK+ */
1243fb4d8502Sjsg 	if (adev->asic_type >= CHIP_BONAIRE)
1244fb4d8502Sjsg 		return amdgpu_atombios_scratch_need_asic_init(adev);
1245fb4d8502Sjsg 
1246fb4d8502Sjsg 	/* check MEM_SIZE for older asics */
1247fb4d8502Sjsg 	reg = amdgpu_asic_get_config_memsize(adev);
1248fb4d8502Sjsg 
1249fb4d8502Sjsg 	if ((reg != 0) && (reg != 0xffffffff))
1250fb4d8502Sjsg 		return false;
1251fb4d8502Sjsg 
1252fb4d8502Sjsg 	return true;
1253fb4d8502Sjsg }
1254fb4d8502Sjsg 
12559da60799Sjsg /*
12569da60799Sjsg  * Intel hosts such as Raptor Lake and Sapphire Rapids don't support dynamic
12579da60799Sjsg  * speed switching. Until we have confirmation from Intel that a specific host
12589da60799Sjsg  * supports it, it's safer that we keep it disabled for all.
12599da60799Sjsg  *
12609da60799Sjsg  * https://edc.intel.com/content/www/us/en/design/products/platforms/details/raptor-lake-s/13th-generation-core-processors-datasheet-volume-1-of-2/005/pci-express-support/
12619da60799Sjsg  * https://gitlab.freedesktop.org/drm/amd/-/issues/2663
12629da60799Sjsg  */
amdgpu_device_pcie_dynamic_switching_supported(void)12639da60799Sjsg bool amdgpu_device_pcie_dynamic_switching_supported(void)
12649da60799Sjsg {
12659da60799Sjsg #if IS_ENABLED(CONFIG_X86)
12669da60799Sjsg #ifdef __linux__
12679da60799Sjsg 	struct cpuinfo_x86 *c = &cpu_data(0);
12689da60799Sjsg 
12699da60799Sjsg 	if (c->x86_vendor == X86_VENDOR_INTEL)
12709da60799Sjsg #else
12719da60799Sjsg 	if (strcmp(cpu_vendor, "GenuineIntel") == 0)
12729da60799Sjsg #endif
12739da60799Sjsg 		return false;
12749da60799Sjsg #endif
12759da60799Sjsg 	return true;
12769da60799Sjsg }
12779da60799Sjsg 
1278a9d9cd9cSjsg /**
1279a9d9cd9cSjsg  * amdgpu_device_should_use_aspm - check if the device should program ASPM
1280a9d9cd9cSjsg  *
1281a9d9cd9cSjsg  * @adev: amdgpu_device pointer
1282a9d9cd9cSjsg  *
1283a9d9cd9cSjsg  * Confirm whether the module parameter and pcie bridge agree that ASPM should
1284a9d9cd9cSjsg  * be set for this device.
1285a9d9cd9cSjsg  *
1286a9d9cd9cSjsg  * Returns true if it should be used or false if not.
1287a9d9cd9cSjsg  */
amdgpu_device_should_use_aspm(struct amdgpu_device * adev)1288a9d9cd9cSjsg bool amdgpu_device_should_use_aspm(struct amdgpu_device *adev)
1289a9d9cd9cSjsg {
1290a9d9cd9cSjsg 	switch (amdgpu_aspm) {
1291a9d9cd9cSjsg 	case -1:
1292a9d9cd9cSjsg 		break;
1293a9d9cd9cSjsg 	case 0:
1294a9d9cd9cSjsg 		return false;
1295a9d9cd9cSjsg 	case 1:
1296a9d9cd9cSjsg 		return true;
1297a9d9cd9cSjsg 	default:
1298a9d9cd9cSjsg 		return false;
1299a9d9cd9cSjsg 	}
1300a9d9cd9cSjsg 	return pcie_aspm_enabled(adev->pdev);
1301a9d9cd9cSjsg }
1302a9d9cd9cSjsg 
amdgpu_device_aspm_support_quirk(void)1303e73b7337Sjsg bool amdgpu_device_aspm_support_quirk(void)
1304e73b7337Sjsg {
1305e73b7337Sjsg #if IS_ENABLED(CONFIG_X86)
1306e73b7337Sjsg 	struct cpu_info *ci = curcpu();
1307e73b7337Sjsg 
1308e73b7337Sjsg 	return !(ci->ci_family == 6 && ci->ci_model == 0x97);
1309e73b7337Sjsg #else
1310e73b7337Sjsg 	return true;
1311e73b7337Sjsg #endif
1312e73b7337Sjsg }
1313e73b7337Sjsg 
1314fb4d8502Sjsg /* if we get transitioned to only one device, take VGA back */
1315fb4d8502Sjsg /**
1316fb4d8502Sjsg  * amdgpu_device_vga_set_decode - enable/disable vga decode
1317fb4d8502Sjsg  *
13185ca02815Sjsg  * @pdev: PCI device pointer
1319fb4d8502Sjsg  * @state: enable/disable vga decode
1320fb4d8502Sjsg  *
1321fb4d8502Sjsg  * Enable/disable vga decode (all asics).
1322fb4d8502Sjsg  * Returns VGA resource flags.
1323fb4d8502Sjsg  */
1324fb4d8502Sjsg #ifdef notyet
amdgpu_device_vga_set_decode(struct pci_dev * pdev,bool state)13255ca02815Sjsg static unsigned int amdgpu_device_vga_set_decode(struct pci_dev *pdev,
13265ca02815Sjsg 		bool state)
1327fb4d8502Sjsg {
13285ca02815Sjsg 	struct amdgpu_device *adev = drm_to_adev(pci_get_drvdata(pdev));
1329f005ef32Sjsg 
1330fb4d8502Sjsg 	amdgpu_asic_set_vga_state(adev, state);
1331fb4d8502Sjsg 	if (state)
1332fb4d8502Sjsg 		return VGA_RSRC_LEGACY_IO | VGA_RSRC_LEGACY_MEM |
1333fb4d8502Sjsg 		       VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM;
1334fb4d8502Sjsg 	else
1335fb4d8502Sjsg 		return VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM;
1336fb4d8502Sjsg }
1337fb4d8502Sjsg #endif
1338fb4d8502Sjsg 
1339fb4d8502Sjsg /**
1340fb4d8502Sjsg  * amdgpu_device_check_block_size - validate the vm block size
1341fb4d8502Sjsg  *
1342fb4d8502Sjsg  * @adev: amdgpu_device pointer
1343fb4d8502Sjsg  *
1344fb4d8502Sjsg  * Validates the vm block size specified via module parameter.
1345fb4d8502Sjsg  * The vm block size defines number of bits in page table versus page directory,
1346fb4d8502Sjsg  * a page is 4KB so we have 12 bits offset, minimum 9 bits in the
1347fb4d8502Sjsg  * page table and the remaining bits are in the page directory.
1348fb4d8502Sjsg  */
amdgpu_device_check_block_size(struct amdgpu_device * adev)1349fb4d8502Sjsg static void amdgpu_device_check_block_size(struct amdgpu_device *adev)
1350fb4d8502Sjsg {
1351fb4d8502Sjsg 	/* defines number of bits in page table versus page directory,
1352fb4d8502Sjsg 	 * a page is 4KB so we have 12 bits offset, minimum 9 bits in the
1353f005ef32Sjsg 	 * page table and the remaining bits are in the page directory
1354f005ef32Sjsg 	 */
1355fb4d8502Sjsg 	if (amdgpu_vm_block_size == -1)
1356fb4d8502Sjsg 		return;
1357fb4d8502Sjsg 
1358fb4d8502Sjsg 	if (amdgpu_vm_block_size < 9) {
1359fb4d8502Sjsg 		dev_warn(adev->dev, "VM page table size (%d) too small\n",
1360fb4d8502Sjsg 			 amdgpu_vm_block_size);
1361fb4d8502Sjsg 		amdgpu_vm_block_size = -1;
1362fb4d8502Sjsg 	}
1363fb4d8502Sjsg }
1364fb4d8502Sjsg 
1365fb4d8502Sjsg /**
1366fb4d8502Sjsg  * amdgpu_device_check_vm_size - validate the vm size
1367fb4d8502Sjsg  *
1368fb4d8502Sjsg  * @adev: amdgpu_device pointer
1369fb4d8502Sjsg  *
1370fb4d8502Sjsg  * Validates the vm size in GB specified via module parameter.
1371fb4d8502Sjsg  * The VM size is the size of the GPU virtual memory space in GB.
1372fb4d8502Sjsg  */
amdgpu_device_check_vm_size(struct amdgpu_device * adev)1373fb4d8502Sjsg static void amdgpu_device_check_vm_size(struct amdgpu_device *adev)
1374fb4d8502Sjsg {
1375fb4d8502Sjsg 	/* no need to check the default value */
1376fb4d8502Sjsg 	if (amdgpu_vm_size == -1)
1377fb4d8502Sjsg 		return;
1378fb4d8502Sjsg 
1379fb4d8502Sjsg 	if (amdgpu_vm_size < 1) {
1380fb4d8502Sjsg 		dev_warn(adev->dev, "VM size (%d) too small, min is 1GB\n",
1381fb4d8502Sjsg 			 amdgpu_vm_size);
1382fb4d8502Sjsg 		amdgpu_vm_size = -1;
1383fb4d8502Sjsg 	}
1384fb4d8502Sjsg }
1385fb4d8502Sjsg 
amdgpu_device_check_smu_prv_buffer_size(struct amdgpu_device * adev)1386fb4d8502Sjsg static void amdgpu_device_check_smu_prv_buffer_size(struct amdgpu_device *adev)
1387fb4d8502Sjsg {
1388fb4d8502Sjsg #ifdef __linux__
1389fb4d8502Sjsg 	struct sysinfo si;
1390fb4d8502Sjsg #endif
1391c349dbc7Sjsg 	bool is_os_64 = (sizeof(void *) == 8);
1392fb4d8502Sjsg 	uint64_t total_memory;
1393fb4d8502Sjsg 	uint64_t dram_size_seven_GB = 0x1B8000000;
1394fb4d8502Sjsg 	uint64_t dram_size_three_GB = 0xB8000000;
1395fb4d8502Sjsg 
1396fb4d8502Sjsg 	if (amdgpu_smu_memory_pool_size == 0)
1397fb4d8502Sjsg 		return;
1398fb4d8502Sjsg 
1399fb4d8502Sjsg 	if (!is_os_64) {
1400fb4d8502Sjsg 		DRM_WARN("Not 64-bit OS, feature not supported\n");
1401fb4d8502Sjsg 		goto def_value;
1402fb4d8502Sjsg 	}
1403fb4d8502Sjsg #ifdef __linux__
1404fb4d8502Sjsg 	si_meminfo(&si);
1405fb4d8502Sjsg 	total_memory = (uint64_t)si.totalram * si.mem_unit;
1406fb4d8502Sjsg #else
1407fb4d8502Sjsg 	total_memory = ptoa(physmem);
1408fb4d8502Sjsg #endif
1409fb4d8502Sjsg 
1410fb4d8502Sjsg 	if ((amdgpu_smu_memory_pool_size == 1) ||
1411fb4d8502Sjsg 		(amdgpu_smu_memory_pool_size == 2)) {
1412fb4d8502Sjsg 		if (total_memory < dram_size_three_GB)
1413fb4d8502Sjsg 			goto def_value1;
1414fb4d8502Sjsg 	} else if ((amdgpu_smu_memory_pool_size == 4) ||
1415fb4d8502Sjsg 		(amdgpu_smu_memory_pool_size == 8)) {
1416fb4d8502Sjsg 		if (total_memory < dram_size_seven_GB)
1417fb4d8502Sjsg 			goto def_value1;
1418fb4d8502Sjsg 	} else {
1419fb4d8502Sjsg 		DRM_WARN("Smu memory pool size not supported\n");
1420fb4d8502Sjsg 		goto def_value;
1421fb4d8502Sjsg 	}
1422fb4d8502Sjsg 	adev->pm.smu_prv_buffer_size = amdgpu_smu_memory_pool_size << 28;
1423fb4d8502Sjsg 
1424fb4d8502Sjsg 	return;
1425fb4d8502Sjsg 
1426fb4d8502Sjsg def_value1:
1427fb4d8502Sjsg 	DRM_WARN("No enough system memory\n");
1428fb4d8502Sjsg def_value:
1429fb4d8502Sjsg 	adev->pm.smu_prv_buffer_size = 0;
1430fb4d8502Sjsg }
1431fb4d8502Sjsg 
amdgpu_device_init_apu_flags(struct amdgpu_device * adev)14325ca02815Sjsg static int amdgpu_device_init_apu_flags(struct amdgpu_device *adev)
14335ca02815Sjsg {
14345ca02815Sjsg 	if (!(adev->flags & AMD_IS_APU) ||
14355ca02815Sjsg 	    adev->asic_type < CHIP_RAVEN)
14365ca02815Sjsg 		return 0;
14375ca02815Sjsg 
14385ca02815Sjsg 	switch (adev->asic_type) {
14395ca02815Sjsg 	case CHIP_RAVEN:
14405ca02815Sjsg 		if (adev->pdev->device == 0x15dd)
14415ca02815Sjsg 			adev->apu_flags |= AMD_APU_IS_RAVEN;
14425ca02815Sjsg 		if (adev->pdev->device == 0x15d8)
14435ca02815Sjsg 			adev->apu_flags |= AMD_APU_IS_PICASSO;
14445ca02815Sjsg 		break;
14455ca02815Sjsg 	case CHIP_RENOIR:
14465ca02815Sjsg 		if ((adev->pdev->device == 0x1636) ||
14475ca02815Sjsg 		    (adev->pdev->device == 0x164c))
14485ca02815Sjsg 			adev->apu_flags |= AMD_APU_IS_RENOIR;
14495ca02815Sjsg 		else
14505ca02815Sjsg 			adev->apu_flags |= AMD_APU_IS_GREEN_SARDINE;
14515ca02815Sjsg 		break;
14525ca02815Sjsg 	case CHIP_VANGOGH:
14535ca02815Sjsg 		adev->apu_flags |= AMD_APU_IS_VANGOGH;
14545ca02815Sjsg 		break;
14555ca02815Sjsg 	case CHIP_YELLOW_CARP:
14565ca02815Sjsg 		break;
14575ca02815Sjsg 	case CHIP_CYAN_SKILLFISH:
14581bb76ff1Sjsg 		if ((adev->pdev->device == 0x13FE) ||
14591bb76ff1Sjsg 		    (adev->pdev->device == 0x143F))
14605ca02815Sjsg 			adev->apu_flags |= AMD_APU_IS_CYAN_SKILLFISH2;
14615ca02815Sjsg 		break;
14625ca02815Sjsg 	default:
14631bb76ff1Sjsg 		break;
14645ca02815Sjsg 	}
14655ca02815Sjsg 
14665ca02815Sjsg 	return 0;
14675ca02815Sjsg }
14685ca02815Sjsg 
1469fb4d8502Sjsg /**
1470fb4d8502Sjsg  * amdgpu_device_check_arguments - validate module params
1471fb4d8502Sjsg  *
1472fb4d8502Sjsg  * @adev: amdgpu_device pointer
1473fb4d8502Sjsg  *
1474fb4d8502Sjsg  * Validates certain module parameters and updates
1475fb4d8502Sjsg  * the associated values used by the driver (all asics).
1476fb4d8502Sjsg  */
amdgpu_device_check_arguments(struct amdgpu_device * adev)1477c349dbc7Sjsg static int amdgpu_device_check_arguments(struct amdgpu_device *adev)
1478fb4d8502Sjsg {
1479fb4d8502Sjsg 	if (amdgpu_sched_jobs < 4) {
1480fb4d8502Sjsg 		dev_warn(adev->dev, "sched jobs (%d) must be at least 4\n",
1481fb4d8502Sjsg 			 amdgpu_sched_jobs);
1482fb4d8502Sjsg 		amdgpu_sched_jobs = 4;
1483fb4d8502Sjsg 	} else if (!is_power_of_2(amdgpu_sched_jobs)) {
1484fb4d8502Sjsg 		dev_warn(adev->dev, "sched jobs (%d) must be a power of 2\n",
1485fb4d8502Sjsg 			 amdgpu_sched_jobs);
1486fb4d8502Sjsg 		amdgpu_sched_jobs = roundup_pow_of_two(amdgpu_sched_jobs);
1487fb4d8502Sjsg 	}
1488fb4d8502Sjsg 
1489fb4d8502Sjsg 	if (amdgpu_gart_size != -1 && amdgpu_gart_size < 32) {
1490fb4d8502Sjsg 		/* gart size must be greater or equal to 32M */
1491fb4d8502Sjsg 		dev_warn(adev->dev, "gart size (%d) too small\n",
1492fb4d8502Sjsg 			 amdgpu_gart_size);
1493fb4d8502Sjsg 		amdgpu_gart_size = -1;
1494fb4d8502Sjsg 	}
1495fb4d8502Sjsg 
1496fb4d8502Sjsg 	if (amdgpu_gtt_size != -1 && amdgpu_gtt_size < 32) {
1497fb4d8502Sjsg 		/* gtt size must be greater or equal to 32M */
1498fb4d8502Sjsg 		dev_warn(adev->dev, "gtt size (%d) too small\n",
1499fb4d8502Sjsg 				 amdgpu_gtt_size);
1500fb4d8502Sjsg 		amdgpu_gtt_size = -1;
1501fb4d8502Sjsg 	}
1502fb4d8502Sjsg 
1503fb4d8502Sjsg 	/* valid range is between 4 and 9 inclusive */
1504fb4d8502Sjsg 	if (amdgpu_vm_fragment_size != -1 &&
1505fb4d8502Sjsg 	    (amdgpu_vm_fragment_size > 9 || amdgpu_vm_fragment_size < 4)) {
1506fb4d8502Sjsg 		dev_warn(adev->dev, "valid range is between 4 and 9\n");
1507fb4d8502Sjsg 		amdgpu_vm_fragment_size = -1;
1508fb4d8502Sjsg 	}
1509fb4d8502Sjsg 
1510ad8b1aafSjsg 	if (amdgpu_sched_hw_submission < 2) {
1511ad8b1aafSjsg 		dev_warn(adev->dev, "sched hw submission jobs (%d) must be at least 2\n",
1512ad8b1aafSjsg 			 amdgpu_sched_hw_submission);
1513ad8b1aafSjsg 		amdgpu_sched_hw_submission = 2;
1514ad8b1aafSjsg 	} else if (!is_power_of_2(amdgpu_sched_hw_submission)) {
1515ad8b1aafSjsg 		dev_warn(adev->dev, "sched hw submission jobs (%d) must be a power of 2\n",
1516ad8b1aafSjsg 			 amdgpu_sched_hw_submission);
1517ad8b1aafSjsg 		amdgpu_sched_hw_submission = roundup_pow_of_two(amdgpu_sched_hw_submission);
1518ad8b1aafSjsg 	}
1519ad8b1aafSjsg 
15201bb76ff1Sjsg 	if (amdgpu_reset_method < -1 || amdgpu_reset_method > 4) {
15211bb76ff1Sjsg 		dev_warn(adev->dev, "invalid option for reset method, reverting to default\n");
15221bb76ff1Sjsg 		amdgpu_reset_method = -1;
15231bb76ff1Sjsg 	}
15241bb76ff1Sjsg 
1525fb4d8502Sjsg 	amdgpu_device_check_smu_prv_buffer_size(adev);
1526fb4d8502Sjsg 
1527fb4d8502Sjsg 	amdgpu_device_check_vm_size(adev);
1528fb4d8502Sjsg 
1529fb4d8502Sjsg 	amdgpu_device_check_block_size(adev);
1530fb4d8502Sjsg 
1531fb4d8502Sjsg 	adev->firmware.load_type = amdgpu_ucode_get_load_type(adev, amdgpu_fw_load_type);
1532c349dbc7Sjsg 
1533c349dbc7Sjsg 	return 0;
1534fb4d8502Sjsg }
1535fb4d8502Sjsg 
1536fb4d8502Sjsg #ifdef __linux__
1537fb4d8502Sjsg /**
1538fb4d8502Sjsg  * amdgpu_switcheroo_set_state - set switcheroo state
1539fb4d8502Sjsg  *
1540fb4d8502Sjsg  * @pdev: pci dev pointer
1541fb4d8502Sjsg  * @state: vga_switcheroo state
1542fb4d8502Sjsg  *
1543f005ef32Sjsg  * Callback for the switcheroo driver.  Suspends or resumes
1544fb4d8502Sjsg  * the asics before or after it is powered up using ACPI methods.
1545fb4d8502Sjsg  */
amdgpu_switcheroo_set_state(struct pci_dev * pdev,enum vga_switcheroo_state state)1546ad8b1aafSjsg static void amdgpu_switcheroo_set_state(struct pci_dev *pdev,
1547ad8b1aafSjsg 					enum vga_switcheroo_state state)
1548fb4d8502Sjsg {
1549fb4d8502Sjsg 	struct drm_device *dev = pci_get_drvdata(pdev);
1550c349dbc7Sjsg 	int r;
1551fb4d8502Sjsg 
15525ca02815Sjsg 	if (amdgpu_device_supports_px(dev) && state == VGA_SWITCHEROO_OFF)
1553fb4d8502Sjsg 		return;
1554fb4d8502Sjsg 
1555fb4d8502Sjsg 	if (state == VGA_SWITCHEROO_ON) {
1556ad8b1aafSjsg 		pr_info("switched on\n");
1557fb4d8502Sjsg 		/* don't suspend or resume card normally */
1558fb4d8502Sjsg 		dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
1559fb4d8502Sjsg 
15605ca02815Sjsg 		pci_set_power_state(pdev, PCI_D0);
15615ca02815Sjsg 		amdgpu_device_load_pci_state(pdev);
15625ca02815Sjsg 		r = pci_enable_device(pdev);
1563c349dbc7Sjsg 		if (r)
1564c349dbc7Sjsg 			DRM_WARN("pci_enable_device failed (%d)\n", r);
1565c349dbc7Sjsg 		amdgpu_device_resume(dev, true);
1566fb4d8502Sjsg 
1567fb4d8502Sjsg 		dev->switch_power_state = DRM_SWITCH_POWER_ON;
1568fb4d8502Sjsg 	} else {
1569ad8b1aafSjsg 		pr_info("switched off\n");
1570fb4d8502Sjsg 		dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
157136668b15Sjsg 		amdgpu_device_prepare(dev);
1572c349dbc7Sjsg 		amdgpu_device_suspend(dev, true);
15735ca02815Sjsg 		amdgpu_device_cache_pci_state(pdev);
1574c349dbc7Sjsg 		/* Shut down the device */
15755ca02815Sjsg 		pci_disable_device(pdev);
15765ca02815Sjsg 		pci_set_power_state(pdev, PCI_D3cold);
1577fb4d8502Sjsg 		dev->switch_power_state = DRM_SWITCH_POWER_OFF;
1578fb4d8502Sjsg 	}
1579fb4d8502Sjsg }
1580fb4d8502Sjsg 
1581fb4d8502Sjsg /**
1582fb4d8502Sjsg  * amdgpu_switcheroo_can_switch - see if switcheroo state can change
1583fb4d8502Sjsg  *
1584fb4d8502Sjsg  * @pdev: pci dev pointer
1585fb4d8502Sjsg  *
1586fb4d8502Sjsg  * Callback for the switcheroo driver.  Check of the switcheroo
1587fb4d8502Sjsg  * state can be changed.
1588fb4d8502Sjsg  * Returns true if the state can be changed, false if not.
1589fb4d8502Sjsg  */
amdgpu_switcheroo_can_switch(struct pci_dev * pdev)1590fb4d8502Sjsg static bool amdgpu_switcheroo_can_switch(struct pci_dev *pdev)
1591fb4d8502Sjsg {
1592fb4d8502Sjsg 	struct drm_device *dev = pci_get_drvdata(pdev);
1593fb4d8502Sjsg 
1594fb4d8502Sjsg        /*
1595fb4d8502Sjsg 	* FIXME: open_count is protected by drm_global_mutex but that would lead to
1596fb4d8502Sjsg 	* locking inversion with the driver load path. And the access here is
1597fb4d8502Sjsg 	* completely racy anyway. So don't bother with locking for now.
1598fb4d8502Sjsg 	*/
1599c349dbc7Sjsg 	return atomic_read(&dev->open_count) == 0;
1600fb4d8502Sjsg }
160149261a46Sjsg #endif /* __linux__ */
1602fb4d8502Sjsg 
1603fb4d8502Sjsg static const struct vga_switcheroo_client_ops amdgpu_switcheroo_ops = {
160449261a46Sjsg #ifdef notyet
1605fb4d8502Sjsg 	.set_gpu_state = amdgpu_switcheroo_set_state,
1606fb4d8502Sjsg 	.reprobe = NULL,
1607fb4d8502Sjsg 	.can_switch = amdgpu_switcheroo_can_switch,
160849261a46Sjsg #endif
1609fb4d8502Sjsg };
1610fb4d8502Sjsg 
1611fb4d8502Sjsg /**
1612fb4d8502Sjsg  * amdgpu_device_ip_set_clockgating_state - set the CG state
1613fb4d8502Sjsg  *
1614fb4d8502Sjsg  * @dev: amdgpu_device pointer
1615fb4d8502Sjsg  * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1616fb4d8502Sjsg  * @state: clockgating state (gate or ungate)
1617fb4d8502Sjsg  *
1618fb4d8502Sjsg  * Sets the requested clockgating state for all instances of
1619fb4d8502Sjsg  * the hardware IP specified.
1620fb4d8502Sjsg  * Returns the error code from the last instance.
1621fb4d8502Sjsg  */
amdgpu_device_ip_set_clockgating_state(void * dev,enum amd_ip_block_type block_type,enum amd_clockgating_state state)1622fb4d8502Sjsg int amdgpu_device_ip_set_clockgating_state(void *dev,
1623fb4d8502Sjsg 					   enum amd_ip_block_type block_type,
1624fb4d8502Sjsg 					   enum amd_clockgating_state state)
1625fb4d8502Sjsg {
1626fb4d8502Sjsg 	struct amdgpu_device *adev = dev;
1627fb4d8502Sjsg 	int i, r = 0;
1628fb4d8502Sjsg 
1629fb4d8502Sjsg 	for (i = 0; i < adev->num_ip_blocks; i++) {
1630fb4d8502Sjsg 		if (!adev->ip_blocks[i].status.valid)
1631fb4d8502Sjsg 			continue;
1632fb4d8502Sjsg 		if (adev->ip_blocks[i].version->type != block_type)
1633fb4d8502Sjsg 			continue;
1634fb4d8502Sjsg 		if (!adev->ip_blocks[i].version->funcs->set_clockgating_state)
1635fb4d8502Sjsg 			continue;
1636fb4d8502Sjsg 		r = adev->ip_blocks[i].version->funcs->set_clockgating_state(
1637fb4d8502Sjsg 			(void *)adev, state);
1638fb4d8502Sjsg 		if (r)
1639fb4d8502Sjsg 			DRM_ERROR("set_clockgating_state of IP block <%s> failed %d\n",
1640fb4d8502Sjsg 				  adev->ip_blocks[i].version->funcs->name, r);
1641fb4d8502Sjsg 	}
1642fb4d8502Sjsg 	return r;
1643fb4d8502Sjsg }
1644fb4d8502Sjsg 
1645fb4d8502Sjsg /**
1646fb4d8502Sjsg  * amdgpu_device_ip_set_powergating_state - set the PG state
1647fb4d8502Sjsg  *
1648fb4d8502Sjsg  * @dev: amdgpu_device pointer
1649fb4d8502Sjsg  * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1650fb4d8502Sjsg  * @state: powergating state (gate or ungate)
1651fb4d8502Sjsg  *
1652fb4d8502Sjsg  * Sets the requested powergating state for all instances of
1653fb4d8502Sjsg  * the hardware IP specified.
1654fb4d8502Sjsg  * Returns the error code from the last instance.
1655fb4d8502Sjsg  */
amdgpu_device_ip_set_powergating_state(void * dev,enum amd_ip_block_type block_type,enum amd_powergating_state state)1656fb4d8502Sjsg int amdgpu_device_ip_set_powergating_state(void *dev,
1657fb4d8502Sjsg 					   enum amd_ip_block_type block_type,
1658fb4d8502Sjsg 					   enum amd_powergating_state state)
1659fb4d8502Sjsg {
1660fb4d8502Sjsg 	struct amdgpu_device *adev = dev;
1661fb4d8502Sjsg 	int i, r = 0;
1662fb4d8502Sjsg 
1663fb4d8502Sjsg 	for (i = 0; i < adev->num_ip_blocks; i++) {
1664fb4d8502Sjsg 		if (!adev->ip_blocks[i].status.valid)
1665fb4d8502Sjsg 			continue;
1666fb4d8502Sjsg 		if (adev->ip_blocks[i].version->type != block_type)
1667fb4d8502Sjsg 			continue;
1668fb4d8502Sjsg 		if (!adev->ip_blocks[i].version->funcs->set_powergating_state)
1669fb4d8502Sjsg 			continue;
1670fb4d8502Sjsg 		r = adev->ip_blocks[i].version->funcs->set_powergating_state(
1671fb4d8502Sjsg 			(void *)adev, state);
1672fb4d8502Sjsg 		if (r)
1673fb4d8502Sjsg 			DRM_ERROR("set_powergating_state of IP block <%s> failed %d\n",
1674fb4d8502Sjsg 				  adev->ip_blocks[i].version->funcs->name, r);
1675fb4d8502Sjsg 	}
1676fb4d8502Sjsg 	return r;
1677fb4d8502Sjsg }
1678fb4d8502Sjsg 
1679fb4d8502Sjsg /**
1680fb4d8502Sjsg  * amdgpu_device_ip_get_clockgating_state - get the CG state
1681fb4d8502Sjsg  *
1682fb4d8502Sjsg  * @adev: amdgpu_device pointer
1683fb4d8502Sjsg  * @flags: clockgating feature flags
1684fb4d8502Sjsg  *
1685fb4d8502Sjsg  * Walks the list of IPs on the device and updates the clockgating
1686fb4d8502Sjsg  * flags for each IP.
1687fb4d8502Sjsg  * Updates @flags with the feature flags for each hardware IP where
1688fb4d8502Sjsg  * clockgating is enabled.
1689fb4d8502Sjsg  */
amdgpu_device_ip_get_clockgating_state(struct amdgpu_device * adev,u64 * flags)1690fb4d8502Sjsg void amdgpu_device_ip_get_clockgating_state(struct amdgpu_device *adev,
16911bb76ff1Sjsg 					    u64 *flags)
1692fb4d8502Sjsg {
1693fb4d8502Sjsg 	int i;
1694fb4d8502Sjsg 
1695fb4d8502Sjsg 	for (i = 0; i < adev->num_ip_blocks; i++) {
1696fb4d8502Sjsg 		if (!adev->ip_blocks[i].status.valid)
1697fb4d8502Sjsg 			continue;
1698fb4d8502Sjsg 		if (adev->ip_blocks[i].version->funcs->get_clockgating_state)
1699fb4d8502Sjsg 			adev->ip_blocks[i].version->funcs->get_clockgating_state((void *)adev, flags);
1700fb4d8502Sjsg 	}
1701fb4d8502Sjsg }
1702fb4d8502Sjsg 
1703fb4d8502Sjsg /**
1704fb4d8502Sjsg  * amdgpu_device_ip_wait_for_idle - wait for idle
1705fb4d8502Sjsg  *
1706fb4d8502Sjsg  * @adev: amdgpu_device pointer
1707fb4d8502Sjsg  * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1708fb4d8502Sjsg  *
1709fb4d8502Sjsg  * Waits for the request hardware IP to be idle.
1710fb4d8502Sjsg  * Returns 0 for success or a negative error code on failure.
1711fb4d8502Sjsg  */
amdgpu_device_ip_wait_for_idle(struct amdgpu_device * adev,enum amd_ip_block_type block_type)1712fb4d8502Sjsg int amdgpu_device_ip_wait_for_idle(struct amdgpu_device *adev,
1713fb4d8502Sjsg 				   enum amd_ip_block_type block_type)
1714fb4d8502Sjsg {
1715fb4d8502Sjsg 	int i, r;
1716fb4d8502Sjsg 
1717fb4d8502Sjsg 	for (i = 0; i < adev->num_ip_blocks; i++) {
1718fb4d8502Sjsg 		if (!adev->ip_blocks[i].status.valid)
1719fb4d8502Sjsg 			continue;
1720fb4d8502Sjsg 		if (adev->ip_blocks[i].version->type == block_type) {
1721fb4d8502Sjsg 			r = adev->ip_blocks[i].version->funcs->wait_for_idle((void *)adev);
1722fb4d8502Sjsg 			if (r)
1723fb4d8502Sjsg 				return r;
1724fb4d8502Sjsg 			break;
1725fb4d8502Sjsg 		}
1726fb4d8502Sjsg 	}
1727fb4d8502Sjsg 	return 0;
1728fb4d8502Sjsg 
1729fb4d8502Sjsg }
1730fb4d8502Sjsg 
1731fb4d8502Sjsg /**
1732fb4d8502Sjsg  * amdgpu_device_ip_is_idle - is the hardware IP idle
1733fb4d8502Sjsg  *
1734fb4d8502Sjsg  * @adev: amdgpu_device pointer
1735fb4d8502Sjsg  * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1736fb4d8502Sjsg  *
1737fb4d8502Sjsg  * Check if the hardware IP is idle or not.
1738fb4d8502Sjsg  * Returns true if it the IP is idle, false if not.
1739fb4d8502Sjsg  */
amdgpu_device_ip_is_idle(struct amdgpu_device * adev,enum amd_ip_block_type block_type)1740fb4d8502Sjsg bool amdgpu_device_ip_is_idle(struct amdgpu_device *adev,
1741fb4d8502Sjsg 			      enum amd_ip_block_type block_type)
1742fb4d8502Sjsg {
1743fb4d8502Sjsg 	int i;
1744fb4d8502Sjsg 
1745fb4d8502Sjsg 	for (i = 0; i < adev->num_ip_blocks; i++) {
1746fb4d8502Sjsg 		if (!adev->ip_blocks[i].status.valid)
1747fb4d8502Sjsg 			continue;
1748fb4d8502Sjsg 		if (adev->ip_blocks[i].version->type == block_type)
1749fb4d8502Sjsg 			return adev->ip_blocks[i].version->funcs->is_idle((void *)adev);
1750fb4d8502Sjsg 	}
1751fb4d8502Sjsg 	return true;
1752fb4d8502Sjsg 
1753fb4d8502Sjsg }
1754fb4d8502Sjsg 
1755fb4d8502Sjsg /**
1756fb4d8502Sjsg  * amdgpu_device_ip_get_ip_block - get a hw IP pointer
1757fb4d8502Sjsg  *
1758fb4d8502Sjsg  * @adev: amdgpu_device pointer
1759fb4d8502Sjsg  * @type: Type of hardware IP (SMU, GFX, UVD, etc.)
1760fb4d8502Sjsg  *
1761fb4d8502Sjsg  * Returns a pointer to the hardware IP block structure
1762fb4d8502Sjsg  * if it exists for the asic, otherwise NULL.
1763fb4d8502Sjsg  */
1764fb4d8502Sjsg struct amdgpu_ip_block *
amdgpu_device_ip_get_ip_block(struct amdgpu_device * adev,enum amd_ip_block_type type)1765fb4d8502Sjsg amdgpu_device_ip_get_ip_block(struct amdgpu_device *adev,
1766fb4d8502Sjsg 			      enum amd_ip_block_type type)
1767fb4d8502Sjsg {
1768fb4d8502Sjsg 	int i;
1769fb4d8502Sjsg 
1770fb4d8502Sjsg 	for (i = 0; i < adev->num_ip_blocks; i++)
1771fb4d8502Sjsg 		if (adev->ip_blocks[i].version->type == type)
1772fb4d8502Sjsg 			return &adev->ip_blocks[i];
1773fb4d8502Sjsg 
1774fb4d8502Sjsg 	return NULL;
1775fb4d8502Sjsg }
1776fb4d8502Sjsg 
1777fb4d8502Sjsg /**
1778fb4d8502Sjsg  * amdgpu_device_ip_block_version_cmp
1779fb4d8502Sjsg  *
1780fb4d8502Sjsg  * @adev: amdgpu_device pointer
1781fb4d8502Sjsg  * @type: enum amd_ip_block_type
1782fb4d8502Sjsg  * @major: major version
1783fb4d8502Sjsg  * @minor: minor version
1784fb4d8502Sjsg  *
1785fb4d8502Sjsg  * return 0 if equal or greater
1786fb4d8502Sjsg  * return 1 if smaller or the ip_block doesn't exist
1787fb4d8502Sjsg  */
amdgpu_device_ip_block_version_cmp(struct amdgpu_device * adev,enum amd_ip_block_type type,u32 major,u32 minor)1788fb4d8502Sjsg int amdgpu_device_ip_block_version_cmp(struct amdgpu_device *adev,
1789fb4d8502Sjsg 				       enum amd_ip_block_type type,
1790fb4d8502Sjsg 				       u32 major, u32 minor)
1791fb4d8502Sjsg {
1792fb4d8502Sjsg 	struct amdgpu_ip_block *ip_block = amdgpu_device_ip_get_ip_block(adev, type);
1793fb4d8502Sjsg 
1794fb4d8502Sjsg 	if (ip_block && ((ip_block->version->major > major) ||
1795fb4d8502Sjsg 			((ip_block->version->major == major) &&
1796fb4d8502Sjsg 			(ip_block->version->minor >= minor))))
1797fb4d8502Sjsg 		return 0;
1798fb4d8502Sjsg 
1799fb4d8502Sjsg 	return 1;
1800fb4d8502Sjsg }
1801fb4d8502Sjsg 
1802fb4d8502Sjsg /**
1803fb4d8502Sjsg  * amdgpu_device_ip_block_add
1804fb4d8502Sjsg  *
1805fb4d8502Sjsg  * @adev: amdgpu_device pointer
1806fb4d8502Sjsg  * @ip_block_version: pointer to the IP to add
1807fb4d8502Sjsg  *
1808fb4d8502Sjsg  * Adds the IP block driver information to the collection of IPs
1809fb4d8502Sjsg  * on the asic.
1810fb4d8502Sjsg  */
amdgpu_device_ip_block_add(struct amdgpu_device * adev,const struct amdgpu_ip_block_version * ip_block_version)1811fb4d8502Sjsg int amdgpu_device_ip_block_add(struct amdgpu_device *adev,
1812fb4d8502Sjsg 			       const struct amdgpu_ip_block_version *ip_block_version)
1813fb4d8502Sjsg {
1814fb4d8502Sjsg 	if (!ip_block_version)
1815fb4d8502Sjsg 		return -EINVAL;
1816fb4d8502Sjsg 
18175ca02815Sjsg 	switch (ip_block_version->type) {
18185ca02815Sjsg 	case AMD_IP_BLOCK_TYPE_VCN:
18195ca02815Sjsg 		if (adev->harvest_ip_mask & AMD_HARVEST_IP_VCN_MASK)
18205ca02815Sjsg 			return 0;
18215ca02815Sjsg 		break;
18225ca02815Sjsg 	case AMD_IP_BLOCK_TYPE_JPEG:
18235ca02815Sjsg 		if (adev->harvest_ip_mask & AMD_HARVEST_IP_JPEG_MASK)
18245ca02815Sjsg 			return 0;
18255ca02815Sjsg 		break;
18265ca02815Sjsg 	default:
18275ca02815Sjsg 		break;
18285ca02815Sjsg 	}
18295ca02815Sjsg 
1830fb4d8502Sjsg 	DRM_INFO("add ip block number %d <%s>\n", adev->num_ip_blocks,
1831fb4d8502Sjsg 		  ip_block_version->funcs->name);
1832fb4d8502Sjsg 
1833fb4d8502Sjsg 	adev->ip_blocks[adev->num_ip_blocks++].version = ip_block_version;
1834fb4d8502Sjsg 
1835fb4d8502Sjsg 	return 0;
1836fb4d8502Sjsg }
1837fb4d8502Sjsg 
1838fb4d8502Sjsg /**
1839fb4d8502Sjsg  * amdgpu_device_enable_virtual_display - enable virtual display feature
1840fb4d8502Sjsg  *
1841fb4d8502Sjsg  * @adev: amdgpu_device pointer
1842fb4d8502Sjsg  *
1843fb4d8502Sjsg  * Enabled the virtual display feature if the user has enabled it via
1844fb4d8502Sjsg  * the module parameter virtual_display.  This feature provides a virtual
1845fb4d8502Sjsg  * display hardware on headless boards or in virtualized environments.
1846fb4d8502Sjsg  * This function parses and validates the configuration string specified by
1847fb4d8502Sjsg  * the user and configues the virtual display configuration (number of
1848fb4d8502Sjsg  * virtual connectors, crtcs, etc.) specified.
1849fb4d8502Sjsg  */
amdgpu_device_enable_virtual_display(struct amdgpu_device * adev)1850fb4d8502Sjsg static void amdgpu_device_enable_virtual_display(struct amdgpu_device *adev)
1851fb4d8502Sjsg {
1852fb4d8502Sjsg 	adev->enable_virtual_display = false;
1853fb4d8502Sjsg 
1854fb4d8502Sjsg #ifdef notyet
1855fb4d8502Sjsg 	if (amdgpu_virtual_display) {
18565ca02815Sjsg 		const char *pci_address_name = pci_name(adev->pdev);
1857fb4d8502Sjsg 		char *pciaddstr, *pciaddstr_tmp, *pciaddname_tmp, *pciaddname;
1858fb4d8502Sjsg 
1859fb4d8502Sjsg 		pciaddstr = kstrdup(amdgpu_virtual_display, GFP_KERNEL);
1860fb4d8502Sjsg 		pciaddstr_tmp = pciaddstr;
1861fb4d8502Sjsg 		while ((pciaddname_tmp = strsep(&pciaddstr_tmp, ";"))) {
1862fb4d8502Sjsg 			pciaddname = strsep(&pciaddname_tmp, ",");
1863fb4d8502Sjsg 			if (!strcmp("all", pciaddname)
1864fb4d8502Sjsg 			    || !strcmp(pci_address_name, pciaddname)) {
1865fb4d8502Sjsg 				long num_crtc;
1866fb4d8502Sjsg 				int res = -1;
1867fb4d8502Sjsg 
1868fb4d8502Sjsg 				adev->enable_virtual_display = true;
1869fb4d8502Sjsg 
1870fb4d8502Sjsg 				if (pciaddname_tmp)
1871fb4d8502Sjsg 					res = kstrtol(pciaddname_tmp, 10,
1872fb4d8502Sjsg 						      &num_crtc);
1873fb4d8502Sjsg 
1874fb4d8502Sjsg 				if (!res) {
1875fb4d8502Sjsg 					if (num_crtc < 1)
1876fb4d8502Sjsg 						num_crtc = 1;
1877fb4d8502Sjsg 					if (num_crtc > 6)
1878fb4d8502Sjsg 						num_crtc = 6;
1879fb4d8502Sjsg 					adev->mode_info.num_crtc = num_crtc;
1880fb4d8502Sjsg 				} else {
1881fb4d8502Sjsg 					adev->mode_info.num_crtc = 1;
1882fb4d8502Sjsg 				}
1883fb4d8502Sjsg 				break;
1884fb4d8502Sjsg 			}
1885fb4d8502Sjsg 		}
1886fb4d8502Sjsg 
1887fb4d8502Sjsg 		DRM_INFO("virtual display string:%s, %s:virtual_display:%d, num_crtc:%d\n",
1888fb4d8502Sjsg 			 amdgpu_virtual_display, pci_address_name,
1889fb4d8502Sjsg 			 adev->enable_virtual_display, adev->mode_info.num_crtc);
1890fb4d8502Sjsg 
1891fb4d8502Sjsg 		kfree(pciaddstr);
1892fb4d8502Sjsg 	}
1893fb4d8502Sjsg #endif
1894fb4d8502Sjsg }
1895fb4d8502Sjsg 
amdgpu_device_set_sriov_virtual_display(struct amdgpu_device * adev)1896f005ef32Sjsg void amdgpu_device_set_sriov_virtual_display(struct amdgpu_device *adev)
1897f005ef32Sjsg {
1898f005ef32Sjsg 	if (amdgpu_sriov_vf(adev) && !adev->enable_virtual_display) {
1899f005ef32Sjsg 		adev->mode_info.num_crtc = 1;
1900f005ef32Sjsg 		adev->enable_virtual_display = true;
1901f005ef32Sjsg 		DRM_INFO("virtual_display:%d, num_crtc:%d\n",
1902f005ef32Sjsg 			 adev->enable_virtual_display, adev->mode_info.num_crtc);
1903f005ef32Sjsg 	}
1904f005ef32Sjsg }
1905f005ef32Sjsg 
1906fb4d8502Sjsg /**
1907fb4d8502Sjsg  * amdgpu_device_parse_gpu_info_fw - parse gpu info firmware
1908fb4d8502Sjsg  *
1909fb4d8502Sjsg  * @adev: amdgpu_device pointer
1910fb4d8502Sjsg  *
1911fb4d8502Sjsg  * Parses the asic configuration parameters specified in the gpu info
1912fb4d8502Sjsg  * firmware and makes them availale to the driver for use in configuring
1913fb4d8502Sjsg  * the asic.
1914fb4d8502Sjsg  * Returns 0 on success, -EINVAL on failure.
1915fb4d8502Sjsg  */
amdgpu_device_parse_gpu_info_fw(struct amdgpu_device * adev)1916fb4d8502Sjsg static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev)
1917fb4d8502Sjsg {
1918fb4d8502Sjsg 	const char *chip_name;
1919ad8b1aafSjsg 	char fw_name[40];
1920fb4d8502Sjsg 	int err;
1921fb4d8502Sjsg 	const struct gpu_info_firmware_header_v1_0 *hdr;
1922fb4d8502Sjsg 
1923fb4d8502Sjsg 	adev->firmware.gpu_info_fw = NULL;
1924fb4d8502Sjsg 
1925f005ef32Sjsg 	if (adev->mman.discovery_bin)
1926ad8b1aafSjsg 		return 0;
1927ad8b1aafSjsg 
1928fb4d8502Sjsg 	switch (adev->asic_type) {
1929fb4d8502Sjsg 	default:
1930fb4d8502Sjsg 		return 0;
1931fb4d8502Sjsg 	case CHIP_VEGA10:
1932fb4d8502Sjsg 		chip_name = "vega10";
1933fb4d8502Sjsg 		break;
1934fb4d8502Sjsg 	case CHIP_VEGA12:
1935fb4d8502Sjsg 		chip_name = "vega12";
1936fb4d8502Sjsg 		break;
1937fb4d8502Sjsg 	case CHIP_RAVEN:
1938ad8b1aafSjsg 		if (adev->apu_flags & AMD_APU_IS_RAVEN2)
1939c349dbc7Sjsg 			chip_name = "raven2";
1940ad8b1aafSjsg 		else if (adev->apu_flags & AMD_APU_IS_PICASSO)
19414fe6e3f4Sjsg 			chip_name = "picasso";
19423ee1c80bSjsg 		else
19433ee1c80bSjsg 			chip_name = "raven";
19444fe6e3f4Sjsg 		break;
1945c349dbc7Sjsg 	case CHIP_ARCTURUS:
1946c349dbc7Sjsg 		chip_name = "arcturus";
1947c349dbc7Sjsg 		break;
1948c349dbc7Sjsg 	case CHIP_NAVI12:
1949c349dbc7Sjsg 		chip_name = "navi12";
1950c349dbc7Sjsg 		break;
1951fb4d8502Sjsg 	}
1952fb4d8502Sjsg 
1953fb4d8502Sjsg 	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_gpu_info.bin", chip_name);
1954f005ef32Sjsg 	err = amdgpu_ucode_request(adev, &adev->firmware.gpu_info_fw, fw_name);
1955fb4d8502Sjsg 	if (err) {
1956fb4d8502Sjsg 		dev_err(adev->dev,
1957f005ef32Sjsg 			"Failed to get gpu_info firmware \"%s\"\n",
1958fb4d8502Sjsg 			fw_name);
1959fb4d8502Sjsg 		goto out;
1960fb4d8502Sjsg 	}
1961fb4d8502Sjsg 
1962fb4d8502Sjsg 	hdr = (const struct gpu_info_firmware_header_v1_0 *)adev->firmware.gpu_info_fw->data;
1963fb4d8502Sjsg 	amdgpu_ucode_print_gpu_info_hdr(&hdr->header);
1964fb4d8502Sjsg 
1965fb4d8502Sjsg 	switch (hdr->version_major) {
1966fb4d8502Sjsg 	case 1:
1967fb4d8502Sjsg 	{
1968fb4d8502Sjsg 		const struct gpu_info_firmware_v1_0 *gpu_info_fw =
1969fb4d8502Sjsg 			(const struct gpu_info_firmware_v1_0 *)(adev->firmware.gpu_info_fw->data +
1970fb4d8502Sjsg 								le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1971fb4d8502Sjsg 
1972ad8b1aafSjsg 		/*
1973ad8b1aafSjsg 		 * Should be droped when DAL no longer needs it.
1974ad8b1aafSjsg 		 */
1975ad8b1aafSjsg 		if (adev->asic_type == CHIP_NAVI12)
1976c349dbc7Sjsg 			goto parse_soc_bounding_box;
1977c349dbc7Sjsg 
1978fb4d8502Sjsg 		adev->gfx.config.max_shader_engines = le32_to_cpu(gpu_info_fw->gc_num_se);
1979fb4d8502Sjsg 		adev->gfx.config.max_cu_per_sh = le32_to_cpu(gpu_info_fw->gc_num_cu_per_sh);
1980fb4d8502Sjsg 		adev->gfx.config.max_sh_per_se = le32_to_cpu(gpu_info_fw->gc_num_sh_per_se);
1981fb4d8502Sjsg 		adev->gfx.config.max_backends_per_se = le32_to_cpu(gpu_info_fw->gc_num_rb_per_se);
1982fb4d8502Sjsg 		adev->gfx.config.max_texture_channel_caches =
1983fb4d8502Sjsg 			le32_to_cpu(gpu_info_fw->gc_num_tccs);
1984fb4d8502Sjsg 		adev->gfx.config.max_gprs = le32_to_cpu(gpu_info_fw->gc_num_gprs);
1985fb4d8502Sjsg 		adev->gfx.config.max_gs_threads = le32_to_cpu(gpu_info_fw->gc_num_max_gs_thds);
1986fb4d8502Sjsg 		adev->gfx.config.gs_vgt_table_depth = le32_to_cpu(gpu_info_fw->gc_gs_table_depth);
1987fb4d8502Sjsg 		adev->gfx.config.gs_prim_buffer_depth = le32_to_cpu(gpu_info_fw->gc_gsprim_buff_depth);
1988fb4d8502Sjsg 		adev->gfx.config.double_offchip_lds_buf =
1989fb4d8502Sjsg 			le32_to_cpu(gpu_info_fw->gc_double_offchip_lds_buffer);
1990fb4d8502Sjsg 		adev->gfx.cu_info.wave_front_size = le32_to_cpu(gpu_info_fw->gc_wave_size);
1991fb4d8502Sjsg 		adev->gfx.cu_info.max_waves_per_simd =
1992fb4d8502Sjsg 			le32_to_cpu(gpu_info_fw->gc_max_waves_per_simd);
1993fb4d8502Sjsg 		adev->gfx.cu_info.max_scratch_slots_per_cu =
1994fb4d8502Sjsg 			le32_to_cpu(gpu_info_fw->gc_max_scratch_slots_per_cu);
1995fb4d8502Sjsg 		adev->gfx.cu_info.lds_size = le32_to_cpu(gpu_info_fw->gc_lds_size);
1996c349dbc7Sjsg 		if (hdr->version_minor >= 1) {
1997c349dbc7Sjsg 			const struct gpu_info_firmware_v1_1 *gpu_info_fw =
1998c349dbc7Sjsg 				(const struct gpu_info_firmware_v1_1 *)(adev->firmware.gpu_info_fw->data +
1999c349dbc7Sjsg 									le32_to_cpu(hdr->header.ucode_array_offset_bytes));
2000c349dbc7Sjsg 			adev->gfx.config.num_sc_per_sh =
2001c349dbc7Sjsg 				le32_to_cpu(gpu_info_fw->num_sc_per_sh);
2002c349dbc7Sjsg 			adev->gfx.config.num_packer_per_sc =
2003c349dbc7Sjsg 				le32_to_cpu(gpu_info_fw->num_packer_per_sc);
2004c349dbc7Sjsg 		}
2005c349dbc7Sjsg 
2006c349dbc7Sjsg parse_soc_bounding_box:
2007c349dbc7Sjsg 		/*
2008c349dbc7Sjsg 		 * soc bounding box info is not integrated in disocovery table,
2009ad8b1aafSjsg 		 * we always need to parse it from gpu info firmware if needed.
2010c349dbc7Sjsg 		 */
2011c349dbc7Sjsg 		if (hdr->version_minor == 2) {
2012c349dbc7Sjsg 			const struct gpu_info_firmware_v1_2 *gpu_info_fw =
2013c349dbc7Sjsg 				(const struct gpu_info_firmware_v1_2 *)(adev->firmware.gpu_info_fw->data +
2014c349dbc7Sjsg 									le32_to_cpu(hdr->header.ucode_array_offset_bytes));
2015c349dbc7Sjsg 			adev->dm.soc_bounding_box = &gpu_info_fw->soc_bounding_box;
2016c349dbc7Sjsg 		}
2017fb4d8502Sjsg 		break;
2018fb4d8502Sjsg 	}
2019fb4d8502Sjsg 	default:
2020fb4d8502Sjsg 		dev_err(adev->dev,
2021fb4d8502Sjsg 			"Unsupported gpu_info table %d\n", hdr->header.ucode_version);
2022fb4d8502Sjsg 		err = -EINVAL;
2023fb4d8502Sjsg 		goto out;
2024fb4d8502Sjsg 	}
2025fb4d8502Sjsg out:
2026fb4d8502Sjsg 	return err;
2027fb4d8502Sjsg }
2028fb4d8502Sjsg 
2029fb4d8502Sjsg /**
2030fb4d8502Sjsg  * amdgpu_device_ip_early_init - run early init for hardware IPs
2031fb4d8502Sjsg  *
2032fb4d8502Sjsg  * @adev: amdgpu_device pointer
2033fb4d8502Sjsg  *
2034fb4d8502Sjsg  * Early initialization pass for hardware IPs.  The hardware IPs that make
2035fb4d8502Sjsg  * up each asic are discovered each IP's early_init callback is run.  This
2036fb4d8502Sjsg  * is the first stage in initializing the asic.
2037fb4d8502Sjsg  * Returns 0 on success, negative error code on failure.
2038fb4d8502Sjsg  */
amdgpu_device_ip_early_init(struct amdgpu_device * adev)2039fb4d8502Sjsg static int amdgpu_device_ip_early_init(struct amdgpu_device *adev)
2040fb4d8502Sjsg {
2041f9c49ec7Sjsg 	struct pci_dev *parent;
2042fb4d8502Sjsg 	int i, r;
2043f005ef32Sjsg 	bool total;
2044fb4d8502Sjsg 
2045fb4d8502Sjsg 	amdgpu_device_enable_virtual_display(adev);
2046fb4d8502Sjsg 
2047ad8b1aafSjsg 	if (amdgpu_sriov_vf(adev)) {
2048ad8b1aafSjsg 		r = amdgpu_virt_request_full_gpu(adev, true);
2049fb4d8502Sjsg 		if (r)
2050fb4d8502Sjsg 			return r;
2051ad8b1aafSjsg 	}
2052ad8b1aafSjsg 
2053ad8b1aafSjsg 	switch (adev->asic_type) {
2054fb4d8502Sjsg #ifdef CONFIG_DRM_AMDGPU_SI
2055fb4d8502Sjsg 	case CHIP_VERDE:
2056fb4d8502Sjsg 	case CHIP_TAHITI:
2057fb4d8502Sjsg 	case CHIP_PITCAIRN:
2058fb4d8502Sjsg 	case CHIP_OLAND:
2059fb4d8502Sjsg 	case CHIP_HAINAN:
2060fb4d8502Sjsg 		adev->family = AMDGPU_FAMILY_SI;
2061fb4d8502Sjsg 		r = si_set_ip_blocks(adev);
2062fb4d8502Sjsg 		if (r)
2063fb4d8502Sjsg 			return r;
2064fb4d8502Sjsg 		break;
2065fb4d8502Sjsg #endif
2066fb4d8502Sjsg #ifdef CONFIG_DRM_AMDGPU_CIK
2067fb4d8502Sjsg 	case CHIP_BONAIRE:
2068fb4d8502Sjsg 	case CHIP_HAWAII:
2069fb4d8502Sjsg 	case CHIP_KAVERI:
2070fb4d8502Sjsg 	case CHIP_KABINI:
2071fb4d8502Sjsg 	case CHIP_MULLINS:
2072ad8b1aafSjsg 		if (adev->flags & AMD_IS_APU)
2073fb4d8502Sjsg 			adev->family = AMDGPU_FAMILY_KV;
2074ad8b1aafSjsg 		else
2075ad8b1aafSjsg 			adev->family = AMDGPU_FAMILY_CI;
2076fb4d8502Sjsg 
2077fb4d8502Sjsg 		r = cik_set_ip_blocks(adev);
2078fb4d8502Sjsg 		if (r)
2079fb4d8502Sjsg 			return r;
2080fb4d8502Sjsg 		break;
2081fb4d8502Sjsg #endif
2082ad8b1aafSjsg 	case CHIP_TOPAZ:
2083ad8b1aafSjsg 	case CHIP_TONGA:
2084ad8b1aafSjsg 	case CHIP_FIJI:
2085ad8b1aafSjsg 	case CHIP_POLARIS10:
2086ad8b1aafSjsg 	case CHIP_POLARIS11:
2087ad8b1aafSjsg 	case CHIP_POLARIS12:
2088ad8b1aafSjsg 	case CHIP_VEGAM:
2089ad8b1aafSjsg 	case CHIP_CARRIZO:
2090ad8b1aafSjsg 	case CHIP_STONEY:
2091ad8b1aafSjsg 		if (adev->flags & AMD_IS_APU)
2092ad8b1aafSjsg 			adev->family = AMDGPU_FAMILY_CZ;
2093ad8b1aafSjsg 		else
2094ad8b1aafSjsg 			adev->family = AMDGPU_FAMILY_VI;
2095ad8b1aafSjsg 
2096ad8b1aafSjsg 		r = vi_set_ip_blocks(adev);
2097ad8b1aafSjsg 		if (r)
2098ad8b1aafSjsg 			return r;
2099ad8b1aafSjsg 		break;
2100fb4d8502Sjsg 	default:
21011bb76ff1Sjsg 		r = amdgpu_discovery_set_ip_blocks(adev);
21021bb76ff1Sjsg 		if (r)
21031bb76ff1Sjsg 			return r;
21041bb76ff1Sjsg 		break;
2105fb4d8502Sjsg 	}
2106fb4d8502Sjsg 
2107f9c49ec7Sjsg 	if (amdgpu_has_atpx() &&
2108f9c49ec7Sjsg 	    (amdgpu_is_atpx_hybrid() ||
2109f9c49ec7Sjsg 	     amdgpu_has_atpx_dgpu_power_cntl()) &&
2110f9c49ec7Sjsg 	    ((adev->flags & AMD_IS_APU) == 0) &&
2111997286d4Sjsg 	    !dev_is_removable(&adev->pdev->dev))
2112f9c49ec7Sjsg 		adev->flags |= AMD_IS_PX;
2113f9c49ec7Sjsg 
2114b9d500ebSjsg 	if (!(adev->flags & AMD_IS_APU)) {
2115c9d1c6fcSjsg #ifdef notyet
2116c9d1c6fcSjsg 		parent = pcie_find_root_port(adev->pdev);
2117f9c49ec7Sjsg 		adev->has_pr3 = parent ? pci_pr3_present(parent) : false;
2118c9d1c6fcSjsg #else
2119c9d1c6fcSjsg 		adev->has_pr3 = false;
2120c9d1c6fcSjsg #endif
2121b9d500ebSjsg 	}
2122f9c49ec7Sjsg 
2123fb4d8502Sjsg 
2124c349dbc7Sjsg 	adev->pm.pp_feature = amdgpu_pp_feature_mask;
2125c349dbc7Sjsg 	if (amdgpu_sriov_vf(adev) || sched_policy == KFD_SCHED_POLICY_NO_HWS)
2126c349dbc7Sjsg 		adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
21275ca02815Sjsg 	if (amdgpu_sriov_vf(adev) && adev->asic_type == CHIP_SIENNA_CICHLID)
21285ca02815Sjsg 		adev->pm.pp_feature &= ~PP_OVERDRIVE_MASK;
2129f005ef32Sjsg 	if (!amdgpu_device_pcie_dynamic_switching_supported())
2130f005ef32Sjsg 		adev->pm.pp_feature &= ~PP_PCIE_DPM_MASK;
2131fb4d8502Sjsg 
2132f005ef32Sjsg 	total = true;
2133fb4d8502Sjsg 	for (i = 0; i < adev->num_ip_blocks; i++) {
2134fb4d8502Sjsg 		if ((amdgpu_ip_block_mask & (1 << i)) == 0) {
2135f005ef32Sjsg 			DRM_WARN("disabled ip block: %d <%s>\n",
2136fb4d8502Sjsg 				  i, adev->ip_blocks[i].version->funcs->name);
2137fb4d8502Sjsg 			adev->ip_blocks[i].status.valid = false;
2138fb4d8502Sjsg 		} else {
2139fb4d8502Sjsg 			if (adev->ip_blocks[i].version->funcs->early_init) {
2140fb4d8502Sjsg 				r = adev->ip_blocks[i].version->funcs->early_init((void *)adev);
2141fb4d8502Sjsg 				if (r == -ENOENT) {
2142fb4d8502Sjsg 					adev->ip_blocks[i].status.valid = false;
2143fb4d8502Sjsg 				} else if (r) {
2144fb4d8502Sjsg 					DRM_ERROR("early_init of IP block <%s> failed %d\n",
2145fb4d8502Sjsg 						  adev->ip_blocks[i].version->funcs->name, r);
2146f005ef32Sjsg 					total = false;
2147fb4d8502Sjsg 				} else {
2148fb4d8502Sjsg 					adev->ip_blocks[i].status.valid = true;
2149fb4d8502Sjsg 				}
2150fb4d8502Sjsg 			} else {
2151fb4d8502Sjsg 				adev->ip_blocks[i].status.valid = true;
2152fb4d8502Sjsg 			}
2153fb4d8502Sjsg 		}
2154c349dbc7Sjsg 		/* get the vbios after the asic_funcs are set up */
2155c349dbc7Sjsg 		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON) {
2156a89df194Sjsg 			r = amdgpu_device_parse_gpu_info_fw(adev);
2157a89df194Sjsg 			if (r)
2158a89df194Sjsg 				return r;
2159a89df194Sjsg 
2160c349dbc7Sjsg 			/* Read BIOS */
2161f005ef32Sjsg 			if (amdgpu_device_read_bios(adev)) {
2162c349dbc7Sjsg 				if (!amdgpu_get_bios(adev))
2163c349dbc7Sjsg 					return -EINVAL;
2164c349dbc7Sjsg 
2165c349dbc7Sjsg 				r = amdgpu_atombios_init(adev);
2166c349dbc7Sjsg 				if (r) {
2167c349dbc7Sjsg 					dev_err(adev->dev, "amdgpu_atombios_init failed\n");
2168c349dbc7Sjsg 					amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_INIT_FAIL, 0, 0);
2169c349dbc7Sjsg 					return r;
2170c349dbc7Sjsg 				}
2171f005ef32Sjsg 			}
21725ca02815Sjsg 
21735ca02815Sjsg 			/*get pf2vf msg info at it's earliest time*/
21745ca02815Sjsg 			if (amdgpu_sriov_vf(adev))
21755ca02815Sjsg 				amdgpu_virt_init_data_exchange(adev);
21765ca02815Sjsg 
2177c349dbc7Sjsg 		}
2178fb4d8502Sjsg 	}
2179f005ef32Sjsg 	if (!total)
2180f005ef32Sjsg 		return -ENODEV;
2181fb4d8502Sjsg 
2182f005ef32Sjsg 	amdgpu_amdkfd_device_probe(adev);
2183fb4d8502Sjsg 	adev->cg_flags &= amdgpu_cg_mask;
2184fb4d8502Sjsg 	adev->pg_flags &= amdgpu_pg_mask;
2185fb4d8502Sjsg 
2186fb4d8502Sjsg 	return 0;
2187fb4d8502Sjsg }
2188fb4d8502Sjsg 
amdgpu_device_ip_hw_init_phase1(struct amdgpu_device * adev)2189c349dbc7Sjsg static int amdgpu_device_ip_hw_init_phase1(struct amdgpu_device *adev)
2190c349dbc7Sjsg {
2191c349dbc7Sjsg 	int i, r;
2192c349dbc7Sjsg 
2193c349dbc7Sjsg 	for (i = 0; i < adev->num_ip_blocks; i++) {
2194c349dbc7Sjsg 		if (!adev->ip_blocks[i].status.sw)
2195c349dbc7Sjsg 			continue;
2196c349dbc7Sjsg 		if (adev->ip_blocks[i].status.hw)
2197c349dbc7Sjsg 			continue;
2198c349dbc7Sjsg 		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
2199c349dbc7Sjsg 		    (amdgpu_sriov_vf(adev) && (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)) ||
2200c349dbc7Sjsg 		    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH) {
2201c349dbc7Sjsg 			r = adev->ip_blocks[i].version->funcs->hw_init(adev);
2202c349dbc7Sjsg 			if (r) {
2203c349dbc7Sjsg 				DRM_ERROR("hw_init of IP block <%s> failed %d\n",
2204c349dbc7Sjsg 					  adev->ip_blocks[i].version->funcs->name, r);
2205c349dbc7Sjsg 				return r;
2206c349dbc7Sjsg 			}
2207c349dbc7Sjsg 			adev->ip_blocks[i].status.hw = true;
2208c349dbc7Sjsg 		}
2209c349dbc7Sjsg 	}
2210c349dbc7Sjsg 
2211c349dbc7Sjsg 	return 0;
2212c349dbc7Sjsg }
2213c349dbc7Sjsg 
amdgpu_device_ip_hw_init_phase2(struct amdgpu_device * adev)2214c349dbc7Sjsg static int amdgpu_device_ip_hw_init_phase2(struct amdgpu_device *adev)
2215c349dbc7Sjsg {
2216c349dbc7Sjsg 	int i, r;
2217c349dbc7Sjsg 
2218c349dbc7Sjsg 	for (i = 0; i < adev->num_ip_blocks; i++) {
2219c349dbc7Sjsg 		if (!adev->ip_blocks[i].status.sw)
2220c349dbc7Sjsg 			continue;
2221c349dbc7Sjsg 		if (adev->ip_blocks[i].status.hw)
2222c349dbc7Sjsg 			continue;
2223c349dbc7Sjsg 		r = adev->ip_blocks[i].version->funcs->hw_init(adev);
2224c349dbc7Sjsg 		if (r) {
2225c349dbc7Sjsg 			DRM_ERROR("hw_init of IP block <%s> failed %d\n",
2226c349dbc7Sjsg 				  adev->ip_blocks[i].version->funcs->name, r);
2227c349dbc7Sjsg 			return r;
2228c349dbc7Sjsg 		}
2229c349dbc7Sjsg 		adev->ip_blocks[i].status.hw = true;
2230c349dbc7Sjsg 	}
2231c349dbc7Sjsg 
2232c349dbc7Sjsg 	return 0;
2233c349dbc7Sjsg }
2234c349dbc7Sjsg 
amdgpu_device_fw_loading(struct amdgpu_device * adev)2235c349dbc7Sjsg static int amdgpu_device_fw_loading(struct amdgpu_device *adev)
2236c349dbc7Sjsg {
2237c349dbc7Sjsg 	int r = 0;
2238c349dbc7Sjsg 	int i;
2239c349dbc7Sjsg 	uint32_t smu_version;
2240c349dbc7Sjsg 
2241c349dbc7Sjsg 	if (adev->asic_type >= CHIP_VEGA10) {
2242c349dbc7Sjsg 		for (i = 0; i < adev->num_ip_blocks; i++) {
2243c349dbc7Sjsg 			if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_PSP)
2244c349dbc7Sjsg 				continue;
2245c349dbc7Sjsg 
22465ca02815Sjsg 			if (!adev->ip_blocks[i].status.sw)
22475ca02815Sjsg 				continue;
22485ca02815Sjsg 
2249c349dbc7Sjsg 			/* no need to do the fw loading again if already done*/
2250c349dbc7Sjsg 			if (adev->ip_blocks[i].status.hw == true)
2251c349dbc7Sjsg 				break;
2252c349dbc7Sjsg 
2253ad8b1aafSjsg 			if (amdgpu_in_reset(adev) || adev->in_suspend) {
2254c349dbc7Sjsg 				r = adev->ip_blocks[i].version->funcs->resume(adev);
2255c349dbc7Sjsg 				if (r) {
2256c349dbc7Sjsg 					DRM_ERROR("resume of IP block <%s> failed %d\n",
2257c349dbc7Sjsg 							  adev->ip_blocks[i].version->funcs->name, r);
2258c349dbc7Sjsg 					return r;
2259c349dbc7Sjsg 				}
2260c349dbc7Sjsg 			} else {
2261c349dbc7Sjsg 				r = adev->ip_blocks[i].version->funcs->hw_init(adev);
2262c349dbc7Sjsg 				if (r) {
2263c349dbc7Sjsg 					DRM_ERROR("hw_init of IP block <%s> failed %d\n",
2264c349dbc7Sjsg 							  adev->ip_blocks[i].version->funcs->name, r);
2265c349dbc7Sjsg 					return r;
2266c349dbc7Sjsg 				}
2267c349dbc7Sjsg 			}
2268c349dbc7Sjsg 
2269c349dbc7Sjsg 			adev->ip_blocks[i].status.hw = true;
2270c349dbc7Sjsg 			break;
2271c349dbc7Sjsg 		}
2272c349dbc7Sjsg 	}
2273c349dbc7Sjsg 
2274c349dbc7Sjsg 	if (!amdgpu_sriov_vf(adev) || adev->asic_type == CHIP_TONGA)
2275c349dbc7Sjsg 		r = amdgpu_pm_load_smu_firmware(adev, &smu_version);
2276c349dbc7Sjsg 
2277c349dbc7Sjsg 	return r;
2278c349dbc7Sjsg }
2279c349dbc7Sjsg 
amdgpu_device_init_schedulers(struct amdgpu_device * adev)22801bb76ff1Sjsg static int amdgpu_device_init_schedulers(struct amdgpu_device *adev)
22811bb76ff1Sjsg {
22821bb76ff1Sjsg 	long timeout;
22831bb76ff1Sjsg 	int r, i;
22841bb76ff1Sjsg 
22851bb76ff1Sjsg 	for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
22861bb76ff1Sjsg 		struct amdgpu_ring *ring = adev->rings[i];
22871bb76ff1Sjsg 
22881bb76ff1Sjsg 		/* No need to setup the GPU scheduler for rings that don't need it */
22891bb76ff1Sjsg 		if (!ring || ring->no_scheduler)
22901bb76ff1Sjsg 			continue;
22911bb76ff1Sjsg 
22921bb76ff1Sjsg 		switch (ring->funcs->type) {
22931bb76ff1Sjsg 		case AMDGPU_RING_TYPE_GFX:
22941bb76ff1Sjsg 			timeout = adev->gfx_timeout;
22951bb76ff1Sjsg 			break;
22961bb76ff1Sjsg 		case AMDGPU_RING_TYPE_COMPUTE:
22971bb76ff1Sjsg 			timeout = adev->compute_timeout;
22981bb76ff1Sjsg 			break;
22991bb76ff1Sjsg 		case AMDGPU_RING_TYPE_SDMA:
23001bb76ff1Sjsg 			timeout = adev->sdma_timeout;
23011bb76ff1Sjsg 			break;
23021bb76ff1Sjsg 		default:
23031bb76ff1Sjsg 			timeout = adev->video_timeout;
23041bb76ff1Sjsg 			break;
23051bb76ff1Sjsg 		}
23061bb76ff1Sjsg 
23071bb76ff1Sjsg 		r = drm_sched_init(&ring->sched, &amdgpu_sched_ops,
2308f005ef32Sjsg 				   ring->num_hw_submission, 0,
23091bb76ff1Sjsg 				   timeout, adev->reset_domain->wq,
23101bb76ff1Sjsg 				   ring->sched_score, ring->name,
23111bb76ff1Sjsg 				   adev->dev);
23121bb76ff1Sjsg 		if (r) {
23131bb76ff1Sjsg 			DRM_ERROR("Failed to create scheduler on ring %s.\n",
23141bb76ff1Sjsg 				  ring->name);
23151bb76ff1Sjsg 			return r;
23161bb76ff1Sjsg 		}
23171bb76ff1Sjsg 	}
23181bb76ff1Sjsg 
2319f005ef32Sjsg 	amdgpu_xcp_update_partition_sched_list(adev);
2320f005ef32Sjsg 
23211bb76ff1Sjsg 	return 0;
23221bb76ff1Sjsg }
23231bb76ff1Sjsg 
23241bb76ff1Sjsg 
2325fb4d8502Sjsg /**
2326fb4d8502Sjsg  * amdgpu_device_ip_init - run init for hardware IPs
2327fb4d8502Sjsg  *
2328fb4d8502Sjsg  * @adev: amdgpu_device pointer
2329fb4d8502Sjsg  *
2330fb4d8502Sjsg  * Main initialization pass for hardware IPs.  The list of all the hardware
2331fb4d8502Sjsg  * IPs that make up the asic is walked and the sw_init and hw_init callbacks
2332fb4d8502Sjsg  * are run.  sw_init initializes the software state associated with each IP
2333fb4d8502Sjsg  * and hw_init initializes the hardware associated with each IP.
2334fb4d8502Sjsg  * Returns 0 on success, negative error code on failure.
2335fb4d8502Sjsg  */
amdgpu_device_ip_init(struct amdgpu_device * adev)2336fb4d8502Sjsg static int amdgpu_device_ip_init(struct amdgpu_device *adev)
2337fb4d8502Sjsg {
2338fb4d8502Sjsg 	int i, r;
2339fb4d8502Sjsg 
2340c349dbc7Sjsg 	r = amdgpu_ras_init(adev);
2341c349dbc7Sjsg 	if (r)
2342c349dbc7Sjsg 		return r;
2343c349dbc7Sjsg 
2344fb4d8502Sjsg 	for (i = 0; i < adev->num_ip_blocks; i++) {
2345fb4d8502Sjsg 		if (!adev->ip_blocks[i].status.valid)
2346fb4d8502Sjsg 			continue;
2347fb4d8502Sjsg 		r = adev->ip_blocks[i].version->funcs->sw_init((void *)adev);
2348fb4d8502Sjsg 		if (r) {
2349fb4d8502Sjsg 			DRM_ERROR("sw_init of IP block <%s> failed %d\n",
2350fb4d8502Sjsg 				  adev->ip_blocks[i].version->funcs->name, r);
2351c349dbc7Sjsg 			goto init_failed;
2352fb4d8502Sjsg 		}
2353fb4d8502Sjsg 		adev->ip_blocks[i].status.sw = true;
2354fb4d8502Sjsg 
2355bdc47e44Sjsg 		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON) {
2356bdc47e44Sjsg 			/* need to do common hw init early so everything is set up for gmc */
2357bdc47e44Sjsg 			r = adev->ip_blocks[i].version->funcs->hw_init((void *)adev);
2358bdc47e44Sjsg 			if (r) {
2359bdc47e44Sjsg 				DRM_ERROR("hw_init %d failed %d\n", i, r);
2360bdc47e44Sjsg 				goto init_failed;
2361bdc47e44Sjsg 			}
2362bdc47e44Sjsg 			adev->ip_blocks[i].status.hw = true;
2363bdc47e44Sjsg 		} else if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) {
2364fb4d8502Sjsg 			/* need to do gmc hw init early so we can allocate gpu mem */
236515f9b5f9Sjsg 			/* Try to reserve bad pages early */
236615f9b5f9Sjsg 			if (amdgpu_sriov_vf(adev))
236715f9b5f9Sjsg 				amdgpu_virt_exchange_data(adev);
236815f9b5f9Sjsg 
2369f005ef32Sjsg 			r = amdgpu_device_mem_scratch_init(adev);
2370fb4d8502Sjsg 			if (r) {
2371f005ef32Sjsg 				DRM_ERROR("amdgpu_mem_scratch_init failed %d\n", r);
2372c349dbc7Sjsg 				goto init_failed;
2373fb4d8502Sjsg 			}
2374fb4d8502Sjsg 			r = adev->ip_blocks[i].version->funcs->hw_init((void *)adev);
2375fb4d8502Sjsg 			if (r) {
2376fb4d8502Sjsg 				DRM_ERROR("hw_init %d failed %d\n", i, r);
2377c349dbc7Sjsg 				goto init_failed;
2378fb4d8502Sjsg 			}
2379fb4d8502Sjsg 			r = amdgpu_device_wb_init(adev);
2380fb4d8502Sjsg 			if (r) {
2381fb4d8502Sjsg 				DRM_ERROR("amdgpu_device_wb_init failed %d\n", r);
2382c349dbc7Sjsg 				goto init_failed;
2383fb4d8502Sjsg 			}
2384fb4d8502Sjsg 			adev->ip_blocks[i].status.hw = true;
2385fb4d8502Sjsg 
2386fb4d8502Sjsg 			/* right after GMC hw init, we create CSA */
2387f005ef32Sjsg 			if (adev->gfx.mcbp) {
2388c349dbc7Sjsg 				r = amdgpu_allocate_static_csa(adev, &adev->virt.csa_obj,
2389f005ef32Sjsg 							       AMDGPU_GEM_DOMAIN_VRAM |
2390f005ef32Sjsg 							       AMDGPU_GEM_DOMAIN_GTT,
2391c349dbc7Sjsg 							       AMDGPU_CSA_SIZE);
2392fb4d8502Sjsg 				if (r) {
2393fb4d8502Sjsg 					DRM_ERROR("allocate CSA failed %d\n", r);
2394c349dbc7Sjsg 					goto init_failed;
2395fb4d8502Sjsg 				}
2396fb4d8502Sjsg 			}
2397fb4d8502Sjsg 		}
2398fb4d8502Sjsg 	}
2399fb4d8502Sjsg 
2400c349dbc7Sjsg 	if (amdgpu_sriov_vf(adev))
240133331580Sjsg 		amdgpu_virt_init_data_exchange(adev);
2402c349dbc7Sjsg 
2403c349dbc7Sjsg 	r = amdgpu_ib_pool_init(adev);
2404fb4d8502Sjsg 	if (r) {
2405c349dbc7Sjsg 		dev_err(adev->dev, "IB initialization failed (%d).\n", r);
2406c349dbc7Sjsg 		amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_IB_INIT_FAIL, 0, r);
2407c349dbc7Sjsg 		goto init_failed;
2408fb4d8502Sjsg 	}
2409fb4d8502Sjsg 
2410c349dbc7Sjsg 	r = amdgpu_ucode_create_bo(adev); /* create ucode bo when sw_init complete*/
2411c349dbc7Sjsg 	if (r)
2412c349dbc7Sjsg 		goto init_failed;
2413c349dbc7Sjsg 
2414c349dbc7Sjsg 	r = amdgpu_device_ip_hw_init_phase1(adev);
2415c349dbc7Sjsg 	if (r)
2416c349dbc7Sjsg 		goto init_failed;
2417c349dbc7Sjsg 
2418c349dbc7Sjsg 	r = amdgpu_device_fw_loading(adev);
2419c349dbc7Sjsg 	if (r)
2420c349dbc7Sjsg 		goto init_failed;
2421c349dbc7Sjsg 
2422c349dbc7Sjsg 	r = amdgpu_device_ip_hw_init_phase2(adev);
2423c349dbc7Sjsg 	if (r)
2424c349dbc7Sjsg 		goto init_failed;
2425c349dbc7Sjsg 
2426c349dbc7Sjsg 	/*
2427c349dbc7Sjsg 	 * retired pages will be loaded from eeprom and reserved here,
2428c349dbc7Sjsg 	 * it should be called after amdgpu_device_ip_hw_init_phase2  since
2429c349dbc7Sjsg 	 * for some ASICs the RAS EEPROM code relies on SMU fully functioning
2430c349dbc7Sjsg 	 * for I2C communication which only true at this point.
2431ad8b1aafSjsg 	 *
2432ad8b1aafSjsg 	 * amdgpu_ras_recovery_init may fail, but the upper only cares the
2433ad8b1aafSjsg 	 * failure from bad gpu situation and stop amdgpu init process
2434ad8b1aafSjsg 	 * accordingly. For other failed cases, it will still release all
2435ad8b1aafSjsg 	 * the resource and print error message, rather than returning one
2436ad8b1aafSjsg 	 * negative value to upper level.
2437c349dbc7Sjsg 	 *
2438c349dbc7Sjsg 	 * Note: theoretically, this should be called before all vram allocations
2439c349dbc7Sjsg 	 * to protect retired page from abusing
2440c349dbc7Sjsg 	 */
2441ad8b1aafSjsg 	r = amdgpu_ras_recovery_init(adev);
2442ad8b1aafSjsg 	if (r)
2443ad8b1aafSjsg 		goto init_failed;
2444c349dbc7Sjsg 
24451bb76ff1Sjsg 	/**
24461bb76ff1Sjsg 	 * In case of XGMI grab extra reference for reset domain for this device
24471bb76ff1Sjsg 	 */
24481bb76ff1Sjsg 	if (adev->gmc.xgmi.num_physical_nodes > 1) {
24491bb76ff1Sjsg 		if (amdgpu_xgmi_add_device(adev) == 0) {
24501bb76ff1Sjsg 			if (!amdgpu_sriov_vf(adev)) {
24511bb76ff1Sjsg 				struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev);
24521bb76ff1Sjsg 
24531bb76ff1Sjsg 				if (WARN_ON(!hive)) {
24541bb76ff1Sjsg 					r = -ENOENT;
24551bb76ff1Sjsg 					goto init_failed;
24561bb76ff1Sjsg 				}
24571bb76ff1Sjsg 
24581bb76ff1Sjsg 				if (!hive->reset_domain ||
24591bb76ff1Sjsg 				    !amdgpu_reset_get_reset_domain(hive->reset_domain)) {
24601bb76ff1Sjsg 					r = -ENOENT;
24611bb76ff1Sjsg 					amdgpu_put_xgmi_hive(hive);
24621bb76ff1Sjsg 					goto init_failed;
24631bb76ff1Sjsg 				}
24641bb76ff1Sjsg 
24651bb76ff1Sjsg 				/* Drop the early temporary reset domain we created for device */
24661bb76ff1Sjsg 				amdgpu_reset_put_reset_domain(adev->reset_domain);
24671bb76ff1Sjsg 				adev->reset_domain = hive->reset_domain;
24681bb76ff1Sjsg 				amdgpu_put_xgmi_hive(hive);
24691bb76ff1Sjsg 			}
24701bb76ff1Sjsg 		}
24711bb76ff1Sjsg 	}
24721bb76ff1Sjsg 
24731bb76ff1Sjsg 	r = amdgpu_device_init_schedulers(adev);
24741bb76ff1Sjsg 	if (r)
24751bb76ff1Sjsg 		goto init_failed;
24765ca02815Sjsg 
24775ca02815Sjsg 	/* Don't init kfd if whole hive need to be reset during init */
2478f005ef32Sjsg 	if (!adev->gmc.xgmi.pending_reset) {
2479f005ef32Sjsg 		kgd2kfd_init_zone_device(adev);
2480fb4d8502Sjsg 		amdgpu_amdkfd_device_init(adev);
2481f005ef32Sjsg 	}
2482fb4d8502Sjsg 
2483ad8b1aafSjsg 	amdgpu_fru_get_product_info(adev);
2484ad8b1aafSjsg 
2485c349dbc7Sjsg init_failed:
2486fb4d8502Sjsg 
2487c349dbc7Sjsg 	return r;
2488fb4d8502Sjsg }
2489fb4d8502Sjsg 
2490fb4d8502Sjsg /**
2491fb4d8502Sjsg  * amdgpu_device_fill_reset_magic - writes reset magic to gart pointer
2492fb4d8502Sjsg  *
2493fb4d8502Sjsg  * @adev: amdgpu_device pointer
2494fb4d8502Sjsg  *
2495fb4d8502Sjsg  * Writes a reset magic value to the gart pointer in VRAM.  The driver calls
2496fb4d8502Sjsg  * this function before a GPU reset.  If the value is retained after a
2497fb4d8502Sjsg  * GPU reset, VRAM has not been lost.  Some GPU resets may destry VRAM contents.
2498fb4d8502Sjsg  */
amdgpu_device_fill_reset_magic(struct amdgpu_device * adev)2499fb4d8502Sjsg static void amdgpu_device_fill_reset_magic(struct amdgpu_device *adev)
2500fb4d8502Sjsg {
2501fb4d8502Sjsg 	memcpy(adev->reset_magic, adev->gart.ptr, AMDGPU_RESET_MAGIC_NUM);
2502fb4d8502Sjsg }
2503fb4d8502Sjsg 
2504fb4d8502Sjsg /**
2505fb4d8502Sjsg  * amdgpu_device_check_vram_lost - check if vram is valid
2506fb4d8502Sjsg  *
2507fb4d8502Sjsg  * @adev: amdgpu_device pointer
2508fb4d8502Sjsg  *
2509fb4d8502Sjsg  * Checks the reset magic value written to the gart pointer in VRAM.
2510fb4d8502Sjsg  * The driver calls this after a GPU reset to see if the contents of
2511fb4d8502Sjsg  * VRAM is lost or now.
2512fb4d8502Sjsg  * returns true if vram is lost, false if not.
2513fb4d8502Sjsg  */
amdgpu_device_check_vram_lost(struct amdgpu_device * adev)2514fb4d8502Sjsg static bool amdgpu_device_check_vram_lost(struct amdgpu_device *adev)
2515fb4d8502Sjsg {
2516c349dbc7Sjsg 	if (memcmp(adev->gart.ptr, adev->reset_magic,
2517c349dbc7Sjsg 			AMDGPU_RESET_MAGIC_NUM))
2518c349dbc7Sjsg 		return true;
2519c349dbc7Sjsg 
2520ad8b1aafSjsg 	if (!amdgpu_in_reset(adev))
2521c349dbc7Sjsg 		return false;
2522c349dbc7Sjsg 
2523c349dbc7Sjsg 	/*
2524c349dbc7Sjsg 	 * For all ASICs with baco/mode1 reset, the VRAM is
2525c349dbc7Sjsg 	 * always assumed to be lost.
2526c349dbc7Sjsg 	 */
2527c349dbc7Sjsg 	switch (amdgpu_asic_reset_method(adev)) {
2528c349dbc7Sjsg 	case AMD_RESET_METHOD_BACO:
2529c349dbc7Sjsg 	case AMD_RESET_METHOD_MODE1:
2530c349dbc7Sjsg 		return true;
2531c349dbc7Sjsg 	default:
2532c349dbc7Sjsg 		return false;
2533c349dbc7Sjsg 	}
2534fb4d8502Sjsg }
2535fb4d8502Sjsg 
2536fb4d8502Sjsg /**
2537c349dbc7Sjsg  * amdgpu_device_set_cg_state - set clockgating for amdgpu device
2538fb4d8502Sjsg  *
2539fb4d8502Sjsg  * @adev: amdgpu_device pointer
2540c349dbc7Sjsg  * @state: clockgating state (gate or ungate)
2541fb4d8502Sjsg  *
2542fb4d8502Sjsg  * The list of all the hardware IPs that make up the asic is walked and the
2543c349dbc7Sjsg  * set_clockgating_state callbacks are run.
2544c349dbc7Sjsg  * Late initialization pass enabling clockgating for hardware IPs.
2545c349dbc7Sjsg  * Fini or suspend, pass disabling clockgating for hardware IPs.
2546fb4d8502Sjsg  * Returns 0 on success, negative error code on failure.
2547fb4d8502Sjsg  */
2548c349dbc7Sjsg 
amdgpu_device_set_cg_state(struct amdgpu_device * adev,enum amd_clockgating_state state)25495ca02815Sjsg int amdgpu_device_set_cg_state(struct amdgpu_device *adev,
2550c349dbc7Sjsg 			       enum amd_clockgating_state state)
2551fb4d8502Sjsg {
2552c349dbc7Sjsg 	int i, j, r;
2553fb4d8502Sjsg 
2554fb4d8502Sjsg 	if (amdgpu_emu_mode == 1)
2555fb4d8502Sjsg 		return 0;
2556fb4d8502Sjsg 
2557c349dbc7Sjsg 	for (j = 0; j < adev->num_ip_blocks; j++) {
2558c349dbc7Sjsg 		i = state == AMD_CG_STATE_GATE ? j : adev->num_ip_blocks - j - 1;
2559c349dbc7Sjsg 		if (!adev->ip_blocks[i].status.late_initialized)
2560fb4d8502Sjsg 			continue;
2561f005ef32Sjsg 		/* skip CG for GFX, SDMA on S0ix */
25625ca02815Sjsg 		if (adev->in_s0ix &&
2563f005ef32Sjsg 		    (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GFX ||
2564f005ef32Sjsg 		     adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SDMA))
25655ca02815Sjsg 			continue;
2566fb4d8502Sjsg 		/* skip CG for VCE/UVD, it's handled specially */
2567fb4d8502Sjsg 		if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&
2568fb4d8502Sjsg 		    adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE &&
2569fb4d8502Sjsg 		    adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCN &&
2570c349dbc7Sjsg 		    adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_JPEG &&
2571fb4d8502Sjsg 		    adev->ip_blocks[i].version->funcs->set_clockgating_state) {
2572fb4d8502Sjsg 			/* enable clockgating to save power */
2573fb4d8502Sjsg 			r = adev->ip_blocks[i].version->funcs->set_clockgating_state((void *)adev,
2574c349dbc7Sjsg 										     state);
2575fb4d8502Sjsg 			if (r) {
2576fb4d8502Sjsg 				DRM_ERROR("set_clockgating_state(gate) of IP block <%s> failed %d\n",
2577fb4d8502Sjsg 					  adev->ip_blocks[i].version->funcs->name, r);
2578fb4d8502Sjsg 				return r;
2579fb4d8502Sjsg 			}
2580fb4d8502Sjsg 		}
2581fb4d8502Sjsg 	}
2582fb4d8502Sjsg 
2583fb4d8502Sjsg 	return 0;
2584fb4d8502Sjsg }
2585fb4d8502Sjsg 
amdgpu_device_set_pg_state(struct amdgpu_device * adev,enum amd_powergating_state state)25865ca02815Sjsg int amdgpu_device_set_pg_state(struct amdgpu_device *adev,
25875ca02815Sjsg 			       enum amd_powergating_state state)
2588fb4d8502Sjsg {
2589c349dbc7Sjsg 	int i, j, r;
2590fb4d8502Sjsg 
2591fb4d8502Sjsg 	if (amdgpu_emu_mode == 1)
2592fb4d8502Sjsg 		return 0;
2593fb4d8502Sjsg 
2594c349dbc7Sjsg 	for (j = 0; j < adev->num_ip_blocks; j++) {
2595c349dbc7Sjsg 		i = state == AMD_PG_STATE_GATE ? j : adev->num_ip_blocks - j - 1;
2596c349dbc7Sjsg 		if (!adev->ip_blocks[i].status.late_initialized)
2597fb4d8502Sjsg 			continue;
2598f005ef32Sjsg 		/* skip PG for GFX, SDMA on S0ix */
25995ca02815Sjsg 		if (adev->in_s0ix &&
2600f005ef32Sjsg 		    (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GFX ||
2601f005ef32Sjsg 		     adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SDMA))
26025ca02815Sjsg 			continue;
2603fb4d8502Sjsg 		/* skip CG for VCE/UVD, it's handled specially */
2604fb4d8502Sjsg 		if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&
2605fb4d8502Sjsg 		    adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE &&
2606fb4d8502Sjsg 		    adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCN &&
2607c349dbc7Sjsg 		    adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_JPEG &&
2608fb4d8502Sjsg 		    adev->ip_blocks[i].version->funcs->set_powergating_state) {
2609fb4d8502Sjsg 			/* enable powergating to save power */
2610fb4d8502Sjsg 			r = adev->ip_blocks[i].version->funcs->set_powergating_state((void *)adev,
2611c349dbc7Sjsg 											state);
2612fb4d8502Sjsg 			if (r) {
2613fb4d8502Sjsg 				DRM_ERROR("set_powergating_state(gate) of IP block <%s> failed %d\n",
2614fb4d8502Sjsg 					  adev->ip_blocks[i].version->funcs->name, r);
2615fb4d8502Sjsg 				return r;
2616fb4d8502Sjsg 			}
2617fb4d8502Sjsg 		}
2618fb4d8502Sjsg 	}
2619fb4d8502Sjsg 	return 0;
2620fb4d8502Sjsg }
2621fb4d8502Sjsg 
amdgpu_device_enable_mgpu_fan_boost(void)2622c349dbc7Sjsg static int amdgpu_device_enable_mgpu_fan_boost(void)
2623c349dbc7Sjsg {
2624c349dbc7Sjsg 	struct amdgpu_gpu_instance *gpu_ins;
2625c349dbc7Sjsg 	struct amdgpu_device *adev;
2626c349dbc7Sjsg 	int i, ret = 0;
2627c349dbc7Sjsg 
2628c349dbc7Sjsg 	mutex_lock(&mgpu_info.mutex);
2629c349dbc7Sjsg 
2630c349dbc7Sjsg 	/*
2631c349dbc7Sjsg 	 * MGPU fan boost feature should be enabled
2632c349dbc7Sjsg 	 * only when there are two or more dGPUs in
2633c349dbc7Sjsg 	 * the system
2634c349dbc7Sjsg 	 */
2635c349dbc7Sjsg 	if (mgpu_info.num_dgpu < 2)
2636c349dbc7Sjsg 		goto out;
2637c349dbc7Sjsg 
2638c349dbc7Sjsg 	for (i = 0; i < mgpu_info.num_dgpu; i++) {
2639c349dbc7Sjsg 		gpu_ins = &(mgpu_info.gpu_ins[i]);
2640c349dbc7Sjsg 		adev = gpu_ins->adev;
2641c349dbc7Sjsg 		if (!(adev->flags & AMD_IS_APU) &&
2642ad8b1aafSjsg 		    !gpu_ins->mgpu_fan_enabled) {
2643c349dbc7Sjsg 			ret = amdgpu_dpm_enable_mgpu_fan_boost(adev);
2644c349dbc7Sjsg 			if (ret)
2645c349dbc7Sjsg 				break;
2646c349dbc7Sjsg 
2647c349dbc7Sjsg 			gpu_ins->mgpu_fan_enabled = 1;
2648c349dbc7Sjsg 		}
2649c349dbc7Sjsg 	}
2650c349dbc7Sjsg 
2651c349dbc7Sjsg out:
2652c349dbc7Sjsg 	mutex_unlock(&mgpu_info.mutex);
2653c349dbc7Sjsg 
2654c349dbc7Sjsg 	return ret;
2655c349dbc7Sjsg }
2656c349dbc7Sjsg 
2657fb4d8502Sjsg /**
2658fb4d8502Sjsg  * amdgpu_device_ip_late_init - run late init for hardware IPs
2659fb4d8502Sjsg  *
2660fb4d8502Sjsg  * @adev: amdgpu_device pointer
2661fb4d8502Sjsg  *
2662fb4d8502Sjsg  * Late initialization pass for hardware IPs.  The list of all the hardware
2663fb4d8502Sjsg  * IPs that make up the asic is walked and the late_init callbacks are run.
2664fb4d8502Sjsg  * late_init covers any special initialization that an IP requires
2665fb4d8502Sjsg  * after all of the have been initialized or something that needs to happen
2666fb4d8502Sjsg  * late in the init process.
2667fb4d8502Sjsg  * Returns 0 on success, negative error code on failure.
2668fb4d8502Sjsg  */
amdgpu_device_ip_late_init(struct amdgpu_device * adev)2669fb4d8502Sjsg static int amdgpu_device_ip_late_init(struct amdgpu_device *adev)
2670fb4d8502Sjsg {
2671c349dbc7Sjsg 	struct amdgpu_gpu_instance *gpu_instance;
2672fb4d8502Sjsg 	int i = 0, r;
2673fb4d8502Sjsg 
2674fb4d8502Sjsg 	for (i = 0; i < adev->num_ip_blocks; i++) {
2675c349dbc7Sjsg 		if (!adev->ip_blocks[i].status.hw)
2676fb4d8502Sjsg 			continue;
2677fb4d8502Sjsg 		if (adev->ip_blocks[i].version->funcs->late_init) {
2678fb4d8502Sjsg 			r = adev->ip_blocks[i].version->funcs->late_init((void *)adev);
2679fb4d8502Sjsg 			if (r) {
2680fb4d8502Sjsg 				DRM_ERROR("late_init of IP block <%s> failed %d\n",
2681fb4d8502Sjsg 					  adev->ip_blocks[i].version->funcs->name, r);
2682fb4d8502Sjsg 				return r;
2683fb4d8502Sjsg 			}
2684c349dbc7Sjsg 		}
2685fb4d8502Sjsg 		adev->ip_blocks[i].status.late_initialized = true;
2686fb4d8502Sjsg 	}
2687fb4d8502Sjsg 
26881bb76ff1Sjsg 	r = amdgpu_ras_late_init(adev);
26891bb76ff1Sjsg 	if (r) {
26901bb76ff1Sjsg 		DRM_ERROR("amdgpu_ras_late_init failed %d", r);
26911bb76ff1Sjsg 		return r;
26921bb76ff1Sjsg 	}
26931bb76ff1Sjsg 
2694ad8b1aafSjsg 	amdgpu_ras_set_error_query_ready(adev, true);
2695ad8b1aafSjsg 
2696c349dbc7Sjsg 	amdgpu_device_set_cg_state(adev, AMD_CG_STATE_GATE);
2697c349dbc7Sjsg 	amdgpu_device_set_pg_state(adev, AMD_PG_STATE_GATE);
2698fb4d8502Sjsg 
2699fb4d8502Sjsg 	amdgpu_device_fill_reset_magic(adev);
2700fb4d8502Sjsg 
2701c349dbc7Sjsg 	r = amdgpu_device_enable_mgpu_fan_boost();
2702c349dbc7Sjsg 	if (r)
2703c349dbc7Sjsg 		DRM_ERROR("enable mgpu fan boost failed (%d).\n", r);
2704c349dbc7Sjsg 
27051bb76ff1Sjsg 	/* For passthrough configuration on arcturus and aldebaran, enable special handling SBR */
2706f005ef32Sjsg 	if (amdgpu_passthrough(adev) &&
2707f005ef32Sjsg 	    ((adev->asic_type == CHIP_ARCTURUS && adev->gmc.xgmi.num_physical_nodes > 1) ||
27081bb76ff1Sjsg 	     adev->asic_type == CHIP_ALDEBARAN))
27091bb76ff1Sjsg 		amdgpu_dpm_handle_passthrough_sbr(adev, true);
2710c349dbc7Sjsg 
2711c349dbc7Sjsg 	if (adev->gmc.xgmi.num_physical_nodes > 1) {
2712c349dbc7Sjsg 		mutex_lock(&mgpu_info.mutex);
2713c349dbc7Sjsg 
2714c349dbc7Sjsg 		/*
2715c349dbc7Sjsg 		 * Reset device p-state to low as this was booted with high.
2716c349dbc7Sjsg 		 *
2717c349dbc7Sjsg 		 * This should be performed only after all devices from the same
2718c349dbc7Sjsg 		 * hive get initialized.
2719c349dbc7Sjsg 		 *
2720c349dbc7Sjsg 		 * However, it's unknown how many device in the hive in advance.
2721c349dbc7Sjsg 		 * As this is counted one by one during devices initializations.
2722c349dbc7Sjsg 		 *
2723c349dbc7Sjsg 		 * So, we wait for all XGMI interlinked devices initialized.
2724c349dbc7Sjsg 		 * This may bring some delays as those devices may come from
2725c349dbc7Sjsg 		 * different hives. But that should be OK.
2726c349dbc7Sjsg 		 */
2727c349dbc7Sjsg 		if (mgpu_info.num_dgpu == adev->gmc.xgmi.num_physical_nodes) {
2728c349dbc7Sjsg 			for (i = 0; i < mgpu_info.num_gpu; i++) {
2729c349dbc7Sjsg 				gpu_instance = &(mgpu_info.gpu_ins[i]);
2730c349dbc7Sjsg 				if (gpu_instance->adev->flags & AMD_IS_APU)
2731c349dbc7Sjsg 					continue;
2732c349dbc7Sjsg 
2733ad8b1aafSjsg 				r = amdgpu_xgmi_set_pstate(gpu_instance->adev,
2734ad8b1aafSjsg 						AMDGPU_XGMI_PSTATE_MIN);
2735c349dbc7Sjsg 				if (r) {
2736c349dbc7Sjsg 					DRM_ERROR("pstate setting failed (%d).\n", r);
2737c349dbc7Sjsg 					break;
2738c349dbc7Sjsg 				}
2739c349dbc7Sjsg 			}
2740c349dbc7Sjsg 		}
2741c349dbc7Sjsg 
2742c349dbc7Sjsg 		mutex_unlock(&mgpu_info.mutex);
2743c349dbc7Sjsg 	}
2744c349dbc7Sjsg 
2745fb4d8502Sjsg 	return 0;
2746fb4d8502Sjsg }
2747fb4d8502Sjsg 
27481bb76ff1Sjsg /**
27491bb76ff1Sjsg  * amdgpu_device_smu_fini_early - smu hw_fini wrapper
27501bb76ff1Sjsg  *
27511bb76ff1Sjsg  * @adev: amdgpu_device pointer
27521bb76ff1Sjsg  *
27531bb76ff1Sjsg  * For ASICs need to disable SMC first
27541bb76ff1Sjsg  */
amdgpu_device_smu_fini_early(struct amdgpu_device * adev)27551bb76ff1Sjsg static void amdgpu_device_smu_fini_early(struct amdgpu_device *adev)
27561bb76ff1Sjsg {
27571bb76ff1Sjsg 	int i, r;
27581bb76ff1Sjsg 
27591bb76ff1Sjsg 	if (adev->ip_versions[GC_HWIP][0] > IP_VERSION(9, 0, 0))
27601bb76ff1Sjsg 		return;
27611bb76ff1Sjsg 
27621bb76ff1Sjsg 	for (i = 0; i < adev->num_ip_blocks; i++) {
27631bb76ff1Sjsg 		if (!adev->ip_blocks[i].status.hw)
27641bb76ff1Sjsg 			continue;
27651bb76ff1Sjsg 		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) {
27661bb76ff1Sjsg 			r = adev->ip_blocks[i].version->funcs->hw_fini((void *)adev);
27671bb76ff1Sjsg 			/* XXX handle errors */
27681bb76ff1Sjsg 			if (r) {
27691bb76ff1Sjsg 				DRM_DEBUG("hw_fini of IP block <%s> failed %d\n",
27701bb76ff1Sjsg 					  adev->ip_blocks[i].version->funcs->name, r);
27711bb76ff1Sjsg 			}
27721bb76ff1Sjsg 			adev->ip_blocks[i].status.hw = false;
27731bb76ff1Sjsg 			break;
27741bb76ff1Sjsg 		}
27751bb76ff1Sjsg 	}
27761bb76ff1Sjsg }
27771bb76ff1Sjsg 
amdgpu_device_ip_fini_early(struct amdgpu_device * adev)27785ca02815Sjsg static int amdgpu_device_ip_fini_early(struct amdgpu_device *adev)
2779fb4d8502Sjsg {
2780fb4d8502Sjsg 	int i, r;
2781fb4d8502Sjsg 
27825ca02815Sjsg 	for (i = 0; i < adev->num_ip_blocks; i++) {
27835ca02815Sjsg 		if (!adev->ip_blocks[i].version->funcs->early_fini)
27845ca02815Sjsg 			continue;
2785ad8b1aafSjsg 
27865ca02815Sjsg 		r = adev->ip_blocks[i].version->funcs->early_fini((void *)adev);
27875ca02815Sjsg 		if (r) {
27885ca02815Sjsg 			DRM_DEBUG("early_fini of IP block <%s> failed %d\n",
27895ca02815Sjsg 				  adev->ip_blocks[i].version->funcs->name, r);
27905ca02815Sjsg 		}
27915ca02815Sjsg 	}
2792c349dbc7Sjsg 
2793c349dbc7Sjsg 	amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE);
2794c349dbc7Sjsg 	amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE);
2795c349dbc7Sjsg 
27961bb76ff1Sjsg 	amdgpu_amdkfd_suspend(adev, false);
27971bb76ff1Sjsg 
27981bb76ff1Sjsg 	/* Workaroud for ASICs need to disable SMC first */
27991bb76ff1Sjsg 	amdgpu_device_smu_fini_early(adev);
2800fb4d8502Sjsg 
2801fb4d8502Sjsg 	for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
2802fb4d8502Sjsg 		if (!adev->ip_blocks[i].status.hw)
2803fb4d8502Sjsg 			continue;
2804fb4d8502Sjsg 
2805fb4d8502Sjsg 		r = adev->ip_blocks[i].version->funcs->hw_fini((void *)adev);
2806fb4d8502Sjsg 		/* XXX handle errors */
2807fb4d8502Sjsg 		if (r) {
2808fb4d8502Sjsg 			DRM_DEBUG("hw_fini of IP block <%s> failed %d\n",
2809fb4d8502Sjsg 				  adev->ip_blocks[i].version->funcs->name, r);
2810fb4d8502Sjsg 		}
2811fb4d8502Sjsg 
2812fb4d8502Sjsg 		adev->ip_blocks[i].status.hw = false;
2813fb4d8502Sjsg 	}
2814fb4d8502Sjsg 
28155ca02815Sjsg 	if (amdgpu_sriov_vf(adev)) {
28165ca02815Sjsg 		if (amdgpu_virt_release_full_gpu(adev, false))
28175ca02815Sjsg 			DRM_ERROR("failed to release exclusive mode on fini\n");
28185ca02815Sjsg 	}
28195ca02815Sjsg 
28205ca02815Sjsg 	return 0;
28215ca02815Sjsg }
28225ca02815Sjsg 
28235ca02815Sjsg /**
28245ca02815Sjsg  * amdgpu_device_ip_fini - run fini for hardware IPs
28255ca02815Sjsg  *
28265ca02815Sjsg  * @adev: amdgpu_device pointer
28275ca02815Sjsg  *
28285ca02815Sjsg  * Main teardown pass for hardware IPs.  The list of all the hardware
28295ca02815Sjsg  * IPs that make up the asic is walked and the hw_fini and sw_fini callbacks
28305ca02815Sjsg  * are run.  hw_fini tears down the hardware associated with each IP
28315ca02815Sjsg  * and sw_fini tears down any software state associated with each IP.
28325ca02815Sjsg  * Returns 0 on success, negative error code on failure.
28335ca02815Sjsg  */
amdgpu_device_ip_fini(struct amdgpu_device * adev)28345ca02815Sjsg static int amdgpu_device_ip_fini(struct amdgpu_device *adev)
28355ca02815Sjsg {
28365ca02815Sjsg 	int i, r;
28375ca02815Sjsg 
28385ca02815Sjsg 	if (amdgpu_sriov_vf(adev) && adev->virt.ras_init_done)
28395ca02815Sjsg 		amdgpu_virt_release_ras_err_handler_data(adev);
28405ca02815Sjsg 
28415ca02815Sjsg 	if (adev->gmc.xgmi.num_physical_nodes > 1)
28425ca02815Sjsg 		amdgpu_xgmi_remove_device(adev);
28435ca02815Sjsg 
28445ca02815Sjsg 	amdgpu_amdkfd_device_fini_sw(adev);
2845fb4d8502Sjsg 
2846fb4d8502Sjsg 	for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
2847fb4d8502Sjsg 		if (!adev->ip_blocks[i].status.sw)
2848fb4d8502Sjsg 			continue;
2849fb4d8502Sjsg 
2850fb4d8502Sjsg 		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) {
2851c349dbc7Sjsg 			amdgpu_ucode_free_bo(adev);
2852c349dbc7Sjsg 			amdgpu_free_static_csa(&adev->virt.csa_obj);
2853fb4d8502Sjsg 			amdgpu_device_wb_fini(adev);
2854f005ef32Sjsg 			amdgpu_device_mem_scratch_fini(adev);
2855c349dbc7Sjsg 			amdgpu_ib_pool_fini(adev);
2856fb4d8502Sjsg 		}
2857fb4d8502Sjsg 
2858fb4d8502Sjsg 		r = adev->ip_blocks[i].version->funcs->sw_fini((void *)adev);
2859fb4d8502Sjsg 		/* XXX handle errors */
2860fb4d8502Sjsg 		if (r) {
2861fb4d8502Sjsg 			DRM_DEBUG("sw_fini of IP block <%s> failed %d\n",
2862fb4d8502Sjsg 				  adev->ip_blocks[i].version->funcs->name, r);
2863fb4d8502Sjsg 		}
2864fb4d8502Sjsg 		adev->ip_blocks[i].status.sw = false;
2865fb4d8502Sjsg 		adev->ip_blocks[i].status.valid = false;
2866fb4d8502Sjsg 	}
2867fb4d8502Sjsg 
2868fb4d8502Sjsg 	for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
2869fb4d8502Sjsg 		if (!adev->ip_blocks[i].status.late_initialized)
2870fb4d8502Sjsg 			continue;
2871fb4d8502Sjsg 		if (adev->ip_blocks[i].version->funcs->late_fini)
2872fb4d8502Sjsg 			adev->ip_blocks[i].version->funcs->late_fini((void *)adev);
2873fb4d8502Sjsg 		adev->ip_blocks[i].status.late_initialized = false;
2874fb4d8502Sjsg 	}
2875fb4d8502Sjsg 
2876c349dbc7Sjsg 	amdgpu_ras_fini(adev);
2877c349dbc7Sjsg 
2878fb4d8502Sjsg 	return 0;
2879fb4d8502Sjsg }
2880fb4d8502Sjsg 
2881fb4d8502Sjsg /**
2882c349dbc7Sjsg  * amdgpu_device_delayed_init_work_handler - work handler for IB tests
2883fb4d8502Sjsg  *
2884c349dbc7Sjsg  * @work: work_struct.
2885fb4d8502Sjsg  */
amdgpu_device_delayed_init_work_handler(struct work_struct * work)2886c349dbc7Sjsg static void amdgpu_device_delayed_init_work_handler(struct work_struct *work)
2887fb4d8502Sjsg {
2888fb4d8502Sjsg 	struct amdgpu_device *adev =
2889c349dbc7Sjsg 		container_of(work, struct amdgpu_device, delayed_init_work.work);
2890fb4d8502Sjsg 	int r;
2891fb4d8502Sjsg 
2892fb4d8502Sjsg 	r = amdgpu_ib_ring_tests(adev);
2893fb4d8502Sjsg 	if (r)
2894fb4d8502Sjsg 		DRM_ERROR("ib ring test failed (%d).\n", r);
2895fb4d8502Sjsg }
2896fb4d8502Sjsg 
amdgpu_device_delay_enable_gfx_off(struct work_struct * work)2897c349dbc7Sjsg static void amdgpu_device_delay_enable_gfx_off(struct work_struct *work)
2898c349dbc7Sjsg {
2899c349dbc7Sjsg 	struct amdgpu_device *adev =
2900c349dbc7Sjsg 		container_of(work, struct amdgpu_device, gfx.gfx_off_delay_work.work);
2901c349dbc7Sjsg 
29028b172e32Sjsg 	WARN_ON_ONCE(adev->gfx.gfx_off_state);
29038b172e32Sjsg 	WARN_ON_ONCE(adev->gfx.gfx_off_req_count);
29048b172e32Sjsg 
2905c349dbc7Sjsg 	if (!amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, true))
2906c349dbc7Sjsg 		adev->gfx.gfx_off_state = true;
2907c349dbc7Sjsg }
2908c349dbc7Sjsg 
2909fb4d8502Sjsg /**
2910fb4d8502Sjsg  * amdgpu_device_ip_suspend_phase1 - run suspend for hardware IPs (phase 1)
2911fb4d8502Sjsg  *
2912fb4d8502Sjsg  * @adev: amdgpu_device pointer
2913fb4d8502Sjsg  *
2914fb4d8502Sjsg  * Main suspend function for hardware IPs.  The list of all the hardware
2915fb4d8502Sjsg  * IPs that make up the asic is walked, clockgating is disabled and the
2916fb4d8502Sjsg  * suspend callbacks are run.  suspend puts the hardware and software state
2917fb4d8502Sjsg  * in each IP into a state suitable for suspend.
2918fb4d8502Sjsg  * Returns 0 on success, negative error code on failure.
2919fb4d8502Sjsg  */
amdgpu_device_ip_suspend_phase1(struct amdgpu_device * adev)2920fb4d8502Sjsg static int amdgpu_device_ip_suspend_phase1(struct amdgpu_device *adev)
2921fb4d8502Sjsg {
2922fb4d8502Sjsg 	int i, r;
2923fb4d8502Sjsg 
2924c349dbc7Sjsg 	amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE);
2925c349dbc7Sjsg 	amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE);
2926fb4d8502Sjsg 
29271bb76ff1Sjsg 	/*
29281bb76ff1Sjsg 	 * Per PMFW team's suggestion, driver needs to handle gfxoff
29291bb76ff1Sjsg 	 * and df cstate features disablement for gpu reset(e.g. Mode1Reset)
29301bb76ff1Sjsg 	 * scenario. Add the missing df cstate disablement here.
29311bb76ff1Sjsg 	 */
29321bb76ff1Sjsg 	if (amdgpu_dpm_set_df_cstate(adev, DF_CSTATE_DISALLOW))
29331bb76ff1Sjsg 		dev_warn(adev->dev, "Failed to disallow df cstate");
29341bb76ff1Sjsg 
2935fb4d8502Sjsg 	for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
2936fb4d8502Sjsg 		if (!adev->ip_blocks[i].status.valid)
2937fb4d8502Sjsg 			continue;
2938ad8b1aafSjsg 
2939fb4d8502Sjsg 		/* displays are handled separately */
2940ad8b1aafSjsg 		if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_DCE)
2941ad8b1aafSjsg 			continue;
2942ad8b1aafSjsg 
2943fb4d8502Sjsg 		/* XXX handle errors */
2944fb4d8502Sjsg 		r = adev->ip_blocks[i].version->funcs->suspend(adev);
2945fb4d8502Sjsg 		/* XXX handle errors */
2946fb4d8502Sjsg 		if (r) {
2947fb4d8502Sjsg 			DRM_ERROR("suspend of IP block <%s> failed %d\n",
2948fb4d8502Sjsg 				  adev->ip_blocks[i].version->funcs->name, r);
2949c349dbc7Sjsg 			return r;
2950c349dbc7Sjsg 		}
2951ad8b1aafSjsg 
2952c349dbc7Sjsg 		adev->ip_blocks[i].status.hw = false;
2953fb4d8502Sjsg 	}
2954fb4d8502Sjsg 
2955fb4d8502Sjsg 	return 0;
2956fb4d8502Sjsg }
2957fb4d8502Sjsg 
2958fb4d8502Sjsg /**
2959fb4d8502Sjsg  * amdgpu_device_ip_suspend_phase2 - run suspend for hardware IPs (phase 2)
2960fb4d8502Sjsg  *
2961fb4d8502Sjsg  * @adev: amdgpu_device pointer
2962fb4d8502Sjsg  *
2963fb4d8502Sjsg  * Main suspend function for hardware IPs.  The list of all the hardware
2964fb4d8502Sjsg  * IPs that make up the asic is walked, clockgating is disabled and the
2965fb4d8502Sjsg  * suspend callbacks are run.  suspend puts the hardware and software state
2966fb4d8502Sjsg  * in each IP into a state suitable for suspend.
2967fb4d8502Sjsg  * Returns 0 on success, negative error code on failure.
2968fb4d8502Sjsg  */
amdgpu_device_ip_suspend_phase2(struct amdgpu_device * adev)2969fb4d8502Sjsg static int amdgpu_device_ip_suspend_phase2(struct amdgpu_device *adev)
2970fb4d8502Sjsg {
2971fb4d8502Sjsg 	int i, r;
2972fb4d8502Sjsg 
29735ca02815Sjsg 	if (adev->in_s0ix)
29741bb76ff1Sjsg 		amdgpu_dpm_gfx_state_change(adev, sGpuChangeState_D3Entry);
29755ca02815Sjsg 
2976fb4d8502Sjsg 	for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
2977fb4d8502Sjsg 		if (!adev->ip_blocks[i].status.valid)
2978fb4d8502Sjsg 			continue;
2979fb4d8502Sjsg 		/* displays are handled in phase1 */
2980fb4d8502Sjsg 		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE)
2981fb4d8502Sjsg 			continue;
2982c349dbc7Sjsg 		/* PSP lost connection when err_event_athub occurs */
2983c349dbc7Sjsg 		if (amdgpu_ras_intr_triggered() &&
2984c349dbc7Sjsg 		    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP) {
2985c349dbc7Sjsg 			adev->ip_blocks[i].status.hw = false;
2986c349dbc7Sjsg 			continue;
2987fb4d8502Sjsg 		}
29885ca02815Sjsg 
29895ca02815Sjsg 		/* skip unnecessary suspend if we do not initialize them yet */
29905ca02815Sjsg 		if (adev->gmc.xgmi.pending_reset &&
29915ca02815Sjsg 		    !(adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
29925ca02815Sjsg 		      adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC ||
29935ca02815Sjsg 		      adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
29945ca02815Sjsg 		      adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH)) {
29955ca02815Sjsg 			adev->ip_blocks[i].status.hw = false;
29965ca02815Sjsg 			continue;
29975ca02815Sjsg 		}
29985ca02815Sjsg 
29997a1b9fa9Sjsg 		/* skip suspend of gfx/mes and psp for S0ix
30005ca02815Sjsg 		 * gfx is in gfxoff state, so on resume it will exit gfxoff just
30015ca02815Sjsg 		 * like at runtime. PSP is also part of the always on hardware
30025ca02815Sjsg 		 * so no need to suspend it.
30035ca02815Sjsg 		 */
30045ca02815Sjsg 		if (adev->in_s0ix &&
30055ca02815Sjsg 		    (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP ||
30067a1b9fa9Sjsg 		     adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GFX ||
30077a1b9fa9Sjsg 		     adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_MES))
30085ca02815Sjsg 			continue;
30095ca02815Sjsg 
3010955b8040Sjsg 		/* SDMA 5.x+ is part of GFX power domain so it's covered by GFXOFF */
3011955b8040Sjsg 		if (adev->in_s0ix &&
3012955b8040Sjsg 		    (adev->ip_versions[SDMA0_HWIP][0] >= IP_VERSION(5, 0, 0)) &&
3013955b8040Sjsg 		    (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SDMA))
3014955b8040Sjsg 			continue;
3015955b8040Sjsg 
3016582979dcSjsg 		/* Once swPSP provides the IMU, RLC FW binaries to TOS during cold-boot.
3017582979dcSjsg 		 * These are in TMR, hence are expected to be reused by PSP-TOS to reload
3018582979dcSjsg 		 * from this location and RLC Autoload automatically also gets loaded
3019582979dcSjsg 		 * from here based on PMFW -> PSP message during re-init sequence.
3020582979dcSjsg 		 * Therefore, the psp suspend & resume should be skipped to avoid destroy
3021582979dcSjsg 		 * the TMR and reload FWs again for IMU enabled APU ASICs.
3022582979dcSjsg 		 */
3023582979dcSjsg 		if (amdgpu_in_reset(adev) &&
3024582979dcSjsg 		    (adev->flags & AMD_IS_APU) && adev->gfx.imu.funcs &&
3025582979dcSjsg 		    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)
3026582979dcSjsg 			continue;
3027582979dcSjsg 
3028fb4d8502Sjsg 		/* XXX handle errors */
3029fb4d8502Sjsg 		r = adev->ip_blocks[i].version->funcs->suspend(adev);
3030fb4d8502Sjsg 		/* XXX handle errors */
3031fb4d8502Sjsg 		if (r) {
3032fb4d8502Sjsg 			DRM_ERROR("suspend of IP block <%s> failed %d\n",
3033fb4d8502Sjsg 				  adev->ip_blocks[i].version->funcs->name, r);
3034fb4d8502Sjsg 		}
3035c349dbc7Sjsg 		adev->ip_blocks[i].status.hw = false;
3036c349dbc7Sjsg 		/* handle putting the SMC in the appropriate state */
3037c349dbc7Sjsg 		if (!amdgpu_sriov_vf(adev)) {
3038c349dbc7Sjsg 			if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) {
3039c349dbc7Sjsg 				r = amdgpu_dpm_set_mp1_state(adev, adev->mp1_state);
3040c349dbc7Sjsg 				if (r) {
3041c349dbc7Sjsg 					DRM_ERROR("SMC failed to set mp1 state %d, %d\n",
3042c349dbc7Sjsg 							adev->mp1_state, r);
3043c349dbc7Sjsg 					return r;
3044fb4d8502Sjsg 				}
3045c349dbc7Sjsg 			}
3046c349dbc7Sjsg 		}
3047c349dbc7Sjsg 	}
3048fb4d8502Sjsg 
3049fb4d8502Sjsg 	return 0;
3050fb4d8502Sjsg }
3051fb4d8502Sjsg 
3052fb4d8502Sjsg /**
3053fb4d8502Sjsg  * amdgpu_device_ip_suspend - run suspend for hardware IPs
3054fb4d8502Sjsg  *
3055fb4d8502Sjsg  * @adev: amdgpu_device pointer
3056fb4d8502Sjsg  *
3057fb4d8502Sjsg  * Main suspend function for hardware IPs.  The list of all the hardware
3058fb4d8502Sjsg  * IPs that make up the asic is walked, clockgating is disabled and the
3059fb4d8502Sjsg  * suspend callbacks are run.  suspend puts the hardware and software state
3060fb4d8502Sjsg  * in each IP into a state suitable for suspend.
3061fb4d8502Sjsg  * Returns 0 on success, negative error code on failure.
3062fb4d8502Sjsg  */
amdgpu_device_ip_suspend(struct amdgpu_device * adev)3063fb4d8502Sjsg int amdgpu_device_ip_suspend(struct amdgpu_device *adev)
3064fb4d8502Sjsg {
3065fb4d8502Sjsg 	int r;
3066fb4d8502Sjsg 
30675ca02815Sjsg 	if (amdgpu_sriov_vf(adev)) {
30685ca02815Sjsg 		amdgpu_virt_fini_data_exchange(adev);
3069c349dbc7Sjsg 		amdgpu_virt_request_full_gpu(adev, false);
30705ca02815Sjsg 	}
3071c349dbc7Sjsg 
3072fb4d8502Sjsg 	r = amdgpu_device_ip_suspend_phase1(adev);
3073fb4d8502Sjsg 	if (r)
3074fb4d8502Sjsg 		return r;
3075fb4d8502Sjsg 	r = amdgpu_device_ip_suspend_phase2(adev);
3076fb4d8502Sjsg 
3077c349dbc7Sjsg 	if (amdgpu_sriov_vf(adev))
3078c349dbc7Sjsg 		amdgpu_virt_release_full_gpu(adev, false);
3079c349dbc7Sjsg 
3080fb4d8502Sjsg 	return r;
3081fb4d8502Sjsg }
3082fb4d8502Sjsg 
amdgpu_device_ip_reinit_early_sriov(struct amdgpu_device * adev)3083fb4d8502Sjsg static int amdgpu_device_ip_reinit_early_sriov(struct amdgpu_device *adev)
3084fb4d8502Sjsg {
3085fb4d8502Sjsg 	int i, r;
3086fb4d8502Sjsg 
3087fb4d8502Sjsg 	static enum amd_ip_block_type ip_order[] = {
3088fb4d8502Sjsg 		AMD_IP_BLOCK_TYPE_COMMON,
3089bdc47e44Sjsg 		AMD_IP_BLOCK_TYPE_GMC,
3090fb4d8502Sjsg 		AMD_IP_BLOCK_TYPE_PSP,
3091fb4d8502Sjsg 		AMD_IP_BLOCK_TYPE_IH,
3092fb4d8502Sjsg 	};
3093fb4d8502Sjsg 
3094a30ccb29Sjsg 	for (i = 0; i < adev->num_ip_blocks; i++) {
3095fb4d8502Sjsg 		int j;
3096fb4d8502Sjsg 		struct amdgpu_ip_block *block;
3097fb4d8502Sjsg 
3098ad8b1aafSjsg 		block = &adev->ip_blocks[i];
3099c349dbc7Sjsg 		block->status.hw = false;
3100ad8b1aafSjsg 
3101ad8b1aafSjsg 		for (j = 0; j < ARRAY_SIZE(ip_order); j++) {
3102ad8b1aafSjsg 
3103ad8b1aafSjsg 			if (block->version->type != ip_order[j] ||
3104fb4d8502Sjsg 				!block->status.valid)
3105fb4d8502Sjsg 				continue;
3106fb4d8502Sjsg 
3107fb4d8502Sjsg 			r = block->version->funcs->hw_init(adev);
3108c349dbc7Sjsg 			DRM_INFO("RE-INIT-early: %s %s\n", block->version->funcs->name, r?"failed":"succeeded");
3109fb4d8502Sjsg 			if (r)
3110fb4d8502Sjsg 				return r;
3111c349dbc7Sjsg 			block->status.hw = true;
3112fb4d8502Sjsg 		}
3113fb4d8502Sjsg 	}
3114fb4d8502Sjsg 
3115fb4d8502Sjsg 	return 0;
3116fb4d8502Sjsg }
3117fb4d8502Sjsg 
amdgpu_device_ip_reinit_late_sriov(struct amdgpu_device * adev)3118fb4d8502Sjsg static int amdgpu_device_ip_reinit_late_sriov(struct amdgpu_device *adev)
3119fb4d8502Sjsg {
3120fb4d8502Sjsg 	int i, r;
3121fb4d8502Sjsg 
3122fb4d8502Sjsg 	static enum amd_ip_block_type ip_order[] = {
3123fb4d8502Sjsg 		AMD_IP_BLOCK_TYPE_SMC,
3124fb4d8502Sjsg 		AMD_IP_BLOCK_TYPE_DCE,
3125fb4d8502Sjsg 		AMD_IP_BLOCK_TYPE_GFX,
3126fb4d8502Sjsg 		AMD_IP_BLOCK_TYPE_SDMA,
3127f005ef32Sjsg 		AMD_IP_BLOCK_TYPE_MES,
3128fb4d8502Sjsg 		AMD_IP_BLOCK_TYPE_UVD,
3129c349dbc7Sjsg 		AMD_IP_BLOCK_TYPE_VCE,
3130f005ef32Sjsg 		AMD_IP_BLOCK_TYPE_VCN,
3131f005ef32Sjsg 		AMD_IP_BLOCK_TYPE_JPEG
3132fb4d8502Sjsg 	};
3133fb4d8502Sjsg 
3134fb4d8502Sjsg 	for (i = 0; i < ARRAY_SIZE(ip_order); i++) {
3135fb4d8502Sjsg 		int j;
3136fb4d8502Sjsg 		struct amdgpu_ip_block *block;
3137fb4d8502Sjsg 
3138fb4d8502Sjsg 		for (j = 0; j < adev->num_ip_blocks; j++) {
3139fb4d8502Sjsg 			block = &adev->ip_blocks[j];
3140fb4d8502Sjsg 
3141fb4d8502Sjsg 			if (block->version->type != ip_order[i] ||
3142c349dbc7Sjsg 				!block->status.valid ||
3143c349dbc7Sjsg 				block->status.hw)
3144fb4d8502Sjsg 				continue;
3145fb4d8502Sjsg 
3146c349dbc7Sjsg 			if (block->version->type == AMD_IP_BLOCK_TYPE_SMC)
3147c349dbc7Sjsg 				r = block->version->funcs->resume(adev);
3148c349dbc7Sjsg 			else
3149fb4d8502Sjsg 				r = block->version->funcs->hw_init(adev);
3150c349dbc7Sjsg 
3151c349dbc7Sjsg 			DRM_INFO("RE-INIT-late: %s %s\n", block->version->funcs->name, r?"failed":"succeeded");
3152fb4d8502Sjsg 			if (r)
3153fb4d8502Sjsg 				return r;
3154c349dbc7Sjsg 			block->status.hw = true;
3155fb4d8502Sjsg 		}
3156fb4d8502Sjsg 	}
3157fb4d8502Sjsg 
3158fb4d8502Sjsg 	return 0;
3159fb4d8502Sjsg }
3160fb4d8502Sjsg 
3161fb4d8502Sjsg /**
3162fb4d8502Sjsg  * amdgpu_device_ip_resume_phase1 - run resume for hardware IPs
3163fb4d8502Sjsg  *
3164fb4d8502Sjsg  * @adev: amdgpu_device pointer
3165fb4d8502Sjsg  *
3166fb4d8502Sjsg  * First resume function for hardware IPs.  The list of all the hardware
3167fb4d8502Sjsg  * IPs that make up the asic is walked and the resume callbacks are run for
3168fb4d8502Sjsg  * COMMON, GMC, and IH.  resume puts the hardware into a functional state
3169fb4d8502Sjsg  * after a suspend and updates the software state as necessary.  This
3170fb4d8502Sjsg  * function is also used for restoring the GPU after a GPU reset.
3171fb4d8502Sjsg  * Returns 0 on success, negative error code on failure.
3172fb4d8502Sjsg  */
amdgpu_device_ip_resume_phase1(struct amdgpu_device * adev)3173fb4d8502Sjsg static int amdgpu_device_ip_resume_phase1(struct amdgpu_device *adev)
3174fb4d8502Sjsg {
3175fb4d8502Sjsg 	int i, r;
3176fb4d8502Sjsg 
3177fb4d8502Sjsg 	for (i = 0; i < adev->num_ip_blocks; i++) {
3178c349dbc7Sjsg 		if (!adev->ip_blocks[i].status.valid || adev->ip_blocks[i].status.hw)
3179fb4d8502Sjsg 			continue;
3180fb4d8502Sjsg 		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
3181fb4d8502Sjsg 		    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
31821bb76ff1Sjsg 		    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH ||
31831bb76ff1Sjsg 		    (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP && amdgpu_sriov_vf(adev))) {
3184c349dbc7Sjsg 
3185fb4d8502Sjsg 			r = adev->ip_blocks[i].version->funcs->resume(adev);
3186fb4d8502Sjsg 			if (r) {
3187fb4d8502Sjsg 				DRM_ERROR("resume of IP block <%s> failed %d\n",
3188fb4d8502Sjsg 					  adev->ip_blocks[i].version->funcs->name, r);
3189fb4d8502Sjsg 				return r;
3190fb4d8502Sjsg 			}
3191c349dbc7Sjsg 			adev->ip_blocks[i].status.hw = true;
3192fb4d8502Sjsg 		}
3193fb4d8502Sjsg 	}
3194fb4d8502Sjsg 
3195fb4d8502Sjsg 	return 0;
3196fb4d8502Sjsg }
3197fb4d8502Sjsg 
3198fb4d8502Sjsg /**
3199fb4d8502Sjsg  * amdgpu_device_ip_resume_phase2 - run resume for hardware IPs
3200fb4d8502Sjsg  *
3201fb4d8502Sjsg  * @adev: amdgpu_device pointer
3202fb4d8502Sjsg  *
3203fb4d8502Sjsg  * First resume function for hardware IPs.  The list of all the hardware
3204fb4d8502Sjsg  * IPs that make up the asic is walked and the resume callbacks are run for
3205fb4d8502Sjsg  * all blocks except COMMON, GMC, and IH.  resume puts the hardware into a
3206fb4d8502Sjsg  * functional state after a suspend and updates the software state as
3207fb4d8502Sjsg  * necessary.  This function is also used for restoring the GPU after a GPU
3208fb4d8502Sjsg  * reset.
3209fb4d8502Sjsg  * Returns 0 on success, negative error code on failure.
3210fb4d8502Sjsg  */
amdgpu_device_ip_resume_phase2(struct amdgpu_device * adev)3211fb4d8502Sjsg static int amdgpu_device_ip_resume_phase2(struct amdgpu_device *adev)
3212fb4d8502Sjsg {
3213fb4d8502Sjsg 	int i, r;
3214fb4d8502Sjsg 
3215fb4d8502Sjsg 	for (i = 0; i < adev->num_ip_blocks; i++) {
3216c349dbc7Sjsg 		if (!adev->ip_blocks[i].status.valid || adev->ip_blocks[i].status.hw)
3217fb4d8502Sjsg 			continue;
3218fb4d8502Sjsg 		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
3219fb4d8502Sjsg 		    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
3220c349dbc7Sjsg 		    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH ||
3221c349dbc7Sjsg 		    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)
3222fb4d8502Sjsg 			continue;
3223fb4d8502Sjsg 		r = adev->ip_blocks[i].version->funcs->resume(adev);
3224fb4d8502Sjsg 		if (r) {
3225fb4d8502Sjsg 			DRM_ERROR("resume of IP block <%s> failed %d\n",
3226fb4d8502Sjsg 				  adev->ip_blocks[i].version->funcs->name, r);
3227fb4d8502Sjsg 			return r;
3228fb4d8502Sjsg 		}
3229c349dbc7Sjsg 		adev->ip_blocks[i].status.hw = true;
3230fb4d8502Sjsg 	}
3231fb4d8502Sjsg 
3232fb4d8502Sjsg 	return 0;
3233fb4d8502Sjsg }
3234fb4d8502Sjsg 
3235fb4d8502Sjsg /**
3236fb4d8502Sjsg  * amdgpu_device_ip_resume - run resume for hardware IPs
3237fb4d8502Sjsg  *
3238fb4d8502Sjsg  * @adev: amdgpu_device pointer
3239fb4d8502Sjsg  *
3240fb4d8502Sjsg  * Main resume function for hardware IPs.  The hardware IPs
3241fb4d8502Sjsg  * are split into two resume functions because they are
3242f005ef32Sjsg  * also used in recovering from a GPU reset and some additional
3243fb4d8502Sjsg  * steps need to be take between them.  In this case (S3/S4) they are
3244fb4d8502Sjsg  * run sequentially.
3245fb4d8502Sjsg  * Returns 0 on success, negative error code on failure.
3246fb4d8502Sjsg  */
amdgpu_device_ip_resume(struct amdgpu_device * adev)3247fb4d8502Sjsg static int amdgpu_device_ip_resume(struct amdgpu_device *adev)
3248fb4d8502Sjsg {
3249fb4d8502Sjsg 	int r;
3250fb4d8502Sjsg 
3251fb4d8502Sjsg 	r = amdgpu_device_ip_resume_phase1(adev);
3252fb4d8502Sjsg 	if (r)
3253fb4d8502Sjsg 		return r;
3254c349dbc7Sjsg 
3255c349dbc7Sjsg 	r = amdgpu_device_fw_loading(adev);
3256c349dbc7Sjsg 	if (r)
3257c349dbc7Sjsg 		return r;
3258c349dbc7Sjsg 
3259fb4d8502Sjsg 	r = amdgpu_device_ip_resume_phase2(adev);
3260fb4d8502Sjsg 
3261fb4d8502Sjsg 	return r;
3262fb4d8502Sjsg }
3263fb4d8502Sjsg 
3264fb4d8502Sjsg /**
3265fb4d8502Sjsg  * amdgpu_device_detect_sriov_bios - determine if the board supports SR-IOV
3266fb4d8502Sjsg  *
3267fb4d8502Sjsg  * @adev: amdgpu_device pointer
3268fb4d8502Sjsg  *
3269fb4d8502Sjsg  * Query the VBIOS data tables to determine if the board supports SR-IOV.
3270fb4d8502Sjsg  */
amdgpu_device_detect_sriov_bios(struct amdgpu_device * adev)3271fb4d8502Sjsg static void amdgpu_device_detect_sriov_bios(struct amdgpu_device *adev)
3272fb4d8502Sjsg {
3273fb4d8502Sjsg 	if (amdgpu_sriov_vf(adev)) {
3274fb4d8502Sjsg 		if (adev->is_atom_fw) {
32755ca02815Sjsg 			if (amdgpu_atomfirmware_gpu_virtualization_supported(adev))
3276fb4d8502Sjsg 				adev->virt.caps |= AMDGPU_SRIOV_CAPS_SRIOV_VBIOS;
3277fb4d8502Sjsg 		} else {
3278fb4d8502Sjsg 			if (amdgpu_atombios_has_gpu_virtualization_table(adev))
3279fb4d8502Sjsg 				adev->virt.caps |= AMDGPU_SRIOV_CAPS_SRIOV_VBIOS;
3280fb4d8502Sjsg 		}
3281fb4d8502Sjsg 
3282fb4d8502Sjsg 		if (!(adev->virt.caps & AMDGPU_SRIOV_CAPS_SRIOV_VBIOS))
3283fb4d8502Sjsg 			amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_NO_VBIOS, 0, 0);
3284fb4d8502Sjsg 	}
3285fb4d8502Sjsg }
3286fb4d8502Sjsg 
3287fb4d8502Sjsg /**
3288fb4d8502Sjsg  * amdgpu_device_asic_has_dc_support - determine if DC supports the asic
3289fb4d8502Sjsg  *
3290fb4d8502Sjsg  * @asic_type: AMD asic type
3291fb4d8502Sjsg  *
3292fb4d8502Sjsg  * Check if there is DC (new modesetting infrastructre) support for an asic.
3293fb4d8502Sjsg  * returns true if DC has support, false if not.
3294fb4d8502Sjsg  */
amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type)3295fb4d8502Sjsg bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type)
3296fb4d8502Sjsg {
3297fb4d8502Sjsg 	switch (asic_type) {
32981bb76ff1Sjsg #ifdef CONFIG_DRM_AMDGPU_SI
32991bb76ff1Sjsg 	case CHIP_HAINAN:
33001bb76ff1Sjsg #endif
33011bb76ff1Sjsg 	case CHIP_TOPAZ:
33021bb76ff1Sjsg 		/* chips with no display hardware */
33031bb76ff1Sjsg 		return false;
3304fb4d8502Sjsg #if defined(CONFIG_DRM_AMD_DC)
3305ad8b1aafSjsg 	case CHIP_TAHITI:
3306ad8b1aafSjsg 	case CHIP_PITCAIRN:
3307ad8b1aafSjsg 	case CHIP_VERDE:
3308ad8b1aafSjsg 	case CHIP_OLAND:
3309fb4d8502Sjsg 		/*
3310fb4d8502Sjsg 		 * We have systems in the wild with these ASICs that require
3311fb4d8502Sjsg 		 * LVDS and VGA support which is not supported with DC.
3312fb4d8502Sjsg 		 *
3313fb4d8502Sjsg 		 * Fallback to the non-DC driver here by default so as not to
3314fb4d8502Sjsg 		 * cause regressions.
3315fb4d8502Sjsg 		 */
33161bb76ff1Sjsg #if defined(CONFIG_DRM_AMD_DC_SI)
3317fb4d8502Sjsg 		return amdgpu_dc > 0;
33181bb76ff1Sjsg #else
33191bb76ff1Sjsg 		return false;
3320ad8b1aafSjsg #endif
33211bb76ff1Sjsg 	case CHIP_BONAIRE:
33221bb76ff1Sjsg 	case CHIP_KAVERI:
33231bb76ff1Sjsg 	case CHIP_KABINI:
33241bb76ff1Sjsg 	case CHIP_MULLINS:
33251bb76ff1Sjsg 		/*
33261bb76ff1Sjsg 		 * We have systems in the wild with these ASICs that require
33271bb76ff1Sjsg 		 * VGA support which is not supported with DC.
33281bb76ff1Sjsg 		 *
33291bb76ff1Sjsg 		 * Fallback to the non-DC driver here by default so as not to
33301bb76ff1Sjsg 		 * cause regressions.
33311bb76ff1Sjsg 		 */
33321bb76ff1Sjsg 		return amdgpu_dc > 0;
33331bb76ff1Sjsg 	default:
3334fb4d8502Sjsg 		return amdgpu_dc != 0;
33351bb76ff1Sjsg #else
3336fb4d8502Sjsg 	default:
3337c349dbc7Sjsg 		if (amdgpu_dc > 0)
3338f005ef32Sjsg 			DRM_INFO_ONCE("Display Core has been requested via kernel parameter but isn't supported by ASIC, ignoring\n");
3339fb4d8502Sjsg 		return false;
33401bb76ff1Sjsg #endif
3341fb4d8502Sjsg 	}
3342fb4d8502Sjsg }
3343fb4d8502Sjsg 
3344fb4d8502Sjsg /**
3345fb4d8502Sjsg  * amdgpu_device_has_dc_support - check if dc is supported
3346fb4d8502Sjsg  *
3347ad8b1aafSjsg  * @adev: amdgpu_device pointer
3348fb4d8502Sjsg  *
3349fb4d8502Sjsg  * Returns true for supported, false for not supported
3350fb4d8502Sjsg  */
amdgpu_device_has_dc_support(struct amdgpu_device * adev)3351fb4d8502Sjsg bool amdgpu_device_has_dc_support(struct amdgpu_device *adev)
3352fb4d8502Sjsg {
3353f005ef32Sjsg 	if (adev->enable_virtual_display ||
33545ca02815Sjsg 	    (adev->harvest_ip_mask & AMD_HARVEST_IP_DMU_MASK))
3355fb4d8502Sjsg 		return false;
3356fb4d8502Sjsg 
3357fb4d8502Sjsg 	return amdgpu_device_asic_has_dc_support(adev->asic_type);
3358fb4d8502Sjsg }
3359fb4d8502Sjsg 
amdgpu_device_xgmi_reset_func(struct work_struct * __work)3360c349dbc7Sjsg static void amdgpu_device_xgmi_reset_func(struct work_struct *__work)
3361c349dbc7Sjsg {
3362c349dbc7Sjsg 	struct amdgpu_device *adev =
3363c349dbc7Sjsg 		container_of(__work, struct amdgpu_device, xgmi_reset_work);
3364ad8b1aafSjsg 	struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev);
3365c349dbc7Sjsg 
3366c349dbc7Sjsg 	/* It's a bug to not have a hive within this function */
3367c349dbc7Sjsg 	if (WARN_ON(!hive))
3368c349dbc7Sjsg 		return;
3369c349dbc7Sjsg 
3370c349dbc7Sjsg 	/*
3371c349dbc7Sjsg 	 * Use task barrier to synchronize all xgmi reset works across the
3372c349dbc7Sjsg 	 * hive. task_barrier_enter and task_barrier_exit will block
3373c349dbc7Sjsg 	 * until all the threads running the xgmi reset works reach
3374c349dbc7Sjsg 	 * those points. task_barrier_full will do both blocks.
3375c349dbc7Sjsg 	 */
3376c349dbc7Sjsg 	if (amdgpu_asic_reset_method(adev) == AMD_RESET_METHOD_BACO) {
3377c349dbc7Sjsg 
3378c349dbc7Sjsg 		task_barrier_enter(&hive->tb);
3379ad8b1aafSjsg 		adev->asic_reset_res = amdgpu_device_baco_enter(adev_to_drm(adev));
3380c349dbc7Sjsg 
3381c349dbc7Sjsg 		if (adev->asic_reset_res)
3382c349dbc7Sjsg 			goto fail;
3383c349dbc7Sjsg 
3384c349dbc7Sjsg 		task_barrier_exit(&hive->tb);
3385ad8b1aafSjsg 		adev->asic_reset_res = amdgpu_device_baco_exit(adev_to_drm(adev));
3386c349dbc7Sjsg 
3387c349dbc7Sjsg 		if (adev->asic_reset_res)
3388c349dbc7Sjsg 			goto fail;
3389c349dbc7Sjsg 
33901bb76ff1Sjsg 		if (adev->mmhub.ras && adev->mmhub.ras->ras_block.hw_ops &&
33911bb76ff1Sjsg 		    adev->mmhub.ras->ras_block.hw_ops->reset_ras_error_count)
33921bb76ff1Sjsg 			adev->mmhub.ras->ras_block.hw_ops->reset_ras_error_count(adev);
3393c349dbc7Sjsg 	} else {
3394c349dbc7Sjsg 
3395c349dbc7Sjsg 		task_barrier_full(&hive->tb);
3396c349dbc7Sjsg 		adev->asic_reset_res =  amdgpu_asic_reset(adev);
3397c349dbc7Sjsg 	}
3398c349dbc7Sjsg 
3399c349dbc7Sjsg fail:
3400c349dbc7Sjsg 	if (adev->asic_reset_res)
3401c349dbc7Sjsg 		DRM_WARN("ASIC reset failed with error, %d for drm dev, %s",
3402ad8b1aafSjsg 			 adev->asic_reset_res, adev_to_drm(adev)->unique);
3403ad8b1aafSjsg 	amdgpu_put_xgmi_hive(hive);
3404c349dbc7Sjsg }
3405c349dbc7Sjsg 
amdgpu_device_get_job_timeout_settings(struct amdgpu_device * adev)3406c349dbc7Sjsg static int amdgpu_device_get_job_timeout_settings(struct amdgpu_device *adev)
3407c349dbc7Sjsg {
3408c349dbc7Sjsg 	char *input = amdgpu_lockup_timeout;
3409c349dbc7Sjsg 	char *timeout_setting = NULL;
3410c349dbc7Sjsg 	int index = 0;
3411c349dbc7Sjsg 	long timeout;
3412c349dbc7Sjsg 	int ret = 0;
3413c349dbc7Sjsg 
3414c349dbc7Sjsg 	/*
34155ca02815Sjsg 	 * By default timeout for non compute jobs is 10000
34165ca02815Sjsg 	 * and 60000 for compute jobs.
3417c349dbc7Sjsg 	 * In SR-IOV or passthrough mode, timeout for compute
3418ad8b1aafSjsg 	 * jobs are 60000 by default.
3419c349dbc7Sjsg 	 */
3420c349dbc7Sjsg 	adev->gfx_timeout = msecs_to_jiffies(10000);
3421c349dbc7Sjsg 	adev->sdma_timeout = adev->video_timeout = adev->gfx_timeout;
34225ca02815Sjsg 	if (amdgpu_sriov_vf(adev))
34235ca02815Sjsg 		adev->compute_timeout = amdgpu_sriov_is_pp_one_vf(adev) ?
34245ca02815Sjsg 					msecs_to_jiffies(60000) : msecs_to_jiffies(10000);
3425c349dbc7Sjsg 	else
34265ca02815Sjsg 		adev->compute_timeout =  msecs_to_jiffies(60000);
3427c349dbc7Sjsg 
3428c349dbc7Sjsg #ifdef notyet
3429c349dbc7Sjsg 	if (strnlen(input, AMDGPU_MAX_TIMEOUT_PARAM_LENGTH)) {
3430c349dbc7Sjsg 		while ((timeout_setting = strsep(&input, ",")) &&
3431c349dbc7Sjsg 				strnlen(timeout_setting, AMDGPU_MAX_TIMEOUT_PARAM_LENGTH)) {
3432c349dbc7Sjsg 			ret = kstrtol(timeout_setting, 0, &timeout);
3433c349dbc7Sjsg 			if (ret)
3434c349dbc7Sjsg 				return ret;
3435c349dbc7Sjsg 
3436c349dbc7Sjsg 			if (timeout == 0) {
3437c349dbc7Sjsg 				index++;
3438c349dbc7Sjsg 				continue;
3439c349dbc7Sjsg 			} else if (timeout < 0) {
3440c349dbc7Sjsg 				timeout = MAX_SCHEDULE_TIMEOUT;
34411bb76ff1Sjsg 				dev_warn(adev->dev, "lockup timeout disabled");
34421bb76ff1Sjsg 				add_taint(TAINT_SOFTLOCKUP, LOCKDEP_STILL_OK);
3443c349dbc7Sjsg 			} else {
3444c349dbc7Sjsg 				timeout = msecs_to_jiffies(timeout);
3445c349dbc7Sjsg 			}
3446c349dbc7Sjsg 
3447c349dbc7Sjsg 			switch (index++) {
3448c349dbc7Sjsg 			case 0:
3449c349dbc7Sjsg 				adev->gfx_timeout = timeout;
3450c349dbc7Sjsg 				break;
3451c349dbc7Sjsg 			case 1:
3452c349dbc7Sjsg 				adev->compute_timeout = timeout;
3453c349dbc7Sjsg 				break;
3454c349dbc7Sjsg 			case 2:
3455c349dbc7Sjsg 				adev->sdma_timeout = timeout;
3456c349dbc7Sjsg 				break;
3457c349dbc7Sjsg 			case 3:
3458c349dbc7Sjsg 				adev->video_timeout = timeout;
3459c349dbc7Sjsg 				break;
3460c349dbc7Sjsg 			default:
3461c349dbc7Sjsg 				break;
3462c349dbc7Sjsg 			}
3463c349dbc7Sjsg 		}
3464c349dbc7Sjsg 		/*
3465c349dbc7Sjsg 		 * There is only one value specified and
3466c349dbc7Sjsg 		 * it should apply to all non-compute jobs.
3467c349dbc7Sjsg 		 */
3468c349dbc7Sjsg 		if (index == 1) {
3469c349dbc7Sjsg 			adev->sdma_timeout = adev->video_timeout = adev->gfx_timeout;
3470c349dbc7Sjsg 			if (amdgpu_sriov_vf(adev) || amdgpu_passthrough(adev))
3471c349dbc7Sjsg 				adev->compute_timeout = adev->gfx_timeout;
3472c349dbc7Sjsg 		}
3473c349dbc7Sjsg 	}
3474c349dbc7Sjsg #endif
3475c349dbc7Sjsg 
3476c349dbc7Sjsg 	return ret;
3477c349dbc7Sjsg }
3478c349dbc7Sjsg 
34791bb76ff1Sjsg /**
34801bb76ff1Sjsg  * amdgpu_device_check_iommu_direct_map - check if RAM direct mapped to GPU
34811bb76ff1Sjsg  *
34821bb76ff1Sjsg  * @adev: amdgpu_device pointer
34831bb76ff1Sjsg  *
34841bb76ff1Sjsg  * RAM direct mapped to GPU if IOMMU is not enabled or is pass through mode
34851bb76ff1Sjsg  */
amdgpu_device_check_iommu_direct_map(struct amdgpu_device * adev)34861bb76ff1Sjsg static void amdgpu_device_check_iommu_direct_map(struct amdgpu_device *adev)
34871bb76ff1Sjsg {
34881bb76ff1Sjsg #ifdef notyet
34891bb76ff1Sjsg 	struct iommu_domain *domain;
34901bb76ff1Sjsg 
34911bb76ff1Sjsg 	domain = iommu_get_domain_for_dev(adev->dev);
34921bb76ff1Sjsg 	if (!domain || domain->type == IOMMU_DOMAIN_IDENTITY)
34931bb76ff1Sjsg #endif
34941bb76ff1Sjsg 		adev->ram_is_direct_mapped = true;
34951bb76ff1Sjsg }
34961bb76ff1Sjsg 
3497ad8b1aafSjsg static const struct attribute *amdgpu_dev_attributes[] = {
3498ad8b1aafSjsg 	&dev_attr_pcie_replay_count.attr,
3499ad8b1aafSjsg 	NULL
3500ad8b1aafSjsg };
3501ad8b1aafSjsg 
amdgpu_device_set_mcbp(struct amdgpu_device * adev)3502f005ef32Sjsg static void amdgpu_device_set_mcbp(struct amdgpu_device *adev)
3503f005ef32Sjsg {
3504f005ef32Sjsg 	if (amdgpu_mcbp == 1)
3505f005ef32Sjsg 		adev->gfx.mcbp = true;
3506f005ef32Sjsg 	else if (amdgpu_mcbp == 0)
3507f005ef32Sjsg 		adev->gfx.mcbp = false;
3508f005ef32Sjsg 
3509f005ef32Sjsg 	if (amdgpu_sriov_vf(adev))
3510f005ef32Sjsg 		adev->gfx.mcbp = true;
3511f005ef32Sjsg 
3512f005ef32Sjsg 	if (adev->gfx.mcbp)
3513f005ef32Sjsg 		DRM_INFO("MCBP is enabled\n");
3514f005ef32Sjsg }
3515f005ef32Sjsg 
3516fb4d8502Sjsg /**
3517fb4d8502Sjsg  * amdgpu_device_init - initialize the driver
3518fb4d8502Sjsg  *
3519fb4d8502Sjsg  * @adev: amdgpu_device pointer
3520fb4d8502Sjsg  * @flags: driver flags
3521fb4d8502Sjsg  *
3522fb4d8502Sjsg  * Initializes the driver info and hw (all asics).
3523fb4d8502Sjsg  * Returns 0 for success or an error on failure.
3524fb4d8502Sjsg  * Called at driver startup.
3525fb4d8502Sjsg  */
amdgpu_device_init(struct amdgpu_device * adev,uint32_t flags)3526fb4d8502Sjsg int amdgpu_device_init(struct amdgpu_device *adev,
3527fb4d8502Sjsg 		       uint32_t flags)
3528fb4d8502Sjsg {
3529ad8b1aafSjsg 	struct drm_device *ddev = adev_to_drm(adev);
3530ad8b1aafSjsg 	struct pci_dev *pdev = adev->pdev;
3531fb4d8502Sjsg 	int r, i;
35325ca02815Sjsg 	bool px = false;
3533fb4d8502Sjsg 	u32 max_MBps;
35343abae83eSjsg 	int tmp;
3535fb4d8502Sjsg 
3536fb4d8502Sjsg 	adev->shutdown = false;
3537fb4d8502Sjsg 	adev->flags = flags;
3538c349dbc7Sjsg 
3539c349dbc7Sjsg 	if (amdgpu_force_asic_type >= 0 && amdgpu_force_asic_type < CHIP_LAST)
3540c349dbc7Sjsg 		adev->asic_type = amdgpu_force_asic_type;
3541c349dbc7Sjsg 	else
3542fb4d8502Sjsg 		adev->asic_type = flags & AMD_ASIC_MASK;
3543c349dbc7Sjsg 
3544fb4d8502Sjsg 	adev->usec_timeout = AMDGPU_MAX_USEC_TIMEOUT;
3545fb4d8502Sjsg 	if (amdgpu_emu_mode == 1)
3546c349dbc7Sjsg 		adev->usec_timeout *= 10;
3547fb4d8502Sjsg 	adev->gmc.gart_size = 512 * 1024 * 1024;
3548fb4d8502Sjsg 	adev->accel_working = false;
3549fb4d8502Sjsg 	adev->num_rings = 0;
35501bb76ff1Sjsg 	RCU_INIT_POINTER(adev->gang_submit, dma_fence_get_stub());
3551fb4d8502Sjsg 	adev->mman.buffer_funcs = NULL;
3552fb4d8502Sjsg 	adev->mman.buffer_funcs_ring = NULL;
3553fb4d8502Sjsg 	adev->vm_manager.vm_pte_funcs = NULL;
3554c349dbc7Sjsg 	adev->vm_manager.vm_pte_num_scheds = 0;
3555fb4d8502Sjsg 	adev->gmc.gmc_funcs = NULL;
35565ca02815Sjsg 	adev->harvest_ip_mask = 0x0;
3557fb4d8502Sjsg 	adev->fence_context = dma_fence_context_alloc(AMDGPU_MAX_RINGS);
3558fb4d8502Sjsg 	bitmap_zero(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
3559fb4d8502Sjsg 
3560fb4d8502Sjsg 	adev->smc_rreg = &amdgpu_invalid_rreg;
3561fb4d8502Sjsg 	adev->smc_wreg = &amdgpu_invalid_wreg;
3562fb4d8502Sjsg 	adev->pcie_rreg = &amdgpu_invalid_rreg;
3563fb4d8502Sjsg 	adev->pcie_wreg = &amdgpu_invalid_wreg;
3564f005ef32Sjsg 	adev->pcie_rreg_ext = &amdgpu_invalid_rreg_ext;
3565f005ef32Sjsg 	adev->pcie_wreg_ext = &amdgpu_invalid_wreg_ext;
3566fb4d8502Sjsg 	adev->pciep_rreg = &amdgpu_invalid_rreg;
3567fb4d8502Sjsg 	adev->pciep_wreg = &amdgpu_invalid_wreg;
3568c349dbc7Sjsg 	adev->pcie_rreg64 = &amdgpu_invalid_rreg64;
3569c349dbc7Sjsg 	adev->pcie_wreg64 = &amdgpu_invalid_wreg64;
3570fb4d8502Sjsg 	adev->uvd_ctx_rreg = &amdgpu_invalid_rreg;
3571fb4d8502Sjsg 	adev->uvd_ctx_wreg = &amdgpu_invalid_wreg;
3572fb4d8502Sjsg 	adev->didt_rreg = &amdgpu_invalid_rreg;
3573fb4d8502Sjsg 	adev->didt_wreg = &amdgpu_invalid_wreg;
3574fb4d8502Sjsg 	adev->gc_cac_rreg = &amdgpu_invalid_rreg;
3575fb4d8502Sjsg 	adev->gc_cac_wreg = &amdgpu_invalid_wreg;
3576fb4d8502Sjsg 	adev->audio_endpt_rreg = &amdgpu_block_invalid_rreg;
3577fb4d8502Sjsg 	adev->audio_endpt_wreg = &amdgpu_block_invalid_wreg;
3578fb4d8502Sjsg 
357946154ddeSjsg 	DRM_INFO("initializing kernel modesetting (%s 0x%04X:0x%04X 0x%04X:0x%04X 0x%02X).\n",
3580fb4d8502Sjsg 		 amdgpu_asic_name[adev->asic_type], pdev->vendor, pdev->device,
3581fb4d8502Sjsg 		 pdev->subsystem_vendor, pdev->subsystem_device, pdev->revision);
3582fb4d8502Sjsg 
3583fb4d8502Sjsg 	/* mutex initialization are all done here so we
3584f005ef32Sjsg 	 * can recall function without having locking issues
3585f005ef32Sjsg 	 */
3586fb4d8502Sjsg 	rw_init(&adev->firmware.mutex, "agfw");
3587fb4d8502Sjsg 	rw_init(&adev->pm.mutex, "agpm");
3588fb4d8502Sjsg 	rw_init(&adev->gfx.gpu_clock_mutex, "gfxclk");
3589fb4d8502Sjsg 	rw_init(&adev->srbm_mutex, "srbm");
3590fb4d8502Sjsg 	rw_init(&adev->gfx.pipe_reserve_mutex, "pipers");
3591c349dbc7Sjsg 	rw_init(&adev->gfx.gfx_off_mutex, "gfxoff");
3592f005ef32Sjsg 	rw_init(&adev->gfx.partition_mutex, "gfxpar");
3593fb4d8502Sjsg 	rw_init(&adev->grbm_idx_mutex, "grbmidx");
3594fb4d8502Sjsg 	rw_init(&adev->mn_lock, "agpumn");
3595fb4d8502Sjsg 	rw_init(&adev->virt.vf_errors.lock, "vferr");
3596fb4d8502Sjsg 	hash_init(adev->mn_hash);
3597c349dbc7Sjsg 	rw_init(&adev->psp.mutex, "agpsp");
3598c349dbc7Sjsg 	rw_init(&adev->notifier_lock, "agnf");
35991bb76ff1Sjsg 	rw_init(&adev->pm.stable_pstate_ctx_lock, "agps");
36001bb76ff1Sjsg 	rw_init(&adev->benchmark_mutex, "agbm");
3601fb4d8502Sjsg 
36021bb76ff1Sjsg 	amdgpu_device_init_apu_flags(adev);
36035ca02815Sjsg 
3604c349dbc7Sjsg 	r = amdgpu_device_check_arguments(adev);
3605c349dbc7Sjsg 	if (r)
3606c349dbc7Sjsg 		return r;
3607fb4d8502Sjsg 
3608fb4d8502Sjsg 	mtx_init(&adev->mmio_idx_lock, IPL_TTY);
3609fb4d8502Sjsg 	mtx_init(&adev->smc_idx_lock, IPL_TTY);
3610fb4d8502Sjsg 	mtx_init(&adev->pcie_idx_lock, IPL_TTY);
3611fb4d8502Sjsg 	mtx_init(&adev->uvd_ctx_idx_lock, IPL_TTY);
3612fb4d8502Sjsg 	mtx_init(&adev->didt_idx_lock, IPL_TTY);
3613fb4d8502Sjsg 	mtx_init(&adev->gc_cac_idx_lock, IPL_TTY);
3614fb4d8502Sjsg 	mtx_init(&adev->se_cac_idx_lock, IPL_TTY);
3615fb4d8502Sjsg 	mtx_init(&adev->audio_endpt_idx_lock, IPL_TTY);
361663b35fb2Sjsg 	mtx_init(&adev->mm_stats.lock, IPL_NONE);
3617fb4d8502Sjsg 
3618fb4d8502Sjsg 	INIT_LIST_HEAD(&adev->shadow_list);
3619fb4d8502Sjsg 	rw_init(&adev->shadow_list_lock, "sdwlst");
3620fb4d8502Sjsg 
36215ca02815Sjsg 	INIT_LIST_HEAD(&adev->reset_list);
36225ca02815Sjsg 
36231bb76ff1Sjsg 	INIT_LIST_HEAD(&adev->ras_list);
36241bb76ff1Sjsg 
3625c349dbc7Sjsg 	INIT_DELAYED_WORK(&adev->delayed_init_work,
3626c349dbc7Sjsg 			  amdgpu_device_delayed_init_work_handler);
3627c349dbc7Sjsg 	INIT_DELAYED_WORK(&adev->gfx.gfx_off_delay_work,
3628c349dbc7Sjsg 			  amdgpu_device_delay_enable_gfx_off);
3629fb4d8502Sjsg 
3630c349dbc7Sjsg 	INIT_WORK(&adev->xgmi_reset_work, amdgpu_device_xgmi_reset_func);
3631c349dbc7Sjsg 
3632c349dbc7Sjsg 	adev->gfx.gfx_off_req_count = 1;
36331bb76ff1Sjsg 	adev->gfx.gfx_off_residency = 0;
36341bb76ff1Sjsg 	adev->gfx.gfx_off_entrycount = 0;
3635ad8b1aafSjsg 	adev->pm.ac_power = power_supply_is_system_supplied() > 0;
3636ad8b1aafSjsg 
3637ad8b1aafSjsg 	atomic_set(&adev->throttling_logging_enabled, 1);
3638ad8b1aafSjsg 	/*
3639ad8b1aafSjsg 	 * If throttling continues, logging will be performed every minute
3640ad8b1aafSjsg 	 * to avoid log flooding. "-1" is subtracted since the thermal
3641ad8b1aafSjsg 	 * throttling interrupt comes every second. Thus, the total logging
3642ad8b1aafSjsg 	 * interval is 59 seconds(retelimited printk interval) + 1(waiting
3643ad8b1aafSjsg 	 * for throttling interrupt) = 60 seconds.
3644ad8b1aafSjsg 	 */
3645ad8b1aafSjsg 	ratelimit_state_init(&adev->throttling_logging_rs, (60 - 1) * HZ, 1);
3646ad8b1aafSjsg 	ratelimit_set_flags(&adev->throttling_logging_rs, RATELIMIT_MSG_ON_RELEASE);
3647fb4d8502Sjsg 
3648fb4d8502Sjsg #ifdef __linux__
3649fb4d8502Sjsg 	/* Registers mapping */
3650fb4d8502Sjsg 	/* TODO: block userspace mapping of io register */
3651fb4d8502Sjsg 	if (adev->asic_type >= CHIP_BONAIRE) {
3652fb4d8502Sjsg 		adev->rmmio_base = pci_resource_start(adev->pdev, 5);
3653fb4d8502Sjsg 		adev->rmmio_size = pci_resource_len(adev->pdev, 5);
3654fb4d8502Sjsg 	} else {
3655fb4d8502Sjsg 		adev->rmmio_base = pci_resource_start(adev->pdev, 2);
3656fb4d8502Sjsg 		adev->rmmio_size = pci_resource_len(adev->pdev, 2);
3657fb4d8502Sjsg 	}
365833b7da62Sjsg #endif
3659fb4d8502Sjsg 
36605ca02815Sjsg 	for (i = 0; i < AMD_IP_BLOCK_TYPE_NUM; i++)
36615ca02815Sjsg 		atomic_set(&adev->pm.pwr_state[i], POWER_STATE_UNKNOWN);
36625ca02815Sjsg 
366333b7da62Sjsg #ifdef __linux__
3664fb4d8502Sjsg 	adev->rmmio = ioremap(adev->rmmio_base, adev->rmmio_size);
3665f005ef32Sjsg 	if (!adev->rmmio)
3666fb4d8502Sjsg 		return -ENOMEM;
3667fb4d8502Sjsg #endif
3668fb4d8502Sjsg 	DRM_INFO("register mmio base: 0x%08X\n", (uint32_t)adev->rmmio_base);
3669f005ef32Sjsg 	DRM_INFO("register mmio size: %u\n", (unsigned int)adev->rmmio_size);
3670c349dbc7Sjsg 
36711bb76ff1Sjsg 	/*
36721bb76ff1Sjsg 	 * Reset domain needs to be present early, before XGMI hive discovered
36731bb76ff1Sjsg 	 * (if any) and intitialized to use reset sem and in_gpu reset flag
36741bb76ff1Sjsg 	 * early on during init and before calling to RREG32.
36751bb76ff1Sjsg 	 */
36761bb76ff1Sjsg 	adev->reset_domain = amdgpu_reset_create_reset_domain(SINGLE_DEVICE, "amdgpu-reset-dev");
36771bb76ff1Sjsg 	if (!adev->reset_domain)
36781bb76ff1Sjsg 		return -ENOMEM;
3679c349dbc7Sjsg 
3680ad8b1aafSjsg 	/* detect hw virtualization here */
3681ad8b1aafSjsg 	amdgpu_detect_virtualization(adev);
3682ad8b1aafSjsg 
3683f005ef32Sjsg 	amdgpu_device_get_pcie_info(adev);
3684f005ef32Sjsg 
3685ad8b1aafSjsg 	r = amdgpu_device_get_job_timeout_settings(adev);
3686c349dbc7Sjsg 	if (r) {
3687ad8b1aafSjsg 		dev_err(adev->dev, "invalid lockup_timeout parameter syntax\n");
368843dd1d00Sjsg 		return r;
3689c349dbc7Sjsg 	}
3690c349dbc7Sjsg 
3691fb4d8502Sjsg 	/* early init functions */
3692fb4d8502Sjsg 	r = amdgpu_device_ip_early_init(adev);
3693fb4d8502Sjsg 	if (r)
369443dd1d00Sjsg 		return r;
3695c349dbc7Sjsg 
3696f005ef32Sjsg 	amdgpu_device_set_mcbp(adev);
3697f005ef32Sjsg 
3698269b8745Sjsg 	/* Get rid of things like offb */
3699269b8745Sjsg 	r = drm_aperture_remove_conflicting_pci_framebuffers(adev->pdev, &amdgpu_kms_driver);
3700269b8745Sjsg 	if (r)
3701269b8745Sjsg 		return r;
3702269b8745Sjsg 
37031bb76ff1Sjsg 	/* Enable TMZ based on IP_VERSION */
37041bb76ff1Sjsg 	amdgpu_gmc_tmz_set(adev);
37051bb76ff1Sjsg 
37061bb76ff1Sjsg 	amdgpu_gmc_noretry_set(adev);
37071bb76ff1Sjsg 	/* Need to get xgmi info early to decide the reset behavior*/
37081bb76ff1Sjsg 	if (adev->gmc.xgmi.supported) {
37091bb76ff1Sjsg 		r = adev->gfxhub.funcs->get_xgmi_info(adev);
37101bb76ff1Sjsg 		if (r)
37111bb76ff1Sjsg 			return r;
37121bb76ff1Sjsg 	}
37131bb76ff1Sjsg 
37141bb76ff1Sjsg 	/* enable PCIE atomic ops */
37151bb76ff1Sjsg #ifdef notyet
3716f005ef32Sjsg 	if (amdgpu_sriov_vf(adev)) {
3717f005ef32Sjsg 		if (adev->virt.fw_reserve.p_pf2vf)
37181bb76ff1Sjsg 			adev->have_atomics_support = ((struct amd_sriov_msg_pf2vf_info *)
37191bb76ff1Sjsg 						      adev->virt.fw_reserve.p_pf2vf)->pcie_atomic_ops_support_flags ==
37201bb76ff1Sjsg 				(PCI_EXP_DEVCAP2_ATOMIC_COMP32 | PCI_EXP_DEVCAP2_ATOMIC_COMP64);
3721e8543b3dSjsg 	/* APUs w/ gfx9 onwards doesn't reply on PCIe atomics, rather it is a
3722e8543b3dSjsg 	 * internal path natively support atomics, set have_atomics_support to true.
3723e8543b3dSjsg 	 */
3724f005ef32Sjsg 	} else if ((adev->flags & AMD_IS_APU) &&
3725f005ef32Sjsg 		   (adev->ip_versions[GC_HWIP][0] > IP_VERSION(9, 0, 0))) {
3726e8543b3dSjsg 		adev->have_atomics_support = true;
3727f005ef32Sjsg 	} else {
37281bb76ff1Sjsg 		adev->have_atomics_support =
37291bb76ff1Sjsg 			!pci_enable_atomic_ops_to_root(adev->pdev,
37301bb76ff1Sjsg 					  PCI_EXP_DEVCAP2_ATOMIC_COMP32 |
37311bb76ff1Sjsg 					  PCI_EXP_DEVCAP2_ATOMIC_COMP64);
3732f005ef32Sjsg 	}
3733f005ef32Sjsg 
37341bb76ff1Sjsg 	if (!adev->have_atomics_support)
37351bb76ff1Sjsg 		dev_info(adev->dev, "PCIE atomic ops is not supported\n");
37361bb76ff1Sjsg #else
3737e8543b3dSjsg 	/* APUs w/ gfx9 onwards doesn't reply on PCIe atomics, rather it is a
3738e8543b3dSjsg 	 * internal path natively support atomics, set have_atomics_support to true.
3739e8543b3dSjsg 	 */
3740e8543b3dSjsg 	if ((adev->flags & AMD_IS_APU) &&
3741e8543b3dSjsg 		(adev->ip_versions[GC_HWIP][0] > IP_VERSION(9, 0, 0)))
3742e8543b3dSjsg 		adev->have_atomics_support = true;
3743e8543b3dSjsg 	else
37441bb76ff1Sjsg 		adev->have_atomics_support = false;
37451bb76ff1Sjsg #endif
37461bb76ff1Sjsg 
3747c349dbc7Sjsg 	/* doorbell bar mapping and doorbell index init*/
3748f005ef32Sjsg 	amdgpu_doorbell_init(adev);
3749c349dbc7Sjsg 
3750fb4d8502Sjsg 	if (amdgpu_emu_mode == 1) {
3751fb4d8502Sjsg 		/* post the asic on emulation mode */
3752fb4d8502Sjsg 		emu_soc_asic_init(adev);
3753fb4d8502Sjsg 		goto fence_driver_init;
3754fb4d8502Sjsg 	}
3755fb4d8502Sjsg 
37565ca02815Sjsg 	amdgpu_reset_init(adev);
37575ca02815Sjsg 
3758fb4d8502Sjsg 	/* detect if we are with an SRIOV vbios */
3759f005ef32Sjsg 	if (adev->bios)
3760fb4d8502Sjsg 		amdgpu_device_detect_sriov_bios(adev);
3761fb4d8502Sjsg 
3762c349dbc7Sjsg 	/* check if we need to reset the asic
3763c349dbc7Sjsg 	 *  E.g., driver was not cleanly unloaded previously, etc.
3764c349dbc7Sjsg 	 */
3765c349dbc7Sjsg 	if (!amdgpu_sriov_vf(adev) && amdgpu_asic_need_reset_on_init(adev)) {
37665ca02815Sjsg 		if (adev->gmc.xgmi.num_physical_nodes) {
37675ca02815Sjsg 			dev_info(adev->dev, "Pending hive reset.\n");
37685ca02815Sjsg 			adev->gmc.xgmi.pending_reset = true;
37695ca02815Sjsg 			/* Only need to init necessary block for SMU to handle the reset */
37705ca02815Sjsg 			for (i = 0; i < adev->num_ip_blocks; i++) {
37715ca02815Sjsg 				if (!adev->ip_blocks[i].status.valid)
37725ca02815Sjsg 					continue;
37735ca02815Sjsg 				if (!(adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
37745ca02815Sjsg 				      adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
37755ca02815Sjsg 				      adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH ||
37765ca02815Sjsg 				      adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC)) {
37775ca02815Sjsg 					DRM_DEBUG("IP %s disabled for hw_init.\n",
37785ca02815Sjsg 						adev->ip_blocks[i].version->funcs->name);
37795ca02815Sjsg 					adev->ip_blocks[i].status.hw = true;
37805ca02815Sjsg 				}
37815ca02815Sjsg 			}
37825ca02815Sjsg 		} else {
37833abae83eSjsg 			tmp = amdgpu_reset_method;
37843abae83eSjsg 			/* It should do a default reset when loading or reloading the driver,
37853abae83eSjsg 			 * regardless of the module parameter reset_method.
37863abae83eSjsg 			 */
37873abae83eSjsg 			amdgpu_reset_method = AMD_RESET_METHOD_NONE;
3788c349dbc7Sjsg 			r = amdgpu_asic_reset(adev);
37893abae83eSjsg 			amdgpu_reset_method = tmp;
3790c349dbc7Sjsg 			if (r) {
3791c349dbc7Sjsg 				dev_err(adev->dev, "asic reset on init failed\n");
3792c349dbc7Sjsg 				goto failed;
3793c349dbc7Sjsg 			}
3794c349dbc7Sjsg 		}
37955ca02815Sjsg 	}
3796c349dbc7Sjsg 
3797fb4d8502Sjsg 	/* Post card if necessary */
3798fb4d8502Sjsg 	if (amdgpu_device_need_post(adev)) {
3799fb4d8502Sjsg 		if (!adev->bios) {
3800fb4d8502Sjsg 			dev_err(adev->dev, "no vBIOS found\n");
3801fb4d8502Sjsg 			r = -EINVAL;
3802fb4d8502Sjsg 			goto failed;
3803fb4d8502Sjsg 		}
3804fb4d8502Sjsg 		DRM_INFO("GPU posting now...\n");
3805ad8b1aafSjsg 		r = amdgpu_device_asic_init(adev);
3806fb4d8502Sjsg 		if (r) {
3807fb4d8502Sjsg 			dev_err(adev->dev, "gpu post error!\n");
3808fb4d8502Sjsg 			goto failed;
3809fb4d8502Sjsg 		}
3810fb4d8502Sjsg 	}
3811fb4d8502Sjsg 
3812f005ef32Sjsg 	if (adev->bios) {
3813fb4d8502Sjsg 		if (adev->is_atom_fw) {
3814fb4d8502Sjsg 			/* Initialize clocks */
3815fb4d8502Sjsg 			r = amdgpu_atomfirmware_get_clock_info(adev);
3816fb4d8502Sjsg 			if (r) {
3817fb4d8502Sjsg 				dev_err(adev->dev, "amdgpu_atomfirmware_get_clock_info failed\n");
3818fb4d8502Sjsg 				amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0);
3819fb4d8502Sjsg 				goto failed;
3820fb4d8502Sjsg 			}
3821fb4d8502Sjsg 		} else {
3822fb4d8502Sjsg 			/* Initialize clocks */
3823fb4d8502Sjsg 			r = amdgpu_atombios_get_clock_info(adev);
3824fb4d8502Sjsg 			if (r) {
3825fb4d8502Sjsg 				dev_err(adev->dev, "amdgpu_atombios_get_clock_info failed\n");
3826fb4d8502Sjsg 				amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0);
3827fb4d8502Sjsg 				goto failed;
3828fb4d8502Sjsg 			}
3829fb4d8502Sjsg 			/* init i2c buses */
3830fb4d8502Sjsg 			if (!amdgpu_device_has_dc_support(adev))
3831fb4d8502Sjsg 				amdgpu_atombios_i2c_init(adev);
3832fb4d8502Sjsg 		}
3833f005ef32Sjsg 	}
3834fb4d8502Sjsg 
3835fb4d8502Sjsg fence_driver_init:
3836fb4d8502Sjsg 	/* Fence driver */
38375ca02815Sjsg 	r = amdgpu_fence_driver_sw_init(adev);
3838fb4d8502Sjsg 	if (r) {
38395ca02815Sjsg 		dev_err(adev->dev, "amdgpu_fence_driver_sw_init failed\n");
3840fb4d8502Sjsg 		amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_FENCE_INIT_FAIL, 0, 0);
3841fb4d8502Sjsg 		goto failed;
3842fb4d8502Sjsg 	}
3843fb4d8502Sjsg 
3844fb4d8502Sjsg 	/* init the mode config */
3845ad8b1aafSjsg 	drm_mode_config_init(adev_to_drm(adev));
3846fb4d8502Sjsg 
3847fb4d8502Sjsg 	r = amdgpu_device_ip_init(adev);
3848fb4d8502Sjsg 	if (r) {
3849fb4d8502Sjsg 		dev_err(adev->dev, "amdgpu_device_ip_init failed\n");
3850fb4d8502Sjsg 		amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_INIT_FAIL, 0, 0);
38515ca02815Sjsg 		goto release_ras_con;
3852fb4d8502Sjsg 	}
3853fb4d8502Sjsg 
38545ca02815Sjsg 	amdgpu_fence_driver_hw_init(adev);
38555ca02815Sjsg 
3856ad8b1aafSjsg 	dev_info(adev->dev,
3857ad8b1aafSjsg 		"SE %d, SH per SE %d, CU per SH %d, active_cu_number %d\n",
3858c349dbc7Sjsg 			adev->gfx.config.max_shader_engines,
3859c349dbc7Sjsg 			adev->gfx.config.max_sh_per_se,
3860c349dbc7Sjsg 			adev->gfx.config.max_cu_per_sh,
3861c349dbc7Sjsg 			adev->gfx.cu_info.number);
3862c349dbc7Sjsg 
386346154ddeSjsg #ifdef __OpenBSD__
386446154ddeSjsg {
386546154ddeSjsg 	const char *chip_name;
38661bb76ff1Sjsg 	uint32_t version = adev->ip_versions[GC_HWIP][0];
38671bb76ff1Sjsg 	int maj, min, rev;
386846154ddeSjsg 
386946154ddeSjsg 	switch (adev->asic_type) {
387046154ddeSjsg 	case CHIP_RAVEN:
3871ad8b1aafSjsg 		if (adev->apu_flags & AMD_APU_IS_RAVEN2)
387246154ddeSjsg 			chip_name = "RAVEN2";
3873ad8b1aafSjsg 		else if (adev->apu_flags & AMD_APU_IS_PICASSO)
387446154ddeSjsg 			chip_name = "PICASSO";
387546154ddeSjsg 		else
387646154ddeSjsg 			chip_name = "RAVEN";
387746154ddeSjsg 		break;
3878ad8b1aafSjsg 	case CHIP_RENOIR:
3879ad8b1aafSjsg 		if (adev->apu_flags & AMD_APU_IS_RENOIR)
3880ad8b1aafSjsg 			chip_name = "RENOIR";
3881ad8b1aafSjsg 		else
3882ad8b1aafSjsg 			chip_name = "GREEN_SARDINE";
3883ad8b1aafSjsg 		break;
388446154ddeSjsg 	default:
388546154ddeSjsg 		chip_name = amdgpu_asic_name[adev->asic_type];
388646154ddeSjsg 	}
38871bb76ff1Sjsg 
38881bb76ff1Sjsg 	printf("%s: %s", adev->self.dv_xname, chip_name);
38891bb76ff1Sjsg 	/* show graphics/compute ip block version, not set on < GFX9 */
38901bb76ff1Sjsg 	if (version) {
38911bb76ff1Sjsg 		maj = IP_VERSION_MAJ(version);
38921bb76ff1Sjsg 		min = IP_VERSION_MIN(version);
38931bb76ff1Sjsg 		rev = IP_VERSION_REV(version);
38941bb76ff1Sjsg 		printf(" GC %d.%d.%d", maj, min, rev);
38951bb76ff1Sjsg 	}
38961bb76ff1Sjsg 	printf(" %d CU rev 0x%02x\n", adev->gfx.cu_info.number, adev->rev_id);
389746154ddeSjsg }
389846154ddeSjsg #endif
389946154ddeSjsg 
3900fb4d8502Sjsg 	adev->accel_working = true;
3901fb4d8502Sjsg 
3902fb4d8502Sjsg 	amdgpu_vm_check_compute_bug(adev);
3903fb4d8502Sjsg 
3904fb4d8502Sjsg 	/* Initialize the buffer migration limit. */
3905fb4d8502Sjsg 	if (amdgpu_moverate >= 0)
3906fb4d8502Sjsg 		max_MBps = amdgpu_moverate;
3907fb4d8502Sjsg 	else
3908fb4d8502Sjsg 		max_MBps = 8; /* Allow 8 MB/s. */
3909fb4d8502Sjsg 	/* Get a log2 for easy divisions. */
3910fb4d8502Sjsg 	adev->mm_stats.log2_max_MBps = ilog2(max(1u, max_MBps));
3911fb4d8502Sjsg 
3912f005ef32Sjsg 	r = amdgpu_atombios_sysfs_init(adev);
3913f005ef32Sjsg 	if (r)
3914f005ef32Sjsg 		drm_err(&adev->ddev,
3915f005ef32Sjsg 			"registering atombios sysfs failed (%d).\n", r);
3916f005ef32Sjsg 
3917fb4d8502Sjsg 	r = amdgpu_pm_sysfs_init(adev);
3918f005ef32Sjsg 	if (r)
3919f005ef32Sjsg 		DRM_ERROR("registering pm sysfs failed (%d).\n", r);
3920fb4d8502Sjsg 
3921c349dbc7Sjsg 	r = amdgpu_ucode_sysfs_init(adev);
3922c349dbc7Sjsg 	if (r) {
3923c349dbc7Sjsg 		adev->ucode_sysfs_en = false;
3924c349dbc7Sjsg 		DRM_ERROR("Creating firmware sysfs failed (%d).\n", r);
3925c349dbc7Sjsg 	} else
3926c349dbc7Sjsg 		adev->ucode_sysfs_en = true;
3927fb4d8502Sjsg 
3928c349dbc7Sjsg 	/*
3929c349dbc7Sjsg 	 * Register gpu instance before amdgpu_device_enable_mgpu_fan_boost.
3930c349dbc7Sjsg 	 * Otherwise the mgpu fan boost feature will be skipped due to the
3931c349dbc7Sjsg 	 * gpu instance is counted less.
3932c349dbc7Sjsg 	 */
3933c349dbc7Sjsg 	amdgpu_register_gpu_instance(adev);
3934c349dbc7Sjsg 
3935fb4d8502Sjsg 	/* enable clockgating, etc. after ib tests, etc. since some blocks require
3936fb4d8502Sjsg 	 * explicit gating rather than handling it automatically.
3937fb4d8502Sjsg 	 */
39385ca02815Sjsg 	if (!adev->gmc.xgmi.pending_reset) {
3939fb4d8502Sjsg 		r = amdgpu_device_ip_late_init(adev);
3940fb4d8502Sjsg 		if (r) {
3941fb4d8502Sjsg 			dev_err(adev->dev, "amdgpu_device_ip_late_init failed\n");
3942fb4d8502Sjsg 			amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_LATE_INIT_FAIL, 0, r);
39435ca02815Sjsg 			goto release_ras_con;
3944fb4d8502Sjsg 		}
3945c349dbc7Sjsg 		/* must succeed. */
3946c349dbc7Sjsg 		amdgpu_ras_resume(adev);
3947c349dbc7Sjsg 		queue_delayed_work(system_wq, &adev->delayed_init_work,
3948c349dbc7Sjsg 				   msecs_to_jiffies(AMDGPU_RESUME_MS));
39495ca02815Sjsg 	}
3950c349dbc7Sjsg 
395134683186Sjsg 	if (amdgpu_sriov_vf(adev)) {
395234683186Sjsg 		amdgpu_virt_release_full_gpu(adev, true);
3953ad8b1aafSjsg 		flush_delayed_work(&adev->delayed_init_work);
395434683186Sjsg 	}
3955ad8b1aafSjsg 
3956ad8b1aafSjsg 	r = sysfs_create_files(&adev->dev->kobj, amdgpu_dev_attributes);
3957ad8b1aafSjsg 	if (r)
3958ad8b1aafSjsg 		dev_err(adev->dev, "Could not create amdgpu device attr\n");
3959c349dbc7Sjsg 
3960f005ef32Sjsg 	amdgpu_fru_sysfs_init(adev);
3961f005ef32Sjsg 
3962c349dbc7Sjsg 	if (IS_ENABLED(CONFIG_PERF_EVENTS))
3963c349dbc7Sjsg 		r = amdgpu_pmu_init(adev);
3964c349dbc7Sjsg 	if (r)
3965c349dbc7Sjsg 		dev_err(adev->dev, "amdgpu_pmu_init failed\n");
3966c349dbc7Sjsg 
3967ad8b1aafSjsg 	/* Have stored pci confspace at hand for restore in sudden PCI error */
3968ad8b1aafSjsg 	if (amdgpu_device_cache_pci_state(adev->pdev))
3969ad8b1aafSjsg 		pci_restore_state(pdev);
3970ad8b1aafSjsg 
39715ca02815Sjsg 	/* if we have > 1 VGA cards, then disable the amdgpu VGA resources */
39725ca02815Sjsg 	/* this will fail for cards that aren't VGA class devices, just
3973f005ef32Sjsg 	 * ignore it
3974f005ef32Sjsg 	 */
39755ca02815Sjsg #ifdef notyet
39765ca02815Sjsg 	if ((adev->pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA)
39775ca02815Sjsg 		vga_client_register(adev->pdev, amdgpu_device_vga_set_decode);
39785ca02815Sjsg #endif
39795ca02815Sjsg 
398078c2b773Sjsg 	px = amdgpu_device_supports_px(ddev);
398178c2b773Sjsg 
3982997286d4Sjsg 	if (px || (!dev_is_removable(&adev->pdev->dev) &&
398378c2b773Sjsg 				apple_gmux_detect(NULL, NULL)))
39845ca02815Sjsg 		vga_switcheroo_register_client(adev->pdev,
39855ca02815Sjsg 					       &amdgpu_switcheroo_ops, px);
398678c2b773Sjsg 
398778c2b773Sjsg 	if (px)
39885ca02815Sjsg 		vga_switcheroo_init_domain_pm_ops(adev->dev, &adev->vga_pm_domain);
39895ca02815Sjsg 
39905ca02815Sjsg 	if (adev->gmc.xgmi.pending_reset)
39915ca02815Sjsg 		queue_delayed_work(system_wq, &mgpu_info.delayed_reset_work,
39925ca02815Sjsg 				   msecs_to_jiffies(AMDGPU_RESUME_MS));
39935ca02815Sjsg 
39941bb76ff1Sjsg 	amdgpu_device_check_iommu_direct_map(adev);
39951bb76ff1Sjsg 
3996fb4d8502Sjsg 	return 0;
3997fb4d8502Sjsg 
39985ca02815Sjsg release_ras_con:
399934683186Sjsg 	if (amdgpu_sriov_vf(adev))
400034683186Sjsg 		amdgpu_virt_release_full_gpu(adev, true);
400134683186Sjsg 
400234683186Sjsg 	/* failed in exclusive mode due to timeout */
400334683186Sjsg 	if (amdgpu_sriov_vf(adev) &&
400434683186Sjsg 		!amdgpu_sriov_runtime(adev) &&
400534683186Sjsg 		amdgpu_virt_mmio_blocked(adev) &&
400634683186Sjsg 		!amdgpu_virt_wait_reset(adev)) {
400734683186Sjsg 		dev_err(adev->dev, "VF exclusive mode timeout\n");
400834683186Sjsg 		/* Don't send request since VF is inactive. */
400934683186Sjsg 		adev->virt.caps &= ~AMDGPU_SRIOV_CAPS_RUNTIME;
401034683186Sjsg 		adev->virt.ops = NULL;
401134683186Sjsg 		r = -EAGAIN;
401234683186Sjsg 	}
40135ca02815Sjsg 	amdgpu_release_ras_context(adev);
40145ca02815Sjsg 
4015fb4d8502Sjsg failed:
4016fb4d8502Sjsg 	amdgpu_vf_error_trans_all(adev);
4017fb4d8502Sjsg 
4018fb4d8502Sjsg 	return r;
4019fb4d8502Sjsg }
4020fb4d8502Sjsg 
amdgpu_device_unmap_mmio(struct amdgpu_device * adev)40215ca02815Sjsg static void amdgpu_device_unmap_mmio(struct amdgpu_device *adev)
40225ca02815Sjsg {
40235ca02815Sjsg 	STUB();
40245ca02815Sjsg #ifdef notyet
4025f005ef32Sjsg 
40265ca02815Sjsg 	/* Clear all CPU mappings pointing to this device */
40275ca02815Sjsg 	unmap_mapping_range(adev->ddev.anon_inode->i_mapping, 0, 0, 1);
40285ca02815Sjsg #endif
40295ca02815Sjsg 
40305ca02815Sjsg 	/* Unmap all mapped bars - Doorbell, registers and VRAM */
4031f005ef32Sjsg 	amdgpu_doorbell_fini(adev);
40325ca02815Sjsg 
40335ca02815Sjsg #ifdef __linux__
40345ca02815Sjsg 	iounmap(adev->rmmio);
40355ca02815Sjsg 	adev->rmmio = NULL;
40365ca02815Sjsg 	if (adev->mman.aper_base_kaddr)
40375ca02815Sjsg 		iounmap(adev->mman.aper_base_kaddr);
40385ca02815Sjsg 	adev->mman.aper_base_kaddr = NULL;
40395ca02815Sjsg #else
40405ca02815Sjsg 	if (adev->rmmio_size > 0)
40415ca02815Sjsg 		bus_space_unmap(adev->rmmio_bst, adev->rmmio_bsh,
40425ca02815Sjsg 		    adev->rmmio_size);
40435ca02815Sjsg 	adev->rmmio_size = 0;
40445ca02815Sjsg 	adev->rmmio = NULL;
40455ca02815Sjsg 	if (adev->mman.aper_base_kaddr)
40465ca02815Sjsg 		bus_space_unmap(adev->memt, adev->mman.aper_bsh,
40475ca02815Sjsg 		    adev->gmc.visible_vram_size);
40485ca02815Sjsg 	adev->mman.aper_base_kaddr = NULL;
40495ca02815Sjsg #endif
40505ca02815Sjsg 
40515ca02815Sjsg 	/* Memory manager related */
4052f005ef32Sjsg 	if (!adev->gmc.xgmi.connected_to_cpu && !adev->gmc.is_app_apu) {
40535ca02815Sjsg #ifdef __linux__
40545ca02815Sjsg 		arch_phys_wc_del(adev->gmc.vram_mtrr);
40555ca02815Sjsg 		arch_io_free_memtype_wc(adev->gmc.aper_base, adev->gmc.aper_size);
40565ca02815Sjsg #else
40575ca02815Sjsg 		drm_mtrr_del(0, adev->gmc.aper_base, adev->gmc.aper_size, DRM_MTRR_WC);
40585ca02815Sjsg #endif
40595ca02815Sjsg 	}
40605ca02815Sjsg }
40615ca02815Sjsg 
4062fb4d8502Sjsg /**
40631bb76ff1Sjsg  * amdgpu_device_fini_hw - tear down the driver
4064fb4d8502Sjsg  *
4065fb4d8502Sjsg  * @adev: amdgpu_device pointer
4066fb4d8502Sjsg  *
4067fb4d8502Sjsg  * Tear down the driver info (all asics).
4068fb4d8502Sjsg  * Called at driver shutdown.
4069fb4d8502Sjsg  */
amdgpu_device_fini_hw(struct amdgpu_device * adev)40705ca02815Sjsg void amdgpu_device_fini_hw(struct amdgpu_device *adev)
4071fb4d8502Sjsg {
4072ad8b1aafSjsg 	dev_info(adev->dev, "amdgpu: finishing device.\n");
4073c349dbc7Sjsg 	flush_delayed_work(&adev->delayed_init_work);
4074fb4d8502Sjsg 	adev->shutdown = true;
4075c349dbc7Sjsg 
4076c349dbc7Sjsg 	/* make sure IB test finished before entering exclusive mode
4077c349dbc7Sjsg 	 * to avoid preemption on IB test
4078f005ef32Sjsg 	 */
4079ad8b1aafSjsg 	if (amdgpu_sriov_vf(adev)) {
4080c349dbc7Sjsg 		amdgpu_virt_request_full_gpu(adev, false);
4081ad8b1aafSjsg 		amdgpu_virt_fini_data_exchange(adev);
4082ad8b1aafSjsg 	}
4083c349dbc7Sjsg 
4084fb4d8502Sjsg 	/* disable all interrupts */
4085fb4d8502Sjsg 	amdgpu_irq_disable_all(adev);
4086fb4d8502Sjsg 	if (adev->mode_info.mode_config_initialized) {
40875ca02815Sjsg 		if (!drm_drv_uses_atomic_modeset(adev_to_drm(adev)))
4088ad8b1aafSjsg 			drm_helper_force_disable_all(adev_to_drm(adev));
4089fb4d8502Sjsg 		else
4090ad8b1aafSjsg 			drm_atomic_helper_shutdown(adev_to_drm(adev));
4091fb4d8502Sjsg 	}
40925ca02815Sjsg 	amdgpu_fence_driver_hw_fini(adev);
40935ca02815Sjsg 
4094f005ef32Sjsg 	if (adev->mman.initialized)
4095f005ef32Sjsg 		drain_workqueue(adev->mman.bdev.wq);
40961bb76ff1Sjsg 
4097f005ef32Sjsg 	if (adev->pm.sysfs_initialized)
4098fb4d8502Sjsg 		amdgpu_pm_sysfs_fini(adev);
40995ca02815Sjsg 	if (adev->ucode_sysfs_en)
41005ca02815Sjsg 		amdgpu_ucode_sysfs_fini(adev);
41015ca02815Sjsg 	sysfs_remove_files(&adev->dev->kobj, amdgpu_dev_attributes);
4102f005ef32Sjsg 	amdgpu_fru_sysfs_fini(adev);
41035ca02815Sjsg 
41041bb76ff1Sjsg 	/* disable ras feature must before hw fini */
41051bb76ff1Sjsg 	amdgpu_ras_pre_fini(adev);
41061bb76ff1Sjsg 
41071bb76ff1Sjsg 	amdgpu_device_ip_fini_early(adev);
41085ca02815Sjsg 
41095ca02815Sjsg 	amdgpu_irq_fini_hw(adev);
41105ca02815Sjsg 
41111bb76ff1Sjsg 	if (adev->mman.initialized)
41121bb76ff1Sjsg 		ttm_device_clear_dma_mappings(&adev->mman.bdev);
41135ca02815Sjsg 
41145ca02815Sjsg 	amdgpu_gart_dummy_page_fini(adev);
41155ca02815Sjsg 
4116e63de9fbSjsg 	if (drm_dev_is_unplugged(adev_to_drm(adev)))
41175ca02815Sjsg 		amdgpu_device_unmap_mmio(adev);
41181bb76ff1Sjsg 
41195ca02815Sjsg }
41205ca02815Sjsg 
amdgpu_device_fini_sw(struct amdgpu_device * adev)41215ca02815Sjsg void amdgpu_device_fini_sw(struct amdgpu_device *adev)
41225ca02815Sjsg {
41231bb76ff1Sjsg 	int idx;
412478c2b773Sjsg 	bool px;
41251bb76ff1Sjsg 
41265ca02815Sjsg 	amdgpu_fence_driver_sw_fini(adev);
4127ad8b1aafSjsg 	amdgpu_device_ip_fini(adev);
4128f005ef32Sjsg 	amdgpu_ucode_release(&adev->firmware.gpu_info_fw);
4129fb4d8502Sjsg 	adev->accel_working = false;
41301bb76ff1Sjsg 	dma_fence_put(rcu_dereference_protected(adev->gang_submit, true));
41315ca02815Sjsg 
41325ca02815Sjsg 	amdgpu_reset_fini(adev);
41335ca02815Sjsg 
4134fb4d8502Sjsg 	/* free i2c buses */
4135fb4d8502Sjsg 	if (!amdgpu_device_has_dc_support(adev))
4136fb4d8502Sjsg 		amdgpu_i2c_fini(adev);
4137fb4d8502Sjsg 
4138fb4d8502Sjsg 	if (amdgpu_emu_mode != 1)
4139fb4d8502Sjsg 		amdgpu_atombios_fini(adev);
4140fb4d8502Sjsg 
4141fb4d8502Sjsg 	kfree(adev->bios);
4142fb4d8502Sjsg 	adev->bios = NULL;
414378c2b773Sjsg 
414478c2b773Sjsg 	px = amdgpu_device_supports_px(adev_to_drm(adev));
414578c2b773Sjsg 
4146997286d4Sjsg 	if (px || (!dev_is_removable(&adev->pdev->dev) &&
414778c2b773Sjsg 				apple_gmux_detect(NULL, NULL)))
4148fb4d8502Sjsg 		vga_switcheroo_unregister_client(adev->pdev);
414978c2b773Sjsg 
415078c2b773Sjsg 	if (px)
4151fb4d8502Sjsg 		vga_switcheroo_fini_domain_pm_ops(adev->dev);
415278c2b773Sjsg 
41535ca02815Sjsg 	if ((adev->pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA)
41545ca02815Sjsg 		vga_client_unregister(adev->pdev);
4155fb4d8502Sjsg 
41561bb76ff1Sjsg 	if (drm_dev_enter(adev_to_drm(adev), &idx)) {
41571bb76ff1Sjsg #ifdef __linux__
41581bb76ff1Sjsg 		iounmap(adev->rmmio);
41591bb76ff1Sjsg 		adev->rmmio = NULL;
41601bb76ff1Sjsg #else
41611bb76ff1Sjsg 		if (adev->rmmio_size > 0)
41621bb76ff1Sjsg 			bus_space_unmap(adev->rmmio_bst, adev->rmmio_bsh,
41631bb76ff1Sjsg 			    adev->rmmio_size);
41641bb76ff1Sjsg 		adev->rmmio_size = 0;
41651bb76ff1Sjsg 		adev->rmmio = NULL;
41661bb76ff1Sjsg #endif
4167f005ef32Sjsg 		amdgpu_doorbell_fini(adev);
41681bb76ff1Sjsg 		drm_dev_exit(idx);
41691bb76ff1Sjsg 	}
41701bb76ff1Sjsg 
4171c349dbc7Sjsg 	if (IS_ENABLED(CONFIG_PERF_EVENTS))
4172c349dbc7Sjsg 		amdgpu_pmu_fini(adev);
4173ad8b1aafSjsg 	if (adev->mman.discovery_bin)
4174c349dbc7Sjsg 		amdgpu_discovery_fini(adev);
41755ca02815Sjsg 
41761bb76ff1Sjsg 	amdgpu_reset_put_reset_domain(adev->reset_domain);
41771bb76ff1Sjsg 	adev->reset_domain = NULL;
41781bb76ff1Sjsg 
41795ca02815Sjsg 	kfree(adev->pci_state);
41805ca02815Sjsg 
4181fb4d8502Sjsg }
4182fb4d8502Sjsg 
41838e01f7deSjsg /**
41848e01f7deSjsg  * amdgpu_device_evict_resources - evict device resources
41858e01f7deSjsg  * @adev: amdgpu device object
41868e01f7deSjsg  *
41878e01f7deSjsg  * Evicts all ttm device resources(vram BOs, gart table) from the lru list
41888e01f7deSjsg  * of the vram memory type. Mainly used for evicting device resources
41898e01f7deSjsg  * at suspend time.
41908e01f7deSjsg  *
41918e01f7deSjsg  */
amdgpu_device_evict_resources(struct amdgpu_device * adev)41921bb76ff1Sjsg static int amdgpu_device_evict_resources(struct amdgpu_device *adev)
41938e01f7deSjsg {
41941bb76ff1Sjsg 	int ret;
41951bb76ff1Sjsg 
41963a693bf3Sjsg 	/* No need to evict vram on APUs for suspend to ram or s2idle */
41973a693bf3Sjsg 	if ((adev->in_s3 || adev->in_s0ix) && (adev->flags & AMD_IS_APU))
41981bb76ff1Sjsg 		return 0;
41998e01f7deSjsg 
42001bb76ff1Sjsg 	ret = amdgpu_ttm_evict_resources(adev, TTM_PL_VRAM);
42011bb76ff1Sjsg 	if (ret)
42028e01f7deSjsg 		DRM_WARN("evicting device resources failed\n");
42031bb76ff1Sjsg 	return ret;
42048e01f7deSjsg }
4205fb4d8502Sjsg 
4206fb4d8502Sjsg /*
4207fb4d8502Sjsg  * Suspend & resume.
4208fb4d8502Sjsg  */
4209fb4d8502Sjsg /**
421036668b15Sjsg  * amdgpu_device_prepare - prepare for device suspend
421136668b15Sjsg  *
421236668b15Sjsg  * @dev: drm dev pointer
421336668b15Sjsg  *
421436668b15Sjsg  * Prepare to put the hw in the suspend state (all asics).
421536668b15Sjsg  * Returns 0 for success or an error on failure.
421636668b15Sjsg  * Called at driver suspend.
421736668b15Sjsg  */
amdgpu_device_prepare(struct drm_device * dev)421836668b15Sjsg int amdgpu_device_prepare(struct drm_device *dev)
421936668b15Sjsg {
422036668b15Sjsg 	struct amdgpu_device *adev = drm_to_adev(dev);
4221064f5254Sjsg 	int i, r;
422236668b15Sjsg 
422336668b15Sjsg 	if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
422436668b15Sjsg 		return 0;
422536668b15Sjsg 
422636668b15Sjsg 	/* Evict the majority of BOs before starting suspend sequence */
422736668b15Sjsg 	r = amdgpu_device_evict_resources(adev);
422836668b15Sjsg 	if (r)
422936668b15Sjsg 		return r;
423036668b15Sjsg 
4231*4a4ef11eSjsg 	flush_delayed_work(&adev->gfx.gfx_off_delay_work);
4232*4a4ef11eSjsg 
4233064f5254Sjsg 	for (i = 0; i < adev->num_ip_blocks; i++) {
4234064f5254Sjsg 		if (!adev->ip_blocks[i].status.valid)
4235064f5254Sjsg 			continue;
4236064f5254Sjsg 		if (!adev->ip_blocks[i].version->funcs->prepare_suspend)
4237064f5254Sjsg 			continue;
4238064f5254Sjsg 		r = adev->ip_blocks[i].version->funcs->prepare_suspend((void *)adev);
4239064f5254Sjsg 		if (r)
4240064f5254Sjsg 			return r;
4241064f5254Sjsg 	}
4242064f5254Sjsg 
424336668b15Sjsg 	return 0;
424436668b15Sjsg }
424536668b15Sjsg 
424636668b15Sjsg /**
4247fb4d8502Sjsg  * amdgpu_device_suspend - initiate device suspend
4248fb4d8502Sjsg  *
4249fb4d8502Sjsg  * @dev: drm dev pointer
4250fb4d8502Sjsg  * @fbcon : notify the fbdev of suspend
4251fb4d8502Sjsg  *
4252fb4d8502Sjsg  * Puts the hw in the suspend state (all asics).
4253fb4d8502Sjsg  * Returns 0 for success or an error on failure.
4254fb4d8502Sjsg  * Called at driver suspend.
4255fb4d8502Sjsg  */
amdgpu_device_suspend(struct drm_device * dev,bool fbcon)4256c349dbc7Sjsg int amdgpu_device_suspend(struct drm_device *dev, bool fbcon)
4257fb4d8502Sjsg {
42585ca02815Sjsg 	struct amdgpu_device *adev = drm_to_adev(dev);
42591bb76ff1Sjsg 	int r = 0;
4260fb4d8502Sjsg 
42616c3f7e80Sjsg 	if (adev->shutdown)
42626c3f7e80Sjsg 		return 0;
4263fb4d8502Sjsg 
4264fb4d8502Sjsg #ifdef notyet
4265fb4d8502Sjsg 	if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
4266fb4d8502Sjsg 		return 0;
4267fb4d8502Sjsg #endif
4268fb4d8502Sjsg 
4269c349dbc7Sjsg 	adev->in_suspend = true;
42705ca02815Sjsg 
42711bb76ff1Sjsg 	if (amdgpu_sriov_vf(adev)) {
42721bb76ff1Sjsg 		amdgpu_virt_fini_data_exchange(adev);
42731bb76ff1Sjsg 		r = amdgpu_virt_request_full_gpu(adev, false);
42741bb76ff1Sjsg 		if (r)
42751bb76ff1Sjsg 			return r;
42761bb76ff1Sjsg 	}
42771bb76ff1Sjsg 
42785ca02815Sjsg 	if (amdgpu_acpi_smart_shift_update(dev, AMDGPU_SS_DEV_D3))
42795ca02815Sjsg 		DRM_WARN("smart shift update failed\n");
42805ca02815Sjsg 
4281fb4d8502Sjsg 	if (fbcon)
42821bb76ff1Sjsg 		drm_fb_helper_set_suspend_unlocked(adev_to_drm(adev)->fb_helper, true);
4283fb4d8502Sjsg 
4284c349dbc7Sjsg 	cancel_delayed_work_sync(&adev->delayed_init_work);
4285c349dbc7Sjsg 
4286c349dbc7Sjsg 	amdgpu_ras_suspend(adev);
4287fb4d8502Sjsg 
42885ca02815Sjsg 	amdgpu_device_ip_suspend_phase1(adev);
4289fb4d8502Sjsg 
42905ca02815Sjsg 	if (!adev->in_s0ix)
42915ca02815Sjsg 		amdgpu_amdkfd_suspend(adev, adev->in_runpm);
4292c349dbc7Sjsg 
42931bb76ff1Sjsg 	r = amdgpu_device_evict_resources(adev);
42941bb76ff1Sjsg 	if (r)
42951bb76ff1Sjsg 		return r;
4296fb4d8502Sjsg 
42975ca02815Sjsg 	amdgpu_fence_driver_hw_fini(adev);
4298fb4d8502Sjsg 
42995ca02815Sjsg 	amdgpu_device_ip_suspend_phase2(adev);
43001bb76ff1Sjsg 
43011bb76ff1Sjsg 	if (amdgpu_sriov_vf(adev))
43021bb76ff1Sjsg 		amdgpu_virt_release_full_gpu(adev, false);
4303fb4d8502Sjsg 
4304fb4d8502Sjsg 	return 0;
4305fb4d8502Sjsg }
4306fb4d8502Sjsg 
4307fb4d8502Sjsg /**
4308fb4d8502Sjsg  * amdgpu_device_resume - initiate device resume
4309fb4d8502Sjsg  *
4310fb4d8502Sjsg  * @dev: drm dev pointer
4311fb4d8502Sjsg  * @fbcon : notify the fbdev of resume
4312fb4d8502Sjsg  *
4313fb4d8502Sjsg  * Bring the hw back to operating state (all asics).
4314fb4d8502Sjsg  * Returns 0 for success or an error on failure.
4315fb4d8502Sjsg  * Called at driver resume.
4316fb4d8502Sjsg  */
amdgpu_device_resume(struct drm_device * dev,bool fbcon)4317c349dbc7Sjsg int amdgpu_device_resume(struct drm_device *dev, bool fbcon)
4318fb4d8502Sjsg {
4319ad8b1aafSjsg 	struct amdgpu_device *adev = drm_to_adev(dev);
4320fb4d8502Sjsg 	int r = 0;
4321fb4d8502Sjsg 
43221bb76ff1Sjsg 	if (amdgpu_sriov_vf(adev)) {
43231bb76ff1Sjsg 		r = amdgpu_virt_request_full_gpu(adev, true);
43241bb76ff1Sjsg 		if (r)
43251bb76ff1Sjsg 			return r;
43261bb76ff1Sjsg 	}
43271bb76ff1Sjsg 
4328fb4d8502Sjsg #ifdef notyet
4329fb4d8502Sjsg 	if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
4330fb4d8502Sjsg 		return 0;
4331fb4d8502Sjsg #endif
4332fb4d8502Sjsg 
43335ca02815Sjsg 	if (adev->in_s0ix)
43341bb76ff1Sjsg 		amdgpu_dpm_gfx_state_change(adev, sGpuChangeState_D0Entry);
43355ca02815Sjsg 
4336fb4d8502Sjsg 	/* post card */
4337fb4d8502Sjsg 	if (amdgpu_device_need_post(adev)) {
4338ad8b1aafSjsg 		r = amdgpu_device_asic_init(adev);
4339fb4d8502Sjsg 		if (r)
4340ad8b1aafSjsg 			dev_err(adev->dev, "amdgpu asic init failed\n");
4341fb4d8502Sjsg 	}
4342fb4d8502Sjsg 
4343fb4d8502Sjsg 	r = amdgpu_device_ip_resume(adev);
43441bb76ff1Sjsg 
4345fb4d8502Sjsg 	if (r) {
4346ad8b1aafSjsg 		dev_err(adev->dev, "amdgpu_device_ip_resume failed (%d).\n", r);
4347f005ef32Sjsg 		goto exit;
4348fb4d8502Sjsg 	}
43495ca02815Sjsg 	amdgpu_fence_driver_hw_init(adev);
4350fb4d8502Sjsg 
4351fb4d8502Sjsg 	r = amdgpu_device_ip_late_init(adev);
4352fb4d8502Sjsg 	if (r)
4353f005ef32Sjsg 		goto exit;
4354fb4d8502Sjsg 
4355c349dbc7Sjsg 	queue_delayed_work(system_wq, &adev->delayed_init_work,
4356c349dbc7Sjsg 			   msecs_to_jiffies(AMDGPU_RESUME_MS));
4357c349dbc7Sjsg 
43585ca02815Sjsg 	if (!adev->in_s0ix) {
43595ca02815Sjsg 		r = amdgpu_amdkfd_resume(adev, adev->in_runpm);
4360fb4d8502Sjsg 		if (r)
4361f005ef32Sjsg 			goto exit;
43625ca02815Sjsg 	}
4363fb4d8502Sjsg 
4364f005ef32Sjsg exit:
4365f005ef32Sjsg 	if (amdgpu_sriov_vf(adev)) {
4366f005ef32Sjsg 		amdgpu_virt_init_data_exchange(adev);
4367f005ef32Sjsg 		amdgpu_virt_release_full_gpu(adev, true);
4368f005ef32Sjsg 	}
4369f005ef32Sjsg 
4370f005ef32Sjsg 	if (r)
4371f005ef32Sjsg 		return r;
4372f005ef32Sjsg 
4373fb4d8502Sjsg 	/* Make sure IB tests flushed */
4374c349dbc7Sjsg 	flush_delayed_work(&adev->delayed_init_work);
4375fb4d8502Sjsg 
43765ca02815Sjsg 	if (fbcon)
43771bb76ff1Sjsg 		drm_fb_helper_set_suspend_unlocked(adev_to_drm(adev)->fb_helper, false);
4378fb4d8502Sjsg 
4379c349dbc7Sjsg 	amdgpu_ras_resume(adev);
4380c349dbc7Sjsg 
4381f005ef32Sjsg 	if (adev->mode_info.num_crtc) {
4382fb4d8502Sjsg 		/*
4383fb4d8502Sjsg 		 * Most of the connector probing functions try to acquire runtime pm
4384fb4d8502Sjsg 		 * refs to ensure that the GPU is powered on when connector polling is
4385fb4d8502Sjsg 		 * performed. Since we're calling this from a runtime PM callback,
4386fb4d8502Sjsg 		 * trying to acquire rpm refs will cause us to deadlock.
4387fb4d8502Sjsg 		 *
4388fb4d8502Sjsg 		 * Since we're guaranteed to be holding the rpm lock, it's safe to
4389fb4d8502Sjsg 		 * temporarily disable the rpm helpers so this doesn't deadlock us.
4390fb4d8502Sjsg 		 */
4391fb4d8502Sjsg #if defined(CONFIG_PM) && defined(__linux__)
4392fb4d8502Sjsg 		dev->dev->power.disable_depth++;
4393fb4d8502Sjsg #endif
4394f005ef32Sjsg 		if (!adev->dc_enabled)
4395fb4d8502Sjsg 			drm_helper_hpd_irq_event(dev);
4396fb4d8502Sjsg 		else
4397fb4d8502Sjsg 			drm_kms_helper_hotplug_event(dev);
4398fb4d8502Sjsg #if defined(CONFIG_PM) && defined(__linux__)
4399fb4d8502Sjsg 		dev->dev->power.disable_depth--;
4400fb4d8502Sjsg #endif
4401f005ef32Sjsg 	}
4402c349dbc7Sjsg 	adev->in_suspend = false;
4403c349dbc7Sjsg 
440473029064Sjsg 	if (adev->enable_mes)
440573029064Sjsg 		amdgpu_mes_self_test(adev);
440673029064Sjsg 
44075ca02815Sjsg 	if (amdgpu_acpi_smart_shift_update(dev, AMDGPU_SS_DEV_D0))
44085ca02815Sjsg 		DRM_WARN("smart shift update failed\n");
44095ca02815Sjsg 
4410fb4d8502Sjsg 	return 0;
4411fb4d8502Sjsg }
4412fb4d8502Sjsg 
4413fb4d8502Sjsg /**
4414fb4d8502Sjsg  * amdgpu_device_ip_check_soft_reset - did soft reset succeed
4415fb4d8502Sjsg  *
4416fb4d8502Sjsg  * @adev: amdgpu_device pointer
4417fb4d8502Sjsg  *
4418fb4d8502Sjsg  * The list of all the hardware IPs that make up the asic is walked and
4419fb4d8502Sjsg  * the check_soft_reset callbacks are run.  check_soft_reset determines
4420fb4d8502Sjsg  * if the asic is still hung or not.
4421fb4d8502Sjsg  * Returns true if any of the IPs are still in a hung state, false if not.
4422fb4d8502Sjsg  */
amdgpu_device_ip_check_soft_reset(struct amdgpu_device * adev)4423fb4d8502Sjsg static bool amdgpu_device_ip_check_soft_reset(struct amdgpu_device *adev)
4424fb4d8502Sjsg {
4425fb4d8502Sjsg 	int i;
4426fb4d8502Sjsg 	bool asic_hang = false;
4427fb4d8502Sjsg 
4428fb4d8502Sjsg 	if (amdgpu_sriov_vf(adev))
4429fb4d8502Sjsg 		return true;
4430fb4d8502Sjsg 
4431fb4d8502Sjsg 	if (amdgpu_asic_need_full_reset(adev))
4432fb4d8502Sjsg 		return true;
4433fb4d8502Sjsg 
4434fb4d8502Sjsg 	for (i = 0; i < adev->num_ip_blocks; i++) {
4435fb4d8502Sjsg 		if (!adev->ip_blocks[i].status.valid)
4436fb4d8502Sjsg 			continue;
4437fb4d8502Sjsg 		if (adev->ip_blocks[i].version->funcs->check_soft_reset)
4438fb4d8502Sjsg 			adev->ip_blocks[i].status.hang =
4439fb4d8502Sjsg 				adev->ip_blocks[i].version->funcs->check_soft_reset(adev);
4440fb4d8502Sjsg 		if (adev->ip_blocks[i].status.hang) {
4441ad8b1aafSjsg 			dev_info(adev->dev, "IP block:%s is hung!\n", adev->ip_blocks[i].version->funcs->name);
4442fb4d8502Sjsg 			asic_hang = true;
4443fb4d8502Sjsg 		}
4444fb4d8502Sjsg 	}
4445fb4d8502Sjsg 	return asic_hang;
4446fb4d8502Sjsg }
4447fb4d8502Sjsg 
4448fb4d8502Sjsg /**
4449fb4d8502Sjsg  * amdgpu_device_ip_pre_soft_reset - prepare for soft reset
4450fb4d8502Sjsg  *
4451fb4d8502Sjsg  * @adev: amdgpu_device pointer
4452fb4d8502Sjsg  *
4453fb4d8502Sjsg  * The list of all the hardware IPs that make up the asic is walked and the
4454fb4d8502Sjsg  * pre_soft_reset callbacks are run if the block is hung.  pre_soft_reset
4455fb4d8502Sjsg  * handles any IP specific hardware or software state changes that are
4456fb4d8502Sjsg  * necessary for a soft reset to succeed.
4457fb4d8502Sjsg  * Returns 0 on success, negative error code on failure.
4458fb4d8502Sjsg  */
amdgpu_device_ip_pre_soft_reset(struct amdgpu_device * adev)4459fb4d8502Sjsg static int amdgpu_device_ip_pre_soft_reset(struct amdgpu_device *adev)
4460fb4d8502Sjsg {
4461fb4d8502Sjsg 	int i, r = 0;
4462fb4d8502Sjsg 
4463fb4d8502Sjsg 	for (i = 0; i < adev->num_ip_blocks; i++) {
4464fb4d8502Sjsg 		if (!adev->ip_blocks[i].status.valid)
4465fb4d8502Sjsg 			continue;
4466fb4d8502Sjsg 		if (adev->ip_blocks[i].status.hang &&
4467fb4d8502Sjsg 		    adev->ip_blocks[i].version->funcs->pre_soft_reset) {
4468fb4d8502Sjsg 			r = adev->ip_blocks[i].version->funcs->pre_soft_reset(adev);
4469fb4d8502Sjsg 			if (r)
4470fb4d8502Sjsg 				return r;
4471fb4d8502Sjsg 		}
4472fb4d8502Sjsg 	}
4473fb4d8502Sjsg 
4474fb4d8502Sjsg 	return 0;
4475fb4d8502Sjsg }
4476fb4d8502Sjsg 
4477fb4d8502Sjsg /**
4478fb4d8502Sjsg  * amdgpu_device_ip_need_full_reset - check if a full asic reset is needed
4479fb4d8502Sjsg  *
4480fb4d8502Sjsg  * @adev: amdgpu_device pointer
4481fb4d8502Sjsg  *
4482fb4d8502Sjsg  * Some hardware IPs cannot be soft reset.  If they are hung, a full gpu
4483fb4d8502Sjsg  * reset is necessary to recover.
4484fb4d8502Sjsg  * Returns true if a full asic reset is required, false if not.
4485fb4d8502Sjsg  */
amdgpu_device_ip_need_full_reset(struct amdgpu_device * adev)4486fb4d8502Sjsg static bool amdgpu_device_ip_need_full_reset(struct amdgpu_device *adev)
4487fb4d8502Sjsg {
4488fb4d8502Sjsg 	int i;
4489fb4d8502Sjsg 
4490fb4d8502Sjsg 	if (amdgpu_asic_need_full_reset(adev))
4491fb4d8502Sjsg 		return true;
4492fb4d8502Sjsg 
4493fb4d8502Sjsg 	for (i = 0; i < adev->num_ip_blocks; i++) {
4494fb4d8502Sjsg 		if (!adev->ip_blocks[i].status.valid)
4495fb4d8502Sjsg 			continue;
4496fb4d8502Sjsg 		if ((adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) ||
4497fb4d8502Sjsg 		    (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) ||
4498fb4d8502Sjsg 		    (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_ACP) ||
4499fb4d8502Sjsg 		    (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE) ||
4500fb4d8502Sjsg 		     adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP) {
4501fb4d8502Sjsg 			if (adev->ip_blocks[i].status.hang) {
4502ad8b1aafSjsg 				dev_info(adev->dev, "Some block need full reset!\n");
4503fb4d8502Sjsg 				return true;
4504fb4d8502Sjsg 			}
4505fb4d8502Sjsg 		}
4506fb4d8502Sjsg 	}
4507fb4d8502Sjsg 	return false;
4508fb4d8502Sjsg }
4509fb4d8502Sjsg 
4510fb4d8502Sjsg /**
4511fb4d8502Sjsg  * amdgpu_device_ip_soft_reset - do a soft reset
4512fb4d8502Sjsg  *
4513fb4d8502Sjsg  * @adev: amdgpu_device pointer
4514fb4d8502Sjsg  *
4515fb4d8502Sjsg  * The list of all the hardware IPs that make up the asic is walked and the
4516fb4d8502Sjsg  * soft_reset callbacks are run if the block is hung.  soft_reset handles any
4517fb4d8502Sjsg  * IP specific hardware or software state changes that are necessary to soft
4518fb4d8502Sjsg  * reset the IP.
4519fb4d8502Sjsg  * Returns 0 on success, negative error code on failure.
4520fb4d8502Sjsg  */
amdgpu_device_ip_soft_reset(struct amdgpu_device * adev)4521fb4d8502Sjsg static int amdgpu_device_ip_soft_reset(struct amdgpu_device *adev)
4522fb4d8502Sjsg {
4523fb4d8502Sjsg 	int i, r = 0;
4524fb4d8502Sjsg 
4525fb4d8502Sjsg 	for (i = 0; i < adev->num_ip_blocks; i++) {
4526fb4d8502Sjsg 		if (!adev->ip_blocks[i].status.valid)
4527fb4d8502Sjsg 			continue;
4528fb4d8502Sjsg 		if (adev->ip_blocks[i].status.hang &&
4529fb4d8502Sjsg 		    adev->ip_blocks[i].version->funcs->soft_reset) {
4530fb4d8502Sjsg 			r = adev->ip_blocks[i].version->funcs->soft_reset(adev);
4531fb4d8502Sjsg 			if (r)
4532fb4d8502Sjsg 				return r;
4533fb4d8502Sjsg 		}
4534fb4d8502Sjsg 	}
4535fb4d8502Sjsg 
4536fb4d8502Sjsg 	return 0;
4537fb4d8502Sjsg }
4538fb4d8502Sjsg 
4539fb4d8502Sjsg /**
4540fb4d8502Sjsg  * amdgpu_device_ip_post_soft_reset - clean up from soft reset
4541fb4d8502Sjsg  *
4542fb4d8502Sjsg  * @adev: amdgpu_device pointer
4543fb4d8502Sjsg  *
4544fb4d8502Sjsg  * The list of all the hardware IPs that make up the asic is walked and the
4545fb4d8502Sjsg  * post_soft_reset callbacks are run if the asic was hung.  post_soft_reset
4546fb4d8502Sjsg  * handles any IP specific hardware or software state changes that are
4547fb4d8502Sjsg  * necessary after the IP has been soft reset.
4548fb4d8502Sjsg  * Returns 0 on success, negative error code on failure.
4549fb4d8502Sjsg  */
amdgpu_device_ip_post_soft_reset(struct amdgpu_device * adev)4550fb4d8502Sjsg static int amdgpu_device_ip_post_soft_reset(struct amdgpu_device *adev)
4551fb4d8502Sjsg {
4552fb4d8502Sjsg 	int i, r = 0;
4553fb4d8502Sjsg 
4554fb4d8502Sjsg 	for (i = 0; i < adev->num_ip_blocks; i++) {
4555fb4d8502Sjsg 		if (!adev->ip_blocks[i].status.valid)
4556fb4d8502Sjsg 			continue;
4557fb4d8502Sjsg 		if (adev->ip_blocks[i].status.hang &&
4558fb4d8502Sjsg 		    adev->ip_blocks[i].version->funcs->post_soft_reset)
4559fb4d8502Sjsg 			r = adev->ip_blocks[i].version->funcs->post_soft_reset(adev);
4560fb4d8502Sjsg 		if (r)
4561fb4d8502Sjsg 			return r;
4562fb4d8502Sjsg 	}
4563fb4d8502Sjsg 
4564fb4d8502Sjsg 	return 0;
4565fb4d8502Sjsg }
4566fb4d8502Sjsg 
4567fb4d8502Sjsg /**
4568c349dbc7Sjsg  * amdgpu_device_recover_vram - Recover some VRAM contents
4569fb4d8502Sjsg  *
4570fb4d8502Sjsg  * @adev: amdgpu_device pointer
4571fb4d8502Sjsg  *
4572fb4d8502Sjsg  * Restores the contents of VRAM buffers from the shadows in GTT.  Used to
4573fb4d8502Sjsg  * restore things like GPUVM page tables after a GPU reset where
4574fb4d8502Sjsg  * the contents of VRAM might be lost.
4575c349dbc7Sjsg  *
4576c349dbc7Sjsg  * Returns:
4577c349dbc7Sjsg  * 0 on success, negative error code on failure.
4578fb4d8502Sjsg  */
amdgpu_device_recover_vram(struct amdgpu_device * adev)4579c349dbc7Sjsg static int amdgpu_device_recover_vram(struct amdgpu_device *adev)
4580fb4d8502Sjsg {
4581fb4d8502Sjsg 	struct dma_fence *fence = NULL, *next = NULL;
4582c349dbc7Sjsg 	struct amdgpu_bo *shadow;
45835ca02815Sjsg 	struct amdgpu_bo_vm *vmbo;
4584c349dbc7Sjsg 	long r = 1, tmo;
4585fb4d8502Sjsg 
4586fb4d8502Sjsg 	if (amdgpu_sriov_runtime(adev))
4587fb4d8502Sjsg 		tmo = msecs_to_jiffies(8000);
4588fb4d8502Sjsg 	else
4589fb4d8502Sjsg 		tmo = msecs_to_jiffies(100);
4590fb4d8502Sjsg 
4591ad8b1aafSjsg 	dev_info(adev->dev, "recover vram bo from shadow start\n");
4592fb4d8502Sjsg 	mutex_lock(&adev->shadow_list_lock);
45935ca02815Sjsg 	list_for_each_entry(vmbo, &adev->shadow_list, shadow_list) {
45945f70b624Sjsg 		/* If vm is compute context or adev is APU, shadow will be NULL */
45955f70b624Sjsg 		if (!vmbo->shadow)
45965f70b624Sjsg 			continue;
45975f70b624Sjsg 		shadow = vmbo->shadow;
45985f70b624Sjsg 
4599c349dbc7Sjsg 		/* No need to recover an evicted BO */
46005ca02815Sjsg 		if (shadow->tbo.resource->mem_type != TTM_PL_TT ||
46015ca02815Sjsg 		    shadow->tbo.resource->start == AMDGPU_BO_INVALID_OFFSET ||
46025ca02815Sjsg 		    shadow->parent->tbo.resource->mem_type != TTM_PL_VRAM)
4603c349dbc7Sjsg 			continue;
4604c349dbc7Sjsg 
4605c349dbc7Sjsg 		r = amdgpu_bo_restore_shadow(shadow, &next);
4606c349dbc7Sjsg 		if (r)
4607c349dbc7Sjsg 			break;
4608c349dbc7Sjsg 
4609fb4d8502Sjsg 		if (fence) {
4610c349dbc7Sjsg 			tmo = dma_fence_wait_timeout(fence, false, tmo);
4611fb4d8502Sjsg 			dma_fence_put(fence);
4612fb4d8502Sjsg 			fence = next;
4613c349dbc7Sjsg 			if (tmo == 0) {
4614c349dbc7Sjsg 				r = -ETIMEDOUT;
4615c349dbc7Sjsg 				break;
4616c349dbc7Sjsg 			} else if (tmo < 0) {
4617c349dbc7Sjsg 				r = tmo;
4618fb4d8502Sjsg 				break;
4619fb4d8502Sjsg 			}
4620c349dbc7Sjsg 		} else {
4621fb4d8502Sjsg 			fence = next;
4622fb4d8502Sjsg 		}
4623c349dbc7Sjsg 	}
4624fb4d8502Sjsg 	mutex_unlock(&adev->shadow_list_lock);
4625fb4d8502Sjsg 
4626c349dbc7Sjsg 	if (fence)
4627c349dbc7Sjsg 		tmo = dma_fence_wait_timeout(fence, false, tmo);
4628fb4d8502Sjsg 	dma_fence_put(fence);
4629fb4d8502Sjsg 
4630c349dbc7Sjsg 	if (r < 0 || tmo <= 0) {
4631ad8b1aafSjsg 		dev_err(adev->dev, "recover vram bo from shadow failed, r is %ld, tmo is %ld\n", r, tmo);
4632c349dbc7Sjsg 		return -EIO;
4633c349dbc7Sjsg 	}
4634c349dbc7Sjsg 
4635ad8b1aafSjsg 	dev_info(adev->dev, "recover vram bo from shadow done\n");
4636c349dbc7Sjsg 	return 0;
4637fb4d8502Sjsg }
4638fb4d8502Sjsg 
4639fb4d8502Sjsg 
4640fb4d8502Sjsg /**
4641fb4d8502Sjsg  * amdgpu_device_reset_sriov - reset ASIC for SR-IOV vf
4642fb4d8502Sjsg  *
4643ad8b1aafSjsg  * @adev: amdgpu_device pointer
4644fb4d8502Sjsg  * @from_hypervisor: request from hypervisor
4645fb4d8502Sjsg  *
4646fb4d8502Sjsg  * do VF FLR and reinitialize Asic
4647fb4d8502Sjsg  * return 0 means succeeded otherwise failed
4648fb4d8502Sjsg  */
amdgpu_device_reset_sriov(struct amdgpu_device * adev,bool from_hypervisor)4649fb4d8502Sjsg static int amdgpu_device_reset_sriov(struct amdgpu_device *adev,
4650fb4d8502Sjsg 				     bool from_hypervisor)
4651fb4d8502Sjsg {
4652fb4d8502Sjsg 	int r;
46531bb76ff1Sjsg 	struct amdgpu_hive_info *hive = NULL;
46541bb76ff1Sjsg 	int retry_limit = 0;
46551bb76ff1Sjsg 
46561bb76ff1Sjsg retry:
46571bb76ff1Sjsg 	amdgpu_amdkfd_pre_reset(adev);
4658fb4d8502Sjsg 
4659fb4d8502Sjsg 	if (from_hypervisor)
4660fb4d8502Sjsg 		r = amdgpu_virt_request_full_gpu(adev, true);
4661fb4d8502Sjsg 	else
4662fb4d8502Sjsg 		r = amdgpu_virt_reset_gpu(adev);
4663fb4d8502Sjsg 	if (r)
4664fb4d8502Sjsg 		return r;
4665f005ef32Sjsg 	amdgpu_irq_gpu_reset_resume_helper(adev);
4666f005ef32Sjsg 
4667f005ef32Sjsg 	/* some sw clean up VF needs to do before recover */
4668f005ef32Sjsg 	amdgpu_virt_post_reset(adev);
4669fb4d8502Sjsg 
4670fb4d8502Sjsg 	/* Resume IP prior to SMC */
4671fb4d8502Sjsg 	r = amdgpu_device_ip_reinit_early_sriov(adev);
4672fb4d8502Sjsg 	if (r)
4673fb4d8502Sjsg 		goto error;
4674fb4d8502Sjsg 
4675c349dbc7Sjsg 	amdgpu_virt_init_data_exchange(adev);
4676fb4d8502Sjsg 
4677c349dbc7Sjsg 	r = amdgpu_device_fw_loading(adev);
4678c349dbc7Sjsg 	if (r)
4679c349dbc7Sjsg 		return r;
4680c349dbc7Sjsg 
4681fb4d8502Sjsg 	/* now we are okay to resume SMC/CP/SDMA */
4682fb4d8502Sjsg 	r = amdgpu_device_ip_reinit_late_sriov(adev);
4683fb4d8502Sjsg 	if (r)
4684fb4d8502Sjsg 		goto error;
4685fb4d8502Sjsg 
46861bb76ff1Sjsg 	hive = amdgpu_get_xgmi_hive(adev);
46871bb76ff1Sjsg 	/* Update PSP FW topology after reset */
46881bb76ff1Sjsg 	if (hive && adev->gmc.xgmi.num_physical_nodes > 1)
46891bb76ff1Sjsg 		r = amdgpu_xgmi_update_topology(hive, adev);
46901bb76ff1Sjsg 
46911bb76ff1Sjsg 	if (hive)
46921bb76ff1Sjsg 		amdgpu_put_xgmi_hive(hive);
46931bb76ff1Sjsg 
46941bb76ff1Sjsg 	if (!r) {
4695fb4d8502Sjsg 		r = amdgpu_ib_ring_tests(adev);
46961bb76ff1Sjsg 
4697c349dbc7Sjsg 		amdgpu_amdkfd_post_reset(adev);
46981bb76ff1Sjsg 	}
4699fb4d8502Sjsg 
4700fb4d8502Sjsg error:
4701fb4d8502Sjsg 	if (!r && adev->virt.gim_feature & AMDGIM_FEATURE_GIM_FLR_VRAMLOST) {
4702c349dbc7Sjsg 		amdgpu_inc_vram_lost(adev);
4703c349dbc7Sjsg 		r = amdgpu_device_recover_vram(adev);
4704fb4d8502Sjsg 	}
47055ca02815Sjsg 	amdgpu_virt_release_full_gpu(adev, true);
4706fb4d8502Sjsg 
47071bb76ff1Sjsg 	if (AMDGPU_RETRY_SRIOV_RESET(r)) {
47081bb76ff1Sjsg 		if (retry_limit < AMDGPU_MAX_RETRY_LIMIT) {
47091bb76ff1Sjsg 			retry_limit++;
47101bb76ff1Sjsg 			goto retry;
47111bb76ff1Sjsg 		} else
47121bb76ff1Sjsg 			DRM_ERROR("GPU reset retry is beyond the retry limit\n");
47131bb76ff1Sjsg 	}
47141bb76ff1Sjsg 
4715fb4d8502Sjsg 	return r;
4716fb4d8502Sjsg }
4717fb4d8502Sjsg 
4718fb4d8502Sjsg /**
4719ad8b1aafSjsg  * amdgpu_device_has_job_running - check if there is any job in mirror list
4720ad8b1aafSjsg  *
4721ad8b1aafSjsg  * @adev: amdgpu_device pointer
4722ad8b1aafSjsg  *
4723ad8b1aafSjsg  * check if there is any job in mirror list
4724ad8b1aafSjsg  */
amdgpu_device_has_job_running(struct amdgpu_device * adev)4725ad8b1aafSjsg bool amdgpu_device_has_job_running(struct amdgpu_device *adev)
4726ad8b1aafSjsg {
4727ad8b1aafSjsg 	int i;
4728ad8b1aafSjsg 	struct drm_sched_job *job;
4729ad8b1aafSjsg 
4730ad8b1aafSjsg 	for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
4731ad8b1aafSjsg 		struct amdgpu_ring *ring = adev->rings[i];
4732ad8b1aafSjsg 
4733ad8b1aafSjsg 		if (!ring || !ring->sched.thread)
4734ad8b1aafSjsg 			continue;
4735ad8b1aafSjsg 
4736ad8b1aafSjsg 		spin_lock(&ring->sched.job_list_lock);
47375ca02815Sjsg 		job = list_first_entry_or_null(&ring->sched.pending_list,
47385ca02815Sjsg 					       struct drm_sched_job, list);
4739ad8b1aafSjsg 		spin_unlock(&ring->sched.job_list_lock);
4740ad8b1aafSjsg 		if (job)
4741ad8b1aafSjsg 			return true;
4742ad8b1aafSjsg 	}
4743ad8b1aafSjsg 	return false;
4744ad8b1aafSjsg }
4745ad8b1aafSjsg 
4746ad8b1aafSjsg /**
4747c349dbc7Sjsg  * amdgpu_device_should_recover_gpu - check if we should try GPU recovery
4748fb4d8502Sjsg  *
4749ad8b1aafSjsg  * @adev: amdgpu_device pointer
4750fb4d8502Sjsg  *
4751c349dbc7Sjsg  * Check amdgpu_gpu_recovery and SRIOV status to see if we should try to recover
4752c349dbc7Sjsg  * a hung GPU.
4753fb4d8502Sjsg  */
amdgpu_device_should_recover_gpu(struct amdgpu_device * adev)4754c349dbc7Sjsg bool amdgpu_device_should_recover_gpu(struct amdgpu_device *adev)
4755fb4d8502Sjsg {
47561bb76ff1Sjsg 
47571bb76ff1Sjsg 	if (amdgpu_gpu_recovery == 0)
47581bb76ff1Sjsg 		goto disabled;
47591bb76ff1Sjsg 
4760f005ef32Sjsg 	/* Skip soft reset check in fatal error mode */
4761f005ef32Sjsg 	if (!amdgpu_ras_is_poison_mode_supported(adev))
4762f005ef32Sjsg 		return true;
4763fb4d8502Sjsg 
4764c349dbc7Sjsg 	if (amdgpu_sriov_vf(adev))
4765c349dbc7Sjsg 		return true;
4766c349dbc7Sjsg 
4767c349dbc7Sjsg 	if (amdgpu_gpu_recovery == -1) {
4768c349dbc7Sjsg 		switch (adev->asic_type) {
47691bb76ff1Sjsg #ifdef CONFIG_DRM_AMDGPU_SI
47701bb76ff1Sjsg 		case CHIP_VERDE:
47711bb76ff1Sjsg 		case CHIP_TAHITI:
47721bb76ff1Sjsg 		case CHIP_PITCAIRN:
47731bb76ff1Sjsg 		case CHIP_OLAND:
47741bb76ff1Sjsg 		case CHIP_HAINAN:
47751bb76ff1Sjsg #endif
47761bb76ff1Sjsg #ifdef CONFIG_DRM_AMDGPU_CIK
47771bb76ff1Sjsg 		case CHIP_KAVERI:
47781bb76ff1Sjsg 		case CHIP_KABINI:
47791bb76ff1Sjsg 		case CHIP_MULLINS:
47801bb76ff1Sjsg #endif
47811bb76ff1Sjsg 		case CHIP_CARRIZO:
47821bb76ff1Sjsg 		case CHIP_STONEY:
47831bb76ff1Sjsg 		case CHIP_CYAN_SKILLFISH:
4784c349dbc7Sjsg 			goto disabled;
47851bb76ff1Sjsg 		default:
47861bb76ff1Sjsg 			break;
4787c349dbc7Sjsg 		}
4788c349dbc7Sjsg 	}
4789c349dbc7Sjsg 
4790c349dbc7Sjsg 	return true;
4791c349dbc7Sjsg 
4792c349dbc7Sjsg disabled:
4793ad8b1aafSjsg 		dev_info(adev->dev, "GPU recovery disabled.\n");
4794c349dbc7Sjsg 		return false;
4795fb4d8502Sjsg }
4796fb4d8502Sjsg 
amdgpu_device_mode1_reset(struct amdgpu_device * adev)47975ca02815Sjsg int amdgpu_device_mode1_reset(struct amdgpu_device *adev)
4798c349dbc7Sjsg {
47995ca02815Sjsg 	u32 i;
48005ca02815Sjsg 	int ret = 0;
4801fb4d8502Sjsg 
48025ca02815Sjsg 	amdgpu_atombios_scratch_regs_engine_hung(adev, true);
48035ca02815Sjsg 
48045ca02815Sjsg 	dev_info(adev->dev, "GPU mode1 reset\n");
48055ca02815Sjsg 
48065ca02815Sjsg 	/* disable BM */
48075ca02815Sjsg 	pci_clear_master(adev->pdev);
48085ca02815Sjsg 
48095ca02815Sjsg 	amdgpu_device_cache_pci_state(adev->pdev);
48105ca02815Sjsg 
48115ca02815Sjsg 	if (amdgpu_dpm_is_mode1_reset_supported(adev)) {
48125ca02815Sjsg 		dev_info(adev->dev, "GPU smu mode1 reset\n");
48135ca02815Sjsg 		ret = amdgpu_dpm_mode1_reset(adev);
48145ca02815Sjsg 	} else {
48155ca02815Sjsg 		dev_info(adev->dev, "GPU psp mode1 reset\n");
48165ca02815Sjsg 		ret = psp_gpu_reset(adev);
48175ca02815Sjsg 	}
48185ca02815Sjsg 
48195ca02815Sjsg 	if (ret)
4820f005ef32Sjsg 		goto mode1_reset_failed;
48215ca02815Sjsg 
48225ca02815Sjsg 	amdgpu_device_load_pci_state(adev->pdev);
4823f005ef32Sjsg 	ret = amdgpu_psp_wait_for_bootloader(adev);
4824f005ef32Sjsg 	if (ret)
4825f005ef32Sjsg 		goto mode1_reset_failed;
48265ca02815Sjsg 
48275ca02815Sjsg 	/* wait for asic to come out of reset */
48285ca02815Sjsg 	for (i = 0; i < adev->usec_timeout; i++) {
48295ca02815Sjsg 		u32 memsize = adev->nbio.funcs->get_memsize(adev);
48305ca02815Sjsg 
48315ca02815Sjsg 		if (memsize != 0xffffffff)
48325ca02815Sjsg 			break;
48335ca02815Sjsg 		udelay(1);
48345ca02815Sjsg 	}
48355ca02815Sjsg 
4836f005ef32Sjsg 	if (i >= adev->usec_timeout) {
4837f005ef32Sjsg 		ret = -ETIMEDOUT;
4838f005ef32Sjsg 		goto mode1_reset_failed;
4839f005ef32Sjsg 	}
4840f005ef32Sjsg 
48415ca02815Sjsg 	amdgpu_atombios_scratch_regs_engine_hung(adev, false);
4842f005ef32Sjsg 
4843f005ef32Sjsg 	return 0;
4844f005ef32Sjsg 
4845f005ef32Sjsg mode1_reset_failed:
4846f005ef32Sjsg 	dev_err(adev->dev, "GPU mode1 reset failed\n");
48475ca02815Sjsg 	return ret;
48485ca02815Sjsg }
48495ca02815Sjsg 
amdgpu_device_pre_asic_reset(struct amdgpu_device * adev,struct amdgpu_reset_context * reset_context)48505ca02815Sjsg int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev,
48515ca02815Sjsg 				 struct amdgpu_reset_context *reset_context)
48525ca02815Sjsg {
48531bb76ff1Sjsg 	int i, r = 0;
48545ca02815Sjsg 	struct amdgpu_job *job = NULL;
48555ca02815Sjsg 	bool need_full_reset =
48565ca02815Sjsg 		test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
48575ca02815Sjsg 
48585ca02815Sjsg 	if (reset_context->reset_req_dev == adev)
48595ca02815Sjsg 		job = reset_context->job;
4860ad8b1aafSjsg 
4861ad8b1aafSjsg 	if (amdgpu_sriov_vf(adev)) {
4862ad8b1aafSjsg 		/* stop the data exchange thread */
4863ad8b1aafSjsg 		amdgpu_virt_fini_data_exchange(adev);
4864ad8b1aafSjsg 	}
4865ad8b1aafSjsg 
48661bb76ff1Sjsg 	amdgpu_fence_driver_isr_toggle(adev, true);
48671bb76ff1Sjsg 
4868fb4d8502Sjsg 	/* block all schedulers and reset given job's ring */
4869fb4d8502Sjsg 	for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
4870fb4d8502Sjsg 		struct amdgpu_ring *ring = adev->rings[i];
4871fb4d8502Sjsg 
4872fb4d8502Sjsg 		if (!ring || !ring->sched.thread)
4873fb4d8502Sjsg 			continue;
4874fb4d8502Sjsg 
4875f005ef32Sjsg 		/* Clear job fence from fence drv to avoid force_completion
4876f005ef32Sjsg 		 * leave NULL and vm flush fence in fence drv
4877f005ef32Sjsg 		 */
48781bb76ff1Sjsg 		amdgpu_fence_driver_clear_job_fences(ring);
48795ca02815Sjsg 
4880fb4d8502Sjsg 		/* after all hw jobs are reset, hw fence is meaningless, so force_completion */
4881fb4d8502Sjsg 		amdgpu_fence_driver_force_completion(ring);
4882fb4d8502Sjsg 	}
4883fb4d8502Sjsg 
48841bb76ff1Sjsg 	amdgpu_fence_driver_isr_toggle(adev, false);
48851bb76ff1Sjsg 
48865ca02815Sjsg 	if (job && job->vm)
4887c349dbc7Sjsg 		drm_sched_increase_karma(&job->base);
4888c349dbc7Sjsg 
48895ca02815Sjsg 	r = amdgpu_reset_prepare_hwcontext(adev, reset_context);
48905ca02815Sjsg 	/* If reset handler not implemented, continue; otherwise return */
4891f005ef32Sjsg 	if (r == -EOPNOTSUPP)
48925ca02815Sjsg 		r = 0;
48935ca02815Sjsg 	else
48945ca02815Sjsg 		return r;
48955ca02815Sjsg 
4896c349dbc7Sjsg 	/* Don't suspend on bare metal if we are not going to HW reset the ASIC */
4897c349dbc7Sjsg 	if (!amdgpu_sriov_vf(adev)) {
4898c349dbc7Sjsg 
4899c349dbc7Sjsg 		if (!need_full_reset)
4900c349dbc7Sjsg 			need_full_reset = amdgpu_device_ip_need_full_reset(adev);
4901c349dbc7Sjsg 
4902f005ef32Sjsg 		if (!need_full_reset && amdgpu_gpu_recovery &&
4903f005ef32Sjsg 		    amdgpu_device_ip_check_soft_reset(adev)) {
4904c349dbc7Sjsg 			amdgpu_device_ip_pre_soft_reset(adev);
4905c349dbc7Sjsg 			r = amdgpu_device_ip_soft_reset(adev);
4906c349dbc7Sjsg 			amdgpu_device_ip_post_soft_reset(adev);
4907c349dbc7Sjsg 			if (r || amdgpu_device_ip_check_soft_reset(adev)) {
4908ad8b1aafSjsg 				dev_info(adev->dev, "soft reset failed, will fallback to full reset!\n");
4909c349dbc7Sjsg 				need_full_reset = true;
4910c349dbc7Sjsg 			}
4911c349dbc7Sjsg 		}
4912c349dbc7Sjsg 
4913c349dbc7Sjsg 		if (need_full_reset)
4914c349dbc7Sjsg 			r = amdgpu_device_ip_suspend(adev);
49155ca02815Sjsg 		if (need_full_reset)
49165ca02815Sjsg 			set_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
49175ca02815Sjsg 		else
49185ca02815Sjsg 			clear_bit(AMDGPU_NEED_FULL_RESET,
49195ca02815Sjsg 				  &reset_context->flags);
4920c349dbc7Sjsg 	}
4921c349dbc7Sjsg 
4922c349dbc7Sjsg 	return r;
4923c349dbc7Sjsg }
4924c349dbc7Sjsg 
amdgpu_reset_reg_dumps(struct amdgpu_device * adev)49251bb76ff1Sjsg static int amdgpu_reset_reg_dumps(struct amdgpu_device *adev)
49261bb76ff1Sjsg {
49271bb76ff1Sjsg 	int i;
49281bb76ff1Sjsg 
49291bb76ff1Sjsg 	lockdep_assert_held(&adev->reset_domain->sem);
49301bb76ff1Sjsg 
49311bb76ff1Sjsg 	for (i = 0; i < adev->num_regs; i++) {
49321bb76ff1Sjsg 		adev->reset_dump_reg_value[i] = RREG32(adev->reset_dump_reg_list[i]);
49331bb76ff1Sjsg 		trace_amdgpu_reset_reg_dumps(adev->reset_dump_reg_list[i],
49341bb76ff1Sjsg 					     adev->reset_dump_reg_value[i]);
49351bb76ff1Sjsg 	}
49361bb76ff1Sjsg 
49371bb76ff1Sjsg 	return 0;
49381bb76ff1Sjsg }
49391bb76ff1Sjsg 
49401bb76ff1Sjsg #ifdef CONFIG_DEV_COREDUMP
amdgpu_devcoredump_read(char * buffer,loff_t offset,size_t count,void * data,size_t datalen)49411bb76ff1Sjsg static ssize_t amdgpu_devcoredump_read(char *buffer, loff_t offset,
49421bb76ff1Sjsg 		size_t count, void *data, size_t datalen)
49431bb76ff1Sjsg {
49441bb76ff1Sjsg 	struct drm_printer p;
49451bb76ff1Sjsg 	struct amdgpu_device *adev = data;
49461bb76ff1Sjsg 	struct drm_print_iterator iter;
49471bb76ff1Sjsg 	int i;
49481bb76ff1Sjsg 
49491bb76ff1Sjsg 	iter.data = buffer;
49501bb76ff1Sjsg 	iter.offset = 0;
49511bb76ff1Sjsg 	iter.start = offset;
49521bb76ff1Sjsg 	iter.remain = count;
49531bb76ff1Sjsg 
49541bb76ff1Sjsg 	p = drm_coredump_printer(&iter);
49551bb76ff1Sjsg 
49561bb76ff1Sjsg 	drm_printf(&p, "**** AMDGPU Device Coredump ****\n");
49571bb76ff1Sjsg 	drm_printf(&p, "kernel: " UTS_RELEASE "\n");
49581bb76ff1Sjsg 	drm_printf(&p, "module: " KBUILD_MODNAME "\n");
49591bb76ff1Sjsg 	drm_printf(&p, "time: %lld.%09ld\n", adev->reset_time.tv_sec, adev->reset_time.tv_nsec);
49601bb76ff1Sjsg 	if (adev->reset_task_info.pid)
49611bb76ff1Sjsg 		drm_printf(&p, "process_name: %s PID: %d\n",
49621bb76ff1Sjsg 			   adev->reset_task_info.process_name,
49631bb76ff1Sjsg 			   adev->reset_task_info.pid);
49641bb76ff1Sjsg 
49651bb76ff1Sjsg 	if (adev->reset_vram_lost)
49661bb76ff1Sjsg 		drm_printf(&p, "VRAM is lost due to GPU reset!\n");
49671bb76ff1Sjsg 	if (adev->num_regs) {
49681bb76ff1Sjsg 		drm_printf(&p, "AMDGPU register dumps:\nOffset:     Value:\n");
49691bb76ff1Sjsg 
49701bb76ff1Sjsg 		for (i = 0; i < adev->num_regs; i++)
49711bb76ff1Sjsg 			drm_printf(&p, "0x%08x: 0x%08x\n",
49721bb76ff1Sjsg 				   adev->reset_dump_reg_list[i],
49731bb76ff1Sjsg 				   adev->reset_dump_reg_value[i]);
49741bb76ff1Sjsg 	}
49751bb76ff1Sjsg 
49761bb76ff1Sjsg 	return count - iter.remain;
49771bb76ff1Sjsg }
49781bb76ff1Sjsg 
amdgpu_devcoredump_free(void * data)49791bb76ff1Sjsg static void amdgpu_devcoredump_free(void *data)
49801bb76ff1Sjsg {
49811bb76ff1Sjsg }
49821bb76ff1Sjsg 
amdgpu_reset_capture_coredumpm(struct amdgpu_device * adev)49831bb76ff1Sjsg static void amdgpu_reset_capture_coredumpm(struct amdgpu_device *adev)
49841bb76ff1Sjsg {
49851bb76ff1Sjsg 	struct drm_device *dev = adev_to_drm(adev);
49861bb76ff1Sjsg 
49871bb76ff1Sjsg 	ktime_get_ts64(&adev->reset_time);
4988f005ef32Sjsg 	dev_coredumpm(dev->dev, THIS_MODULE, adev, 0, GFP_NOWAIT,
49891bb76ff1Sjsg 		      amdgpu_devcoredump_read, amdgpu_devcoredump_free);
49901bb76ff1Sjsg }
49911bb76ff1Sjsg #endif
49921bb76ff1Sjsg 
amdgpu_do_asic_reset(struct list_head * device_list_handle,struct amdgpu_reset_context * reset_context)49935ca02815Sjsg int amdgpu_do_asic_reset(struct list_head *device_list_handle,
49945ca02815Sjsg 			 struct amdgpu_reset_context *reset_context)
4995c349dbc7Sjsg {
4996c349dbc7Sjsg 	struct amdgpu_device *tmp_adev = NULL;
49975ca02815Sjsg 	bool need_full_reset, skip_hw_reset, vram_lost = false;
4998c349dbc7Sjsg 	int r = 0;
49991bb76ff1Sjsg 	bool gpu_reset_for_dev_remove = 0;
5000c349dbc7Sjsg 
50015ca02815Sjsg 	/* Try reset handler method first */
50025ca02815Sjsg 	tmp_adev = list_first_entry(device_list_handle, struct amdgpu_device,
50035ca02815Sjsg 				    reset_list);
50041bb76ff1Sjsg 	amdgpu_reset_reg_dumps(tmp_adev);
50051bb76ff1Sjsg 
50061bb76ff1Sjsg 	reset_context->reset_device_list = device_list_handle;
50075ca02815Sjsg 	r = amdgpu_reset_perform_reset(tmp_adev, reset_context);
50085ca02815Sjsg 	/* If reset handler not implemented, continue; otherwise return */
5009f005ef32Sjsg 	if (r == -EOPNOTSUPP)
50105ca02815Sjsg 		r = 0;
50115ca02815Sjsg 	else
50125ca02815Sjsg 		return r;
50135ca02815Sjsg 
50145ca02815Sjsg 	/* Reset handler not implemented, use the default method */
50155ca02815Sjsg 	need_full_reset =
50165ca02815Sjsg 		test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
50175ca02815Sjsg 	skip_hw_reset = test_bit(AMDGPU_SKIP_HW_RESET, &reset_context->flags);
50185ca02815Sjsg 
50191bb76ff1Sjsg 	gpu_reset_for_dev_remove =
50201bb76ff1Sjsg 		test_bit(AMDGPU_RESET_FOR_DEVICE_REMOVE, &reset_context->flags) &&
50211bb76ff1Sjsg 			test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
50221bb76ff1Sjsg 
5023c349dbc7Sjsg 	/*
50245ca02815Sjsg 	 * ASIC reset has to be done on all XGMI hive nodes ASAP
5025c349dbc7Sjsg 	 * to allow proper links negotiation in FW (within 1 sec)
5026c349dbc7Sjsg 	 */
5027ad8b1aafSjsg 	if (!skip_hw_reset && need_full_reset) {
50285ca02815Sjsg 		list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
5029c349dbc7Sjsg 			/* For XGMI run all resets in parallel to speed up the process */
5030c349dbc7Sjsg 			if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) {
50315ca02815Sjsg 				tmp_adev->gmc.xgmi.pending_reset = false;
5032c349dbc7Sjsg 				if (!queue_work(system_unbound_wq, &tmp_adev->xgmi_reset_work))
5033c349dbc7Sjsg 					r = -EALREADY;
5034c349dbc7Sjsg 			} else
5035c349dbc7Sjsg 				r = amdgpu_asic_reset(tmp_adev);
5036c349dbc7Sjsg 
5037c349dbc7Sjsg 			if (r) {
5038ad8b1aafSjsg 				dev_err(tmp_adev->dev, "ASIC reset failed with error, %d for drm dev, %s",
5039ad8b1aafSjsg 					 r, adev_to_drm(tmp_adev)->unique);
5040c349dbc7Sjsg 				break;
5041c349dbc7Sjsg 			}
5042c349dbc7Sjsg 		}
5043c349dbc7Sjsg 
5044c349dbc7Sjsg 		/* For XGMI wait for all resets to complete before proceed */
5045c349dbc7Sjsg 		if (!r) {
50465ca02815Sjsg 			list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
5047c349dbc7Sjsg 				if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) {
5048c349dbc7Sjsg 					flush_work(&tmp_adev->xgmi_reset_work);
5049c349dbc7Sjsg 					r = tmp_adev->asic_reset_res;
5050c349dbc7Sjsg 					if (r)
5051c349dbc7Sjsg 						break;
5052c349dbc7Sjsg 				}
5053c349dbc7Sjsg 			}
5054c349dbc7Sjsg 		}
5055c349dbc7Sjsg 	}
5056c349dbc7Sjsg 
5057c349dbc7Sjsg 	if (!r && amdgpu_ras_intr_triggered()) {
50585ca02815Sjsg 		list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
50591bb76ff1Sjsg 			if (tmp_adev->mmhub.ras && tmp_adev->mmhub.ras->ras_block.hw_ops &&
50601bb76ff1Sjsg 			    tmp_adev->mmhub.ras->ras_block.hw_ops->reset_ras_error_count)
50611bb76ff1Sjsg 				tmp_adev->mmhub.ras->ras_block.hw_ops->reset_ras_error_count(tmp_adev);
5062c349dbc7Sjsg 		}
5063c349dbc7Sjsg 
5064c349dbc7Sjsg 		amdgpu_ras_intr_cleared();
5065c349dbc7Sjsg 	}
5066c349dbc7Sjsg 
50671bb76ff1Sjsg 	/* Since the mode1 reset affects base ip blocks, the
50681bb76ff1Sjsg 	 * phase1 ip blocks need to be resumed. Otherwise there
50691bb76ff1Sjsg 	 * will be a BIOS signature error and the psp bootloader
50701bb76ff1Sjsg 	 * can't load kdb on the next amdgpu install.
50711bb76ff1Sjsg 	 */
50721bb76ff1Sjsg 	if (gpu_reset_for_dev_remove) {
50731bb76ff1Sjsg 		list_for_each_entry(tmp_adev, device_list_handle, reset_list)
50741bb76ff1Sjsg 			amdgpu_device_ip_resume_phase1(tmp_adev);
50751bb76ff1Sjsg 
50761bb76ff1Sjsg 		goto end;
50771bb76ff1Sjsg 	}
50781bb76ff1Sjsg 
50795ca02815Sjsg 	list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
5080c349dbc7Sjsg 		if (need_full_reset) {
5081c349dbc7Sjsg 			/* post card */
50825ca02815Sjsg 			r = amdgpu_device_asic_init(tmp_adev);
50835ca02815Sjsg 			if (r) {
5084ad8b1aafSjsg 				dev_warn(tmp_adev->dev, "asic atom init failed!");
50855ca02815Sjsg 			} else {
5086c349dbc7Sjsg 				dev_info(tmp_adev->dev, "GPU reset succeeded, trying to resume\n");
5087e7ede0e6Sjsg 
5088c349dbc7Sjsg 				r = amdgpu_device_ip_resume_phase1(tmp_adev);
5089c349dbc7Sjsg 				if (r)
5090c349dbc7Sjsg 					goto out;
5091c349dbc7Sjsg 
5092c349dbc7Sjsg 				vram_lost = amdgpu_device_check_vram_lost(tmp_adev);
50931bb76ff1Sjsg #ifdef CONFIG_DEV_COREDUMP
50941bb76ff1Sjsg 				tmp_adev->reset_vram_lost = vram_lost;
50951bb76ff1Sjsg 				memset(&tmp_adev->reset_task_info, 0,
50961bb76ff1Sjsg 						sizeof(tmp_adev->reset_task_info));
50971bb76ff1Sjsg 				if (reset_context->job && reset_context->job->vm)
50981bb76ff1Sjsg 					tmp_adev->reset_task_info =
50991bb76ff1Sjsg 						reset_context->job->vm->task_info;
51001bb76ff1Sjsg 				amdgpu_reset_capture_coredumpm(tmp_adev);
51011bb76ff1Sjsg #endif
5102c349dbc7Sjsg 				if (vram_lost) {
5103c349dbc7Sjsg 					DRM_INFO("VRAM is lost due to GPU reset!\n");
5104c349dbc7Sjsg 					amdgpu_inc_vram_lost(tmp_adev);
5105c349dbc7Sjsg 				}
5106c349dbc7Sjsg 
5107c349dbc7Sjsg 				r = amdgpu_device_fw_loading(tmp_adev);
5108c349dbc7Sjsg 				if (r)
5109c349dbc7Sjsg 					return r;
5110c349dbc7Sjsg 
5111c349dbc7Sjsg 				r = amdgpu_device_ip_resume_phase2(tmp_adev);
5112c349dbc7Sjsg 				if (r)
5113c349dbc7Sjsg 					goto out;
5114c349dbc7Sjsg 
5115c349dbc7Sjsg 				if (vram_lost)
5116c349dbc7Sjsg 					amdgpu_device_fill_reset_magic(tmp_adev);
5117c349dbc7Sjsg 
5118c349dbc7Sjsg 				/*
5119c349dbc7Sjsg 				 * Add this ASIC as tracked as reset was already
5120c349dbc7Sjsg 				 * complete successfully.
5121c349dbc7Sjsg 				 */
5122c349dbc7Sjsg 				amdgpu_register_gpu_instance(tmp_adev);
5123c349dbc7Sjsg 
51245ca02815Sjsg 				if (!reset_context->hive &&
51255ca02815Sjsg 				    tmp_adev->gmc.xgmi.num_physical_nodes > 1)
51265ca02815Sjsg 					amdgpu_xgmi_add_device(tmp_adev);
51275ca02815Sjsg 
5128c349dbc7Sjsg 				r = amdgpu_device_ip_late_init(tmp_adev);
5129c349dbc7Sjsg 				if (r)
5130c349dbc7Sjsg 					goto out;
5131c349dbc7Sjsg 
51321bb76ff1Sjsg 				drm_fb_helper_set_suspend_unlocked(adev_to_drm(tmp_adev)->fb_helper, false);
5133c349dbc7Sjsg 
5134ad8b1aafSjsg 				/*
5135ad8b1aafSjsg 				 * The GPU enters bad state once faulty pages
5136ad8b1aafSjsg 				 * by ECC has reached the threshold, and ras
5137ad8b1aafSjsg 				 * recovery is scheduled next. So add one check
5138ad8b1aafSjsg 				 * here to break recovery if it indeed exceeds
5139ad8b1aafSjsg 				 * bad page threshold, and remind user to
5140ad8b1aafSjsg 				 * retire this GPU or setting one bigger
5141ad8b1aafSjsg 				 * bad_page_threshold value to fix this once
5142ad8b1aafSjsg 				 * probing driver again.
5143ad8b1aafSjsg 				 */
51445ca02815Sjsg 				if (!amdgpu_ras_eeprom_check_err_threshold(tmp_adev)) {
5145c349dbc7Sjsg 					/* must succeed. */
5146c349dbc7Sjsg 					amdgpu_ras_resume(tmp_adev);
5147ad8b1aafSjsg 				} else {
5148ad8b1aafSjsg 					r = -EINVAL;
5149ad8b1aafSjsg 					goto out;
5150ad8b1aafSjsg 				}
5151c349dbc7Sjsg 
5152c349dbc7Sjsg 				/* Update PSP FW topology after reset */
51535ca02815Sjsg 				if (reset_context->hive &&
51545ca02815Sjsg 				    tmp_adev->gmc.xgmi.num_physical_nodes > 1)
51555ca02815Sjsg 					r = amdgpu_xgmi_update_topology(
51565ca02815Sjsg 						reset_context->hive, tmp_adev);
5157c349dbc7Sjsg 			}
5158c349dbc7Sjsg 		}
5159c349dbc7Sjsg 
5160c349dbc7Sjsg out:
5161c349dbc7Sjsg 		if (!r) {
5162c349dbc7Sjsg 			amdgpu_irq_gpu_reset_resume_helper(tmp_adev);
5163c349dbc7Sjsg 			r = amdgpu_ib_ring_tests(tmp_adev);
5164c349dbc7Sjsg 			if (r) {
5165c349dbc7Sjsg 				dev_err(tmp_adev->dev, "ib ring test failed (%d).\n", r);
5166c349dbc7Sjsg 				need_full_reset = true;
5167c349dbc7Sjsg 				r = -EAGAIN;
5168c349dbc7Sjsg 				goto end;
5169c349dbc7Sjsg 			}
5170c349dbc7Sjsg 		}
5171c349dbc7Sjsg 
5172c349dbc7Sjsg 		if (!r)
5173c349dbc7Sjsg 			r = amdgpu_device_recover_vram(tmp_adev);
5174fb4d8502Sjsg 		else
5175c349dbc7Sjsg 			tmp_adev->asic_reset_res = r;
5176c349dbc7Sjsg 	}
5177c349dbc7Sjsg 
5178c349dbc7Sjsg end:
51795ca02815Sjsg 	if (need_full_reset)
51805ca02815Sjsg 		set_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
51815ca02815Sjsg 	else
51825ca02815Sjsg 		clear_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
5183c349dbc7Sjsg 	return r;
5184c349dbc7Sjsg }
5185c349dbc7Sjsg 
amdgpu_device_set_mp1_state(struct amdgpu_device * adev)51861bb76ff1Sjsg static void amdgpu_device_set_mp1_state(struct amdgpu_device *adev)
5187c349dbc7Sjsg {
5188c349dbc7Sjsg 
5189c349dbc7Sjsg 	switch (amdgpu_asic_reset_method(adev)) {
5190c349dbc7Sjsg 	case AMD_RESET_METHOD_MODE1:
5191c349dbc7Sjsg 		adev->mp1_state = PP_MP1_STATE_SHUTDOWN;
5192c349dbc7Sjsg 		break;
5193c349dbc7Sjsg 	case AMD_RESET_METHOD_MODE2:
5194c349dbc7Sjsg 		adev->mp1_state = PP_MP1_STATE_RESET;
5195c349dbc7Sjsg 		break;
5196c349dbc7Sjsg 	default:
5197c349dbc7Sjsg 		adev->mp1_state = PP_MP1_STATE_NONE;
5198c349dbc7Sjsg 		break;
5199c349dbc7Sjsg 	}
5200c349dbc7Sjsg 
52011bb76ff1Sjsg 	pci_dev_put(p);
5202c349dbc7Sjsg }
5203c349dbc7Sjsg 
amdgpu_device_unset_mp1_state(struct amdgpu_device * adev)52041bb76ff1Sjsg static void amdgpu_device_unset_mp1_state(struct amdgpu_device *adev)
5205c349dbc7Sjsg {
5206c349dbc7Sjsg 	amdgpu_vf_error_trans_all(adev);
5207c349dbc7Sjsg 	adev->mp1_state = PP_MP1_STATE_NONE;
52085ca02815Sjsg }
52095ca02815Sjsg 
amdgpu_device_resume_display_audio(struct amdgpu_device * adev)5210ad8b1aafSjsg static void amdgpu_device_resume_display_audio(struct amdgpu_device *adev)
5211ad8b1aafSjsg {
5212ad8b1aafSjsg 	STUB();
5213ad8b1aafSjsg #ifdef notyet
5214ad8b1aafSjsg 	struct pci_dev *p = NULL;
5215ad8b1aafSjsg 
5216ad8b1aafSjsg 	p = pci_get_domain_bus_and_slot(pci_domain_nr(adev->pdev->bus),
5217ad8b1aafSjsg 			adev->pdev->bus->number, 1);
5218ad8b1aafSjsg 	if (p) {
5219ad8b1aafSjsg 		pm_runtime_enable(&(p->dev));
5220ad8b1aafSjsg 		pm_runtime_resume(&(p->dev));
5221ad8b1aafSjsg 	}
5222ad8b1aafSjsg #endif
5223ad8b1aafSjsg }
5224ad8b1aafSjsg 
amdgpu_device_suspend_display_audio(struct amdgpu_device * adev)5225ad8b1aafSjsg static int amdgpu_device_suspend_display_audio(struct amdgpu_device *adev)
5226ad8b1aafSjsg {
5227ad8b1aafSjsg 	enum amd_reset_method reset_method;
5228ad8b1aafSjsg 	struct pci_dev *p = NULL;
5229ad8b1aafSjsg 	u64 expires;
5230ad8b1aafSjsg 
5231ad8b1aafSjsg 	/*
5232ad8b1aafSjsg 	 * For now, only BACO and mode1 reset are confirmed
5233ad8b1aafSjsg 	 * to suffer the audio issue without proper suspended.
5234ad8b1aafSjsg 	 */
5235ad8b1aafSjsg 	reset_method = amdgpu_asic_reset_method(adev);
5236ad8b1aafSjsg 	if ((reset_method != AMD_RESET_METHOD_BACO) &&
5237ad8b1aafSjsg 	     (reset_method != AMD_RESET_METHOD_MODE1))
5238ad8b1aafSjsg 		return -EINVAL;
5239ad8b1aafSjsg 
5240ad8b1aafSjsg 	STUB();
5241ad8b1aafSjsg 	return -ENOSYS;
5242ad8b1aafSjsg #ifdef notyet
5243ad8b1aafSjsg 
5244ad8b1aafSjsg 	p = pci_get_domain_bus_and_slot(pci_domain_nr(adev->pdev->bus),
5245ad8b1aafSjsg 			adev->pdev->bus->number, 1);
5246ad8b1aafSjsg 	if (!p)
5247ad8b1aafSjsg 		return -ENODEV;
5248ad8b1aafSjsg 
5249ad8b1aafSjsg 	expires = pm_runtime_autosuspend_expiration(&(p->dev));
5250ad8b1aafSjsg 	if (!expires)
5251ad8b1aafSjsg 		/*
5252ad8b1aafSjsg 		 * If we cannot get the audio device autosuspend delay,
5253ad8b1aafSjsg 		 * a fixed 4S interval will be used. Considering 3S is
5254ad8b1aafSjsg 		 * the audio controller default autosuspend delay setting.
5255ad8b1aafSjsg 		 * 4S used here is guaranteed to cover that.
5256ad8b1aafSjsg 		 */
5257ad8b1aafSjsg 		expires = ktime_get_mono_fast_ns() + NSEC_PER_SEC * 4ULL;
5258ad8b1aafSjsg 
5259ad8b1aafSjsg 	while (!pm_runtime_status_suspended(&(p->dev))) {
5260ad8b1aafSjsg 		if (!pm_runtime_suspend(&(p->dev)))
5261ad8b1aafSjsg 			break;
5262ad8b1aafSjsg 
5263ad8b1aafSjsg 		if (expires < ktime_get_mono_fast_ns()) {
5264ad8b1aafSjsg 			dev_warn(adev->dev, "failed to suspend display audio\n");
52651bb76ff1Sjsg 			pci_dev_put(p);
5266ad8b1aafSjsg 			/* TODO: abort the succeeding gpu reset? */
5267ad8b1aafSjsg 			return -ETIMEDOUT;
5268ad8b1aafSjsg 		}
5269ad8b1aafSjsg 	}
5270ad8b1aafSjsg 
5271ad8b1aafSjsg 	pm_runtime_disable(&(p->dev));
5272ad8b1aafSjsg 
52731bb76ff1Sjsg 	pci_dev_put(p);
5274ad8b1aafSjsg 	return 0;
5275ad8b1aafSjsg #endif
5276c349dbc7Sjsg }
5277c349dbc7Sjsg 
amdgpu_device_stop_pending_resets(struct amdgpu_device * adev)52781bb76ff1Sjsg static inline void amdgpu_device_stop_pending_resets(struct amdgpu_device *adev)
52791bb76ff1Sjsg {
52801bb76ff1Sjsg 	struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
52811bb76ff1Sjsg 
52821bb76ff1Sjsg #if defined(CONFIG_DEBUG_FS)
52831bb76ff1Sjsg 	if (!amdgpu_sriov_vf(adev))
52841bb76ff1Sjsg 		cancel_work(&adev->reset_work);
52851bb76ff1Sjsg #endif
52861bb76ff1Sjsg 
52871bb76ff1Sjsg 	if (adev->kfd.dev)
52881bb76ff1Sjsg 		cancel_work(&adev->kfd.reset_work);
52891bb76ff1Sjsg 
52901bb76ff1Sjsg 	if (amdgpu_sriov_vf(adev))
52911bb76ff1Sjsg 		cancel_work(&adev->virt.flr_work);
52921bb76ff1Sjsg 
52931bb76ff1Sjsg 	if (con && adev->ras_enabled)
52941bb76ff1Sjsg 		cancel_work(&con->recovery_work);
52951bb76ff1Sjsg 
52961bb76ff1Sjsg }
52971bb76ff1Sjsg 
5298c349dbc7Sjsg /**
5299c349dbc7Sjsg  * amdgpu_device_gpu_recover - reset the asic and recover scheduler
5300c349dbc7Sjsg  *
5301ad8b1aafSjsg  * @adev: amdgpu_device pointer
5302c349dbc7Sjsg  * @job: which job trigger hang
5303f005ef32Sjsg  * @reset_context: amdgpu reset context pointer
5304c349dbc7Sjsg  *
5305c349dbc7Sjsg  * Attempt to reset the GPU if it has hung (all asics).
5306c349dbc7Sjsg  * Attempt to do soft-reset or full-reset and reinitialize Asic
5307c349dbc7Sjsg  * Returns 0 for success or an error on failure.
5308c349dbc7Sjsg  */
5309c349dbc7Sjsg 
amdgpu_device_gpu_recover(struct amdgpu_device * adev,struct amdgpu_job * job,struct amdgpu_reset_context * reset_context)5310c349dbc7Sjsg int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
53111bb76ff1Sjsg 			      struct amdgpu_job *job,
53121bb76ff1Sjsg 			      struct amdgpu_reset_context *reset_context)
5313c349dbc7Sjsg {
5314c349dbc7Sjsg 	struct list_head device_list, *device_list_handle =  NULL;
5315ad8b1aafSjsg 	bool job_signaled = false;
5316c349dbc7Sjsg 	struct amdgpu_hive_info *hive = NULL;
5317c349dbc7Sjsg 	struct amdgpu_device *tmp_adev = NULL;
5318c349dbc7Sjsg 	int i, r = 0;
5319ad8b1aafSjsg 	bool need_emergency_restart = false;
5320ad8b1aafSjsg 	bool audio_suspended = false;
53211bb76ff1Sjsg 	bool gpu_reset_for_dev_remove = false;
53225ca02815Sjsg 
53231bb76ff1Sjsg 	gpu_reset_for_dev_remove =
53241bb76ff1Sjsg 			test_bit(AMDGPU_RESET_FOR_DEVICE_REMOVE, &reset_context->flags) &&
53251bb76ff1Sjsg 				test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
5326ad8b1aafSjsg 
5327ad8b1aafSjsg 	/*
5328ad8b1aafSjsg 	 * Special case: RAS triggered and full reset isn't supported
5329ad8b1aafSjsg 	 */
5330ad8b1aafSjsg 	need_emergency_restart = amdgpu_ras_need_emergency_restart(adev);
5331c349dbc7Sjsg 
5332c349dbc7Sjsg 	/*
5333c349dbc7Sjsg 	 * Flush RAM to disk so that after reboot
5334c349dbc7Sjsg 	 * the user can read log and see why the system rebooted.
5335c349dbc7Sjsg 	 */
5336aa9b4d72Sjsg 	if (need_emergency_restart && amdgpu_ras_get_context(adev) &&
5337aa9b4d72Sjsg 		amdgpu_ras_get_context(adev)->reboot) {
5338c349dbc7Sjsg 		DRM_WARN("Emergency reboot.");
5339c349dbc7Sjsg 
5340c349dbc7Sjsg #ifdef notyet
5341c349dbc7Sjsg 		ksys_sync_helper();
5342c349dbc7Sjsg 		emergency_restart();
5343c349dbc7Sjsg #else
5344c349dbc7Sjsg 		panic("emergency_restart");
5345c349dbc7Sjsg #endif
5346c349dbc7Sjsg 	}
5347c349dbc7Sjsg 
5348c349dbc7Sjsg 	dev_info(adev->dev, "GPU %s begin!\n",
5349ad8b1aafSjsg 		need_emergency_restart ? "jobs stop":"reset");
5350c349dbc7Sjsg 
53511bb76ff1Sjsg 	if (!amdgpu_sriov_vf(adev))
5352ad8b1aafSjsg 		hive = amdgpu_get_xgmi_hive(adev);
53531bb76ff1Sjsg 	if (hive)
5354ad8b1aafSjsg 		mutex_lock(&hive->hive_lock);
5355c349dbc7Sjsg 
53561bb76ff1Sjsg 	reset_context->job = job;
53571bb76ff1Sjsg 	reset_context->hive = hive;
5358c349dbc7Sjsg 	/*
5359ad8b1aafSjsg 	 * Build list of devices to reset.
5360ad8b1aafSjsg 	 * In case we are in XGMI hive mode, resort the device list
5361ad8b1aafSjsg 	 * to put adev in the 1st position.
5362c349dbc7Sjsg 	 */
5363ad8b1aafSjsg 	INIT_LIST_HEAD(&device_list);
53641bb76ff1Sjsg 	if (!amdgpu_sriov_vf(adev) && (adev->gmc.xgmi.num_physical_nodes > 1)) {
53651bb76ff1Sjsg 		list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) {
53665ca02815Sjsg 			list_add_tail(&tmp_adev->reset_list, &device_list);
53671bb76ff1Sjsg 			if (gpu_reset_for_dev_remove && adev->shutdown)
53681bb76ff1Sjsg 				tmp_adev->shutdown = true;
53691bb76ff1Sjsg 		}
53705ca02815Sjsg 		if (!list_is_first(&adev->reset_list, &device_list))
53715ca02815Sjsg 			list_rotate_to_front(&adev->reset_list, &device_list);
53725ca02815Sjsg 		device_list_handle = &device_list;
5373c349dbc7Sjsg 	} else {
53745ca02815Sjsg 		list_add_tail(&adev->reset_list, &device_list);
5375c349dbc7Sjsg 		device_list_handle = &device_list;
5376c349dbc7Sjsg 	}
5377c349dbc7Sjsg 
53781bb76ff1Sjsg 	/* We need to lock reset domain only once both for XGMI and single device */
53791bb76ff1Sjsg 	tmp_adev = list_first_entry(device_list_handle, struct amdgpu_device,
53801bb76ff1Sjsg 				    reset_list);
53811bb76ff1Sjsg 	amdgpu_device_lock_reset_domain(tmp_adev->reset_domain);
53821bb76ff1Sjsg 
5383c349dbc7Sjsg 	/* block all schedulers and reset given job's ring */
53845ca02815Sjsg 	list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
53851bb76ff1Sjsg 
53861bb76ff1Sjsg 		amdgpu_device_set_mp1_state(tmp_adev);
53871bb76ff1Sjsg 
5388ad8b1aafSjsg 		/*
5389ad8b1aafSjsg 		 * Try to put the audio codec into suspend state
5390ad8b1aafSjsg 		 * before gpu reset started.
5391ad8b1aafSjsg 		 *
5392ad8b1aafSjsg 		 * Due to the power domain of the graphics device
5393ad8b1aafSjsg 		 * is shared with AZ power domain. Without this,
5394ad8b1aafSjsg 		 * we may change the audio hardware from behind
5395ad8b1aafSjsg 		 * the audio driver's back. That will trigger
5396ad8b1aafSjsg 		 * some audio codec errors.
5397ad8b1aafSjsg 		 */
5398ad8b1aafSjsg 		if (!amdgpu_device_suspend_display_audio(tmp_adev))
5399ad8b1aafSjsg 			audio_suspended = true;
5400ad8b1aafSjsg 
5401af8ed3f7Sjsg 		amdgpu_ras_set_error_query_ready(tmp_adev, false);
5402ad8b1aafSjsg 
5403ad8b1aafSjsg 		cancel_delayed_work_sync(&tmp_adev->delayed_init_work);
5404ad8b1aafSjsg 
5405c349dbc7Sjsg 		if (!amdgpu_sriov_vf(tmp_adev))
5406c349dbc7Sjsg 			amdgpu_amdkfd_pre_reset(tmp_adev);
5407c349dbc7Sjsg 
5408c349dbc7Sjsg 		/*
5409c349dbc7Sjsg 		 * Mark these ASICs to be reseted as untracked first
5410c349dbc7Sjsg 		 * And add them back after reset completed
5411c349dbc7Sjsg 		 */
5412c349dbc7Sjsg 		amdgpu_unregister_gpu_instance(tmp_adev);
5413c349dbc7Sjsg 
54141bb76ff1Sjsg 		drm_fb_helper_set_suspend_unlocked(adev_to_drm(tmp_adev)->fb_helper, true);
5415c349dbc7Sjsg 
5416c349dbc7Sjsg 		/* disable ras on ALL IPs */
5417ad8b1aafSjsg 		if (!need_emergency_restart &&
5418c349dbc7Sjsg 		      amdgpu_device_ip_need_full_reset(tmp_adev))
5419c349dbc7Sjsg 			amdgpu_ras_suspend(tmp_adev);
5420fb4d8502Sjsg 
5421fb4d8502Sjsg 		for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
5422c349dbc7Sjsg 			struct amdgpu_ring *ring = tmp_adev->rings[i];
5423fb4d8502Sjsg 
5424fb4d8502Sjsg 			if (!ring || !ring->sched.thread)
5425fb4d8502Sjsg 				continue;
5426fb4d8502Sjsg 
5427c349dbc7Sjsg 			drm_sched_stop(&ring->sched, job ? &job->base : NULL);
5428c349dbc7Sjsg 
5429ad8b1aafSjsg 			if (need_emergency_restart)
5430c349dbc7Sjsg 				amdgpu_job_stop_all_jobs_on_sched(&ring->sched);
5431c349dbc7Sjsg 		}
54325ca02815Sjsg 		atomic_inc(&tmp_adev->gpu_reset_counter);
5433c349dbc7Sjsg 	}
5434c349dbc7Sjsg 
5435ad8b1aafSjsg 	if (need_emergency_restart)
5436c349dbc7Sjsg 		goto skip_sched_resume;
5437c349dbc7Sjsg 
5438c349dbc7Sjsg 	/*
5439c349dbc7Sjsg 	 * Must check guilty signal here since after this point all old
5440c349dbc7Sjsg 	 * HW fences are force signaled.
5441c349dbc7Sjsg 	 *
5442c349dbc7Sjsg 	 * job->base holds a reference to parent fence
5443fb4d8502Sjsg 	 */
54441bb76ff1Sjsg 	if (job && dma_fence_is_signaled(&job->hw_fence)) {
5445c349dbc7Sjsg 		job_signaled = true;
5446c349dbc7Sjsg 		dev_info(adev->dev, "Guilty job already signaled, skipping HW reset");
5447c349dbc7Sjsg 		goto skip_hw_reset;
5448fb4d8502Sjsg 	}
5449fb4d8502Sjsg 
5450c349dbc7Sjsg retry:	/* Rest of adevs pre asic reset from XGMI hive. */
54515ca02815Sjsg 	list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
54521bb76ff1Sjsg 		if (gpu_reset_for_dev_remove) {
54531bb76ff1Sjsg 			/* Workaroud for ASICs need to disable SMC first */
54541bb76ff1Sjsg 			amdgpu_device_smu_fini_early(tmp_adev);
54551bb76ff1Sjsg 		}
54561bb76ff1Sjsg 		r = amdgpu_device_pre_asic_reset(tmp_adev, reset_context);
5457c349dbc7Sjsg 		/*TODO Should we stop ?*/
5458c349dbc7Sjsg 		if (r) {
5459ad8b1aafSjsg 			dev_err(tmp_adev->dev, "GPU pre asic reset failed with err, %d for drm dev, %s ",
5460ad8b1aafSjsg 				  r, adev_to_drm(tmp_adev)->unique);
5461c349dbc7Sjsg 			tmp_adev->asic_reset_res = r;
5462c349dbc7Sjsg 		}
54631bb76ff1Sjsg 
54641bb76ff1Sjsg 		/*
54651bb76ff1Sjsg 		 * Drop all pending non scheduler resets. Scheduler resets
54661bb76ff1Sjsg 		 * were already dropped during drm_sched_stop
54671bb76ff1Sjsg 		 */
54681bb76ff1Sjsg 		amdgpu_device_stop_pending_resets(tmp_adev);
5469c349dbc7Sjsg 	}
5470c349dbc7Sjsg 
5471c349dbc7Sjsg 	/* Actual ASIC resets if needed.*/
54721bb76ff1Sjsg 	/* Host driver will handle XGMI hive reset for SRIOV */
5473c349dbc7Sjsg 	if (amdgpu_sriov_vf(adev)) {
5474c349dbc7Sjsg 		r = amdgpu_device_reset_sriov(adev, job ? false : true);
5475c349dbc7Sjsg 		if (r)
5476c349dbc7Sjsg 			adev->asic_reset_res = r;
54771bb76ff1Sjsg 
5478f005ef32Sjsg 		/* Aldebaran and gfx_11_0_3 support ras in SRIOV, so need resume ras during reset */
5479f005ef32Sjsg 		if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 2) ||
5480f005ef32Sjsg 		    adev->ip_versions[GC_HWIP][0] == IP_VERSION(11, 0, 3))
54811bb76ff1Sjsg 			amdgpu_ras_resume(adev);
5482c349dbc7Sjsg 	} else {
54831bb76ff1Sjsg 		r = amdgpu_do_asic_reset(device_list_handle, reset_context);
5484c349dbc7Sjsg 		if (r && r == -EAGAIN)
5485c349dbc7Sjsg 			goto retry;
54861bb76ff1Sjsg 
54871bb76ff1Sjsg 		if (!r && gpu_reset_for_dev_remove)
54881bb76ff1Sjsg 			goto recover_end;
5489c349dbc7Sjsg 	}
5490c349dbc7Sjsg 
5491c349dbc7Sjsg skip_hw_reset:
5492c349dbc7Sjsg 
5493c349dbc7Sjsg 	/* Post ASIC reset for all devs .*/
54945ca02815Sjsg 	list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
54955ca02815Sjsg 
5496c349dbc7Sjsg 		for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
5497c349dbc7Sjsg 			struct amdgpu_ring *ring = tmp_adev->rings[i];
5498c349dbc7Sjsg 
5499c349dbc7Sjsg 			if (!ring || !ring->sched.thread)
5500c349dbc7Sjsg 				continue;
5501c349dbc7Sjsg 
5502f005ef32Sjsg 			drm_sched_start(&ring->sched, true);
5503c349dbc7Sjsg 		}
5504c349dbc7Sjsg 
55051bb76ff1Sjsg 		if (adev->enable_mes && adev->ip_versions[GC_HWIP][0] != IP_VERSION(11, 0, 3))
55061bb76ff1Sjsg 			amdgpu_mes_self_test(tmp_adev);
55071bb76ff1Sjsg 
5508f005ef32Sjsg 		if (!drm_drv_uses_atomic_modeset(adev_to_drm(tmp_adev)) && !job_signaled)
5509ad8b1aafSjsg 			drm_helper_resume_force_mode(adev_to_drm(tmp_adev));
5510c349dbc7Sjsg 
55111bb76ff1Sjsg 		if (tmp_adev->asic_reset_res)
55121bb76ff1Sjsg 			r = tmp_adev->asic_reset_res;
55131bb76ff1Sjsg 
5514c349dbc7Sjsg 		tmp_adev->asic_reset_res = 0;
5515fb4d8502Sjsg 
5516fb4d8502Sjsg 		if (r) {
5517fb4d8502Sjsg 			/* bad news, how to tell it to userspace ? */
5518c349dbc7Sjsg 			dev_info(tmp_adev->dev, "GPU reset(%d) failed\n", atomic_read(&tmp_adev->gpu_reset_counter));
5519c349dbc7Sjsg 			amdgpu_vf_error_put(tmp_adev, AMDGIM_ERROR_VF_GPU_RESET_FAIL, 0, r);
5520fb4d8502Sjsg 		} else {
5521c349dbc7Sjsg 			dev_info(tmp_adev->dev, "GPU reset(%d) succeeded!\n", atomic_read(&tmp_adev->gpu_reset_counter));
55225ca02815Sjsg 			if (amdgpu_acpi_smart_shift_update(adev_to_drm(tmp_adev), AMDGPU_SS_DEV_D0))
55235ca02815Sjsg 				DRM_WARN("smart shift update failed\n");
5524c349dbc7Sjsg 		}
5525fb4d8502Sjsg 	}
5526fb4d8502Sjsg 
5527c349dbc7Sjsg skip_sched_resume:
55285ca02815Sjsg 	list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
5529c349dbc7Sjsg 		/* unlock kfd: SRIOV would do it separately */
5530ad8b1aafSjsg 		if (!need_emergency_restart && !amdgpu_sriov_vf(tmp_adev))
5531c349dbc7Sjsg 			amdgpu_amdkfd_post_reset(tmp_adev);
55325ca02815Sjsg 
55335ca02815Sjsg 		/* kfd_post_reset will do nothing if kfd device is not initialized,
55345ca02815Sjsg 		 * need to bring up kfd here if it's not be initialized before
55355ca02815Sjsg 		 */
55365ca02815Sjsg 		if (!adev->kfd.init_complete)
55375ca02815Sjsg 			amdgpu_amdkfd_device_init(adev);
55385ca02815Sjsg 
5539ad8b1aafSjsg 		if (audio_suspended)
5540ad8b1aafSjsg 			amdgpu_device_resume_display_audio(tmp_adev);
55411bb76ff1Sjsg 
55421bb76ff1Sjsg 		amdgpu_device_unset_mp1_state(tmp_adev);
5543f005ef32Sjsg 
5544f005ef32Sjsg 		amdgpu_ras_set_error_query_ready(tmp_adev, true);
5545c349dbc7Sjsg 	}
5546c349dbc7Sjsg 
55471bb76ff1Sjsg recover_end:
55481bb76ff1Sjsg 	tmp_adev = list_first_entry(device_list_handle, struct amdgpu_device,
55491bb76ff1Sjsg 					    reset_list);
55501bb76ff1Sjsg 	amdgpu_device_unlock_reset_domain(tmp_adev->reset_domain);
55511bb76ff1Sjsg 
5552ad8b1aafSjsg 	if (hive) {
5553ad8b1aafSjsg 		mutex_unlock(&hive->hive_lock);
5554ad8b1aafSjsg 		amdgpu_put_xgmi_hive(hive);
5555ad8b1aafSjsg 	}
5556c349dbc7Sjsg 
55571bb76ff1Sjsg 	if (r)
5558c349dbc7Sjsg 		dev_info(adev->dev, "GPU reset end with ret = %d\n", r);
55591bb76ff1Sjsg 
55601bb76ff1Sjsg 	atomic_set(&adev->reset_domain->reset_res, r);
5561fb4d8502Sjsg 	return r;
5562fb4d8502Sjsg }
5563fb4d8502Sjsg 
5564fb4d8502Sjsg /**
5565fb4d8502Sjsg  * amdgpu_device_get_pcie_info - fence pcie info about the PCIE slot
5566fb4d8502Sjsg  *
5567fb4d8502Sjsg  * @adev: amdgpu_device pointer
5568fb4d8502Sjsg  *
5569fb4d8502Sjsg  * Fetchs and stores in the driver the PCIE capabilities (gen speed
5570fb4d8502Sjsg  * and lanes) of the slot the device is in. Handles APUs and
5571fb4d8502Sjsg  * virtualized environments where PCIE config space may not be available.
5572fb4d8502Sjsg  */
amdgpu_device_get_pcie_info(struct amdgpu_device * adev)5573fb4d8502Sjsg static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev)
5574fb4d8502Sjsg {
5575fb4d8502Sjsg 	struct pci_dev *pdev;
5576c349dbc7Sjsg 	enum pci_bus_speed speed_cap, platform_speed_cap;
5577c349dbc7Sjsg 	enum pcie_link_width platform_link_width;
5578fb4d8502Sjsg 
5579fb4d8502Sjsg 	if (amdgpu_pcie_gen_cap)
5580fb4d8502Sjsg 		adev->pm.pcie_gen_mask = amdgpu_pcie_gen_cap;
5581fb4d8502Sjsg 
5582fb4d8502Sjsg 	if (amdgpu_pcie_lane_cap)
5583fb4d8502Sjsg 		adev->pm.pcie_mlw_mask = amdgpu_pcie_lane_cap;
5584fb4d8502Sjsg 
5585fb4d8502Sjsg 	/* covers APUs as well */
5586f005ef32Sjsg 	if (pci_is_root_bus(adev->pdev->bus) && !amdgpu_passthrough(adev)) {
5587fb4d8502Sjsg 		if (adev->pm.pcie_gen_mask == 0)
5588fb4d8502Sjsg 			adev->pm.pcie_gen_mask = AMDGPU_DEFAULT_PCIE_GEN_MASK;
5589fb4d8502Sjsg 		if (adev->pm.pcie_mlw_mask == 0)
5590fb4d8502Sjsg 			adev->pm.pcie_mlw_mask = AMDGPU_DEFAULT_PCIE_MLW_MASK;
5591fb4d8502Sjsg 		return;
5592fb4d8502Sjsg 	}
5593fb4d8502Sjsg 
5594c349dbc7Sjsg 	if (adev->pm.pcie_gen_mask && adev->pm.pcie_mlw_mask)
5595c349dbc7Sjsg 		return;
5596c349dbc7Sjsg 
5597c349dbc7Sjsg 	pcie_bandwidth_available(adev->pdev, NULL,
5598c349dbc7Sjsg 				 &platform_speed_cap, &platform_link_width);
5599c349dbc7Sjsg 
5600fb4d8502Sjsg 	if (adev->pm.pcie_gen_mask == 0) {
5601fb4d8502Sjsg 		/* asic caps */
5602fb4d8502Sjsg 		pdev = adev->pdev;
5603fb4d8502Sjsg 		speed_cap = pcie_get_speed_cap(pdev);
5604fb4d8502Sjsg 		if (speed_cap == PCI_SPEED_UNKNOWN) {
5605fb4d8502Sjsg 			adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5606fb4d8502Sjsg 						  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5607fb4d8502Sjsg 						  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3);
5608fb4d8502Sjsg 		} else {
56095ca02815Sjsg 			if (speed_cap == PCIE_SPEED_32_0GT)
56105ca02815Sjsg 				adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
56115ca02815Sjsg 							  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
56125ca02815Sjsg 							  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3 |
56135ca02815Sjsg 							  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN4 |
56145ca02815Sjsg 							  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN5);
56155ca02815Sjsg 			else if (speed_cap == PCIE_SPEED_16_0GT)
5616fb4d8502Sjsg 				adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5617fb4d8502Sjsg 							  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5618fb4d8502Sjsg 							  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3 |
5619fb4d8502Sjsg 							  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN4);
5620fb4d8502Sjsg 			else if (speed_cap == PCIE_SPEED_8_0GT)
5621fb4d8502Sjsg 				adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5622fb4d8502Sjsg 							  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5623fb4d8502Sjsg 							  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3);
5624fb4d8502Sjsg 			else if (speed_cap == PCIE_SPEED_5_0GT)
5625fb4d8502Sjsg 				adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5626fb4d8502Sjsg 							  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2);
5627fb4d8502Sjsg 			else
5628fb4d8502Sjsg 				adev->pm.pcie_gen_mask |= CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1;
5629fb4d8502Sjsg 		}
5630fb4d8502Sjsg 		/* platform caps */
5631c349dbc7Sjsg 		if (platform_speed_cap == PCI_SPEED_UNKNOWN) {
5632fb4d8502Sjsg 			adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5633fb4d8502Sjsg 						   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2);
5634fb4d8502Sjsg 		} else {
56355ca02815Sjsg 			if (platform_speed_cap == PCIE_SPEED_32_0GT)
56365ca02815Sjsg 				adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
56375ca02815Sjsg 							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
56385ca02815Sjsg 							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3 |
56395ca02815Sjsg 							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN4 |
56405ca02815Sjsg 							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN5);
56415ca02815Sjsg 			else if (platform_speed_cap == PCIE_SPEED_16_0GT)
5642fb4d8502Sjsg 				adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5643fb4d8502Sjsg 							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5644fb4d8502Sjsg 							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3 |
5645fb4d8502Sjsg 							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN4);
5646c349dbc7Sjsg 			else if (platform_speed_cap == PCIE_SPEED_8_0GT)
5647fb4d8502Sjsg 				adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5648fb4d8502Sjsg 							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5649fb4d8502Sjsg 							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3);
5650c349dbc7Sjsg 			else if (platform_speed_cap == PCIE_SPEED_5_0GT)
5651fb4d8502Sjsg 				adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5652fb4d8502Sjsg 							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2);
5653fb4d8502Sjsg 			else
5654fb4d8502Sjsg 				adev->pm.pcie_gen_mask |= CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1;
5655fb4d8502Sjsg 
5656fb4d8502Sjsg 		}
5657fb4d8502Sjsg 	}
5658fb4d8502Sjsg 	if (adev->pm.pcie_mlw_mask == 0) {
5659c349dbc7Sjsg 		if (platform_link_width == PCIE_LNK_WIDTH_UNKNOWN) {
5660fb4d8502Sjsg 			adev->pm.pcie_mlw_mask |= AMDGPU_DEFAULT_PCIE_MLW_MASK;
5661fb4d8502Sjsg 		} else {
5662c349dbc7Sjsg 			switch (platform_link_width) {
5663fb4d8502Sjsg 			case PCIE_LNK_X32:
5664fb4d8502Sjsg 				adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X32 |
5665fb4d8502Sjsg 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 |
5666fb4d8502Sjsg 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
5667fb4d8502Sjsg 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
5668fb4d8502Sjsg 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
5669fb4d8502Sjsg 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
5670fb4d8502Sjsg 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
5671fb4d8502Sjsg 				break;
5672fb4d8502Sjsg 			case PCIE_LNK_X16:
5673fb4d8502Sjsg 				adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 |
5674fb4d8502Sjsg 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
5675fb4d8502Sjsg 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
5676fb4d8502Sjsg 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
5677fb4d8502Sjsg 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
5678fb4d8502Sjsg 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
5679fb4d8502Sjsg 				break;
5680fb4d8502Sjsg 			case PCIE_LNK_X12:
5681fb4d8502Sjsg 				adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
5682fb4d8502Sjsg 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
5683fb4d8502Sjsg 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
5684fb4d8502Sjsg 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
5685fb4d8502Sjsg 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
5686fb4d8502Sjsg 				break;
5687fb4d8502Sjsg 			case PCIE_LNK_X8:
5688fb4d8502Sjsg 				adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
5689fb4d8502Sjsg 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
5690fb4d8502Sjsg 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
5691fb4d8502Sjsg 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
5692fb4d8502Sjsg 				break;
5693fb4d8502Sjsg 			case PCIE_LNK_X4:
5694fb4d8502Sjsg 				adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
5695fb4d8502Sjsg 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
5696fb4d8502Sjsg 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
5697fb4d8502Sjsg 				break;
5698fb4d8502Sjsg 			case PCIE_LNK_X2:
5699fb4d8502Sjsg 				adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
5700fb4d8502Sjsg 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
5701fb4d8502Sjsg 				break;
5702fb4d8502Sjsg 			case PCIE_LNK_X1:
5703fb4d8502Sjsg 				adev->pm.pcie_mlw_mask = CAIL_PCIE_LINK_WIDTH_SUPPORT_X1;
5704fb4d8502Sjsg 				break;
5705fb4d8502Sjsg 			default:
5706fb4d8502Sjsg 				break;
5707fb4d8502Sjsg 			}
5708fb4d8502Sjsg 		}
5709fb4d8502Sjsg 	}
5710fb4d8502Sjsg }
5711fb4d8502Sjsg 
57121bb76ff1Sjsg /**
57131bb76ff1Sjsg  * amdgpu_device_is_peer_accessible - Check peer access through PCIe BAR
57141bb76ff1Sjsg  *
57151bb76ff1Sjsg  * @adev: amdgpu_device pointer
57161bb76ff1Sjsg  * @peer_adev: amdgpu_device pointer for peer device trying to access @adev
57171bb76ff1Sjsg  *
57181bb76ff1Sjsg  * Return true if @peer_adev can access (DMA) @adev through the PCIe
57191bb76ff1Sjsg  * BAR, i.e. @adev is "large BAR" and the BAR matches the DMA mask of
57201bb76ff1Sjsg  * @peer_adev.
57211bb76ff1Sjsg  */
amdgpu_device_is_peer_accessible(struct amdgpu_device * adev,struct amdgpu_device * peer_adev)57221bb76ff1Sjsg bool amdgpu_device_is_peer_accessible(struct amdgpu_device *adev,
57231bb76ff1Sjsg 				      struct amdgpu_device *peer_adev)
57241bb76ff1Sjsg {
57251bb76ff1Sjsg #ifdef CONFIG_HSA_AMD_P2P
57261bb76ff1Sjsg 	uint64_t address_mask = peer_adev->dev->dma_mask ?
57271bb76ff1Sjsg 		~*peer_adev->dev->dma_mask : ~((1ULL << 32) - 1);
57281bb76ff1Sjsg 	resource_size_t aper_limit =
57291bb76ff1Sjsg 		adev->gmc.aper_base + adev->gmc.aper_size - 1;
57301bb76ff1Sjsg 	bool p2p_access =
57311bb76ff1Sjsg 		!adev->gmc.xgmi.connected_to_cpu &&
57321bb76ff1Sjsg 		!(pci_p2pdma_distance(adev->pdev, peer_adev->dev, false) < 0);
57331bb76ff1Sjsg 
57341bb76ff1Sjsg 	return pcie_p2p && p2p_access && (adev->gmc.visible_vram_size &&
57351bb76ff1Sjsg 		adev->gmc.real_vram_size == adev->gmc.visible_vram_size &&
57361bb76ff1Sjsg 		!(adev->gmc.aper_base & address_mask ||
57371bb76ff1Sjsg 		  aper_limit & address_mask));
57381bb76ff1Sjsg #else
57391bb76ff1Sjsg 	return false;
57401bb76ff1Sjsg #endif
57411bb76ff1Sjsg }
57421bb76ff1Sjsg 
amdgpu_device_baco_enter(struct drm_device * dev)5743c349dbc7Sjsg int amdgpu_device_baco_enter(struct drm_device *dev)
5744c349dbc7Sjsg {
5745ad8b1aafSjsg 	struct amdgpu_device *adev = drm_to_adev(dev);
5746c349dbc7Sjsg 	struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
5747c349dbc7Sjsg 
5748f005ef32Sjsg 	if (!amdgpu_device_supports_baco(dev))
5749c349dbc7Sjsg 		return -ENOTSUPP;
5750c349dbc7Sjsg 
57515ca02815Sjsg 	if (ras && adev->ras_enabled &&
57525ca02815Sjsg 	    adev->nbio.funcs->enable_doorbell_interrupt)
5753c349dbc7Sjsg 		adev->nbio.funcs->enable_doorbell_interrupt(adev, false);
5754c349dbc7Sjsg 
5755c349dbc7Sjsg 	return amdgpu_dpm_baco_enter(adev);
5756c349dbc7Sjsg }
5757c349dbc7Sjsg 
amdgpu_device_baco_exit(struct drm_device * dev)5758c349dbc7Sjsg int amdgpu_device_baco_exit(struct drm_device *dev)
5759c349dbc7Sjsg {
5760ad8b1aafSjsg 	struct amdgpu_device *adev = drm_to_adev(dev);
5761c349dbc7Sjsg 	struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
5762c349dbc7Sjsg 	int ret = 0;
5763c349dbc7Sjsg 
5764f005ef32Sjsg 	if (!amdgpu_device_supports_baco(dev))
5765c349dbc7Sjsg 		return -ENOTSUPP;
5766c349dbc7Sjsg 
5767c349dbc7Sjsg 	ret = amdgpu_dpm_baco_exit(adev);
5768c349dbc7Sjsg 	if (ret)
5769c349dbc7Sjsg 		return ret;
5770c349dbc7Sjsg 
57715ca02815Sjsg 	if (ras && adev->ras_enabled &&
57725ca02815Sjsg 	    adev->nbio.funcs->enable_doorbell_interrupt)
5773c349dbc7Sjsg 		adev->nbio.funcs->enable_doorbell_interrupt(adev, true);
5774c349dbc7Sjsg 
57755ca02815Sjsg 	if (amdgpu_passthrough(adev) &&
57765ca02815Sjsg 	    adev->nbio.funcs->clear_doorbell_interrupt)
57775ca02815Sjsg 		adev->nbio.funcs->clear_doorbell_interrupt(adev);
57785ca02815Sjsg 
5779c349dbc7Sjsg 	return 0;
5780c349dbc7Sjsg }
5781ad8b1aafSjsg 
5782ad8b1aafSjsg /**
5783ad8b1aafSjsg  * amdgpu_pci_error_detected - Called when a PCI error is detected.
5784ad8b1aafSjsg  * @pdev: PCI device struct
5785ad8b1aafSjsg  * @state: PCI channel state
5786ad8b1aafSjsg  *
5787ad8b1aafSjsg  * Description: Called when a PCI error is detected.
5788ad8b1aafSjsg  *
5789ad8b1aafSjsg  * Return: PCI_ERS_RESULT_NEED_RESET or PCI_ERS_RESULT_DISCONNECT.
5790ad8b1aafSjsg  */
amdgpu_pci_error_detected(struct pci_dev * pdev,pci_channel_state_t state)5791ad8b1aafSjsg pci_ers_result_t amdgpu_pci_error_detected(struct pci_dev *pdev, pci_channel_state_t state)
5792ad8b1aafSjsg {
5793ad8b1aafSjsg 	STUB();
5794ad8b1aafSjsg 	return 0;
5795ad8b1aafSjsg #ifdef notyet
5796ad8b1aafSjsg 	struct drm_device *dev = pci_get_drvdata(pdev);
5797ad8b1aafSjsg 	struct amdgpu_device *adev = drm_to_adev(dev);
5798ad8b1aafSjsg 	int i;
5799ad8b1aafSjsg 
5800ad8b1aafSjsg 	DRM_INFO("PCI error: detected callback, state(%d)!!\n", state);
5801ad8b1aafSjsg 
5802ad8b1aafSjsg 	if (adev->gmc.xgmi.num_physical_nodes > 1) {
5803ad8b1aafSjsg 		DRM_WARN("No support for XGMI hive yet...");
5804ad8b1aafSjsg 		return PCI_ERS_RESULT_DISCONNECT;
5805ad8b1aafSjsg 	}
5806ad8b1aafSjsg 
58075ca02815Sjsg 	adev->pci_channel_state = state;
58085ca02815Sjsg 
5809ad8b1aafSjsg 	switch (state) {
5810ad8b1aafSjsg 	case pci_channel_io_normal:
5811ad8b1aafSjsg 		return PCI_ERS_RESULT_CAN_RECOVER;
5812ad8b1aafSjsg 	/* Fatal error, prepare for slot reset */
5813ad8b1aafSjsg 	case pci_channel_io_frozen:
5814ad8b1aafSjsg 		/*
58151bb76ff1Sjsg 		 * Locking adev->reset_domain->sem will prevent any external access
5816ad8b1aafSjsg 		 * to GPU during PCI error recovery
5817ad8b1aafSjsg 		 */
58181bb76ff1Sjsg 		amdgpu_device_lock_reset_domain(adev->reset_domain);
58191bb76ff1Sjsg 		amdgpu_device_set_mp1_state(adev);
5820ad8b1aafSjsg 
5821ad8b1aafSjsg 		/*
5822ad8b1aafSjsg 		 * Block any work scheduling as we do for regular GPU reset
5823ad8b1aafSjsg 		 * for the duration of the recovery
5824ad8b1aafSjsg 		 */
5825ad8b1aafSjsg 		for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
5826ad8b1aafSjsg 			struct amdgpu_ring *ring = adev->rings[i];
5827ad8b1aafSjsg 
5828ad8b1aafSjsg 			if (!ring || !ring->sched.thread)
5829ad8b1aafSjsg 				continue;
5830ad8b1aafSjsg 
5831ad8b1aafSjsg 			drm_sched_stop(&ring->sched, NULL);
5832ad8b1aafSjsg 		}
58335ca02815Sjsg 		atomic_inc(&adev->gpu_reset_counter);
5834ad8b1aafSjsg 		return PCI_ERS_RESULT_NEED_RESET;
5835ad8b1aafSjsg 	case pci_channel_io_perm_failure:
5836ad8b1aafSjsg 		/* Permanent error, prepare for device removal */
5837ad8b1aafSjsg 		return PCI_ERS_RESULT_DISCONNECT;
5838ad8b1aafSjsg 	}
5839ad8b1aafSjsg 
5840ad8b1aafSjsg 	return PCI_ERS_RESULT_NEED_RESET;
5841ad8b1aafSjsg #endif
5842ad8b1aafSjsg }
5843ad8b1aafSjsg 
5844ad8b1aafSjsg /**
5845ad8b1aafSjsg  * amdgpu_pci_mmio_enabled - Enable MMIO and dump debug registers
5846ad8b1aafSjsg  * @pdev: pointer to PCI device
5847ad8b1aafSjsg  */
amdgpu_pci_mmio_enabled(struct pci_dev * pdev)5848ad8b1aafSjsg pci_ers_result_t amdgpu_pci_mmio_enabled(struct pci_dev *pdev)
5849ad8b1aafSjsg {
5850ad8b1aafSjsg 
5851ad8b1aafSjsg 	DRM_INFO("PCI error: mmio enabled callback!!\n");
5852ad8b1aafSjsg 
5853ad8b1aafSjsg 	/* TODO - dump whatever for debugging purposes */
5854ad8b1aafSjsg 
5855ad8b1aafSjsg 	/* This called only if amdgpu_pci_error_detected returns
5856ad8b1aafSjsg 	 * PCI_ERS_RESULT_CAN_RECOVER. Read/write to the device still
5857ad8b1aafSjsg 	 * works, no need to reset slot.
5858ad8b1aafSjsg 	 */
5859ad8b1aafSjsg 
5860ad8b1aafSjsg 	return PCI_ERS_RESULT_RECOVERED;
5861ad8b1aafSjsg }
5862ad8b1aafSjsg 
5863ad8b1aafSjsg /**
5864ad8b1aafSjsg  * amdgpu_pci_slot_reset - Called when PCI slot has been reset.
5865ad8b1aafSjsg  * @pdev: PCI device struct
5866ad8b1aafSjsg  *
5867ad8b1aafSjsg  * Description: This routine is called by the pci error recovery
5868ad8b1aafSjsg  * code after the PCI slot has been reset, just before we
5869ad8b1aafSjsg  * should resume normal operations.
5870ad8b1aafSjsg  */
amdgpu_pci_slot_reset(struct pci_dev * pdev)5871ad8b1aafSjsg pci_ers_result_t amdgpu_pci_slot_reset(struct pci_dev *pdev)
5872ad8b1aafSjsg {
5873ad8b1aafSjsg 	STUB();
5874ad8b1aafSjsg 	return PCI_ERS_RESULT_RECOVERED;
5875ad8b1aafSjsg #ifdef notyet
5876ad8b1aafSjsg 	struct drm_device *dev = pci_get_drvdata(pdev);
5877ad8b1aafSjsg 	struct amdgpu_device *adev = drm_to_adev(dev);
5878ad8b1aafSjsg 	int r, i;
58795ca02815Sjsg 	struct amdgpu_reset_context reset_context;
5880ad8b1aafSjsg 	u32 memsize;
5881ad8b1aafSjsg 	struct list_head device_list;
5882ad8b1aafSjsg 
5883ad8b1aafSjsg 	DRM_INFO("PCI error: slot reset callback!!\n");
5884ad8b1aafSjsg 
58855ca02815Sjsg 	memset(&reset_context, 0, sizeof(reset_context));
58865ca02815Sjsg 
5887ad8b1aafSjsg 	INIT_LIST_HEAD(&device_list);
58885ca02815Sjsg 	list_add_tail(&adev->reset_list, &device_list);
5889ad8b1aafSjsg 
5890ad8b1aafSjsg 	/* wait for asic to come out of reset */
5891ad8b1aafSjsg 	drm_msleep(500);
5892ad8b1aafSjsg 
5893ad8b1aafSjsg 	/* Restore PCI confspace */
5894ad8b1aafSjsg 	amdgpu_device_load_pci_state(pdev);
5895ad8b1aafSjsg 
5896ad8b1aafSjsg 	/* confirm  ASIC came out of reset */
5897ad8b1aafSjsg 	for (i = 0; i < adev->usec_timeout; i++) {
5898ad8b1aafSjsg 		memsize = amdgpu_asic_get_config_memsize(adev);
5899ad8b1aafSjsg 
5900ad8b1aafSjsg 		if (memsize != 0xffffffff)
5901ad8b1aafSjsg 			break;
5902ad8b1aafSjsg 		udelay(1);
5903ad8b1aafSjsg 	}
5904ad8b1aafSjsg 	if (memsize == 0xffffffff) {
5905ad8b1aafSjsg 		r = -ETIME;
5906ad8b1aafSjsg 		goto out;
5907ad8b1aafSjsg 	}
5908ad8b1aafSjsg 
59095ca02815Sjsg 	reset_context.method = AMD_RESET_METHOD_NONE;
59105ca02815Sjsg 	reset_context.reset_req_dev = adev;
59115ca02815Sjsg 	set_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags);
59125ca02815Sjsg 	set_bit(AMDGPU_SKIP_HW_RESET, &reset_context.flags);
59135ca02815Sjsg 
59145ca02815Sjsg 	adev->no_hw_access = true;
59155ca02815Sjsg 	r = amdgpu_device_pre_asic_reset(adev, &reset_context);
59165ca02815Sjsg 	adev->no_hw_access = false;
5917ad8b1aafSjsg 	if (r)
5918ad8b1aafSjsg 		goto out;
5919ad8b1aafSjsg 
59205ca02815Sjsg 	r = amdgpu_do_asic_reset(&device_list, &reset_context);
5921ad8b1aafSjsg 
5922ad8b1aafSjsg out:
5923ad8b1aafSjsg 	if (!r) {
5924ad8b1aafSjsg 		if (amdgpu_device_cache_pci_state(adev->pdev))
5925ad8b1aafSjsg 			pci_restore_state(adev->pdev);
5926ad8b1aafSjsg 
5927ad8b1aafSjsg 		DRM_INFO("PCIe error recovery succeeded\n");
5928ad8b1aafSjsg 	} else {
5929ad8b1aafSjsg 		DRM_ERROR("PCIe error recovery failed, err:%d", r);
59301bb76ff1Sjsg 		amdgpu_device_unset_mp1_state(adev);
59311bb76ff1Sjsg 		amdgpu_device_unlock_reset_domain(adev->reset_domain);
5932ad8b1aafSjsg 	}
5933ad8b1aafSjsg 
5934ad8b1aafSjsg 	return r ? PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_RECOVERED;
5935ad8b1aafSjsg #endif
5936ad8b1aafSjsg }
5937ad8b1aafSjsg 
5938ad8b1aafSjsg /**
5939ad8b1aafSjsg  * amdgpu_pci_resume() - resume normal ops after PCI reset
5940ad8b1aafSjsg  * @pdev: pointer to PCI device
5941ad8b1aafSjsg  *
5942ad8b1aafSjsg  * Called when the error recovery driver tells us that its
59435ca02815Sjsg  * OK to resume normal operation.
5944ad8b1aafSjsg  */
amdgpu_pci_resume(struct pci_dev * pdev)5945ad8b1aafSjsg void amdgpu_pci_resume(struct pci_dev *pdev)
5946ad8b1aafSjsg {
5947ad8b1aafSjsg 	STUB();
5948ad8b1aafSjsg #ifdef notyet
5949ad8b1aafSjsg 	struct drm_device *dev = pci_get_drvdata(pdev);
5950ad8b1aafSjsg 	struct amdgpu_device *adev = drm_to_adev(dev);
5951ad8b1aafSjsg 	int i;
5952ad8b1aafSjsg 
5953ad8b1aafSjsg 
5954ad8b1aafSjsg 	DRM_INFO("PCI error: resume callback!!\n");
5955ad8b1aafSjsg 
59565ca02815Sjsg 	/* Only continue execution for the case of pci_channel_io_frozen */
59575ca02815Sjsg 	if (adev->pci_channel_state != pci_channel_io_frozen)
59585ca02815Sjsg 		return;
59595ca02815Sjsg 
5960ad8b1aafSjsg 	for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
5961ad8b1aafSjsg 		struct amdgpu_ring *ring = adev->rings[i];
5962ad8b1aafSjsg 
5963ad8b1aafSjsg 		if (!ring || !ring->sched.thread)
5964ad8b1aafSjsg 			continue;
5965ad8b1aafSjsg 
5966ad8b1aafSjsg 		drm_sched_start(&ring->sched, true);
5967ad8b1aafSjsg 	}
5968ad8b1aafSjsg 
59691bb76ff1Sjsg 	amdgpu_device_unset_mp1_state(adev);
59701bb76ff1Sjsg 	amdgpu_device_unlock_reset_domain(adev->reset_domain);
5971ad8b1aafSjsg #endif
5972ad8b1aafSjsg }
5973ad8b1aafSjsg 
amdgpu_device_cache_pci_state(struct pci_dev * pdev)5974ad8b1aafSjsg bool amdgpu_device_cache_pci_state(struct pci_dev *pdev)
5975ad8b1aafSjsg {
5976ad8b1aafSjsg 	return false;
5977ad8b1aafSjsg #ifdef notyet
5978ad8b1aafSjsg 	struct drm_device *dev = pci_get_drvdata(pdev);
5979ad8b1aafSjsg 	struct amdgpu_device *adev = drm_to_adev(dev);
5980ad8b1aafSjsg 	int r;
5981ad8b1aafSjsg 
5982ad8b1aafSjsg 	r = pci_save_state(pdev);
5983ad8b1aafSjsg 	if (!r) {
5984ad8b1aafSjsg 		kfree(adev->pci_state);
5985ad8b1aafSjsg 
5986ad8b1aafSjsg 		adev->pci_state = pci_store_saved_state(pdev);
5987ad8b1aafSjsg 
5988ad8b1aafSjsg 		if (!adev->pci_state) {
5989ad8b1aafSjsg 			DRM_ERROR("Failed to store PCI saved state");
5990ad8b1aafSjsg 			return false;
5991ad8b1aafSjsg 		}
5992ad8b1aafSjsg 	} else {
5993ad8b1aafSjsg 		DRM_WARN("Failed to save PCI state, err:%d\n", r);
5994ad8b1aafSjsg 		return false;
5995ad8b1aafSjsg 	}
5996ad8b1aafSjsg 
5997ad8b1aafSjsg 	return true;
5998ad8b1aafSjsg #endif
5999ad8b1aafSjsg }
6000ad8b1aafSjsg 
amdgpu_device_load_pci_state(struct pci_dev * pdev)6001ad8b1aafSjsg bool amdgpu_device_load_pci_state(struct pci_dev *pdev)
6002ad8b1aafSjsg {
6003ad8b1aafSjsg 	STUB();
6004ad8b1aafSjsg 	return false;
6005ad8b1aafSjsg #ifdef notyet
6006ad8b1aafSjsg 	struct drm_device *dev = pci_get_drvdata(pdev);
6007ad8b1aafSjsg 	struct amdgpu_device *adev = drm_to_adev(dev);
6008ad8b1aafSjsg 	int r;
6009ad8b1aafSjsg 
6010ad8b1aafSjsg 	if (!adev->pci_state)
6011ad8b1aafSjsg 		return false;
6012ad8b1aafSjsg 
6013ad8b1aafSjsg 	r = pci_load_saved_state(pdev, adev->pci_state);
6014ad8b1aafSjsg 
6015ad8b1aafSjsg 	if (!r) {
6016ad8b1aafSjsg 		pci_restore_state(pdev);
6017ad8b1aafSjsg 	} else {
6018ad8b1aafSjsg 		DRM_WARN("Failed to load PCI state, err:%d\n", r);
6019ad8b1aafSjsg 		return false;
6020ad8b1aafSjsg 	}
6021ad8b1aafSjsg 
6022ad8b1aafSjsg 	return true;
6023ad8b1aafSjsg #endif
6024ad8b1aafSjsg }
6025ad8b1aafSjsg 
amdgpu_device_flush_hdp(struct amdgpu_device * adev,struct amdgpu_ring * ring)60265ca02815Sjsg void amdgpu_device_flush_hdp(struct amdgpu_device *adev,
60275ca02815Sjsg 		struct amdgpu_ring *ring)
60285ca02815Sjsg {
60295ca02815Sjsg #ifdef CONFIG_X86_64
603016c07370Sjsg 	if ((adev->flags & AMD_IS_APU) && !amdgpu_passthrough(adev))
60315ca02815Sjsg 		return;
60325ca02815Sjsg #endif
60335ca02815Sjsg 	if (adev->gmc.xgmi.connected_to_cpu)
60345ca02815Sjsg 		return;
6035ad8b1aafSjsg 
60365ca02815Sjsg 	if (ring && ring->funcs->emit_hdp_flush)
60375ca02815Sjsg 		amdgpu_ring_emit_hdp_flush(ring);
60385ca02815Sjsg 	else
60395ca02815Sjsg 		amdgpu_asic_flush_hdp(adev, ring);
60405ca02815Sjsg }
60415ca02815Sjsg 
amdgpu_device_invalidate_hdp(struct amdgpu_device * adev,struct amdgpu_ring * ring)60425ca02815Sjsg void amdgpu_device_invalidate_hdp(struct amdgpu_device *adev,
60435ca02815Sjsg 		struct amdgpu_ring *ring)
60445ca02815Sjsg {
60455ca02815Sjsg #ifdef CONFIG_X86_64
604616c07370Sjsg 	if ((adev->flags & AMD_IS_APU) && !amdgpu_passthrough(adev))
60475ca02815Sjsg 		return;
60485ca02815Sjsg #endif
60495ca02815Sjsg 	if (adev->gmc.xgmi.connected_to_cpu)
60505ca02815Sjsg 		return;
60515ca02815Sjsg 
60525ca02815Sjsg 	amdgpu_asic_invalidate_hdp(adev, ring);
60535ca02815Sjsg }
60541bb76ff1Sjsg 
amdgpu_in_reset(struct amdgpu_device * adev)60551bb76ff1Sjsg int amdgpu_in_reset(struct amdgpu_device *adev)
60561bb76ff1Sjsg {
60571bb76ff1Sjsg 	return atomic_read(&adev->reset_domain->in_gpu_reset);
60581bb76ff1Sjsg }
60591bb76ff1Sjsg 
60601bb76ff1Sjsg /**
60611bb76ff1Sjsg  * amdgpu_device_halt() - bring hardware to some kind of halt state
60621bb76ff1Sjsg  *
60631bb76ff1Sjsg  * @adev: amdgpu_device pointer
60641bb76ff1Sjsg  *
60651bb76ff1Sjsg  * Bring hardware to some kind of halt state so that no one can touch it
60661bb76ff1Sjsg  * any more. It will help to maintain error context when error occurred.
60671bb76ff1Sjsg  * Compare to a simple hang, the system will keep stable at least for SSH
60681bb76ff1Sjsg  * access. Then it should be trivial to inspect the hardware state and
60691bb76ff1Sjsg  * see what's going on. Implemented as following:
60701bb76ff1Sjsg  *
60711bb76ff1Sjsg  * 1. drm_dev_unplug() makes device inaccessible to user space(IOCTLs, etc),
60721bb76ff1Sjsg  *    clears all CPU mappings to device, disallows remappings through page faults
60731bb76ff1Sjsg  * 2. amdgpu_irq_disable_all() disables all interrupts
60741bb76ff1Sjsg  * 3. amdgpu_fence_driver_hw_fini() signals all HW fences
60751bb76ff1Sjsg  * 4. set adev->no_hw_access to avoid potential crashes after setp 5
60761bb76ff1Sjsg  * 5. amdgpu_device_unmap_mmio() clears all MMIO mappings
60771bb76ff1Sjsg  * 6. pci_disable_device() and pci_wait_for_pending_transaction()
60781bb76ff1Sjsg  *    flush any in flight DMA operations
60791bb76ff1Sjsg  */
amdgpu_device_halt(struct amdgpu_device * adev)60801bb76ff1Sjsg void amdgpu_device_halt(struct amdgpu_device *adev)
60811bb76ff1Sjsg {
60821bb76ff1Sjsg 	struct pci_dev *pdev = adev->pdev;
60831bb76ff1Sjsg 	struct drm_device *ddev = adev_to_drm(adev);
60841bb76ff1Sjsg 
6085f005ef32Sjsg 	amdgpu_xcp_dev_unplug(adev);
60861bb76ff1Sjsg 	drm_dev_unplug(ddev);
60871bb76ff1Sjsg 
60881bb76ff1Sjsg 	amdgpu_irq_disable_all(adev);
60891bb76ff1Sjsg 
60901bb76ff1Sjsg 	amdgpu_fence_driver_hw_fini(adev);
60911bb76ff1Sjsg 
60921bb76ff1Sjsg 	adev->no_hw_access = true;
60931bb76ff1Sjsg 
60941bb76ff1Sjsg 	amdgpu_device_unmap_mmio(adev);
60951bb76ff1Sjsg 
60961bb76ff1Sjsg 	pci_disable_device(pdev);
60971bb76ff1Sjsg 	pci_wait_for_pending_transaction(pdev);
60981bb76ff1Sjsg }
60991bb76ff1Sjsg 
amdgpu_device_pcie_port_rreg(struct amdgpu_device * adev,u32 reg)61001bb76ff1Sjsg u32 amdgpu_device_pcie_port_rreg(struct amdgpu_device *adev,
61011bb76ff1Sjsg 				u32 reg)
61021bb76ff1Sjsg {
61031bb76ff1Sjsg 	unsigned long flags, address, data;
61041bb76ff1Sjsg 	u32 r;
61051bb76ff1Sjsg 
61061bb76ff1Sjsg 	address = adev->nbio.funcs->get_pcie_port_index_offset(adev);
61071bb76ff1Sjsg 	data = adev->nbio.funcs->get_pcie_port_data_offset(adev);
61081bb76ff1Sjsg 
61091bb76ff1Sjsg 	spin_lock_irqsave(&adev->pcie_idx_lock, flags);
61101bb76ff1Sjsg 	WREG32(address, reg * 4);
61111bb76ff1Sjsg 	(void)RREG32(address);
61121bb76ff1Sjsg 	r = RREG32(data);
61131bb76ff1Sjsg 	spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
61141bb76ff1Sjsg 	return r;
61151bb76ff1Sjsg }
61161bb76ff1Sjsg 
amdgpu_device_pcie_port_wreg(struct amdgpu_device * adev,u32 reg,u32 v)61171bb76ff1Sjsg void amdgpu_device_pcie_port_wreg(struct amdgpu_device *adev,
61181bb76ff1Sjsg 				u32 reg, u32 v)
61191bb76ff1Sjsg {
61201bb76ff1Sjsg 	unsigned long flags, address, data;
61211bb76ff1Sjsg 
61221bb76ff1Sjsg 	address = adev->nbio.funcs->get_pcie_port_index_offset(adev);
61231bb76ff1Sjsg 	data = adev->nbio.funcs->get_pcie_port_data_offset(adev);
61241bb76ff1Sjsg 
61251bb76ff1Sjsg 	spin_lock_irqsave(&adev->pcie_idx_lock, flags);
61261bb76ff1Sjsg 	WREG32(address, reg * 4);
61271bb76ff1Sjsg 	(void)RREG32(address);
61281bb76ff1Sjsg 	WREG32(data, v);
61291bb76ff1Sjsg 	(void)RREG32(data);
61301bb76ff1Sjsg 	spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
61311bb76ff1Sjsg }
61321bb76ff1Sjsg 
61331bb76ff1Sjsg /**
61341bb76ff1Sjsg  * amdgpu_device_switch_gang - switch to a new gang
61351bb76ff1Sjsg  * @adev: amdgpu_device pointer
61361bb76ff1Sjsg  * @gang: the gang to switch to
61371bb76ff1Sjsg  *
61381bb76ff1Sjsg  * Try to switch to a new gang.
61391bb76ff1Sjsg  * Returns: NULL if we switched to the new gang or a reference to the current
61401bb76ff1Sjsg  * gang leader.
61411bb76ff1Sjsg  */
amdgpu_device_switch_gang(struct amdgpu_device * adev,struct dma_fence * gang)61421bb76ff1Sjsg struct dma_fence *amdgpu_device_switch_gang(struct amdgpu_device *adev,
61431bb76ff1Sjsg 					    struct dma_fence *gang)
61441bb76ff1Sjsg {
61451bb76ff1Sjsg 	struct dma_fence *old = NULL;
61461bb76ff1Sjsg 
61471bb76ff1Sjsg 	do {
61481bb76ff1Sjsg 		dma_fence_put(old);
61491bb76ff1Sjsg 		rcu_read_lock();
61501bb76ff1Sjsg 		old = dma_fence_get_rcu_safe(&adev->gang_submit);
61511bb76ff1Sjsg 		rcu_read_unlock();
61521bb76ff1Sjsg 
61531bb76ff1Sjsg 		if (old == gang)
61541bb76ff1Sjsg 			break;
61551bb76ff1Sjsg 
61561bb76ff1Sjsg 		if (!dma_fence_is_signaled(old))
61571bb76ff1Sjsg 			return old;
61581bb76ff1Sjsg 
61591bb76ff1Sjsg 	} while (cmpxchg((struct dma_fence __force **)&adev->gang_submit,
61601bb76ff1Sjsg 			 old, gang) != old);
61611bb76ff1Sjsg 
61621bb76ff1Sjsg 	dma_fence_put(old);
61631bb76ff1Sjsg 	return NULL;
61641bb76ff1Sjsg }
61651bb76ff1Sjsg 
amdgpu_device_has_display_hardware(struct amdgpu_device * adev)61661bb76ff1Sjsg bool amdgpu_device_has_display_hardware(struct amdgpu_device *adev)
61671bb76ff1Sjsg {
61681bb76ff1Sjsg 	switch (adev->asic_type) {
61691bb76ff1Sjsg #ifdef CONFIG_DRM_AMDGPU_SI
61701bb76ff1Sjsg 	case CHIP_HAINAN:
61711bb76ff1Sjsg #endif
61721bb76ff1Sjsg 	case CHIP_TOPAZ:
61731bb76ff1Sjsg 		/* chips with no display hardware */
61741bb76ff1Sjsg 		return false;
61751bb76ff1Sjsg #ifdef CONFIG_DRM_AMDGPU_SI
61761bb76ff1Sjsg 	case CHIP_TAHITI:
61771bb76ff1Sjsg 	case CHIP_PITCAIRN:
61781bb76ff1Sjsg 	case CHIP_VERDE:
61791bb76ff1Sjsg 	case CHIP_OLAND:
61801bb76ff1Sjsg #endif
61811bb76ff1Sjsg #ifdef CONFIG_DRM_AMDGPU_CIK
61821bb76ff1Sjsg 	case CHIP_BONAIRE:
61831bb76ff1Sjsg 	case CHIP_HAWAII:
61841bb76ff1Sjsg 	case CHIP_KAVERI:
61851bb76ff1Sjsg 	case CHIP_KABINI:
61861bb76ff1Sjsg 	case CHIP_MULLINS:
61871bb76ff1Sjsg #endif
61881bb76ff1Sjsg 	case CHIP_TONGA:
61891bb76ff1Sjsg 	case CHIP_FIJI:
61901bb76ff1Sjsg 	case CHIP_POLARIS10:
61911bb76ff1Sjsg 	case CHIP_POLARIS11:
61921bb76ff1Sjsg 	case CHIP_POLARIS12:
61931bb76ff1Sjsg 	case CHIP_VEGAM:
61941bb76ff1Sjsg 	case CHIP_CARRIZO:
61951bb76ff1Sjsg 	case CHIP_STONEY:
61961bb76ff1Sjsg 		/* chips with display hardware */
61971bb76ff1Sjsg 		return true;
61981bb76ff1Sjsg 	default:
61991bb76ff1Sjsg 		/* IP discovery */
62001bb76ff1Sjsg 		if (!adev->ip_versions[DCE_HWIP][0] ||
62011bb76ff1Sjsg 		    (adev->harvest_ip_mask & AMD_HARVEST_IP_DMU_MASK))
62021bb76ff1Sjsg 			return false;
62031bb76ff1Sjsg 		return true;
62041bb76ff1Sjsg 	}
62051bb76ff1Sjsg }
6206f005ef32Sjsg 
amdgpu_device_wait_on_rreg(struct amdgpu_device * adev,uint32_t inst,uint32_t reg_addr,char reg_name[],uint32_t expected_value,uint32_t mask)6207f005ef32Sjsg uint32_t amdgpu_device_wait_on_rreg(struct amdgpu_device *adev,
6208f005ef32Sjsg 		uint32_t inst, uint32_t reg_addr, char reg_name[],
6209f005ef32Sjsg 		uint32_t expected_value, uint32_t mask)
6210f005ef32Sjsg {
6211f005ef32Sjsg 	uint32_t ret = 0;
6212f005ef32Sjsg 	uint32_t old_ = 0;
6213f005ef32Sjsg 	uint32_t tmp_ = RREG32(reg_addr);
6214f005ef32Sjsg 	uint32_t loop = adev->usec_timeout;
6215f005ef32Sjsg 
6216f005ef32Sjsg 	while ((tmp_ & (mask)) != (expected_value)) {
6217f005ef32Sjsg 		if (old_ != tmp_) {
6218f005ef32Sjsg 			loop = adev->usec_timeout;
6219f005ef32Sjsg 			old_ = tmp_;
6220f005ef32Sjsg 		} else
6221f005ef32Sjsg 			udelay(1);
6222f005ef32Sjsg 		tmp_ = RREG32(reg_addr);
6223f005ef32Sjsg 		loop--;
6224f005ef32Sjsg 		if (!loop) {
6225f005ef32Sjsg 			DRM_WARN("Register(%d) [%s] failed to reach value 0x%08x != 0x%08xn",
6226f005ef32Sjsg 				  inst, reg_name, (uint32_t)expected_value,
6227f005ef32Sjsg 				  (uint32_t)(tmp_ & (mask)));
6228f005ef32Sjsg 			ret = -ETIMEDOUT;
6229f005ef32Sjsg 			break;
6230f005ef32Sjsg 		}
6231f005ef32Sjsg 	}
6232f005ef32Sjsg 	return ret;
6233f005ef32Sjsg }
6234