1 /*	$NetBSD: amdgpu_gmc_v9_0.c,v 1.5 2021/12/19 12:31:45 riastradh Exp $	*/
2 
3 /*
4  * Copyright 2016 Advanced Micro Devices, Inc.
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a
7  * copy of this software and associated documentation files (the "Software"),
8  * to deal in the Software without restriction, including without limitation
9  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10  * and/or sell copies of the Software, and to permit persons to whom the
11  * Software is furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
19  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
20  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22  * OTHER DEALINGS IN THE SOFTWARE.
23  *
24  */
25 
26 #include <sys/cdefs.h>
27 __KERNEL_RCSID(0, "$NetBSD: amdgpu_gmc_v9_0.c,v 1.5 2021/12/19 12:31:45 riastradh Exp $");
28 
29 #include <linux/firmware.h>
30 #include <linux/pci.h>
31 
32 #include <drm/drm_cache.h>
33 
34 #include "amdgpu.h"
35 #include "gmc_v9_0.h"
36 #include "amdgpu_atomfirmware.h"
37 #include "amdgpu_gem.h"
38 
39 #include "hdp/hdp_4_0_offset.h"
40 #include "hdp/hdp_4_0_sh_mask.h"
41 #include "gc/gc_9_0_sh_mask.h"
42 #include "dce/dce_12_0_offset.h"
43 #include "dce/dce_12_0_sh_mask.h"
44 #include "vega10_enum.h"
45 #include "mmhub/mmhub_1_0_offset.h"
46 #include "athub/athub_1_0_sh_mask.h"
47 #include "athub/athub_1_0_offset.h"
48 #include "oss/osssys_4_0_offset.h"
49 
50 #include "soc15.h"
51 #include "soc15d.h"
52 #include "soc15_common.h"
53 #include "umc/umc_6_0_sh_mask.h"
54 
55 #include "gfxhub_v1_0.h"
56 #include "mmhub_v1_0.h"
57 #include "athub_v1_0.h"
58 #include "gfxhub_v1_1.h"
59 #include "mmhub_v9_4.h"
60 #include "umc_v6_1.h"
61 #include "umc_v6_0.h"
62 
63 #include "ivsrcid/vmc/irqsrcs_vmc_1_0.h"
64 
65 #include "amdgpu_ras.h"
66 #include "amdgpu_xgmi.h"
67 
68 /* add these here since we already include dce12 headers and these are for DCN */
69 #define mmHUBP0_DCSURF_PRI_VIEWPORT_DIMENSION                                                          0x055d
70 #define mmHUBP0_DCSURF_PRI_VIEWPORT_DIMENSION_BASE_IDX                                                 2
71 #define HUBP0_DCSURF_PRI_VIEWPORT_DIMENSION__PRI_VIEWPORT_WIDTH__SHIFT                                        0x0
72 #define HUBP0_DCSURF_PRI_VIEWPORT_DIMENSION__PRI_VIEWPORT_HEIGHT__SHIFT                                       0x10
73 #define HUBP0_DCSURF_PRI_VIEWPORT_DIMENSION__PRI_VIEWPORT_WIDTH_MASK                                          0x00003FFFL
74 #define HUBP0_DCSURF_PRI_VIEWPORT_DIMENSION__PRI_VIEWPORT_HEIGHT_MASK                                         0x3FFF0000L
75 
76 /* XXX Move this macro to VEGA10 header file, which is like vid.h for VI.*/
77 #define AMDGPU_NUM_OF_VMIDS			8
78 
79 static const u32 golden_settings_vega10_hdp[] =
80 {
81 	0xf64, 0x0fffffff, 0x00000000,
82 	0xf65, 0x0fffffff, 0x00000000,
83 	0xf66, 0x0fffffff, 0x00000000,
84 	0xf67, 0x0fffffff, 0x00000000,
85 	0xf68, 0x0fffffff, 0x00000000,
86 	0xf6a, 0x0fffffff, 0x00000000,
87 	0xf6b, 0x0fffffff, 0x00000000,
88 	0xf6c, 0x0fffffff, 0x00000000,
89 	0xf6d, 0x0fffffff, 0x00000000,
90 	0xf6e, 0x0fffffff, 0x00000000,
91 };
92 
93 static const struct soc15_reg_golden golden_settings_mmhub_1_0_0[] =
94 {
95 	SOC15_REG_GOLDEN_VALUE(MMHUB, 0, mmDAGB1_WRCLI2, 0x00000007, 0xfe5fe0fa),
96 	SOC15_REG_GOLDEN_VALUE(MMHUB, 0, mmMMEA1_DRAM_WR_CLI2GRP_MAP0, 0x00000030, 0x55555565)
97 };
98 
99 static const struct soc15_reg_golden golden_settings_athub_1_0_0[] =
100 {
101 	SOC15_REG_GOLDEN_VALUE(ATHUB, 0, mmRPB_ARB_CNTL, 0x0000ff00, 0x00000800),
102 	SOC15_REG_GOLDEN_VALUE(ATHUB, 0, mmRPB_ARB_CNTL2, 0x00ff00ff, 0x00080008)
103 };
104 
105 static const uint32_t ecc_umc_mcumc_ctrl_addrs[] = {
106 	(0x000143c0 + 0x00000000),
107 	(0x000143c0 + 0x00000800),
108 	(0x000143c0 + 0x00001000),
109 	(0x000143c0 + 0x00001800),
110 	(0x000543c0 + 0x00000000),
111 	(0x000543c0 + 0x00000800),
112 	(0x000543c0 + 0x00001000),
113 	(0x000543c0 + 0x00001800),
114 	(0x000943c0 + 0x00000000),
115 	(0x000943c0 + 0x00000800),
116 	(0x000943c0 + 0x00001000),
117 	(0x000943c0 + 0x00001800),
118 	(0x000d43c0 + 0x00000000),
119 	(0x000d43c0 + 0x00000800),
120 	(0x000d43c0 + 0x00001000),
121 	(0x000d43c0 + 0x00001800),
122 	(0x001143c0 + 0x00000000),
123 	(0x001143c0 + 0x00000800),
124 	(0x001143c0 + 0x00001000),
125 	(0x001143c0 + 0x00001800),
126 	(0x001543c0 + 0x00000000),
127 	(0x001543c0 + 0x00000800),
128 	(0x001543c0 + 0x00001000),
129 	(0x001543c0 + 0x00001800),
130 	(0x001943c0 + 0x00000000),
131 	(0x001943c0 + 0x00000800),
132 	(0x001943c0 + 0x00001000),
133 	(0x001943c0 + 0x00001800),
134 	(0x001d43c0 + 0x00000000),
135 	(0x001d43c0 + 0x00000800),
136 	(0x001d43c0 + 0x00001000),
137 	(0x001d43c0 + 0x00001800),
138 };
139 
140 static const uint32_t ecc_umc_mcumc_ctrl_mask_addrs[] = {
141 	(0x000143e0 + 0x00000000),
142 	(0x000143e0 + 0x00000800),
143 	(0x000143e0 + 0x00001000),
144 	(0x000143e0 + 0x00001800),
145 	(0x000543e0 + 0x00000000),
146 	(0x000543e0 + 0x00000800),
147 	(0x000543e0 + 0x00001000),
148 	(0x000543e0 + 0x00001800),
149 	(0x000943e0 + 0x00000000),
150 	(0x000943e0 + 0x00000800),
151 	(0x000943e0 + 0x00001000),
152 	(0x000943e0 + 0x00001800),
153 	(0x000d43e0 + 0x00000000),
154 	(0x000d43e0 + 0x00000800),
155 	(0x000d43e0 + 0x00001000),
156 	(0x000d43e0 + 0x00001800),
157 	(0x001143e0 + 0x00000000),
158 	(0x001143e0 + 0x00000800),
159 	(0x001143e0 + 0x00001000),
160 	(0x001143e0 + 0x00001800),
161 	(0x001543e0 + 0x00000000),
162 	(0x001543e0 + 0x00000800),
163 	(0x001543e0 + 0x00001000),
164 	(0x001543e0 + 0x00001800),
165 	(0x001943e0 + 0x00000000),
166 	(0x001943e0 + 0x00000800),
167 	(0x001943e0 + 0x00001000),
168 	(0x001943e0 + 0x00001800),
169 	(0x001d43e0 + 0x00000000),
170 	(0x001d43e0 + 0x00000800),
171 	(0x001d43e0 + 0x00001000),
172 	(0x001d43e0 + 0x00001800),
173 };
174 
175 static const uint32_t ecc_umc_mcumc_status_addrs[] __unused = {
176 	(0x000143c2 + 0x00000000),
177 	(0x000143c2 + 0x00000800),
178 	(0x000143c2 + 0x00001000),
179 	(0x000143c2 + 0x00001800),
180 	(0x000543c2 + 0x00000000),
181 	(0x000543c2 + 0x00000800),
182 	(0x000543c2 + 0x00001000),
183 	(0x000543c2 + 0x00001800),
184 	(0x000943c2 + 0x00000000),
185 	(0x000943c2 + 0x00000800),
186 	(0x000943c2 + 0x00001000),
187 	(0x000943c2 + 0x00001800),
188 	(0x000d43c2 + 0x00000000),
189 	(0x000d43c2 + 0x00000800),
190 	(0x000d43c2 + 0x00001000),
191 	(0x000d43c2 + 0x00001800),
192 	(0x001143c2 + 0x00000000),
193 	(0x001143c2 + 0x00000800),
194 	(0x001143c2 + 0x00001000),
195 	(0x001143c2 + 0x00001800),
196 	(0x001543c2 + 0x00000000),
197 	(0x001543c2 + 0x00000800),
198 	(0x001543c2 + 0x00001000),
199 	(0x001543c2 + 0x00001800),
200 	(0x001943c2 + 0x00000000),
201 	(0x001943c2 + 0x00000800),
202 	(0x001943c2 + 0x00001000),
203 	(0x001943c2 + 0x00001800),
204 	(0x001d43c2 + 0x00000000),
205 	(0x001d43c2 + 0x00000800),
206 	(0x001d43c2 + 0x00001000),
207 	(0x001d43c2 + 0x00001800),
208 };
209 
gmc_v9_0_ecc_interrupt_state(struct amdgpu_device * adev,struct amdgpu_irq_src * src,unsigned type,enum amdgpu_interrupt_state state)210 static int gmc_v9_0_ecc_interrupt_state(struct amdgpu_device *adev,
211 		struct amdgpu_irq_src *src,
212 		unsigned type,
213 		enum amdgpu_interrupt_state state)
214 {
215 	u32 bits, i, tmp, reg;
216 
217 	/* Devices newer then VEGA10/12 shall have these programming
218 	     sequences performed by PSP BL */
219 	if (adev->asic_type >= CHIP_VEGA20)
220 		return 0;
221 
222 	bits = 0x7f;
223 
224 	switch (state) {
225 	case AMDGPU_IRQ_STATE_DISABLE:
226 		for (i = 0; i < ARRAY_SIZE(ecc_umc_mcumc_ctrl_addrs); i++) {
227 			reg = ecc_umc_mcumc_ctrl_addrs[i];
228 			tmp = RREG32(reg);
229 			tmp &= ~bits;
230 			WREG32(reg, tmp);
231 		}
232 		for (i = 0; i < ARRAY_SIZE(ecc_umc_mcumc_ctrl_mask_addrs); i++) {
233 			reg = ecc_umc_mcumc_ctrl_mask_addrs[i];
234 			tmp = RREG32(reg);
235 			tmp &= ~bits;
236 			WREG32(reg, tmp);
237 		}
238 		break;
239 	case AMDGPU_IRQ_STATE_ENABLE:
240 		for (i = 0; i < ARRAY_SIZE(ecc_umc_mcumc_ctrl_addrs); i++) {
241 			reg = ecc_umc_mcumc_ctrl_addrs[i];
242 			tmp = RREG32(reg);
243 			tmp |= bits;
244 			WREG32(reg, tmp);
245 		}
246 		for (i = 0; i < ARRAY_SIZE(ecc_umc_mcumc_ctrl_mask_addrs); i++) {
247 			reg = ecc_umc_mcumc_ctrl_mask_addrs[i];
248 			tmp = RREG32(reg);
249 			tmp |= bits;
250 			WREG32(reg, tmp);
251 		}
252 		break;
253 	default:
254 		break;
255 	}
256 
257 	return 0;
258 }
259 
gmc_v9_0_vm_fault_interrupt_state(struct amdgpu_device * adev,struct amdgpu_irq_src * src,unsigned type,enum amdgpu_interrupt_state state)260 static int gmc_v9_0_vm_fault_interrupt_state(struct amdgpu_device *adev,
261 					struct amdgpu_irq_src *src,
262 					unsigned type,
263 					enum amdgpu_interrupt_state state)
264 {
265 	struct amdgpu_vmhub *hub;
266 	u32 tmp, reg, bits, i, j;
267 
268 	bits = VM_CONTEXT1_CNTL__RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
269 		VM_CONTEXT1_CNTL__DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
270 		VM_CONTEXT1_CNTL__PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
271 		VM_CONTEXT1_CNTL__VALID_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
272 		VM_CONTEXT1_CNTL__READ_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
273 		VM_CONTEXT1_CNTL__WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
274 		VM_CONTEXT1_CNTL__EXECUTE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK;
275 
276 	switch (state) {
277 	case AMDGPU_IRQ_STATE_DISABLE:
278 		for (j = 0; j < adev->num_vmhubs; j++) {
279 			hub = &adev->vmhub[j];
280 			for (i = 0; i < 16; i++) {
281 				reg = hub->vm_context0_cntl + i;
282 				tmp = RREG32(reg);
283 				tmp &= ~bits;
284 				WREG32(reg, tmp);
285 			}
286 		}
287 		break;
288 	case AMDGPU_IRQ_STATE_ENABLE:
289 		for (j = 0; j < adev->num_vmhubs; j++) {
290 			hub = &adev->vmhub[j];
291 			for (i = 0; i < 16; i++) {
292 				reg = hub->vm_context0_cntl + i;
293 				tmp = RREG32(reg);
294 				tmp |= bits;
295 				WREG32(reg, tmp);
296 			}
297 		}
298 	default:
299 		break;
300 	}
301 
302 	return 0;
303 }
304 
gmc_v9_0_process_interrupt(struct amdgpu_device * adev,struct amdgpu_irq_src * source,struct amdgpu_iv_entry * entry)305 static int gmc_v9_0_process_interrupt(struct amdgpu_device *adev,
306 				struct amdgpu_irq_src *source,
307 				struct amdgpu_iv_entry *entry)
308 {
309 	struct amdgpu_vmhub *hub;
310 	bool retry_fault = !!(entry->src_data[1] & 0x80);
311 	uint32_t status = 0;
312 	u64 addr;
313 	char hub_name[10];
314 
315 	addr = (u64)entry->src_data[0] << 12;
316 	addr |= ((u64)entry->src_data[1] & 0xf) << 44;
317 
318 	if (retry_fault && amdgpu_gmc_filter_faults(adev, addr, entry->pasid,
319 						    entry->timestamp))
320 		return 1; /* This also prevents sending it to KFD */
321 
322 	if (entry->client_id == SOC15_IH_CLIENTID_VMC) {
323 		snprintf(hub_name, sizeof(hub_name), "mmhub0");
324 		hub = &adev->vmhub[AMDGPU_MMHUB_0];
325 	} else if (entry->client_id == SOC15_IH_CLIENTID_VMC1) {
326 		snprintf(hub_name, sizeof(hub_name), "mmhub1");
327 		hub = &adev->vmhub[AMDGPU_MMHUB_1];
328 	} else {
329 		snprintf(hub_name, sizeof(hub_name), "gfxhub0");
330 		hub = &adev->vmhub[AMDGPU_GFXHUB_0];
331 	}
332 
333 	/* If it's the first fault for this address, process it normally */
334 	if (retry_fault && !in_interrupt() &&
335 	    amdgpu_vm_handle_fault(adev, entry->pasid, addr))
336 		return 1; /* This also prevents sending it to KFD */
337 
338 	if (!amdgpu_sriov_vf(adev)) {
339 		/*
340 		 * Issue a dummy read to wait for the status register to
341 		 * be updated to avoid reading an incorrect value due to
342 		 * the new fast GRBM interface.
343 		 */
344 		if (entry->vmid_src == AMDGPU_GFXHUB_0)
345 			RREG32(hub->vm_l2_pro_fault_status);
346 
347 		status = RREG32(hub->vm_l2_pro_fault_status);
348 		WREG32_P(hub->vm_l2_pro_fault_cntl, 1, ~1);
349 	}
350 
351 	if (printk_ratelimit()) {
352 		struct amdgpu_task_info task_info;
353 
354 		memset(&task_info, 0, sizeof(struct amdgpu_task_info));
355 		amdgpu_vm_get_task_info(adev, entry->pasid, &task_info);
356 
357 		dev_err(adev->dev,
358 			"[%s] %s page fault (src_id:%u ring:%u vmid:%u "
359 			"pasid:%u, for process %s pid %d thread %s pid %d)\n",
360 			hub_name, retry_fault ? "retry" : "no-retry",
361 			entry->src_id, entry->ring_id, entry->vmid,
362 			entry->pasid, task_info.process_name, task_info.tgid,
363 			task_info.task_name, task_info.pid);
364 		dev_err(adev->dev, "  in page starting at address 0x%016"PRIx64" from client %d\n",
365 			addr, entry->client_id);
366 		if (!amdgpu_sriov_vf(adev)) {
367 			dev_err(adev->dev,
368 				"VM_L2_PROTECTION_FAULT_STATUS:0x%08X\n",
369 				status);
370 			dev_err(adev->dev, "\t MORE_FAULTS: 0x%lx\n",
371 				REG_GET_FIELD(status,
372 				VM_L2_PROTECTION_FAULT_STATUS, MORE_FAULTS));
373 			dev_err(adev->dev, "\t WALKER_ERROR: 0x%lx\n",
374 				REG_GET_FIELD(status,
375 				VM_L2_PROTECTION_FAULT_STATUS, WALKER_ERROR));
376 			dev_err(adev->dev, "\t PERMISSION_FAULTS: 0x%lx\n",
377 				REG_GET_FIELD(status,
378 				VM_L2_PROTECTION_FAULT_STATUS, PERMISSION_FAULTS));
379 			dev_err(adev->dev, "\t MAPPING_ERROR: 0x%lx\n",
380 				REG_GET_FIELD(status,
381 				VM_L2_PROTECTION_FAULT_STATUS, MAPPING_ERROR));
382 			dev_err(adev->dev, "\t RW: 0x%lx\n",
383 				REG_GET_FIELD(status,
384 				VM_L2_PROTECTION_FAULT_STATUS, RW));
385 
386 		}
387 	}
388 
389 	return 0;
390 }
391 
392 static const struct amdgpu_irq_src_funcs gmc_v9_0_irq_funcs = {
393 	.set = gmc_v9_0_vm_fault_interrupt_state,
394 	.process = gmc_v9_0_process_interrupt,
395 };
396 
397 
398 static const struct amdgpu_irq_src_funcs gmc_v9_0_ecc_funcs = {
399 	.set = gmc_v9_0_ecc_interrupt_state,
400 	.process = amdgpu_umc_process_ecc_irq,
401 };
402 
gmc_v9_0_set_irq_funcs(struct amdgpu_device * adev)403 static void gmc_v9_0_set_irq_funcs(struct amdgpu_device *adev)
404 {
405 	adev->gmc.vm_fault.num_types = 1;
406 	adev->gmc.vm_fault.funcs = &gmc_v9_0_irq_funcs;
407 
408 	if (!amdgpu_sriov_vf(adev)) {
409 		adev->gmc.ecc_irq.num_types = 1;
410 		adev->gmc.ecc_irq.funcs = &gmc_v9_0_ecc_funcs;
411 	}
412 }
413 
gmc_v9_0_get_invalidate_req(unsigned int vmid,uint32_t flush_type)414 static uint32_t gmc_v9_0_get_invalidate_req(unsigned int vmid,
415 					uint32_t flush_type)
416 {
417 	u32 req = 0;
418 
419 	req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ,
420 			    PER_VMID_INVALIDATE_REQ, 1 << vmid);
421 	req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ, FLUSH_TYPE, flush_type);
422 	req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PTES, 1);
423 	req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PDE0, 1);
424 	req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PDE1, 1);
425 	req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PDE2, 1);
426 	req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ, INVALIDATE_L1_PTES, 1);
427 	req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ,
428 			    CLEAR_PROTECTION_FAULT_STATUS_ADDR,	0);
429 
430 	return req;
431 }
432 
433 /**
434  * gmc_v9_0_use_invalidate_semaphore - judge whether to use semaphore
435  *
436  * @adev: amdgpu_device pointer
437  * @vmhub: vmhub type
438  *
439  */
gmc_v9_0_use_invalidate_semaphore(struct amdgpu_device * adev,uint32_t vmhub)440 static bool gmc_v9_0_use_invalidate_semaphore(struct amdgpu_device *adev,
441 				       uint32_t vmhub)
442 {
443 	return ((vmhub == AMDGPU_MMHUB_0 ||
444 		 vmhub == AMDGPU_MMHUB_1) &&
445 		(!amdgpu_sriov_vf(adev)) &&
446 		(!(adev->asic_type == CHIP_RAVEN &&
447 		   adev->rev_id < 0x8 &&
448 		   adev->pdev->device == 0x15d8)));
449 }
450 
gmc_v9_0_get_atc_vmid_pasid_mapping_info(struct amdgpu_device * adev,uint8_t vmid,uint16_t * p_pasid)451 static bool gmc_v9_0_get_atc_vmid_pasid_mapping_info(struct amdgpu_device *adev,
452 					uint8_t vmid, uint16_t *p_pasid)
453 {
454 	uint32_t value;
455 
456 	value = RREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID0_PASID_MAPPING)
457 		     + vmid);
458 	*p_pasid = value & ATC_VMID0_PASID_MAPPING__PASID_MASK;
459 
460 	return !!(value & ATC_VMID0_PASID_MAPPING__VALID_MASK);
461 }
462 
463 /*
464  * GART
465  * VMID 0 is the physical GPU addresses as used by the kernel.
466  * VMIDs 1-15 are used for userspace clients and are handled
467  * by the amdgpu vm/hsa code.
468  */
469 
470 /**
471  * gmc_v9_0_flush_gpu_tlb - tlb flush with certain type
472  *
473  * @adev: amdgpu_device pointer
474  * @vmid: vm instance to flush
475  * @flush_type: the flush type
476  *
477  * Flush the TLB for the requested page table using certain type.
478  */
gmc_v9_0_flush_gpu_tlb(struct amdgpu_device * adev,uint32_t vmid,uint32_t vmhub,uint32_t flush_type)479 static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
480 					uint32_t vmhub, uint32_t flush_type)
481 {
482 	bool use_semaphore = gmc_v9_0_use_invalidate_semaphore(adev, vmhub);
483 	const unsigned eng = 17;
484 	u32 j, inv_req, tmp;
485 	struct amdgpu_vmhub *hub;
486 
487 	BUG_ON(vmhub >= adev->num_vmhubs);
488 
489 	hub = &adev->vmhub[vmhub];
490 	inv_req = gmc_v9_0_get_invalidate_req(vmid, flush_type);
491 
492 	/* This is necessary for a HW workaround under SRIOV as well
493 	 * as GFXOFF under bare metal
494 	 */
495 	if (adev->gfx.kiq.ring.sched.ready &&
496 			(amdgpu_sriov_runtime(adev) || !amdgpu_sriov_vf(adev)) &&
497 			!adev->in_gpu_reset) {
498 		uint32_t req = hub->vm_inv_eng0_req + eng;
499 		uint32_t ack = hub->vm_inv_eng0_ack + eng;
500 
501 		amdgpu_virt_kiq_reg_write_reg_wait(adev, req, ack, inv_req,
502 				1 << vmid);
503 		return;
504 	}
505 
506 	spin_lock(&adev->gmc.invalidate_lock);
507 
508 	/*
509 	 * It may lose gpuvm invalidate acknowldege state across power-gating
510 	 * off cycle, add semaphore acquire before invalidation and semaphore
511 	 * release after invalidation to avoid entering power gated state
512 	 * to WA the Issue
513 	 */
514 
515 	/* TODO: It needs to continue working on debugging with semaphore for GFXHUB as well. */
516 	if (use_semaphore) {
517 		for (j = 0; j < adev->usec_timeout; j++) {
518 			/* a read return value of 1 means semaphore acuqire */
519 			tmp = RREG32_NO_KIQ(hub->vm_inv_eng0_sem + eng);
520 			if (tmp & 0x1)
521 				break;
522 			udelay(1);
523 		}
524 
525 		if (j >= adev->usec_timeout)
526 			DRM_ERROR("Timeout waiting for sem acquire in VM flush!\n");
527 	}
528 
529 	WREG32_NO_KIQ(hub->vm_inv_eng0_req + eng, inv_req);
530 
531 	/*
532 	 * Issue a dummy read to wait for the ACK register to be cleared
533 	 * to avoid a false ACK due to the new fast GRBM interface.
534 	 */
535 	if (vmhub == AMDGPU_GFXHUB_0)
536 		RREG32_NO_KIQ(hub->vm_inv_eng0_req + eng);
537 
538 	for (j = 0; j < adev->usec_timeout; j++) {
539 		tmp = RREG32_NO_KIQ(hub->vm_inv_eng0_ack + eng);
540 		if (tmp & (1 << vmid))
541 			break;
542 		udelay(1);
543 	}
544 
545 	/* TODO: It needs to continue working on debugging with semaphore for GFXHUB as well. */
546 	if (use_semaphore)
547 		/*
548 		 * add semaphore release after invalidation,
549 		 * write with 0 means semaphore release
550 		 */
551 		WREG32_NO_KIQ(hub->vm_inv_eng0_sem + eng, 0);
552 
553 	spin_unlock(&adev->gmc.invalidate_lock);
554 
555 	if (j < adev->usec_timeout)
556 		return;
557 
558 	DRM_ERROR("Timeout waiting for VM flush ACK!\n");
559 }
560 
561 /**
562  * gmc_v9_0_flush_gpu_tlb_pasid - tlb flush via pasid
563  *
564  * @adev: amdgpu_device pointer
565  * @pasid: pasid to be flush
566  *
567  * Flush the TLB for the requested pasid.
568  */
gmc_v9_0_flush_gpu_tlb_pasid(struct amdgpu_device * adev,uint16_t pasid,uint32_t flush_type,bool all_hub)569 static int gmc_v9_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
570 					uint16_t pasid, uint32_t flush_type,
571 					bool all_hub)
572 {
573 	int vmid, i;
574 	signed long r;
575 	uint32_t seq;
576 	uint16_t queried_pasid;
577 	bool ret;
578 	struct amdgpu_ring *ring = &adev->gfx.kiq.ring;
579 	struct amdgpu_kiq *kiq = &adev->gfx.kiq;
580 
581 	if (adev->in_gpu_reset)
582 		return -EIO;
583 
584 	if (ring->sched.ready) {
585 		spin_lock(&adev->gfx.kiq.ring_lock);
586 		/* 2 dwords flush + 8 dwords fence */
587 		amdgpu_ring_alloc(ring, kiq->pmf->invalidate_tlbs_size + 8);
588 		kiq->pmf->kiq_invalidate_tlbs(ring,
589 					pasid, flush_type, all_hub);
590 		amdgpu_fence_emit_polling(ring, &seq);
591 		amdgpu_ring_commit(ring);
592 		spin_unlock(&adev->gfx.kiq.ring_lock);
593 		r = amdgpu_fence_wait_polling(ring, seq, adev->usec_timeout);
594 		if (r < 1) {
595 			DRM_ERROR("wait for kiq fence error: %ld.\n", r);
596 			return -ETIME;
597 		}
598 
599 		return 0;
600 	}
601 
602 	for (vmid = 1; vmid < 16; vmid++) {
603 
604 		ret = gmc_v9_0_get_atc_vmid_pasid_mapping_info(adev, vmid,
605 				&queried_pasid);
606 		if (ret && queried_pasid == pasid) {
607 			if (all_hub) {
608 				for (i = 0; i < adev->num_vmhubs; i++)
609 					gmc_v9_0_flush_gpu_tlb(adev, vmid,
610 							i, flush_type);
611 			} else {
612 				gmc_v9_0_flush_gpu_tlb(adev, vmid,
613 						AMDGPU_GFXHUB_0, flush_type);
614 			}
615 			break;
616 		}
617 	}
618 
619 	return 0;
620 
621 }
622 
gmc_v9_0_emit_flush_gpu_tlb(struct amdgpu_ring * ring,unsigned vmid,uint64_t pd_addr)623 static uint64_t gmc_v9_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring,
624 					    unsigned vmid, uint64_t pd_addr)
625 {
626 	bool use_semaphore = gmc_v9_0_use_invalidate_semaphore(ring->adev, ring->funcs->vmhub);
627 	struct amdgpu_device *adev = ring->adev;
628 	struct amdgpu_vmhub *hub = &adev->vmhub[ring->funcs->vmhub];
629 	uint32_t req = gmc_v9_0_get_invalidate_req(vmid, 0);
630 	unsigned eng = ring->vm_inv_eng;
631 
632 	/*
633 	 * It may lose gpuvm invalidate acknowldege state across power-gating
634 	 * off cycle, add semaphore acquire before invalidation and semaphore
635 	 * release after invalidation to avoid entering power gated state
636 	 * to WA the Issue
637 	 */
638 
639 	/* TODO: It needs to continue working on debugging with semaphore for GFXHUB as well. */
640 	if (use_semaphore)
641 		/* a read return value of 1 means semaphore acuqire */
642 		amdgpu_ring_emit_reg_wait(ring,
643 					  hub->vm_inv_eng0_sem + eng, 0x1, 0x1);
644 
645 	amdgpu_ring_emit_wreg(ring, hub->ctx0_ptb_addr_lo32 + (2 * vmid),
646 			      lower_32_bits(pd_addr));
647 
648 	amdgpu_ring_emit_wreg(ring, hub->ctx0_ptb_addr_hi32 + (2 * vmid),
649 			      upper_32_bits(pd_addr));
650 
651 	amdgpu_ring_emit_reg_write_reg_wait(ring, hub->vm_inv_eng0_req + eng,
652 					    hub->vm_inv_eng0_ack + eng,
653 					    req, 1 << vmid);
654 
655 	/* TODO: It needs to continue working on debugging with semaphore for GFXHUB as well. */
656 	if (use_semaphore)
657 		/*
658 		 * add semaphore release after invalidation,
659 		 * write with 0 means semaphore release
660 		 */
661 		amdgpu_ring_emit_wreg(ring, hub->vm_inv_eng0_sem + eng, 0);
662 
663 	return pd_addr;
664 }
665 
gmc_v9_0_emit_pasid_mapping(struct amdgpu_ring * ring,unsigned vmid,unsigned pasid)666 static void gmc_v9_0_emit_pasid_mapping(struct amdgpu_ring *ring, unsigned vmid,
667 					unsigned pasid)
668 {
669 	struct amdgpu_device *adev = ring->adev;
670 	uint32_t reg;
671 
672 	/* Do nothing because there's no lut register for mmhub1. */
673 	if (ring->funcs->vmhub == AMDGPU_MMHUB_1)
674 		return;
675 
676 	if (ring->funcs->vmhub == AMDGPU_GFXHUB_0)
677 		reg = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_VMID_0_LUT) + vmid;
678 	else
679 		reg = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_VMID_0_LUT_MM) + vmid;
680 
681 	amdgpu_ring_emit_wreg(ring, reg, pasid);
682 }
683 
684 /*
685  * PTE format on VEGA 10:
686  * 63:59 reserved
687  * 58:57 mtype
688  * 56 F
689  * 55 L
690  * 54 P
691  * 53 SW
692  * 52 T
693  * 50:48 reserved
694  * 47:12 4k physical page base address
695  * 11:7 fragment
696  * 6 write
697  * 5 read
698  * 4 exe
699  * 3 Z
700  * 2 snooped
701  * 1 system
702  * 0 valid
703  *
704  * PDE format on VEGA 10:
705  * 63:59 block fragment size
706  * 58:55 reserved
707  * 54 P
708  * 53:48 reserved
709  * 47:6 physical base address of PD or PTE
710  * 5:3 reserved
711  * 2 C
712  * 1 system
713  * 0 valid
714  */
715 
gmc_v9_0_map_mtype(struct amdgpu_device * adev,uint32_t flags)716 static uint64_t gmc_v9_0_map_mtype(struct amdgpu_device *adev, uint32_t flags)
717 
718 {
719 	switch (flags) {
720 	case AMDGPU_VM_MTYPE_DEFAULT:
721 		return AMDGPU_PTE_MTYPE_VG10(MTYPE_NC);
722 	case AMDGPU_VM_MTYPE_NC:
723 		return AMDGPU_PTE_MTYPE_VG10(MTYPE_NC);
724 	case AMDGPU_VM_MTYPE_WC:
725 		return AMDGPU_PTE_MTYPE_VG10(MTYPE_WC);
726 	case AMDGPU_VM_MTYPE_RW:
727 		return AMDGPU_PTE_MTYPE_VG10(MTYPE_RW);
728 	case AMDGPU_VM_MTYPE_CC:
729 		return AMDGPU_PTE_MTYPE_VG10(MTYPE_CC);
730 	case AMDGPU_VM_MTYPE_UC:
731 		return AMDGPU_PTE_MTYPE_VG10(MTYPE_UC);
732 	default:
733 		return AMDGPU_PTE_MTYPE_VG10(MTYPE_NC);
734 	}
735 }
736 
gmc_v9_0_get_vm_pde(struct amdgpu_device * adev,int level,uint64_t * addr,uint64_t * flags)737 static void gmc_v9_0_get_vm_pde(struct amdgpu_device *adev, int level,
738 				uint64_t *addr, uint64_t *flags)
739 {
740 	if (!(*flags & AMDGPU_PDE_PTE) && !(*flags & AMDGPU_PTE_SYSTEM))
741 		*addr = adev->vm_manager.vram_base_offset + *addr -
742 			adev->gmc.vram_start;
743 	BUG_ON(*addr & 0xFFFF00000000003FULL);
744 
745 	if (!adev->gmc.translate_further)
746 		return;
747 
748 	if (level == AMDGPU_VM_PDB1) {
749 		/* Set the block fragment size */
750 		if (!(*flags & AMDGPU_PDE_PTE))
751 			*flags |= AMDGPU_PDE_BFS(0x9);
752 
753 	} else if (level == AMDGPU_VM_PDB0) {
754 		if (*flags & AMDGPU_PDE_PTE)
755 			*flags &= ~AMDGPU_PDE_PTE;
756 		else
757 			*flags |= AMDGPU_PTE_TF;
758 	}
759 }
760 
gmc_v9_0_get_vm_pte(struct amdgpu_device * adev,struct amdgpu_bo_va_mapping * mapping,uint64_t * flags)761 static void gmc_v9_0_get_vm_pte(struct amdgpu_device *adev,
762 				struct amdgpu_bo_va_mapping *mapping,
763 				uint64_t *flags)
764 {
765 	*flags &= ~AMDGPU_PTE_EXECUTABLE;
766 	*flags |= mapping->flags & AMDGPU_PTE_EXECUTABLE;
767 
768 	*flags &= ~AMDGPU_PTE_MTYPE_VG10_MASK;
769 	*flags |= mapping->flags & AMDGPU_PTE_MTYPE_VG10_MASK;
770 
771 	if (mapping->flags & AMDGPU_PTE_PRT) {
772 		*flags |= AMDGPU_PTE_PRT;
773 		*flags &= ~AMDGPU_PTE_VALID;
774 	}
775 
776 	if (adev->asic_type == CHIP_ARCTURUS &&
777 	    !(*flags & AMDGPU_PTE_SYSTEM) &&
778 	    mapping->bo_va->is_xgmi)
779 		*flags |= AMDGPU_PTE_SNOOPED;
780 }
781 
782 static const struct amdgpu_gmc_funcs gmc_v9_0_gmc_funcs = {
783 	.flush_gpu_tlb = gmc_v9_0_flush_gpu_tlb,
784 	.flush_gpu_tlb_pasid = gmc_v9_0_flush_gpu_tlb_pasid,
785 	.emit_flush_gpu_tlb = gmc_v9_0_emit_flush_gpu_tlb,
786 	.emit_pasid_mapping = gmc_v9_0_emit_pasid_mapping,
787 	.map_mtype = gmc_v9_0_map_mtype,
788 	.get_vm_pde = gmc_v9_0_get_vm_pde,
789 	.get_vm_pte = gmc_v9_0_get_vm_pte
790 };
791 
gmc_v9_0_set_gmc_funcs(struct amdgpu_device * adev)792 static void gmc_v9_0_set_gmc_funcs(struct amdgpu_device *adev)
793 {
794 	adev->gmc.gmc_funcs = &gmc_v9_0_gmc_funcs;
795 }
796 
gmc_v9_0_set_umc_funcs(struct amdgpu_device * adev)797 static void gmc_v9_0_set_umc_funcs(struct amdgpu_device *adev)
798 {
799 	switch (adev->asic_type) {
800 	case CHIP_VEGA10:
801 		adev->umc.funcs = &umc_v6_0_funcs;
802 		break;
803 	case CHIP_VEGA20:
804 		adev->umc.max_ras_err_cnt_per_query = UMC_V6_1_TOTAL_CHANNEL_NUM;
805 		adev->umc.channel_inst_num = UMC_V6_1_CHANNEL_INSTANCE_NUM;
806 		adev->umc.umc_inst_num = UMC_V6_1_UMC_INSTANCE_NUM;
807 		adev->umc.channel_offs = UMC_V6_1_PER_CHANNEL_OFFSET_VG20;
808 		adev->umc.channel_idx_tbl = &umc_v6_1_channel_idx_tbl[0][0];
809 		adev->umc.funcs = &umc_v6_1_funcs;
810 		break;
811 	case CHIP_ARCTURUS:
812 		adev->umc.max_ras_err_cnt_per_query = UMC_V6_1_TOTAL_CHANNEL_NUM;
813 		adev->umc.channel_inst_num = UMC_V6_1_CHANNEL_INSTANCE_NUM;
814 		adev->umc.umc_inst_num = UMC_V6_1_UMC_INSTANCE_NUM;
815 		adev->umc.channel_offs = UMC_V6_1_PER_CHANNEL_OFFSET_ARCT;
816 		adev->umc.channel_idx_tbl = &umc_v6_1_channel_idx_tbl[0][0];
817 		adev->umc.funcs = &umc_v6_1_funcs;
818 		break;
819 	default:
820 		break;
821 	}
822 }
823 
gmc_v9_0_set_mmhub_funcs(struct amdgpu_device * adev)824 static void gmc_v9_0_set_mmhub_funcs(struct amdgpu_device *adev)
825 {
826 	switch (adev->asic_type) {
827 	case CHIP_VEGA20:
828 		adev->mmhub.funcs = &mmhub_v1_0_funcs;
829 		break;
830 	case CHIP_ARCTURUS:
831 		adev->mmhub.funcs = &mmhub_v9_4_funcs;
832 		break;
833 	default:
834 		break;
835 	}
836 }
837 
gmc_v9_0_early_init(void * handle)838 static int gmc_v9_0_early_init(void *handle)
839 {
840 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
841 
842 	gmc_v9_0_set_gmc_funcs(adev);
843 	gmc_v9_0_set_irq_funcs(adev);
844 	gmc_v9_0_set_umc_funcs(adev);
845 	gmc_v9_0_set_mmhub_funcs(adev);
846 
847 	adev->gmc.shared_aperture_start = 0x2000000000000000ULL;
848 	adev->gmc.shared_aperture_end =
849 		adev->gmc.shared_aperture_start + (4ULL << 30) - 1;
850 	adev->gmc.private_aperture_start = 0x1000000000000000ULL;
851 	adev->gmc.private_aperture_end =
852 		adev->gmc.private_aperture_start + (4ULL << 30) - 1;
853 
854 	return 0;
855 }
856 
gmc_v9_0_keep_stolen_memory(struct amdgpu_device * adev)857 static bool gmc_v9_0_keep_stolen_memory(struct amdgpu_device *adev)
858 {
859 
860 	/*
861 	 * TODO:
862 	 * Currently there is a bug where some memory client outside
863 	 * of the driver writes to first 8M of VRAM on S3 resume,
864 	 * this overrides GART which by default gets placed in first 8M and
865 	 * causes VM_FAULTS once GTT is accessed.
866 	 * Keep the stolen memory reservation until the while this is not solved.
867 	 * Also check code in gmc_v9_0_get_vbios_fb_size and gmc_v9_0_late_init
868 	 */
869 	switch (adev->asic_type) {
870 	case CHIP_VEGA10:
871 	case CHIP_RAVEN:
872 	case CHIP_ARCTURUS:
873 	case CHIP_RENOIR:
874 		return true;
875 	case CHIP_VEGA12:
876 	case CHIP_VEGA20:
877 	default:
878 		return false;
879 	}
880 }
881 
gmc_v9_0_late_init(void * handle)882 static int gmc_v9_0_late_init(void *handle)
883 {
884 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
885 	int r;
886 
887 	if (!gmc_v9_0_keep_stolen_memory(adev))
888 		amdgpu_bo_late_init(adev);
889 
890 	r = amdgpu_gmc_allocate_vm_inv_eng(adev);
891 	if (r)
892 		return r;
893 	/* Check if ecc is available */
894 	if (!amdgpu_sriov_vf(adev)) {
895 		switch (adev->asic_type) {
896 		case CHIP_VEGA10:
897 		case CHIP_VEGA20:
898 		case CHIP_ARCTURUS:
899 			r = amdgpu_atomfirmware_mem_ecc_supported(adev);
900 			if (!r) {
901 				DRM_INFO("ECC is not present.\n");
902 				if (adev->df.funcs->enable_ecc_force_par_wr_rmw)
903 					adev->df.funcs->enable_ecc_force_par_wr_rmw(adev, false);
904 			} else {
905 				DRM_INFO("ECC is active.\n");
906 			}
907 
908 			r = amdgpu_atomfirmware_sram_ecc_supported(adev);
909 			if (!r) {
910 				DRM_INFO("SRAM ECC is not present.\n");
911 			} else {
912 				DRM_INFO("SRAM ECC is active.\n");
913 			}
914 			break;
915 		default:
916 			break;
917 		}
918 	}
919 
920 	r = amdgpu_gmc_ras_late_init(adev);
921 	if (r)
922 		return r;
923 
924 	return amdgpu_irq_get(adev, &adev->gmc.vm_fault, 0);
925 }
926 
gmc_v9_0_vram_gtt_location(struct amdgpu_device * adev,struct amdgpu_gmc * mc)927 static void gmc_v9_0_vram_gtt_location(struct amdgpu_device *adev,
928 					struct amdgpu_gmc *mc)
929 {
930 	u64 base = 0;
931 
932 	if (adev->asic_type == CHIP_ARCTURUS)
933 		base = mmhub_v9_4_get_fb_location(adev);
934 	else if (!amdgpu_sriov_vf(adev))
935 		base = mmhub_v1_0_get_fb_location(adev);
936 
937 	/* add the xgmi offset of the physical node */
938 	base += adev->gmc.xgmi.physical_node_id * adev->gmc.xgmi.node_segment_size;
939 	amdgpu_gmc_vram_location(adev, mc, base);
940 	amdgpu_gmc_gart_location(adev, mc);
941 	amdgpu_gmc_agp_location(adev, mc);
942 	/* base offset of vram pages */
943 	adev->vm_manager.vram_base_offset = gfxhub_v1_0_get_mc_fb_offset(adev);
944 
945 	/* XXX: add the xgmi offset of the physical node? */
946 	adev->vm_manager.vram_base_offset +=
947 		adev->gmc.xgmi.physical_node_id * adev->gmc.xgmi.node_segment_size;
948 }
949 
950 /**
951  * gmc_v9_0_mc_init - initialize the memory controller driver params
952  *
953  * @adev: amdgpu_device pointer
954  *
955  * Look up the amount of vram, vram width, and decide how to place
956  * vram and gart within the GPU's physical address space.
957  * Returns 0 for success.
958  */
gmc_v9_0_mc_init(struct amdgpu_device * adev)959 static int gmc_v9_0_mc_init(struct amdgpu_device *adev)
960 {
961 	int r;
962 
963 	/* size in MB on si */
964 	adev->gmc.mc_vram_size =
965 		adev->nbio.funcs->get_memsize(adev) * 1024ULL * 1024ULL;
966 	adev->gmc.real_vram_size = adev->gmc.mc_vram_size;
967 
968 	if (!(adev->flags & AMD_IS_APU)) {
969 		r = amdgpu_device_resize_fb_bar(adev);
970 		if (r)
971 			return r;
972 	}
973 	adev->gmc.aper_base = pci_resource_start(adev->pdev, 0);
974 	adev->gmc.aper_size = pci_resource_len(adev->pdev, 0);
975 
976 #ifdef __NetBSD__
977 	adev->gmc.aper_tag = adev->pdev->pd_pa.pa_memt;
978 #endif
979 
980 #ifdef CONFIG_X86_64
981 	if (adev->flags & AMD_IS_APU) {
982 		adev->gmc.aper_base = gfxhub_v1_0_get_mc_fb_offset(adev);
983 		adev->gmc.aper_size = adev->gmc.real_vram_size;
984 	}
985 #endif
986 	/* In case the PCI BAR is larger than the actual amount of vram */
987 	adev->gmc.visible_vram_size = adev->gmc.aper_size;
988 	if (adev->gmc.visible_vram_size > adev->gmc.real_vram_size)
989 		adev->gmc.visible_vram_size = adev->gmc.real_vram_size;
990 
991 	/* set the gart size */
992 	if (amdgpu_gart_size == -1) {
993 		switch (adev->asic_type) {
994 		case CHIP_VEGA10:  /* all engines support GPUVM */
995 		case CHIP_VEGA12:  /* all engines support GPUVM */
996 		case CHIP_VEGA20:
997 		case CHIP_ARCTURUS:
998 		default:
999 			adev->gmc.gart_size = 512ULL << 20;
1000 			break;
1001 		case CHIP_RAVEN:   /* DCE SG support */
1002 		case CHIP_RENOIR:
1003 			adev->gmc.gart_size = 1024ULL << 20;
1004 			break;
1005 		}
1006 	} else {
1007 		adev->gmc.gart_size = (u64)amdgpu_gart_size << 20;
1008 	}
1009 
1010 	gmc_v9_0_vram_gtt_location(adev, &adev->gmc);
1011 
1012 	return 0;
1013 }
1014 
gmc_v9_0_gart_init(struct amdgpu_device * adev)1015 static int gmc_v9_0_gart_init(struct amdgpu_device *adev)
1016 {
1017 	int r;
1018 
1019 	if (adev->gart.bo) {
1020 		WARN(1, "VEGA10 PCIE GART already initialized\n");
1021 		return 0;
1022 	}
1023 	/* Initialize common gart structure */
1024 	r = amdgpu_gart_init(adev);
1025 	if (r)
1026 		return r;
1027 	adev->gart.table_size = adev->gart.num_gpu_pages * 8;
1028 	adev->gart.gart_pte_flags = AMDGPU_PTE_MTYPE_VG10(MTYPE_UC) |
1029 				 AMDGPU_PTE_EXECUTABLE;
1030 	return amdgpu_gart_table_vram_alloc(adev);
1031 }
1032 
gmc_v9_0_get_vbios_fb_size(struct amdgpu_device * adev)1033 static unsigned gmc_v9_0_get_vbios_fb_size(struct amdgpu_device *adev)
1034 {
1035 	u32 d1vga_control;
1036 	unsigned size;
1037 
1038 	/*
1039 	 * TODO Remove once GART corruption is resolved
1040 	 * Check related code in gmc_v9_0_sw_fini
1041 	 * */
1042 	if (gmc_v9_0_keep_stolen_memory(adev))
1043 		return 9 * 1024 * 1024;
1044 
1045 	d1vga_control = RREG32_SOC15(DCE, 0, mmD1VGA_CONTROL);
1046 	if (REG_GET_FIELD(d1vga_control, D1VGA_CONTROL, D1VGA_MODE_ENABLE)) {
1047 		size = 9 * 1024 * 1024; /* reserve 8MB for vga emulator and 1 MB for FB */
1048 	} else {
1049 		u32 viewport;
1050 
1051 		switch (adev->asic_type) {
1052 		case CHIP_RAVEN:
1053 		case CHIP_RENOIR:
1054 			viewport = RREG32_SOC15(DCE, 0, mmHUBP0_DCSURF_PRI_VIEWPORT_DIMENSION);
1055 			size = (REG_GET_FIELD(viewport,
1056 					      HUBP0_DCSURF_PRI_VIEWPORT_DIMENSION, PRI_VIEWPORT_HEIGHT) *
1057 				REG_GET_FIELD(viewport,
1058 					      HUBP0_DCSURF_PRI_VIEWPORT_DIMENSION, PRI_VIEWPORT_WIDTH) *
1059 				4);
1060 			break;
1061 		case CHIP_VEGA10:
1062 		case CHIP_VEGA12:
1063 		case CHIP_VEGA20:
1064 		default:
1065 			viewport = RREG32_SOC15(DCE, 0, mmSCL0_VIEWPORT_SIZE);
1066 			size = (REG_GET_FIELD(viewport, SCL0_VIEWPORT_SIZE, VIEWPORT_HEIGHT) *
1067 				REG_GET_FIELD(viewport, SCL0_VIEWPORT_SIZE, VIEWPORT_WIDTH) *
1068 				4);
1069 			break;
1070 		}
1071 	}
1072 	/* return 0 if the pre-OS buffer uses up most of vram */
1073 	if ((adev->gmc.real_vram_size - size) < (8 * 1024 * 1024))
1074 		return 0;
1075 
1076 	return size;
1077 }
1078 
gmc_v9_0_sw_init(void * handle)1079 static int gmc_v9_0_sw_init(void *handle)
1080 {
1081 	int r, vram_width = 0, vram_type = 0, vram_vendor = 0;
1082 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1083 
1084 	gfxhub_v1_0_init(adev);
1085 	if (adev->asic_type == CHIP_ARCTURUS)
1086 		mmhub_v9_4_init(adev);
1087 	else
1088 		mmhub_v1_0_init(adev);
1089 
1090 	spin_lock_init(&adev->gmc.invalidate_lock);
1091 
1092 	r = amdgpu_atomfirmware_get_vram_info(adev,
1093 		&vram_width, &vram_type, &vram_vendor);
1094 	if (amdgpu_sriov_vf(adev))
1095 		/* For Vega10 SR-IOV, vram_width can't be read from ATOM as RAVEN,
1096 		 * and DF related registers is not readable, seems hardcord is the
1097 		 * only way to set the correct vram_width
1098 		 */
1099 		adev->gmc.vram_width = 2048;
1100 	else if (amdgpu_emu_mode != 1)
1101 		adev->gmc.vram_width = vram_width;
1102 
1103 	if (!adev->gmc.vram_width) {
1104 		int chansize, numchan;
1105 
1106 		/* hbm memory channel size */
1107 		if (adev->flags & AMD_IS_APU)
1108 			chansize = 64;
1109 		else
1110 			chansize = 128;
1111 
1112 		numchan = adev->df.funcs->get_hbm_channel_number(adev);
1113 		adev->gmc.vram_width = numchan * chansize;
1114 	}
1115 
1116 	adev->gmc.vram_type = vram_type;
1117 	adev->gmc.vram_vendor = vram_vendor;
1118 	switch (adev->asic_type) {
1119 	case CHIP_RAVEN:
1120 		adev->num_vmhubs = 2;
1121 
1122 		if (adev->rev_id == 0x0 || adev->rev_id == 0x1) {
1123 			amdgpu_vm_adjust_size(adev, 256 * 1024, 9, 3, 48);
1124 		} else {
1125 			/* vm_size is 128TB + 512GB for legacy 3-level page support */
1126 			amdgpu_vm_adjust_size(adev, 128 * 1024 + 512, 9, 2, 48);
1127 			adev->gmc.translate_further =
1128 				adev->vm_manager.num_level > 1;
1129 		}
1130 		break;
1131 	case CHIP_VEGA10:
1132 	case CHIP_VEGA12:
1133 	case CHIP_VEGA20:
1134 	case CHIP_RENOIR:
1135 		adev->num_vmhubs = 2;
1136 
1137 
1138 		/*
1139 		 * To fulfill 4-level page support,
1140 		 * vm size is 256TB (48bit), maximum size of Vega10,
1141 		 * block size 512 (9bit)
1142 		 */
1143 		/* sriov restrict max_pfn below AMDGPU_GMC_HOLE */
1144 		if (amdgpu_sriov_vf(adev))
1145 			amdgpu_vm_adjust_size(adev, 256 * 1024, 9, 3, 47);
1146 		else
1147 			amdgpu_vm_adjust_size(adev, 256 * 1024, 9, 3, 48);
1148 		break;
1149 	case CHIP_ARCTURUS:
1150 		adev->num_vmhubs = 3;
1151 
1152 		/* Keep the vm size same with Vega20 */
1153 		amdgpu_vm_adjust_size(adev, 256 * 1024, 9, 3, 48);
1154 		break;
1155 	default:
1156 		break;
1157 	}
1158 
1159 	/* This interrupt is VMC page fault.*/
1160 	r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VMC, VMC_1_0__SRCID__VM_FAULT,
1161 				&adev->gmc.vm_fault);
1162 	if (r)
1163 		return r;
1164 
1165 	if (adev->asic_type == CHIP_ARCTURUS) {
1166 		r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VMC1, VMC_1_0__SRCID__VM_FAULT,
1167 					&adev->gmc.vm_fault);
1168 		if (r)
1169 			return r;
1170 	}
1171 
1172 	r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_UTCL2, UTCL2_1_0__SRCID__FAULT,
1173 				&adev->gmc.vm_fault);
1174 
1175 	if (r)
1176 		return r;
1177 
1178 	if (!amdgpu_sriov_vf(adev)) {
1179 		/* interrupt sent to DF. */
1180 		r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_DF, 0,
1181 				      &adev->gmc.ecc_irq);
1182 		if (r)
1183 			return r;
1184 	}
1185 
1186 	/* Set the internal MC address mask
1187 	 * This is the max address of the GPU's
1188 	 * internal address space.
1189 	 */
1190 	adev->gmc.mc_mask = 0xffffffffffffULL; /* 48 bit MC */
1191 
1192 #ifdef __NetBSD__
1193 	r = drm_limit_dma_space(adev->ddev, 0, DMA_BIT_MASK(44));
1194 #else
1195 	r = dma_set_mask_and_coherent(adev->dev, DMA_BIT_MASK(44));
1196 #endif
1197 	if (r) {
1198 		printk(KERN_WARNING "amdgpu: No suitable DMA available.\n");
1199 		return r;
1200 	}
1201 	adev->need_swiotlb = drm_need_swiotlb(44);
1202 
1203 	if (adev->gmc.xgmi.supported) {
1204 		r = gfxhub_v1_1_get_xgmi_info(adev);
1205 		if (r)
1206 			return r;
1207 	}
1208 
1209 	r = gmc_v9_0_mc_init(adev);
1210 	if (r)
1211 		return r;
1212 
1213 	adev->gmc.stolen_size = gmc_v9_0_get_vbios_fb_size(adev);
1214 
1215 	/* Memory manager */
1216 	r = amdgpu_bo_init(adev);
1217 	if (r)
1218 		return r;
1219 
1220 	r = gmc_v9_0_gart_init(adev);
1221 	if (r)
1222 		return r;
1223 
1224 	/*
1225 	 * number of VMs
1226 	 * VMID 0 is reserved for System
1227 	 * amdgpu graphics/compute will use VMIDs 1-7
1228 	 * amdkfd will use VMIDs 8-15
1229 	 */
1230 	adev->vm_manager.id_mgr[AMDGPU_GFXHUB_0].num_ids = AMDGPU_NUM_OF_VMIDS;
1231 	adev->vm_manager.id_mgr[AMDGPU_MMHUB_0].num_ids = AMDGPU_NUM_OF_VMIDS;
1232 	adev->vm_manager.id_mgr[AMDGPU_MMHUB_1].num_ids = AMDGPU_NUM_OF_VMIDS;
1233 
1234 	amdgpu_vm_manager_init(adev);
1235 
1236 	return 0;
1237 }
1238 
gmc_v9_0_sw_fini(void * handle)1239 static int gmc_v9_0_sw_fini(void *handle)
1240 {
1241 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1242 	void *stolen_vga_buf;
1243 
1244 	amdgpu_gmc_ras_fini(adev);
1245 	amdgpu_gem_force_release(adev);
1246 	amdgpu_vm_manager_fini(adev);
1247 
1248 	if (gmc_v9_0_keep_stolen_memory(adev))
1249 		amdgpu_bo_free_kernel(&adev->stolen_vga_memory, NULL, &stolen_vga_buf);
1250 
1251 	amdgpu_gart_table_vram_free(adev);
1252 	amdgpu_bo_fini(adev);
1253 	amdgpu_gart_fini(adev);
1254 
1255 	spin_lock_destroy(&adev->gmc.invalidate_lock);
1256 
1257 	return 0;
1258 }
1259 
gmc_v9_0_init_golden_registers(struct amdgpu_device * adev)1260 static void gmc_v9_0_init_golden_registers(struct amdgpu_device *adev)
1261 {
1262 
1263 	switch (adev->asic_type) {
1264 	case CHIP_VEGA10:
1265 		if (amdgpu_sriov_vf(adev))
1266 			break;
1267 		/* fall through */
1268 	case CHIP_VEGA20:
1269 		soc15_program_register_sequence(adev,
1270 						golden_settings_mmhub_1_0_0,
1271 						ARRAY_SIZE(golden_settings_mmhub_1_0_0));
1272 		soc15_program_register_sequence(adev,
1273 						golden_settings_athub_1_0_0,
1274 						ARRAY_SIZE(golden_settings_athub_1_0_0));
1275 		break;
1276 	case CHIP_VEGA12:
1277 		break;
1278 	case CHIP_RAVEN:
1279 		/* TODO for renoir */
1280 		soc15_program_register_sequence(adev,
1281 						golden_settings_athub_1_0_0,
1282 						ARRAY_SIZE(golden_settings_athub_1_0_0));
1283 		break;
1284 	default:
1285 		break;
1286 	}
1287 }
1288 
1289 /**
1290  * gmc_v9_0_gart_enable - gart enable
1291  *
1292  * @adev: amdgpu_device pointer
1293  */
gmc_v9_0_gart_enable(struct amdgpu_device * adev)1294 static int gmc_v9_0_gart_enable(struct amdgpu_device *adev)
1295 {
1296 	int r;
1297 
1298 	if (adev->gart.bo == NULL) {
1299 		dev_err(adev->dev, "No VRAM object for PCIE GART.\n");
1300 		return -EINVAL;
1301 	}
1302 	r = amdgpu_gart_table_vram_pin(adev);
1303 	if (r)
1304 		return r;
1305 
1306 	r = gfxhub_v1_0_gart_enable(adev);
1307 	if (r)
1308 		return r;
1309 
1310 	if (adev->asic_type == CHIP_ARCTURUS)
1311 		r = mmhub_v9_4_gart_enable(adev);
1312 	else
1313 		r = mmhub_v1_0_gart_enable(adev);
1314 	if (r)
1315 		return r;
1316 
1317 	DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
1318 		 (unsigned)(adev->gmc.gart_size >> 20),
1319 		 (unsigned long long)amdgpu_bo_gpu_offset(adev->gart.bo));
1320 	adev->gart.ready = true;
1321 	return 0;
1322 }
1323 
gmc_v9_0_hw_init(void * handle)1324 static int gmc_v9_0_hw_init(void *handle)
1325 {
1326 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1327 	bool value;
1328 	int r, i;
1329 	u32 tmp;
1330 
1331 	/* The sequence of these two function calls matters.*/
1332 	gmc_v9_0_init_golden_registers(adev);
1333 
1334 	if (adev->mode_info.num_crtc) {
1335 		if (adev->asic_type != CHIP_ARCTURUS) {
1336 			/* Lockout access through VGA aperture*/
1337 			WREG32_FIELD15(DCE, 0, VGA_HDP_CONTROL, VGA_MEMORY_DISABLE, 1);
1338 
1339 			/* disable VGA render */
1340 			WREG32_FIELD15(DCE, 0, VGA_RENDER_CONTROL, VGA_VSTATUS_CNTL, 0);
1341 		}
1342 	}
1343 
1344 	amdgpu_device_program_register_sequence(adev,
1345 						golden_settings_vega10_hdp,
1346 						ARRAY_SIZE(golden_settings_vega10_hdp));
1347 
1348 	switch (adev->asic_type) {
1349 	case CHIP_RAVEN:
1350 		/* TODO for renoir */
1351 		mmhub_v1_0_update_power_gating(adev, true);
1352 		break;
1353 	case CHIP_ARCTURUS:
1354 		WREG32_FIELD15(HDP, 0, HDP_MMHUB_CNTL, HDP_MMHUB_GCC, 1);
1355 		break;
1356 	default:
1357 		break;
1358 	}
1359 
1360 	WREG32_FIELD15(HDP, 0, HDP_MISC_CNTL, FLUSH_INVALIDATE_CACHE, 1);
1361 
1362 	tmp = RREG32_SOC15(HDP, 0, mmHDP_HOST_PATH_CNTL);
1363 	WREG32_SOC15(HDP, 0, mmHDP_HOST_PATH_CNTL, tmp);
1364 
1365 	WREG32_SOC15(HDP, 0, mmHDP_NONSURFACE_BASE, (adev->gmc.vram_start >> 8));
1366 	WREG32_SOC15(HDP, 0, mmHDP_NONSURFACE_BASE_HI, (adev->gmc.vram_start >> 40));
1367 
1368 	/* After HDP is initialized, flush HDP.*/
1369 	adev->nbio.funcs->hdp_flush(adev, NULL);
1370 
1371 	if (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_ALWAYS)
1372 		value = false;
1373 	else
1374 		value = true;
1375 
1376 	if (!amdgpu_sriov_vf(adev)) {
1377 		gfxhub_v1_0_set_fault_enable_default(adev, value);
1378 		if (adev->asic_type == CHIP_ARCTURUS)
1379 			mmhub_v9_4_set_fault_enable_default(adev, value);
1380 		else
1381 			mmhub_v1_0_set_fault_enable_default(adev, value);
1382 	}
1383 	for (i = 0; i < adev->num_vmhubs; ++i)
1384 		gmc_v9_0_flush_gpu_tlb(adev, 0, i, 0);
1385 
1386 	if (adev->umc.funcs && adev->umc.funcs->init_registers)
1387 		adev->umc.funcs->init_registers(adev);
1388 
1389 	r = gmc_v9_0_gart_enable(adev);
1390 
1391 	return r;
1392 }
1393 
1394 /**
1395  * gmc_v9_0_gart_disable - gart disable
1396  *
1397  * @adev: amdgpu_device pointer
1398  *
1399  * This disables all VM page table.
1400  */
gmc_v9_0_gart_disable(struct amdgpu_device * adev)1401 static void gmc_v9_0_gart_disable(struct amdgpu_device *adev)
1402 {
1403 	gfxhub_v1_0_gart_disable(adev);
1404 	if (adev->asic_type == CHIP_ARCTURUS)
1405 		mmhub_v9_4_gart_disable(adev);
1406 	else
1407 		mmhub_v1_0_gart_disable(adev);
1408 	amdgpu_gart_table_vram_unpin(adev);
1409 }
1410 
gmc_v9_0_hw_fini(void * handle)1411 static int gmc_v9_0_hw_fini(void *handle)
1412 {
1413 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1414 
1415 	if (amdgpu_sriov_vf(adev)) {
1416 		/* full access mode, so don't touch any GMC register */
1417 		DRM_DEBUG("For SRIOV client, shouldn't do anything.\n");
1418 		return 0;
1419 	}
1420 
1421 	amdgpu_irq_put(adev, &adev->gmc.ecc_irq, 0);
1422 	amdgpu_irq_put(adev, &adev->gmc.vm_fault, 0);
1423 	gmc_v9_0_gart_disable(adev);
1424 
1425 	return 0;
1426 }
1427 
gmc_v9_0_suspend(void * handle)1428 static int gmc_v9_0_suspend(void *handle)
1429 {
1430 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1431 
1432 	return gmc_v9_0_hw_fini(adev);
1433 }
1434 
gmc_v9_0_resume(void * handle)1435 static int gmc_v9_0_resume(void *handle)
1436 {
1437 	int r;
1438 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1439 
1440 	r = gmc_v9_0_hw_init(adev);
1441 	if (r)
1442 		return r;
1443 
1444 	amdgpu_vmid_reset_all(adev);
1445 
1446 	return 0;
1447 }
1448 
gmc_v9_0_is_idle(void * handle)1449 static bool gmc_v9_0_is_idle(void *handle)
1450 {
1451 	/* MC is always ready in GMC v9.*/
1452 	return true;
1453 }
1454 
gmc_v9_0_wait_for_idle(void * handle)1455 static int gmc_v9_0_wait_for_idle(void *handle)
1456 {
1457 	/* There is no need to wait for MC idle in GMC v9.*/
1458 	return 0;
1459 }
1460 
gmc_v9_0_soft_reset(void * handle)1461 static int gmc_v9_0_soft_reset(void *handle)
1462 {
1463 	/* XXX for emulation.*/
1464 	return 0;
1465 }
1466 
gmc_v9_0_set_clockgating_state(void * handle,enum amd_clockgating_state state)1467 static int gmc_v9_0_set_clockgating_state(void *handle,
1468 					enum amd_clockgating_state state)
1469 {
1470 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1471 
1472 	if (adev->asic_type == CHIP_ARCTURUS)
1473 		mmhub_v9_4_set_clockgating(adev, state);
1474 	else
1475 		mmhub_v1_0_set_clockgating(adev, state);
1476 
1477 	athub_v1_0_set_clockgating(adev, state);
1478 
1479 	return 0;
1480 }
1481 
gmc_v9_0_get_clockgating_state(void * handle,u32 * flags)1482 static void gmc_v9_0_get_clockgating_state(void *handle, u32 *flags)
1483 {
1484 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1485 
1486 	if (adev->asic_type == CHIP_ARCTURUS)
1487 		mmhub_v9_4_get_clockgating(adev, flags);
1488 	else
1489 		mmhub_v1_0_get_clockgating(adev, flags);
1490 
1491 	athub_v1_0_get_clockgating(adev, flags);
1492 }
1493 
gmc_v9_0_set_powergating_state(void * handle,enum amd_powergating_state state)1494 static int gmc_v9_0_set_powergating_state(void *handle,
1495 					enum amd_powergating_state state)
1496 {
1497 	return 0;
1498 }
1499 
1500 const struct amd_ip_funcs gmc_v9_0_ip_funcs = {
1501 	.name = "gmc_v9_0",
1502 	.early_init = gmc_v9_0_early_init,
1503 	.late_init = gmc_v9_0_late_init,
1504 	.sw_init = gmc_v9_0_sw_init,
1505 	.sw_fini = gmc_v9_0_sw_fini,
1506 	.hw_init = gmc_v9_0_hw_init,
1507 	.hw_fini = gmc_v9_0_hw_fini,
1508 	.suspend = gmc_v9_0_suspend,
1509 	.resume = gmc_v9_0_resume,
1510 	.is_idle = gmc_v9_0_is_idle,
1511 	.wait_for_idle = gmc_v9_0_wait_for_idle,
1512 	.soft_reset = gmc_v9_0_soft_reset,
1513 	.set_clockgating_state = gmc_v9_0_set_clockgating_state,
1514 	.set_powergating_state = gmc_v9_0_set_powergating_state,
1515 	.get_clockgating_state = gmc_v9_0_get_clockgating_state,
1516 };
1517 
1518 const struct amdgpu_ip_block_version gmc_v9_0_ip_block =
1519 {
1520 	.type = AMD_IP_BLOCK_TYPE_GMC,
1521 	.major = 9,
1522 	.minor = 0,
1523 	.rev = 0,
1524 	.funcs = &gmc_v9_0_ip_funcs,
1525 };
1526