1 /* $NetBSD: amdgpu_gmc_v9_0.c,v 1.5 2021/12/19 12:31:45 riastradh Exp $ */
2
3 /*
4 * Copyright 2016 Advanced Micro Devices, Inc.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
20 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22 * OTHER DEALINGS IN THE SOFTWARE.
23 *
24 */
25
26 #include <sys/cdefs.h>
27 __KERNEL_RCSID(0, "$NetBSD: amdgpu_gmc_v9_0.c,v 1.5 2021/12/19 12:31:45 riastradh Exp $");
28
29 #include <linux/firmware.h>
30 #include <linux/pci.h>
31
32 #include <drm/drm_cache.h>
33
34 #include "amdgpu.h"
35 #include "gmc_v9_0.h"
36 #include "amdgpu_atomfirmware.h"
37 #include "amdgpu_gem.h"
38
39 #include "hdp/hdp_4_0_offset.h"
40 #include "hdp/hdp_4_0_sh_mask.h"
41 #include "gc/gc_9_0_sh_mask.h"
42 #include "dce/dce_12_0_offset.h"
43 #include "dce/dce_12_0_sh_mask.h"
44 #include "vega10_enum.h"
45 #include "mmhub/mmhub_1_0_offset.h"
46 #include "athub/athub_1_0_sh_mask.h"
47 #include "athub/athub_1_0_offset.h"
48 #include "oss/osssys_4_0_offset.h"
49
50 #include "soc15.h"
51 #include "soc15d.h"
52 #include "soc15_common.h"
53 #include "umc/umc_6_0_sh_mask.h"
54
55 #include "gfxhub_v1_0.h"
56 #include "mmhub_v1_0.h"
57 #include "athub_v1_0.h"
58 #include "gfxhub_v1_1.h"
59 #include "mmhub_v9_4.h"
60 #include "umc_v6_1.h"
61 #include "umc_v6_0.h"
62
63 #include "ivsrcid/vmc/irqsrcs_vmc_1_0.h"
64
65 #include "amdgpu_ras.h"
66 #include "amdgpu_xgmi.h"
67
68 /* add these here since we already include dce12 headers and these are for DCN */
69 #define mmHUBP0_DCSURF_PRI_VIEWPORT_DIMENSION 0x055d
70 #define mmHUBP0_DCSURF_PRI_VIEWPORT_DIMENSION_BASE_IDX 2
71 #define HUBP0_DCSURF_PRI_VIEWPORT_DIMENSION__PRI_VIEWPORT_WIDTH__SHIFT 0x0
72 #define HUBP0_DCSURF_PRI_VIEWPORT_DIMENSION__PRI_VIEWPORT_HEIGHT__SHIFT 0x10
73 #define HUBP0_DCSURF_PRI_VIEWPORT_DIMENSION__PRI_VIEWPORT_WIDTH_MASK 0x00003FFFL
74 #define HUBP0_DCSURF_PRI_VIEWPORT_DIMENSION__PRI_VIEWPORT_HEIGHT_MASK 0x3FFF0000L
75
76 /* XXX Move this macro to VEGA10 header file, which is like vid.h for VI.*/
77 #define AMDGPU_NUM_OF_VMIDS 8
78
79 static const u32 golden_settings_vega10_hdp[] =
80 {
81 0xf64, 0x0fffffff, 0x00000000,
82 0xf65, 0x0fffffff, 0x00000000,
83 0xf66, 0x0fffffff, 0x00000000,
84 0xf67, 0x0fffffff, 0x00000000,
85 0xf68, 0x0fffffff, 0x00000000,
86 0xf6a, 0x0fffffff, 0x00000000,
87 0xf6b, 0x0fffffff, 0x00000000,
88 0xf6c, 0x0fffffff, 0x00000000,
89 0xf6d, 0x0fffffff, 0x00000000,
90 0xf6e, 0x0fffffff, 0x00000000,
91 };
92
93 static const struct soc15_reg_golden golden_settings_mmhub_1_0_0[] =
94 {
95 SOC15_REG_GOLDEN_VALUE(MMHUB, 0, mmDAGB1_WRCLI2, 0x00000007, 0xfe5fe0fa),
96 SOC15_REG_GOLDEN_VALUE(MMHUB, 0, mmMMEA1_DRAM_WR_CLI2GRP_MAP0, 0x00000030, 0x55555565)
97 };
98
99 static const struct soc15_reg_golden golden_settings_athub_1_0_0[] =
100 {
101 SOC15_REG_GOLDEN_VALUE(ATHUB, 0, mmRPB_ARB_CNTL, 0x0000ff00, 0x00000800),
102 SOC15_REG_GOLDEN_VALUE(ATHUB, 0, mmRPB_ARB_CNTL2, 0x00ff00ff, 0x00080008)
103 };
104
105 static const uint32_t ecc_umc_mcumc_ctrl_addrs[] = {
106 (0x000143c0 + 0x00000000),
107 (0x000143c0 + 0x00000800),
108 (0x000143c0 + 0x00001000),
109 (0x000143c0 + 0x00001800),
110 (0x000543c0 + 0x00000000),
111 (0x000543c0 + 0x00000800),
112 (0x000543c0 + 0x00001000),
113 (0x000543c0 + 0x00001800),
114 (0x000943c0 + 0x00000000),
115 (0x000943c0 + 0x00000800),
116 (0x000943c0 + 0x00001000),
117 (0x000943c0 + 0x00001800),
118 (0x000d43c0 + 0x00000000),
119 (0x000d43c0 + 0x00000800),
120 (0x000d43c0 + 0x00001000),
121 (0x000d43c0 + 0x00001800),
122 (0x001143c0 + 0x00000000),
123 (0x001143c0 + 0x00000800),
124 (0x001143c0 + 0x00001000),
125 (0x001143c0 + 0x00001800),
126 (0x001543c0 + 0x00000000),
127 (0x001543c0 + 0x00000800),
128 (0x001543c0 + 0x00001000),
129 (0x001543c0 + 0x00001800),
130 (0x001943c0 + 0x00000000),
131 (0x001943c0 + 0x00000800),
132 (0x001943c0 + 0x00001000),
133 (0x001943c0 + 0x00001800),
134 (0x001d43c0 + 0x00000000),
135 (0x001d43c0 + 0x00000800),
136 (0x001d43c0 + 0x00001000),
137 (0x001d43c0 + 0x00001800),
138 };
139
140 static const uint32_t ecc_umc_mcumc_ctrl_mask_addrs[] = {
141 (0x000143e0 + 0x00000000),
142 (0x000143e0 + 0x00000800),
143 (0x000143e0 + 0x00001000),
144 (0x000143e0 + 0x00001800),
145 (0x000543e0 + 0x00000000),
146 (0x000543e0 + 0x00000800),
147 (0x000543e0 + 0x00001000),
148 (0x000543e0 + 0x00001800),
149 (0x000943e0 + 0x00000000),
150 (0x000943e0 + 0x00000800),
151 (0x000943e0 + 0x00001000),
152 (0x000943e0 + 0x00001800),
153 (0x000d43e0 + 0x00000000),
154 (0x000d43e0 + 0x00000800),
155 (0x000d43e0 + 0x00001000),
156 (0x000d43e0 + 0x00001800),
157 (0x001143e0 + 0x00000000),
158 (0x001143e0 + 0x00000800),
159 (0x001143e0 + 0x00001000),
160 (0x001143e0 + 0x00001800),
161 (0x001543e0 + 0x00000000),
162 (0x001543e0 + 0x00000800),
163 (0x001543e0 + 0x00001000),
164 (0x001543e0 + 0x00001800),
165 (0x001943e0 + 0x00000000),
166 (0x001943e0 + 0x00000800),
167 (0x001943e0 + 0x00001000),
168 (0x001943e0 + 0x00001800),
169 (0x001d43e0 + 0x00000000),
170 (0x001d43e0 + 0x00000800),
171 (0x001d43e0 + 0x00001000),
172 (0x001d43e0 + 0x00001800),
173 };
174
175 static const uint32_t ecc_umc_mcumc_status_addrs[] __unused = {
176 (0x000143c2 + 0x00000000),
177 (0x000143c2 + 0x00000800),
178 (0x000143c2 + 0x00001000),
179 (0x000143c2 + 0x00001800),
180 (0x000543c2 + 0x00000000),
181 (0x000543c2 + 0x00000800),
182 (0x000543c2 + 0x00001000),
183 (0x000543c2 + 0x00001800),
184 (0x000943c2 + 0x00000000),
185 (0x000943c2 + 0x00000800),
186 (0x000943c2 + 0x00001000),
187 (0x000943c2 + 0x00001800),
188 (0x000d43c2 + 0x00000000),
189 (0x000d43c2 + 0x00000800),
190 (0x000d43c2 + 0x00001000),
191 (0x000d43c2 + 0x00001800),
192 (0x001143c2 + 0x00000000),
193 (0x001143c2 + 0x00000800),
194 (0x001143c2 + 0x00001000),
195 (0x001143c2 + 0x00001800),
196 (0x001543c2 + 0x00000000),
197 (0x001543c2 + 0x00000800),
198 (0x001543c2 + 0x00001000),
199 (0x001543c2 + 0x00001800),
200 (0x001943c2 + 0x00000000),
201 (0x001943c2 + 0x00000800),
202 (0x001943c2 + 0x00001000),
203 (0x001943c2 + 0x00001800),
204 (0x001d43c2 + 0x00000000),
205 (0x001d43c2 + 0x00000800),
206 (0x001d43c2 + 0x00001000),
207 (0x001d43c2 + 0x00001800),
208 };
209
gmc_v9_0_ecc_interrupt_state(struct amdgpu_device * adev,struct amdgpu_irq_src * src,unsigned type,enum amdgpu_interrupt_state state)210 static int gmc_v9_0_ecc_interrupt_state(struct amdgpu_device *adev,
211 struct amdgpu_irq_src *src,
212 unsigned type,
213 enum amdgpu_interrupt_state state)
214 {
215 u32 bits, i, tmp, reg;
216
217 /* Devices newer then VEGA10/12 shall have these programming
218 sequences performed by PSP BL */
219 if (adev->asic_type >= CHIP_VEGA20)
220 return 0;
221
222 bits = 0x7f;
223
224 switch (state) {
225 case AMDGPU_IRQ_STATE_DISABLE:
226 for (i = 0; i < ARRAY_SIZE(ecc_umc_mcumc_ctrl_addrs); i++) {
227 reg = ecc_umc_mcumc_ctrl_addrs[i];
228 tmp = RREG32(reg);
229 tmp &= ~bits;
230 WREG32(reg, tmp);
231 }
232 for (i = 0; i < ARRAY_SIZE(ecc_umc_mcumc_ctrl_mask_addrs); i++) {
233 reg = ecc_umc_mcumc_ctrl_mask_addrs[i];
234 tmp = RREG32(reg);
235 tmp &= ~bits;
236 WREG32(reg, tmp);
237 }
238 break;
239 case AMDGPU_IRQ_STATE_ENABLE:
240 for (i = 0; i < ARRAY_SIZE(ecc_umc_mcumc_ctrl_addrs); i++) {
241 reg = ecc_umc_mcumc_ctrl_addrs[i];
242 tmp = RREG32(reg);
243 tmp |= bits;
244 WREG32(reg, tmp);
245 }
246 for (i = 0; i < ARRAY_SIZE(ecc_umc_mcumc_ctrl_mask_addrs); i++) {
247 reg = ecc_umc_mcumc_ctrl_mask_addrs[i];
248 tmp = RREG32(reg);
249 tmp |= bits;
250 WREG32(reg, tmp);
251 }
252 break;
253 default:
254 break;
255 }
256
257 return 0;
258 }
259
gmc_v9_0_vm_fault_interrupt_state(struct amdgpu_device * adev,struct amdgpu_irq_src * src,unsigned type,enum amdgpu_interrupt_state state)260 static int gmc_v9_0_vm_fault_interrupt_state(struct amdgpu_device *adev,
261 struct amdgpu_irq_src *src,
262 unsigned type,
263 enum amdgpu_interrupt_state state)
264 {
265 struct amdgpu_vmhub *hub;
266 u32 tmp, reg, bits, i, j;
267
268 bits = VM_CONTEXT1_CNTL__RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
269 VM_CONTEXT1_CNTL__DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
270 VM_CONTEXT1_CNTL__PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
271 VM_CONTEXT1_CNTL__VALID_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
272 VM_CONTEXT1_CNTL__READ_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
273 VM_CONTEXT1_CNTL__WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
274 VM_CONTEXT1_CNTL__EXECUTE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK;
275
276 switch (state) {
277 case AMDGPU_IRQ_STATE_DISABLE:
278 for (j = 0; j < adev->num_vmhubs; j++) {
279 hub = &adev->vmhub[j];
280 for (i = 0; i < 16; i++) {
281 reg = hub->vm_context0_cntl + i;
282 tmp = RREG32(reg);
283 tmp &= ~bits;
284 WREG32(reg, tmp);
285 }
286 }
287 break;
288 case AMDGPU_IRQ_STATE_ENABLE:
289 for (j = 0; j < adev->num_vmhubs; j++) {
290 hub = &adev->vmhub[j];
291 for (i = 0; i < 16; i++) {
292 reg = hub->vm_context0_cntl + i;
293 tmp = RREG32(reg);
294 tmp |= bits;
295 WREG32(reg, tmp);
296 }
297 }
298 default:
299 break;
300 }
301
302 return 0;
303 }
304
gmc_v9_0_process_interrupt(struct amdgpu_device * adev,struct amdgpu_irq_src * source,struct amdgpu_iv_entry * entry)305 static int gmc_v9_0_process_interrupt(struct amdgpu_device *adev,
306 struct amdgpu_irq_src *source,
307 struct amdgpu_iv_entry *entry)
308 {
309 struct amdgpu_vmhub *hub;
310 bool retry_fault = !!(entry->src_data[1] & 0x80);
311 uint32_t status = 0;
312 u64 addr;
313 char hub_name[10];
314
315 addr = (u64)entry->src_data[0] << 12;
316 addr |= ((u64)entry->src_data[1] & 0xf) << 44;
317
318 if (retry_fault && amdgpu_gmc_filter_faults(adev, addr, entry->pasid,
319 entry->timestamp))
320 return 1; /* This also prevents sending it to KFD */
321
322 if (entry->client_id == SOC15_IH_CLIENTID_VMC) {
323 snprintf(hub_name, sizeof(hub_name), "mmhub0");
324 hub = &adev->vmhub[AMDGPU_MMHUB_0];
325 } else if (entry->client_id == SOC15_IH_CLIENTID_VMC1) {
326 snprintf(hub_name, sizeof(hub_name), "mmhub1");
327 hub = &adev->vmhub[AMDGPU_MMHUB_1];
328 } else {
329 snprintf(hub_name, sizeof(hub_name), "gfxhub0");
330 hub = &adev->vmhub[AMDGPU_GFXHUB_0];
331 }
332
333 /* If it's the first fault for this address, process it normally */
334 if (retry_fault && !in_interrupt() &&
335 amdgpu_vm_handle_fault(adev, entry->pasid, addr))
336 return 1; /* This also prevents sending it to KFD */
337
338 if (!amdgpu_sriov_vf(adev)) {
339 /*
340 * Issue a dummy read to wait for the status register to
341 * be updated to avoid reading an incorrect value due to
342 * the new fast GRBM interface.
343 */
344 if (entry->vmid_src == AMDGPU_GFXHUB_0)
345 RREG32(hub->vm_l2_pro_fault_status);
346
347 status = RREG32(hub->vm_l2_pro_fault_status);
348 WREG32_P(hub->vm_l2_pro_fault_cntl, 1, ~1);
349 }
350
351 if (printk_ratelimit()) {
352 struct amdgpu_task_info task_info;
353
354 memset(&task_info, 0, sizeof(struct amdgpu_task_info));
355 amdgpu_vm_get_task_info(adev, entry->pasid, &task_info);
356
357 dev_err(adev->dev,
358 "[%s] %s page fault (src_id:%u ring:%u vmid:%u "
359 "pasid:%u, for process %s pid %d thread %s pid %d)\n",
360 hub_name, retry_fault ? "retry" : "no-retry",
361 entry->src_id, entry->ring_id, entry->vmid,
362 entry->pasid, task_info.process_name, task_info.tgid,
363 task_info.task_name, task_info.pid);
364 dev_err(adev->dev, " in page starting at address 0x%016"PRIx64" from client %d\n",
365 addr, entry->client_id);
366 if (!amdgpu_sriov_vf(adev)) {
367 dev_err(adev->dev,
368 "VM_L2_PROTECTION_FAULT_STATUS:0x%08X\n",
369 status);
370 dev_err(adev->dev, "\t MORE_FAULTS: 0x%lx\n",
371 REG_GET_FIELD(status,
372 VM_L2_PROTECTION_FAULT_STATUS, MORE_FAULTS));
373 dev_err(adev->dev, "\t WALKER_ERROR: 0x%lx\n",
374 REG_GET_FIELD(status,
375 VM_L2_PROTECTION_FAULT_STATUS, WALKER_ERROR));
376 dev_err(adev->dev, "\t PERMISSION_FAULTS: 0x%lx\n",
377 REG_GET_FIELD(status,
378 VM_L2_PROTECTION_FAULT_STATUS, PERMISSION_FAULTS));
379 dev_err(adev->dev, "\t MAPPING_ERROR: 0x%lx\n",
380 REG_GET_FIELD(status,
381 VM_L2_PROTECTION_FAULT_STATUS, MAPPING_ERROR));
382 dev_err(adev->dev, "\t RW: 0x%lx\n",
383 REG_GET_FIELD(status,
384 VM_L2_PROTECTION_FAULT_STATUS, RW));
385
386 }
387 }
388
389 return 0;
390 }
391
392 static const struct amdgpu_irq_src_funcs gmc_v9_0_irq_funcs = {
393 .set = gmc_v9_0_vm_fault_interrupt_state,
394 .process = gmc_v9_0_process_interrupt,
395 };
396
397
398 static const struct amdgpu_irq_src_funcs gmc_v9_0_ecc_funcs = {
399 .set = gmc_v9_0_ecc_interrupt_state,
400 .process = amdgpu_umc_process_ecc_irq,
401 };
402
gmc_v9_0_set_irq_funcs(struct amdgpu_device * adev)403 static void gmc_v9_0_set_irq_funcs(struct amdgpu_device *adev)
404 {
405 adev->gmc.vm_fault.num_types = 1;
406 adev->gmc.vm_fault.funcs = &gmc_v9_0_irq_funcs;
407
408 if (!amdgpu_sriov_vf(adev)) {
409 adev->gmc.ecc_irq.num_types = 1;
410 adev->gmc.ecc_irq.funcs = &gmc_v9_0_ecc_funcs;
411 }
412 }
413
gmc_v9_0_get_invalidate_req(unsigned int vmid,uint32_t flush_type)414 static uint32_t gmc_v9_0_get_invalidate_req(unsigned int vmid,
415 uint32_t flush_type)
416 {
417 u32 req = 0;
418
419 req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ,
420 PER_VMID_INVALIDATE_REQ, 1 << vmid);
421 req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ, FLUSH_TYPE, flush_type);
422 req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PTES, 1);
423 req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PDE0, 1);
424 req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PDE1, 1);
425 req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PDE2, 1);
426 req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ, INVALIDATE_L1_PTES, 1);
427 req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ,
428 CLEAR_PROTECTION_FAULT_STATUS_ADDR, 0);
429
430 return req;
431 }
432
433 /**
434 * gmc_v9_0_use_invalidate_semaphore - judge whether to use semaphore
435 *
436 * @adev: amdgpu_device pointer
437 * @vmhub: vmhub type
438 *
439 */
gmc_v9_0_use_invalidate_semaphore(struct amdgpu_device * adev,uint32_t vmhub)440 static bool gmc_v9_0_use_invalidate_semaphore(struct amdgpu_device *adev,
441 uint32_t vmhub)
442 {
443 return ((vmhub == AMDGPU_MMHUB_0 ||
444 vmhub == AMDGPU_MMHUB_1) &&
445 (!amdgpu_sriov_vf(adev)) &&
446 (!(adev->asic_type == CHIP_RAVEN &&
447 adev->rev_id < 0x8 &&
448 adev->pdev->device == 0x15d8)));
449 }
450
gmc_v9_0_get_atc_vmid_pasid_mapping_info(struct amdgpu_device * adev,uint8_t vmid,uint16_t * p_pasid)451 static bool gmc_v9_0_get_atc_vmid_pasid_mapping_info(struct amdgpu_device *adev,
452 uint8_t vmid, uint16_t *p_pasid)
453 {
454 uint32_t value;
455
456 value = RREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID0_PASID_MAPPING)
457 + vmid);
458 *p_pasid = value & ATC_VMID0_PASID_MAPPING__PASID_MASK;
459
460 return !!(value & ATC_VMID0_PASID_MAPPING__VALID_MASK);
461 }
462
463 /*
464 * GART
465 * VMID 0 is the physical GPU addresses as used by the kernel.
466 * VMIDs 1-15 are used for userspace clients and are handled
467 * by the amdgpu vm/hsa code.
468 */
469
470 /**
471 * gmc_v9_0_flush_gpu_tlb - tlb flush with certain type
472 *
473 * @adev: amdgpu_device pointer
474 * @vmid: vm instance to flush
475 * @flush_type: the flush type
476 *
477 * Flush the TLB for the requested page table using certain type.
478 */
gmc_v9_0_flush_gpu_tlb(struct amdgpu_device * adev,uint32_t vmid,uint32_t vmhub,uint32_t flush_type)479 static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
480 uint32_t vmhub, uint32_t flush_type)
481 {
482 bool use_semaphore = gmc_v9_0_use_invalidate_semaphore(adev, vmhub);
483 const unsigned eng = 17;
484 u32 j, inv_req, tmp;
485 struct amdgpu_vmhub *hub;
486
487 BUG_ON(vmhub >= adev->num_vmhubs);
488
489 hub = &adev->vmhub[vmhub];
490 inv_req = gmc_v9_0_get_invalidate_req(vmid, flush_type);
491
492 /* This is necessary for a HW workaround under SRIOV as well
493 * as GFXOFF under bare metal
494 */
495 if (adev->gfx.kiq.ring.sched.ready &&
496 (amdgpu_sriov_runtime(adev) || !amdgpu_sriov_vf(adev)) &&
497 !adev->in_gpu_reset) {
498 uint32_t req = hub->vm_inv_eng0_req + eng;
499 uint32_t ack = hub->vm_inv_eng0_ack + eng;
500
501 amdgpu_virt_kiq_reg_write_reg_wait(adev, req, ack, inv_req,
502 1 << vmid);
503 return;
504 }
505
506 spin_lock(&adev->gmc.invalidate_lock);
507
508 /*
509 * It may lose gpuvm invalidate acknowldege state across power-gating
510 * off cycle, add semaphore acquire before invalidation and semaphore
511 * release after invalidation to avoid entering power gated state
512 * to WA the Issue
513 */
514
515 /* TODO: It needs to continue working on debugging with semaphore for GFXHUB as well. */
516 if (use_semaphore) {
517 for (j = 0; j < adev->usec_timeout; j++) {
518 /* a read return value of 1 means semaphore acuqire */
519 tmp = RREG32_NO_KIQ(hub->vm_inv_eng0_sem + eng);
520 if (tmp & 0x1)
521 break;
522 udelay(1);
523 }
524
525 if (j >= adev->usec_timeout)
526 DRM_ERROR("Timeout waiting for sem acquire in VM flush!\n");
527 }
528
529 WREG32_NO_KIQ(hub->vm_inv_eng0_req + eng, inv_req);
530
531 /*
532 * Issue a dummy read to wait for the ACK register to be cleared
533 * to avoid a false ACK due to the new fast GRBM interface.
534 */
535 if (vmhub == AMDGPU_GFXHUB_0)
536 RREG32_NO_KIQ(hub->vm_inv_eng0_req + eng);
537
538 for (j = 0; j < adev->usec_timeout; j++) {
539 tmp = RREG32_NO_KIQ(hub->vm_inv_eng0_ack + eng);
540 if (tmp & (1 << vmid))
541 break;
542 udelay(1);
543 }
544
545 /* TODO: It needs to continue working on debugging with semaphore for GFXHUB as well. */
546 if (use_semaphore)
547 /*
548 * add semaphore release after invalidation,
549 * write with 0 means semaphore release
550 */
551 WREG32_NO_KIQ(hub->vm_inv_eng0_sem + eng, 0);
552
553 spin_unlock(&adev->gmc.invalidate_lock);
554
555 if (j < adev->usec_timeout)
556 return;
557
558 DRM_ERROR("Timeout waiting for VM flush ACK!\n");
559 }
560
561 /**
562 * gmc_v9_0_flush_gpu_tlb_pasid - tlb flush via pasid
563 *
564 * @adev: amdgpu_device pointer
565 * @pasid: pasid to be flush
566 *
567 * Flush the TLB for the requested pasid.
568 */
gmc_v9_0_flush_gpu_tlb_pasid(struct amdgpu_device * adev,uint16_t pasid,uint32_t flush_type,bool all_hub)569 static int gmc_v9_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
570 uint16_t pasid, uint32_t flush_type,
571 bool all_hub)
572 {
573 int vmid, i;
574 signed long r;
575 uint32_t seq;
576 uint16_t queried_pasid;
577 bool ret;
578 struct amdgpu_ring *ring = &adev->gfx.kiq.ring;
579 struct amdgpu_kiq *kiq = &adev->gfx.kiq;
580
581 if (adev->in_gpu_reset)
582 return -EIO;
583
584 if (ring->sched.ready) {
585 spin_lock(&adev->gfx.kiq.ring_lock);
586 /* 2 dwords flush + 8 dwords fence */
587 amdgpu_ring_alloc(ring, kiq->pmf->invalidate_tlbs_size + 8);
588 kiq->pmf->kiq_invalidate_tlbs(ring,
589 pasid, flush_type, all_hub);
590 amdgpu_fence_emit_polling(ring, &seq);
591 amdgpu_ring_commit(ring);
592 spin_unlock(&adev->gfx.kiq.ring_lock);
593 r = amdgpu_fence_wait_polling(ring, seq, adev->usec_timeout);
594 if (r < 1) {
595 DRM_ERROR("wait for kiq fence error: %ld.\n", r);
596 return -ETIME;
597 }
598
599 return 0;
600 }
601
602 for (vmid = 1; vmid < 16; vmid++) {
603
604 ret = gmc_v9_0_get_atc_vmid_pasid_mapping_info(adev, vmid,
605 &queried_pasid);
606 if (ret && queried_pasid == pasid) {
607 if (all_hub) {
608 for (i = 0; i < adev->num_vmhubs; i++)
609 gmc_v9_0_flush_gpu_tlb(adev, vmid,
610 i, flush_type);
611 } else {
612 gmc_v9_0_flush_gpu_tlb(adev, vmid,
613 AMDGPU_GFXHUB_0, flush_type);
614 }
615 break;
616 }
617 }
618
619 return 0;
620
621 }
622
gmc_v9_0_emit_flush_gpu_tlb(struct amdgpu_ring * ring,unsigned vmid,uint64_t pd_addr)623 static uint64_t gmc_v9_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring,
624 unsigned vmid, uint64_t pd_addr)
625 {
626 bool use_semaphore = gmc_v9_0_use_invalidate_semaphore(ring->adev, ring->funcs->vmhub);
627 struct amdgpu_device *adev = ring->adev;
628 struct amdgpu_vmhub *hub = &adev->vmhub[ring->funcs->vmhub];
629 uint32_t req = gmc_v9_0_get_invalidate_req(vmid, 0);
630 unsigned eng = ring->vm_inv_eng;
631
632 /*
633 * It may lose gpuvm invalidate acknowldege state across power-gating
634 * off cycle, add semaphore acquire before invalidation and semaphore
635 * release after invalidation to avoid entering power gated state
636 * to WA the Issue
637 */
638
639 /* TODO: It needs to continue working on debugging with semaphore for GFXHUB as well. */
640 if (use_semaphore)
641 /* a read return value of 1 means semaphore acuqire */
642 amdgpu_ring_emit_reg_wait(ring,
643 hub->vm_inv_eng0_sem + eng, 0x1, 0x1);
644
645 amdgpu_ring_emit_wreg(ring, hub->ctx0_ptb_addr_lo32 + (2 * vmid),
646 lower_32_bits(pd_addr));
647
648 amdgpu_ring_emit_wreg(ring, hub->ctx0_ptb_addr_hi32 + (2 * vmid),
649 upper_32_bits(pd_addr));
650
651 amdgpu_ring_emit_reg_write_reg_wait(ring, hub->vm_inv_eng0_req + eng,
652 hub->vm_inv_eng0_ack + eng,
653 req, 1 << vmid);
654
655 /* TODO: It needs to continue working on debugging with semaphore for GFXHUB as well. */
656 if (use_semaphore)
657 /*
658 * add semaphore release after invalidation,
659 * write with 0 means semaphore release
660 */
661 amdgpu_ring_emit_wreg(ring, hub->vm_inv_eng0_sem + eng, 0);
662
663 return pd_addr;
664 }
665
gmc_v9_0_emit_pasid_mapping(struct amdgpu_ring * ring,unsigned vmid,unsigned pasid)666 static void gmc_v9_0_emit_pasid_mapping(struct amdgpu_ring *ring, unsigned vmid,
667 unsigned pasid)
668 {
669 struct amdgpu_device *adev = ring->adev;
670 uint32_t reg;
671
672 /* Do nothing because there's no lut register for mmhub1. */
673 if (ring->funcs->vmhub == AMDGPU_MMHUB_1)
674 return;
675
676 if (ring->funcs->vmhub == AMDGPU_GFXHUB_0)
677 reg = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_VMID_0_LUT) + vmid;
678 else
679 reg = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_VMID_0_LUT_MM) + vmid;
680
681 amdgpu_ring_emit_wreg(ring, reg, pasid);
682 }
683
684 /*
685 * PTE format on VEGA 10:
686 * 63:59 reserved
687 * 58:57 mtype
688 * 56 F
689 * 55 L
690 * 54 P
691 * 53 SW
692 * 52 T
693 * 50:48 reserved
694 * 47:12 4k physical page base address
695 * 11:7 fragment
696 * 6 write
697 * 5 read
698 * 4 exe
699 * 3 Z
700 * 2 snooped
701 * 1 system
702 * 0 valid
703 *
704 * PDE format on VEGA 10:
705 * 63:59 block fragment size
706 * 58:55 reserved
707 * 54 P
708 * 53:48 reserved
709 * 47:6 physical base address of PD or PTE
710 * 5:3 reserved
711 * 2 C
712 * 1 system
713 * 0 valid
714 */
715
gmc_v9_0_map_mtype(struct amdgpu_device * adev,uint32_t flags)716 static uint64_t gmc_v9_0_map_mtype(struct amdgpu_device *adev, uint32_t flags)
717
718 {
719 switch (flags) {
720 case AMDGPU_VM_MTYPE_DEFAULT:
721 return AMDGPU_PTE_MTYPE_VG10(MTYPE_NC);
722 case AMDGPU_VM_MTYPE_NC:
723 return AMDGPU_PTE_MTYPE_VG10(MTYPE_NC);
724 case AMDGPU_VM_MTYPE_WC:
725 return AMDGPU_PTE_MTYPE_VG10(MTYPE_WC);
726 case AMDGPU_VM_MTYPE_RW:
727 return AMDGPU_PTE_MTYPE_VG10(MTYPE_RW);
728 case AMDGPU_VM_MTYPE_CC:
729 return AMDGPU_PTE_MTYPE_VG10(MTYPE_CC);
730 case AMDGPU_VM_MTYPE_UC:
731 return AMDGPU_PTE_MTYPE_VG10(MTYPE_UC);
732 default:
733 return AMDGPU_PTE_MTYPE_VG10(MTYPE_NC);
734 }
735 }
736
gmc_v9_0_get_vm_pde(struct amdgpu_device * adev,int level,uint64_t * addr,uint64_t * flags)737 static void gmc_v9_0_get_vm_pde(struct amdgpu_device *adev, int level,
738 uint64_t *addr, uint64_t *flags)
739 {
740 if (!(*flags & AMDGPU_PDE_PTE) && !(*flags & AMDGPU_PTE_SYSTEM))
741 *addr = adev->vm_manager.vram_base_offset + *addr -
742 adev->gmc.vram_start;
743 BUG_ON(*addr & 0xFFFF00000000003FULL);
744
745 if (!adev->gmc.translate_further)
746 return;
747
748 if (level == AMDGPU_VM_PDB1) {
749 /* Set the block fragment size */
750 if (!(*flags & AMDGPU_PDE_PTE))
751 *flags |= AMDGPU_PDE_BFS(0x9);
752
753 } else if (level == AMDGPU_VM_PDB0) {
754 if (*flags & AMDGPU_PDE_PTE)
755 *flags &= ~AMDGPU_PDE_PTE;
756 else
757 *flags |= AMDGPU_PTE_TF;
758 }
759 }
760
gmc_v9_0_get_vm_pte(struct amdgpu_device * adev,struct amdgpu_bo_va_mapping * mapping,uint64_t * flags)761 static void gmc_v9_0_get_vm_pte(struct amdgpu_device *adev,
762 struct amdgpu_bo_va_mapping *mapping,
763 uint64_t *flags)
764 {
765 *flags &= ~AMDGPU_PTE_EXECUTABLE;
766 *flags |= mapping->flags & AMDGPU_PTE_EXECUTABLE;
767
768 *flags &= ~AMDGPU_PTE_MTYPE_VG10_MASK;
769 *flags |= mapping->flags & AMDGPU_PTE_MTYPE_VG10_MASK;
770
771 if (mapping->flags & AMDGPU_PTE_PRT) {
772 *flags |= AMDGPU_PTE_PRT;
773 *flags &= ~AMDGPU_PTE_VALID;
774 }
775
776 if (adev->asic_type == CHIP_ARCTURUS &&
777 !(*flags & AMDGPU_PTE_SYSTEM) &&
778 mapping->bo_va->is_xgmi)
779 *flags |= AMDGPU_PTE_SNOOPED;
780 }
781
782 static const struct amdgpu_gmc_funcs gmc_v9_0_gmc_funcs = {
783 .flush_gpu_tlb = gmc_v9_0_flush_gpu_tlb,
784 .flush_gpu_tlb_pasid = gmc_v9_0_flush_gpu_tlb_pasid,
785 .emit_flush_gpu_tlb = gmc_v9_0_emit_flush_gpu_tlb,
786 .emit_pasid_mapping = gmc_v9_0_emit_pasid_mapping,
787 .map_mtype = gmc_v9_0_map_mtype,
788 .get_vm_pde = gmc_v9_0_get_vm_pde,
789 .get_vm_pte = gmc_v9_0_get_vm_pte
790 };
791
gmc_v9_0_set_gmc_funcs(struct amdgpu_device * adev)792 static void gmc_v9_0_set_gmc_funcs(struct amdgpu_device *adev)
793 {
794 adev->gmc.gmc_funcs = &gmc_v9_0_gmc_funcs;
795 }
796
gmc_v9_0_set_umc_funcs(struct amdgpu_device * adev)797 static void gmc_v9_0_set_umc_funcs(struct amdgpu_device *adev)
798 {
799 switch (adev->asic_type) {
800 case CHIP_VEGA10:
801 adev->umc.funcs = &umc_v6_0_funcs;
802 break;
803 case CHIP_VEGA20:
804 adev->umc.max_ras_err_cnt_per_query = UMC_V6_1_TOTAL_CHANNEL_NUM;
805 adev->umc.channel_inst_num = UMC_V6_1_CHANNEL_INSTANCE_NUM;
806 adev->umc.umc_inst_num = UMC_V6_1_UMC_INSTANCE_NUM;
807 adev->umc.channel_offs = UMC_V6_1_PER_CHANNEL_OFFSET_VG20;
808 adev->umc.channel_idx_tbl = &umc_v6_1_channel_idx_tbl[0][0];
809 adev->umc.funcs = &umc_v6_1_funcs;
810 break;
811 case CHIP_ARCTURUS:
812 adev->umc.max_ras_err_cnt_per_query = UMC_V6_1_TOTAL_CHANNEL_NUM;
813 adev->umc.channel_inst_num = UMC_V6_1_CHANNEL_INSTANCE_NUM;
814 adev->umc.umc_inst_num = UMC_V6_1_UMC_INSTANCE_NUM;
815 adev->umc.channel_offs = UMC_V6_1_PER_CHANNEL_OFFSET_ARCT;
816 adev->umc.channel_idx_tbl = &umc_v6_1_channel_idx_tbl[0][0];
817 adev->umc.funcs = &umc_v6_1_funcs;
818 break;
819 default:
820 break;
821 }
822 }
823
gmc_v9_0_set_mmhub_funcs(struct amdgpu_device * adev)824 static void gmc_v9_0_set_mmhub_funcs(struct amdgpu_device *adev)
825 {
826 switch (adev->asic_type) {
827 case CHIP_VEGA20:
828 adev->mmhub.funcs = &mmhub_v1_0_funcs;
829 break;
830 case CHIP_ARCTURUS:
831 adev->mmhub.funcs = &mmhub_v9_4_funcs;
832 break;
833 default:
834 break;
835 }
836 }
837
gmc_v9_0_early_init(void * handle)838 static int gmc_v9_0_early_init(void *handle)
839 {
840 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
841
842 gmc_v9_0_set_gmc_funcs(adev);
843 gmc_v9_0_set_irq_funcs(adev);
844 gmc_v9_0_set_umc_funcs(adev);
845 gmc_v9_0_set_mmhub_funcs(adev);
846
847 adev->gmc.shared_aperture_start = 0x2000000000000000ULL;
848 adev->gmc.shared_aperture_end =
849 adev->gmc.shared_aperture_start + (4ULL << 30) - 1;
850 adev->gmc.private_aperture_start = 0x1000000000000000ULL;
851 adev->gmc.private_aperture_end =
852 adev->gmc.private_aperture_start + (4ULL << 30) - 1;
853
854 return 0;
855 }
856
gmc_v9_0_keep_stolen_memory(struct amdgpu_device * adev)857 static bool gmc_v9_0_keep_stolen_memory(struct amdgpu_device *adev)
858 {
859
860 /*
861 * TODO:
862 * Currently there is a bug where some memory client outside
863 * of the driver writes to first 8M of VRAM on S3 resume,
864 * this overrides GART which by default gets placed in first 8M and
865 * causes VM_FAULTS once GTT is accessed.
866 * Keep the stolen memory reservation until the while this is not solved.
867 * Also check code in gmc_v9_0_get_vbios_fb_size and gmc_v9_0_late_init
868 */
869 switch (adev->asic_type) {
870 case CHIP_VEGA10:
871 case CHIP_RAVEN:
872 case CHIP_ARCTURUS:
873 case CHIP_RENOIR:
874 return true;
875 case CHIP_VEGA12:
876 case CHIP_VEGA20:
877 default:
878 return false;
879 }
880 }
881
gmc_v9_0_late_init(void * handle)882 static int gmc_v9_0_late_init(void *handle)
883 {
884 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
885 int r;
886
887 if (!gmc_v9_0_keep_stolen_memory(adev))
888 amdgpu_bo_late_init(adev);
889
890 r = amdgpu_gmc_allocate_vm_inv_eng(adev);
891 if (r)
892 return r;
893 /* Check if ecc is available */
894 if (!amdgpu_sriov_vf(adev)) {
895 switch (adev->asic_type) {
896 case CHIP_VEGA10:
897 case CHIP_VEGA20:
898 case CHIP_ARCTURUS:
899 r = amdgpu_atomfirmware_mem_ecc_supported(adev);
900 if (!r) {
901 DRM_INFO("ECC is not present.\n");
902 if (adev->df.funcs->enable_ecc_force_par_wr_rmw)
903 adev->df.funcs->enable_ecc_force_par_wr_rmw(adev, false);
904 } else {
905 DRM_INFO("ECC is active.\n");
906 }
907
908 r = amdgpu_atomfirmware_sram_ecc_supported(adev);
909 if (!r) {
910 DRM_INFO("SRAM ECC is not present.\n");
911 } else {
912 DRM_INFO("SRAM ECC is active.\n");
913 }
914 break;
915 default:
916 break;
917 }
918 }
919
920 r = amdgpu_gmc_ras_late_init(adev);
921 if (r)
922 return r;
923
924 return amdgpu_irq_get(adev, &adev->gmc.vm_fault, 0);
925 }
926
gmc_v9_0_vram_gtt_location(struct amdgpu_device * adev,struct amdgpu_gmc * mc)927 static void gmc_v9_0_vram_gtt_location(struct amdgpu_device *adev,
928 struct amdgpu_gmc *mc)
929 {
930 u64 base = 0;
931
932 if (adev->asic_type == CHIP_ARCTURUS)
933 base = mmhub_v9_4_get_fb_location(adev);
934 else if (!amdgpu_sriov_vf(adev))
935 base = mmhub_v1_0_get_fb_location(adev);
936
937 /* add the xgmi offset of the physical node */
938 base += adev->gmc.xgmi.physical_node_id * adev->gmc.xgmi.node_segment_size;
939 amdgpu_gmc_vram_location(adev, mc, base);
940 amdgpu_gmc_gart_location(adev, mc);
941 amdgpu_gmc_agp_location(adev, mc);
942 /* base offset of vram pages */
943 adev->vm_manager.vram_base_offset = gfxhub_v1_0_get_mc_fb_offset(adev);
944
945 /* XXX: add the xgmi offset of the physical node? */
946 adev->vm_manager.vram_base_offset +=
947 adev->gmc.xgmi.physical_node_id * adev->gmc.xgmi.node_segment_size;
948 }
949
950 /**
951 * gmc_v9_0_mc_init - initialize the memory controller driver params
952 *
953 * @adev: amdgpu_device pointer
954 *
955 * Look up the amount of vram, vram width, and decide how to place
956 * vram and gart within the GPU's physical address space.
957 * Returns 0 for success.
958 */
gmc_v9_0_mc_init(struct amdgpu_device * adev)959 static int gmc_v9_0_mc_init(struct amdgpu_device *adev)
960 {
961 int r;
962
963 /* size in MB on si */
964 adev->gmc.mc_vram_size =
965 adev->nbio.funcs->get_memsize(adev) * 1024ULL * 1024ULL;
966 adev->gmc.real_vram_size = adev->gmc.mc_vram_size;
967
968 if (!(adev->flags & AMD_IS_APU)) {
969 r = amdgpu_device_resize_fb_bar(adev);
970 if (r)
971 return r;
972 }
973 adev->gmc.aper_base = pci_resource_start(adev->pdev, 0);
974 adev->gmc.aper_size = pci_resource_len(adev->pdev, 0);
975
976 #ifdef __NetBSD__
977 adev->gmc.aper_tag = adev->pdev->pd_pa.pa_memt;
978 #endif
979
980 #ifdef CONFIG_X86_64
981 if (adev->flags & AMD_IS_APU) {
982 adev->gmc.aper_base = gfxhub_v1_0_get_mc_fb_offset(adev);
983 adev->gmc.aper_size = adev->gmc.real_vram_size;
984 }
985 #endif
986 /* In case the PCI BAR is larger than the actual amount of vram */
987 adev->gmc.visible_vram_size = adev->gmc.aper_size;
988 if (adev->gmc.visible_vram_size > adev->gmc.real_vram_size)
989 adev->gmc.visible_vram_size = adev->gmc.real_vram_size;
990
991 /* set the gart size */
992 if (amdgpu_gart_size == -1) {
993 switch (adev->asic_type) {
994 case CHIP_VEGA10: /* all engines support GPUVM */
995 case CHIP_VEGA12: /* all engines support GPUVM */
996 case CHIP_VEGA20:
997 case CHIP_ARCTURUS:
998 default:
999 adev->gmc.gart_size = 512ULL << 20;
1000 break;
1001 case CHIP_RAVEN: /* DCE SG support */
1002 case CHIP_RENOIR:
1003 adev->gmc.gart_size = 1024ULL << 20;
1004 break;
1005 }
1006 } else {
1007 adev->gmc.gart_size = (u64)amdgpu_gart_size << 20;
1008 }
1009
1010 gmc_v9_0_vram_gtt_location(adev, &adev->gmc);
1011
1012 return 0;
1013 }
1014
gmc_v9_0_gart_init(struct amdgpu_device * adev)1015 static int gmc_v9_0_gart_init(struct amdgpu_device *adev)
1016 {
1017 int r;
1018
1019 if (adev->gart.bo) {
1020 WARN(1, "VEGA10 PCIE GART already initialized\n");
1021 return 0;
1022 }
1023 /* Initialize common gart structure */
1024 r = amdgpu_gart_init(adev);
1025 if (r)
1026 return r;
1027 adev->gart.table_size = adev->gart.num_gpu_pages * 8;
1028 adev->gart.gart_pte_flags = AMDGPU_PTE_MTYPE_VG10(MTYPE_UC) |
1029 AMDGPU_PTE_EXECUTABLE;
1030 return amdgpu_gart_table_vram_alloc(adev);
1031 }
1032
gmc_v9_0_get_vbios_fb_size(struct amdgpu_device * adev)1033 static unsigned gmc_v9_0_get_vbios_fb_size(struct amdgpu_device *adev)
1034 {
1035 u32 d1vga_control;
1036 unsigned size;
1037
1038 /*
1039 * TODO Remove once GART corruption is resolved
1040 * Check related code in gmc_v9_0_sw_fini
1041 * */
1042 if (gmc_v9_0_keep_stolen_memory(adev))
1043 return 9 * 1024 * 1024;
1044
1045 d1vga_control = RREG32_SOC15(DCE, 0, mmD1VGA_CONTROL);
1046 if (REG_GET_FIELD(d1vga_control, D1VGA_CONTROL, D1VGA_MODE_ENABLE)) {
1047 size = 9 * 1024 * 1024; /* reserve 8MB for vga emulator and 1 MB for FB */
1048 } else {
1049 u32 viewport;
1050
1051 switch (adev->asic_type) {
1052 case CHIP_RAVEN:
1053 case CHIP_RENOIR:
1054 viewport = RREG32_SOC15(DCE, 0, mmHUBP0_DCSURF_PRI_VIEWPORT_DIMENSION);
1055 size = (REG_GET_FIELD(viewport,
1056 HUBP0_DCSURF_PRI_VIEWPORT_DIMENSION, PRI_VIEWPORT_HEIGHT) *
1057 REG_GET_FIELD(viewport,
1058 HUBP0_DCSURF_PRI_VIEWPORT_DIMENSION, PRI_VIEWPORT_WIDTH) *
1059 4);
1060 break;
1061 case CHIP_VEGA10:
1062 case CHIP_VEGA12:
1063 case CHIP_VEGA20:
1064 default:
1065 viewport = RREG32_SOC15(DCE, 0, mmSCL0_VIEWPORT_SIZE);
1066 size = (REG_GET_FIELD(viewport, SCL0_VIEWPORT_SIZE, VIEWPORT_HEIGHT) *
1067 REG_GET_FIELD(viewport, SCL0_VIEWPORT_SIZE, VIEWPORT_WIDTH) *
1068 4);
1069 break;
1070 }
1071 }
1072 /* return 0 if the pre-OS buffer uses up most of vram */
1073 if ((adev->gmc.real_vram_size - size) < (8 * 1024 * 1024))
1074 return 0;
1075
1076 return size;
1077 }
1078
gmc_v9_0_sw_init(void * handle)1079 static int gmc_v9_0_sw_init(void *handle)
1080 {
1081 int r, vram_width = 0, vram_type = 0, vram_vendor = 0;
1082 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1083
1084 gfxhub_v1_0_init(adev);
1085 if (adev->asic_type == CHIP_ARCTURUS)
1086 mmhub_v9_4_init(adev);
1087 else
1088 mmhub_v1_0_init(adev);
1089
1090 spin_lock_init(&adev->gmc.invalidate_lock);
1091
1092 r = amdgpu_atomfirmware_get_vram_info(adev,
1093 &vram_width, &vram_type, &vram_vendor);
1094 if (amdgpu_sriov_vf(adev))
1095 /* For Vega10 SR-IOV, vram_width can't be read from ATOM as RAVEN,
1096 * and DF related registers is not readable, seems hardcord is the
1097 * only way to set the correct vram_width
1098 */
1099 adev->gmc.vram_width = 2048;
1100 else if (amdgpu_emu_mode != 1)
1101 adev->gmc.vram_width = vram_width;
1102
1103 if (!adev->gmc.vram_width) {
1104 int chansize, numchan;
1105
1106 /* hbm memory channel size */
1107 if (adev->flags & AMD_IS_APU)
1108 chansize = 64;
1109 else
1110 chansize = 128;
1111
1112 numchan = adev->df.funcs->get_hbm_channel_number(adev);
1113 adev->gmc.vram_width = numchan * chansize;
1114 }
1115
1116 adev->gmc.vram_type = vram_type;
1117 adev->gmc.vram_vendor = vram_vendor;
1118 switch (adev->asic_type) {
1119 case CHIP_RAVEN:
1120 adev->num_vmhubs = 2;
1121
1122 if (adev->rev_id == 0x0 || adev->rev_id == 0x1) {
1123 amdgpu_vm_adjust_size(adev, 256 * 1024, 9, 3, 48);
1124 } else {
1125 /* vm_size is 128TB + 512GB for legacy 3-level page support */
1126 amdgpu_vm_adjust_size(adev, 128 * 1024 + 512, 9, 2, 48);
1127 adev->gmc.translate_further =
1128 adev->vm_manager.num_level > 1;
1129 }
1130 break;
1131 case CHIP_VEGA10:
1132 case CHIP_VEGA12:
1133 case CHIP_VEGA20:
1134 case CHIP_RENOIR:
1135 adev->num_vmhubs = 2;
1136
1137
1138 /*
1139 * To fulfill 4-level page support,
1140 * vm size is 256TB (48bit), maximum size of Vega10,
1141 * block size 512 (9bit)
1142 */
1143 /* sriov restrict max_pfn below AMDGPU_GMC_HOLE */
1144 if (amdgpu_sriov_vf(adev))
1145 amdgpu_vm_adjust_size(adev, 256 * 1024, 9, 3, 47);
1146 else
1147 amdgpu_vm_adjust_size(adev, 256 * 1024, 9, 3, 48);
1148 break;
1149 case CHIP_ARCTURUS:
1150 adev->num_vmhubs = 3;
1151
1152 /* Keep the vm size same with Vega20 */
1153 amdgpu_vm_adjust_size(adev, 256 * 1024, 9, 3, 48);
1154 break;
1155 default:
1156 break;
1157 }
1158
1159 /* This interrupt is VMC page fault.*/
1160 r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VMC, VMC_1_0__SRCID__VM_FAULT,
1161 &adev->gmc.vm_fault);
1162 if (r)
1163 return r;
1164
1165 if (adev->asic_type == CHIP_ARCTURUS) {
1166 r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VMC1, VMC_1_0__SRCID__VM_FAULT,
1167 &adev->gmc.vm_fault);
1168 if (r)
1169 return r;
1170 }
1171
1172 r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_UTCL2, UTCL2_1_0__SRCID__FAULT,
1173 &adev->gmc.vm_fault);
1174
1175 if (r)
1176 return r;
1177
1178 if (!amdgpu_sriov_vf(adev)) {
1179 /* interrupt sent to DF. */
1180 r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_DF, 0,
1181 &adev->gmc.ecc_irq);
1182 if (r)
1183 return r;
1184 }
1185
1186 /* Set the internal MC address mask
1187 * This is the max address of the GPU's
1188 * internal address space.
1189 */
1190 adev->gmc.mc_mask = 0xffffffffffffULL; /* 48 bit MC */
1191
1192 #ifdef __NetBSD__
1193 r = drm_limit_dma_space(adev->ddev, 0, DMA_BIT_MASK(44));
1194 #else
1195 r = dma_set_mask_and_coherent(adev->dev, DMA_BIT_MASK(44));
1196 #endif
1197 if (r) {
1198 printk(KERN_WARNING "amdgpu: No suitable DMA available.\n");
1199 return r;
1200 }
1201 adev->need_swiotlb = drm_need_swiotlb(44);
1202
1203 if (adev->gmc.xgmi.supported) {
1204 r = gfxhub_v1_1_get_xgmi_info(adev);
1205 if (r)
1206 return r;
1207 }
1208
1209 r = gmc_v9_0_mc_init(adev);
1210 if (r)
1211 return r;
1212
1213 adev->gmc.stolen_size = gmc_v9_0_get_vbios_fb_size(adev);
1214
1215 /* Memory manager */
1216 r = amdgpu_bo_init(adev);
1217 if (r)
1218 return r;
1219
1220 r = gmc_v9_0_gart_init(adev);
1221 if (r)
1222 return r;
1223
1224 /*
1225 * number of VMs
1226 * VMID 0 is reserved for System
1227 * amdgpu graphics/compute will use VMIDs 1-7
1228 * amdkfd will use VMIDs 8-15
1229 */
1230 adev->vm_manager.id_mgr[AMDGPU_GFXHUB_0].num_ids = AMDGPU_NUM_OF_VMIDS;
1231 adev->vm_manager.id_mgr[AMDGPU_MMHUB_0].num_ids = AMDGPU_NUM_OF_VMIDS;
1232 adev->vm_manager.id_mgr[AMDGPU_MMHUB_1].num_ids = AMDGPU_NUM_OF_VMIDS;
1233
1234 amdgpu_vm_manager_init(adev);
1235
1236 return 0;
1237 }
1238
gmc_v9_0_sw_fini(void * handle)1239 static int gmc_v9_0_sw_fini(void *handle)
1240 {
1241 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1242 void *stolen_vga_buf;
1243
1244 amdgpu_gmc_ras_fini(adev);
1245 amdgpu_gem_force_release(adev);
1246 amdgpu_vm_manager_fini(adev);
1247
1248 if (gmc_v9_0_keep_stolen_memory(adev))
1249 amdgpu_bo_free_kernel(&adev->stolen_vga_memory, NULL, &stolen_vga_buf);
1250
1251 amdgpu_gart_table_vram_free(adev);
1252 amdgpu_bo_fini(adev);
1253 amdgpu_gart_fini(adev);
1254
1255 spin_lock_destroy(&adev->gmc.invalidate_lock);
1256
1257 return 0;
1258 }
1259
gmc_v9_0_init_golden_registers(struct amdgpu_device * adev)1260 static void gmc_v9_0_init_golden_registers(struct amdgpu_device *adev)
1261 {
1262
1263 switch (adev->asic_type) {
1264 case CHIP_VEGA10:
1265 if (amdgpu_sriov_vf(adev))
1266 break;
1267 /* fall through */
1268 case CHIP_VEGA20:
1269 soc15_program_register_sequence(adev,
1270 golden_settings_mmhub_1_0_0,
1271 ARRAY_SIZE(golden_settings_mmhub_1_0_0));
1272 soc15_program_register_sequence(adev,
1273 golden_settings_athub_1_0_0,
1274 ARRAY_SIZE(golden_settings_athub_1_0_0));
1275 break;
1276 case CHIP_VEGA12:
1277 break;
1278 case CHIP_RAVEN:
1279 /* TODO for renoir */
1280 soc15_program_register_sequence(adev,
1281 golden_settings_athub_1_0_0,
1282 ARRAY_SIZE(golden_settings_athub_1_0_0));
1283 break;
1284 default:
1285 break;
1286 }
1287 }
1288
1289 /**
1290 * gmc_v9_0_gart_enable - gart enable
1291 *
1292 * @adev: amdgpu_device pointer
1293 */
gmc_v9_0_gart_enable(struct amdgpu_device * adev)1294 static int gmc_v9_0_gart_enable(struct amdgpu_device *adev)
1295 {
1296 int r;
1297
1298 if (adev->gart.bo == NULL) {
1299 dev_err(adev->dev, "No VRAM object for PCIE GART.\n");
1300 return -EINVAL;
1301 }
1302 r = amdgpu_gart_table_vram_pin(adev);
1303 if (r)
1304 return r;
1305
1306 r = gfxhub_v1_0_gart_enable(adev);
1307 if (r)
1308 return r;
1309
1310 if (adev->asic_type == CHIP_ARCTURUS)
1311 r = mmhub_v9_4_gart_enable(adev);
1312 else
1313 r = mmhub_v1_0_gart_enable(adev);
1314 if (r)
1315 return r;
1316
1317 DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
1318 (unsigned)(adev->gmc.gart_size >> 20),
1319 (unsigned long long)amdgpu_bo_gpu_offset(adev->gart.bo));
1320 adev->gart.ready = true;
1321 return 0;
1322 }
1323
gmc_v9_0_hw_init(void * handle)1324 static int gmc_v9_0_hw_init(void *handle)
1325 {
1326 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1327 bool value;
1328 int r, i;
1329 u32 tmp;
1330
1331 /* The sequence of these two function calls matters.*/
1332 gmc_v9_0_init_golden_registers(adev);
1333
1334 if (adev->mode_info.num_crtc) {
1335 if (adev->asic_type != CHIP_ARCTURUS) {
1336 /* Lockout access through VGA aperture*/
1337 WREG32_FIELD15(DCE, 0, VGA_HDP_CONTROL, VGA_MEMORY_DISABLE, 1);
1338
1339 /* disable VGA render */
1340 WREG32_FIELD15(DCE, 0, VGA_RENDER_CONTROL, VGA_VSTATUS_CNTL, 0);
1341 }
1342 }
1343
1344 amdgpu_device_program_register_sequence(adev,
1345 golden_settings_vega10_hdp,
1346 ARRAY_SIZE(golden_settings_vega10_hdp));
1347
1348 switch (adev->asic_type) {
1349 case CHIP_RAVEN:
1350 /* TODO for renoir */
1351 mmhub_v1_0_update_power_gating(adev, true);
1352 break;
1353 case CHIP_ARCTURUS:
1354 WREG32_FIELD15(HDP, 0, HDP_MMHUB_CNTL, HDP_MMHUB_GCC, 1);
1355 break;
1356 default:
1357 break;
1358 }
1359
1360 WREG32_FIELD15(HDP, 0, HDP_MISC_CNTL, FLUSH_INVALIDATE_CACHE, 1);
1361
1362 tmp = RREG32_SOC15(HDP, 0, mmHDP_HOST_PATH_CNTL);
1363 WREG32_SOC15(HDP, 0, mmHDP_HOST_PATH_CNTL, tmp);
1364
1365 WREG32_SOC15(HDP, 0, mmHDP_NONSURFACE_BASE, (adev->gmc.vram_start >> 8));
1366 WREG32_SOC15(HDP, 0, mmHDP_NONSURFACE_BASE_HI, (adev->gmc.vram_start >> 40));
1367
1368 /* After HDP is initialized, flush HDP.*/
1369 adev->nbio.funcs->hdp_flush(adev, NULL);
1370
1371 if (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_ALWAYS)
1372 value = false;
1373 else
1374 value = true;
1375
1376 if (!amdgpu_sriov_vf(adev)) {
1377 gfxhub_v1_0_set_fault_enable_default(adev, value);
1378 if (adev->asic_type == CHIP_ARCTURUS)
1379 mmhub_v9_4_set_fault_enable_default(adev, value);
1380 else
1381 mmhub_v1_0_set_fault_enable_default(adev, value);
1382 }
1383 for (i = 0; i < adev->num_vmhubs; ++i)
1384 gmc_v9_0_flush_gpu_tlb(adev, 0, i, 0);
1385
1386 if (adev->umc.funcs && adev->umc.funcs->init_registers)
1387 adev->umc.funcs->init_registers(adev);
1388
1389 r = gmc_v9_0_gart_enable(adev);
1390
1391 return r;
1392 }
1393
1394 /**
1395 * gmc_v9_0_gart_disable - gart disable
1396 *
1397 * @adev: amdgpu_device pointer
1398 *
1399 * This disables all VM page table.
1400 */
gmc_v9_0_gart_disable(struct amdgpu_device * adev)1401 static void gmc_v9_0_gart_disable(struct amdgpu_device *adev)
1402 {
1403 gfxhub_v1_0_gart_disable(adev);
1404 if (adev->asic_type == CHIP_ARCTURUS)
1405 mmhub_v9_4_gart_disable(adev);
1406 else
1407 mmhub_v1_0_gart_disable(adev);
1408 amdgpu_gart_table_vram_unpin(adev);
1409 }
1410
gmc_v9_0_hw_fini(void * handle)1411 static int gmc_v9_0_hw_fini(void *handle)
1412 {
1413 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1414
1415 if (amdgpu_sriov_vf(adev)) {
1416 /* full access mode, so don't touch any GMC register */
1417 DRM_DEBUG("For SRIOV client, shouldn't do anything.\n");
1418 return 0;
1419 }
1420
1421 amdgpu_irq_put(adev, &adev->gmc.ecc_irq, 0);
1422 amdgpu_irq_put(adev, &adev->gmc.vm_fault, 0);
1423 gmc_v9_0_gart_disable(adev);
1424
1425 return 0;
1426 }
1427
gmc_v9_0_suspend(void * handle)1428 static int gmc_v9_0_suspend(void *handle)
1429 {
1430 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1431
1432 return gmc_v9_0_hw_fini(adev);
1433 }
1434
gmc_v9_0_resume(void * handle)1435 static int gmc_v9_0_resume(void *handle)
1436 {
1437 int r;
1438 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1439
1440 r = gmc_v9_0_hw_init(adev);
1441 if (r)
1442 return r;
1443
1444 amdgpu_vmid_reset_all(adev);
1445
1446 return 0;
1447 }
1448
gmc_v9_0_is_idle(void * handle)1449 static bool gmc_v9_0_is_idle(void *handle)
1450 {
1451 /* MC is always ready in GMC v9.*/
1452 return true;
1453 }
1454
gmc_v9_0_wait_for_idle(void * handle)1455 static int gmc_v9_0_wait_for_idle(void *handle)
1456 {
1457 /* There is no need to wait for MC idle in GMC v9.*/
1458 return 0;
1459 }
1460
gmc_v9_0_soft_reset(void * handle)1461 static int gmc_v9_0_soft_reset(void *handle)
1462 {
1463 /* XXX for emulation.*/
1464 return 0;
1465 }
1466
gmc_v9_0_set_clockgating_state(void * handle,enum amd_clockgating_state state)1467 static int gmc_v9_0_set_clockgating_state(void *handle,
1468 enum amd_clockgating_state state)
1469 {
1470 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1471
1472 if (adev->asic_type == CHIP_ARCTURUS)
1473 mmhub_v9_4_set_clockgating(adev, state);
1474 else
1475 mmhub_v1_0_set_clockgating(adev, state);
1476
1477 athub_v1_0_set_clockgating(adev, state);
1478
1479 return 0;
1480 }
1481
gmc_v9_0_get_clockgating_state(void * handle,u32 * flags)1482 static void gmc_v9_0_get_clockgating_state(void *handle, u32 *flags)
1483 {
1484 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1485
1486 if (adev->asic_type == CHIP_ARCTURUS)
1487 mmhub_v9_4_get_clockgating(adev, flags);
1488 else
1489 mmhub_v1_0_get_clockgating(adev, flags);
1490
1491 athub_v1_0_get_clockgating(adev, flags);
1492 }
1493
gmc_v9_0_set_powergating_state(void * handle,enum amd_powergating_state state)1494 static int gmc_v9_0_set_powergating_state(void *handle,
1495 enum amd_powergating_state state)
1496 {
1497 return 0;
1498 }
1499
1500 const struct amd_ip_funcs gmc_v9_0_ip_funcs = {
1501 .name = "gmc_v9_0",
1502 .early_init = gmc_v9_0_early_init,
1503 .late_init = gmc_v9_0_late_init,
1504 .sw_init = gmc_v9_0_sw_init,
1505 .sw_fini = gmc_v9_0_sw_fini,
1506 .hw_init = gmc_v9_0_hw_init,
1507 .hw_fini = gmc_v9_0_hw_fini,
1508 .suspend = gmc_v9_0_suspend,
1509 .resume = gmc_v9_0_resume,
1510 .is_idle = gmc_v9_0_is_idle,
1511 .wait_for_idle = gmc_v9_0_wait_for_idle,
1512 .soft_reset = gmc_v9_0_soft_reset,
1513 .set_clockgating_state = gmc_v9_0_set_clockgating_state,
1514 .set_powergating_state = gmc_v9_0_set_powergating_state,
1515 .get_clockgating_state = gmc_v9_0_get_clockgating_state,
1516 };
1517
1518 const struct amdgpu_ip_block_version gmc_v9_0_ip_block =
1519 {
1520 .type = AMD_IP_BLOCK_TYPE_GMC,
1521 .major = 9,
1522 .minor = 0,
1523 .rev = 0,
1524 .funcs = &gmc_v9_0_ip_funcs,
1525 };
1526