1 /* $NetBSD: amdgpu_gmc.c,v 1.3 2021/12/19 12:02:39 riastradh Exp $ */
2
3 /*
4 * Copyright 2018 Advanced Micro Devices, Inc.
5 * All Rights Reserved.
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the
9 * "Software"), to deal in the Software without restriction, including
10 * without limitation the rights to use, copy, modify, merge, publish,
11 * distribute, sub license, and/or sell copies of the Software, and to
12 * permit persons to whom the Software is furnished to do so, subject to
13 * the following conditions:
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
22 *
23 * The above copyright notice and this permission notice (including the
24 * next paragraph) shall be included in all copies or substantial portions
25 * of the Software.
26 *
27 */
28
29 #include <sys/cdefs.h>
30 __KERNEL_RCSID(0, "$NetBSD: amdgpu_gmc.c,v 1.3 2021/12/19 12:02:39 riastradh Exp $");
31
32 #include <linux/io-64-nonatomic-lo-hi.h>
33
34 #include "amdgpu.h"
35 #include "amdgpu_ras.h"
36 #include "amdgpu_xgmi.h"
37
38 #include <linux/nbsd-namespace.h>
39
40 /**
41 * amdgpu_gmc_get_pde_for_bo - get the PDE for a BO
42 *
43 * @bo: the BO to get the PDE for
44 * @level: the level in the PD hirarchy
45 * @addr: resulting addr
46 * @flags: resulting flags
47 *
48 * Get the address and flags to be used for a PDE (Page Directory Entry).
49 */
amdgpu_gmc_get_pde_for_bo(struct amdgpu_bo * bo,int level,uint64_t * addr,uint64_t * flags)50 void amdgpu_gmc_get_pde_for_bo(struct amdgpu_bo *bo, int level,
51 uint64_t *addr, uint64_t *flags)
52 {
53 struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
54 struct ttm_dma_tt *ttm;
55
56 switch (bo->tbo.mem.mem_type) {
57 case TTM_PL_TT:
58 ttm = container_of(bo->tbo.ttm, struct ttm_dma_tt, ttm);
59 #ifdef __NetBSD__
60 *addr = ttm->dma_address->dm_segs[0].ds_addr;
61 #else
62 *addr = ttm->dma_address[0];
63 #endif
64 break;
65 case TTM_PL_VRAM:
66 *addr = amdgpu_bo_gpu_offset(bo);
67 break;
68 default:
69 *addr = 0;
70 break;
71 }
72 *flags = amdgpu_ttm_tt_pde_flags(bo->tbo.ttm, &bo->tbo.mem);
73 amdgpu_gmc_get_vm_pde(adev, level, addr, flags);
74 }
75
76 /**
77 * amdgpu_gmc_pd_addr - return the address of the root directory
78 *
79 */
amdgpu_gmc_pd_addr(struct amdgpu_bo * bo)80 uint64_t amdgpu_gmc_pd_addr(struct amdgpu_bo *bo)
81 {
82 struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
83 uint64_t pd_addr;
84
85 /* TODO: move that into ASIC specific code */
86 if (adev->asic_type >= CHIP_VEGA10) {
87 uint64_t flags = AMDGPU_PTE_VALID;
88
89 amdgpu_gmc_get_pde_for_bo(bo, -1, &pd_addr, &flags);
90 pd_addr |= flags;
91 } else {
92 pd_addr = amdgpu_bo_gpu_offset(bo);
93 }
94 return pd_addr;
95 }
96
97 /**
98 * amdgpu_gmc_set_pte_pde - update the page tables using CPU
99 *
100 * @adev: amdgpu_device pointer
101 * @cpu_pt_addr: cpu address of the page table
102 * @gpu_page_idx: entry in the page table to update
103 * @addr: dst addr to write into pte/pde
104 * @flags: access flags
105 *
106 * Update the page tables using CPU.
107 */
amdgpu_gmc_set_pte_pde(struct amdgpu_device * adev,void * cpu_pt_addr,uint32_t gpu_page_idx,uint64_t addr,uint64_t flags)108 int amdgpu_gmc_set_pte_pde(struct amdgpu_device *adev, void *cpu_pt_addr,
109 uint32_t gpu_page_idx, uint64_t addr,
110 uint64_t flags)
111 {
112 #ifndef __NetBSD__
113 void __iomem *ptr = (void *)cpu_pt_addr;
114 #endif
115 uint64_t value;
116
117 /*
118 * The following is for PTE only. GART does not have PDEs.
119 */
120 value = addr & 0x0000FFFFFFFFF000ULL;
121 value |= flags;
122 #ifdef __NetBSD__
123 /* Caller must issue appropriate bus_dmamap_sync before use. */
124 ((uint64_t *)cpu_pt_addr)[gpu_page_idx] = value;
125 #else
126 writeq(value, ptr + (gpu_page_idx * 8));
127 #endif
128 return 0;
129 }
130
131 /**
132 * amdgpu_gmc_agp_addr - return the address in the AGP address space
133 *
134 * @tbo: TTM BO which needs the address, must be in GTT domain
135 *
136 * Tries to figure out how to access the BO through the AGP aperture. Returns
137 * AMDGPU_BO_INVALID_OFFSET if that is not possible.
138 */
amdgpu_gmc_agp_addr(struct ttm_buffer_object * bo)139 uint64_t amdgpu_gmc_agp_addr(struct ttm_buffer_object *bo)
140 {
141 struct amdgpu_device *adev = amdgpu_ttm_adev(bo->bdev);
142 struct ttm_dma_tt *ttm;
143 resource_size_t addr;
144
145 if (bo->num_pages != 1 || bo->ttm->caching_state == tt_cached)
146 return AMDGPU_BO_INVALID_OFFSET;
147
148 ttm = container_of(bo->ttm, struct ttm_dma_tt, ttm);
149 #ifdef __NetBSD__
150 addr = ttm->dma_address->dm_segs[0].ds_addr;
151 #else
152 addr = ttm->dma_address[0];
153 #endif
154 if (addr + PAGE_SIZE >= adev->gmc.agp_size)
155 return AMDGPU_BO_INVALID_OFFSET;
156
157 return adev->gmc.agp_start + addr;
158 }
159
160 /**
161 * amdgpu_gmc_vram_location - try to find VRAM location
162 *
163 * @adev: amdgpu device structure holding all necessary informations
164 * @mc: memory controller structure holding memory informations
165 * @base: base address at which to put VRAM
166 *
167 * Function will try to place VRAM at base address provided
168 * as parameter.
169 */
amdgpu_gmc_vram_location(struct amdgpu_device * adev,struct amdgpu_gmc * mc,u64 base)170 void amdgpu_gmc_vram_location(struct amdgpu_device *adev, struct amdgpu_gmc *mc,
171 u64 base)
172 {
173 uint64_t limit = (uint64_t)amdgpu_vram_limit << 20;
174
175 mc->vram_start = base;
176 mc->vram_end = mc->vram_start + mc->mc_vram_size - 1;
177 if (limit && limit < mc->real_vram_size)
178 mc->real_vram_size = limit;
179
180 if (mc->xgmi.num_physical_nodes == 0) {
181 mc->fb_start = mc->vram_start;
182 mc->fb_end = mc->vram_end;
183 }
184 dev_info(adev->dev, "VRAM: %"PRIu64"M 0x%016"PRIX64" - 0x%016"PRIX64" (%"PRIu64"M used)\n",
185 mc->mc_vram_size >> 20, mc->vram_start,
186 mc->vram_end, mc->real_vram_size >> 20);
187 }
188
189 /**
190 * amdgpu_gmc_gart_location - try to find GART location
191 *
192 * @adev: amdgpu device structure holding all necessary informations
193 * @mc: memory controller structure holding memory informations
194 *
195 * Function will place try to place GART before or after VRAM.
196 *
197 * If GART size is bigger than space left then we ajust GART size.
198 * Thus function will never fails.
199 */
amdgpu_gmc_gart_location(struct amdgpu_device * adev,struct amdgpu_gmc * mc)200 void amdgpu_gmc_gart_location(struct amdgpu_device *adev, struct amdgpu_gmc *mc)
201 {
202 const uint64_t four_gb = 0x100000000ULL;
203 u64 size_af, size_bf;
204 /*To avoid the hole, limit the max mc address to AMDGPU_GMC_HOLE_START*/
205 u64 max_mc_address = min(adev->gmc.mc_mask, AMDGPU_GMC_HOLE_START - 1);
206
207 mc->gart_size += adev->pm.smu_prv_buffer_size;
208
209 /* VCE doesn't like it when BOs cross a 4GB segment, so align
210 * the GART base on a 4GB boundary as well.
211 */
212 size_bf = mc->fb_start;
213 size_af = max_mc_address + 1 - ALIGN(mc->fb_end + 1, four_gb);
214
215 if (mc->gart_size > max(size_bf, size_af)) {
216 dev_warn(adev->dev, "limiting GART\n");
217 mc->gart_size = max(size_bf, size_af);
218 }
219
220 if ((size_bf >= mc->gart_size && size_bf < size_af) ||
221 (size_af < mc->gart_size))
222 mc->gart_start = 0;
223 else
224 mc->gart_start = max_mc_address - mc->gart_size + 1;
225
226 mc->gart_start &= ~(four_gb - 1);
227 mc->gart_end = mc->gart_start + mc->gart_size - 1;
228 dev_info(adev->dev, "GART: %"PRIu64"M 0x%016"PRIX64" - 0x%016"PRIX64"\n",
229 mc->gart_size >> 20, mc->gart_start, mc->gart_end);
230 }
231
232 /**
233 * amdgpu_gmc_agp_location - try to find AGP location
234 * @adev: amdgpu device structure holding all necessary informations
235 * @mc: memory controller structure holding memory informations
236 *
237 * Function will place try to find a place for the AGP BAR in the MC address
238 * space.
239 *
240 * AGP BAR will be assigned the largest available hole in the address space.
241 * Should be called after VRAM and GART locations are setup.
242 */
amdgpu_gmc_agp_location(struct amdgpu_device * adev,struct amdgpu_gmc * mc)243 void amdgpu_gmc_agp_location(struct amdgpu_device *adev, struct amdgpu_gmc *mc)
244 {
245 const uint64_t sixteen_gb = 1ULL << 34;
246 const uint64_t sixteen_gb_mask = ~(sixteen_gb - 1);
247 u64 size_af, size_bf;
248
249 if (amdgpu_sriov_vf(adev)) {
250 mc->agp_start = 0xffffffffffff;
251 mc->agp_end = 0x0;
252 mc->agp_size = 0;
253
254 return;
255 }
256
257 if (mc->fb_start > mc->gart_start) {
258 size_bf = (mc->fb_start & sixteen_gb_mask) -
259 ALIGN(mc->gart_end + 1, sixteen_gb);
260 size_af = mc->mc_mask + 1 - ALIGN(mc->fb_end + 1, sixteen_gb);
261 } else {
262 size_bf = mc->fb_start & sixteen_gb_mask;
263 size_af = (mc->gart_start & sixteen_gb_mask) -
264 ALIGN(mc->fb_end + 1, sixteen_gb);
265 }
266
267 if (size_bf > size_af) {
268 mc->agp_start = (mc->fb_start - size_bf) & sixteen_gb_mask;
269 mc->agp_size = size_bf;
270 } else {
271 mc->agp_start = ALIGN(mc->fb_end + 1, sixteen_gb);
272 mc->agp_size = size_af;
273 }
274
275 mc->agp_end = mc->agp_start + mc->agp_size - 1;
276 dev_info(adev->dev, "AGP: %"PRIu64"M 0x%016"PRIX64" - 0x%016"PRIX64"\n",
277 mc->agp_size >> 20, mc->agp_start, mc->agp_end);
278 }
279
280 /**
281 * amdgpu_gmc_filter_faults - filter VM faults
282 *
283 * @adev: amdgpu device structure
284 * @addr: address of the VM fault
285 * @pasid: PASID of the process causing the fault
286 * @timestamp: timestamp of the fault
287 *
288 * Returns:
289 * True if the fault was filtered and should not be processed further.
290 * False if the fault is a new one and needs to be handled.
291 */
amdgpu_gmc_filter_faults(struct amdgpu_device * adev,uint64_t addr,uint16_t pasid,uint64_t timestamp)292 bool amdgpu_gmc_filter_faults(struct amdgpu_device *adev, uint64_t addr,
293 uint16_t pasid, uint64_t timestamp)
294 {
295 struct amdgpu_gmc *gmc = &adev->gmc;
296
297 uint64_t stamp, key = addr << 4 | pasid;
298 struct amdgpu_gmc_fault *fault;
299 uint32_t hash;
300
301 /* If we don't have space left in the ring buffer return immediately */
302 stamp = max(timestamp, AMDGPU_GMC_FAULT_TIMEOUT + 1) -
303 AMDGPU_GMC_FAULT_TIMEOUT;
304 if (gmc->fault_ring[gmc->last_fault].timestamp >= stamp)
305 return true;
306
307 /* Try to find the fault in the hash */
308 hash = hash_64(key, AMDGPU_GMC_FAULT_HASH_ORDER);
309 fault = &gmc->fault_ring[gmc->fault_hash[hash].idx];
310 while (fault->timestamp >= stamp) {
311 uint64_t tmp;
312
313 if (fault->key == key)
314 return true;
315
316 tmp = fault->timestamp;
317 fault = &gmc->fault_ring[fault->next];
318
319 /* Check if the entry was reused */
320 if (fault->timestamp >= tmp)
321 break;
322 }
323
324 /* Add the fault to the ring */
325 fault = &gmc->fault_ring[gmc->last_fault];
326 fault->key = key;
327 fault->timestamp = timestamp;
328
329 /* And update the hash */
330 fault->next = gmc->fault_hash[hash].idx;
331 gmc->fault_hash[hash].idx = gmc->last_fault++;
332 return false;
333 }
334
amdgpu_gmc_ras_late_init(struct amdgpu_device * adev)335 int amdgpu_gmc_ras_late_init(struct amdgpu_device *adev)
336 {
337 int r;
338
339 if (adev->umc.funcs && adev->umc.funcs->ras_late_init) {
340 r = adev->umc.funcs->ras_late_init(adev);
341 if (r)
342 return r;
343 }
344
345 if (adev->mmhub.funcs && adev->mmhub.funcs->ras_late_init) {
346 r = adev->mmhub.funcs->ras_late_init(adev);
347 if (r)
348 return r;
349 }
350
351 return amdgpu_xgmi_ras_late_init(adev);
352 }
353
amdgpu_gmc_ras_fini(struct amdgpu_device * adev)354 void amdgpu_gmc_ras_fini(struct amdgpu_device *adev)
355 {
356 amdgpu_umc_ras_fini(adev);
357 amdgpu_mmhub_ras_fini(adev);
358 amdgpu_xgmi_ras_fini(adev);
359 }
360
361 /*
362 * The latest engine allocation on gfx9/10 is:
363 * Engine 2, 3: firmware
364 * Engine 0, 1, 4~16: amdgpu ring,
365 * subject to change when ring number changes
366 * Engine 17: Gart flushes
367 */
368 #define GFXHUB_FREE_VM_INV_ENGS_BITMAP 0x1FFF3
369 #define MMHUB_FREE_VM_INV_ENGS_BITMAP 0x1FFF3
370
amdgpu_gmc_allocate_vm_inv_eng(struct amdgpu_device * adev)371 int amdgpu_gmc_allocate_vm_inv_eng(struct amdgpu_device *adev)
372 {
373 struct amdgpu_ring *ring;
374 unsigned vm_inv_engs[AMDGPU_MAX_VMHUBS] =
375 {GFXHUB_FREE_VM_INV_ENGS_BITMAP, MMHUB_FREE_VM_INV_ENGS_BITMAP,
376 GFXHUB_FREE_VM_INV_ENGS_BITMAP};
377 unsigned i;
378 unsigned vmhub, inv_eng;
379
380 for (i = 0; i < adev->num_rings; ++i) {
381 ring = adev->rings[i];
382 vmhub = ring->funcs->vmhub;
383
384 inv_eng = ffs(vm_inv_engs[vmhub]);
385 if (!inv_eng) {
386 dev_err(adev->dev, "no VM inv eng for ring %s\n",
387 ring->name);
388 return -EINVAL;
389 }
390
391 ring->vm_inv_eng = inv_eng - 1;
392 vm_inv_engs[vmhub] &= ~(1 << ring->vm_inv_eng);
393
394 dev_info(adev->dev, "ring %s uses VM inv eng %u on hub %u\n",
395 ring->name, ring->vm_inv_eng, ring->funcs->vmhub);
396 }
397
398 return 0;
399 }
400