xref: /linux/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c (revision 1e525507)
1 // SPDX-License-Identifier: MIT
2 /*
3  * Copyright © 2021-2023 Intel Corporation
4  * Copyright (C) 2021-2002 Red Hat
5  */
6 
7 #include <drm/drm_managed.h>
8 #include <drm/drm_mm.h>
9 
10 #include <drm/ttm/ttm_device.h>
11 #include <drm/ttm/ttm_placement.h>
12 #include <drm/ttm/ttm_range_manager.h>
13 
14 #include <generated/xe_wa_oob.h>
15 
16 #include "regs/xe_gt_regs.h"
17 #include "regs/xe_regs.h"
18 #include "xe_bo.h"
19 #include "xe_device.h"
20 #include "xe_gt.h"
21 #include "xe_mmio.h"
22 #include "xe_res_cursor.h"
23 #include "xe_sriov.h"
24 #include "xe_ttm_stolen_mgr.h"
25 #include "xe_ttm_vram_mgr.h"
26 #include "xe_wa.h"
27 
28 struct xe_ttm_stolen_mgr {
29 	struct xe_ttm_vram_mgr base;
30 
31 	/* PCI base offset */
32 	resource_size_t io_base;
33 	/* GPU base offset */
34 	resource_size_t stolen_base;
35 
36 	void __iomem *mapping;
37 };
38 
39 static inline struct xe_ttm_stolen_mgr *
40 to_stolen_mgr(struct ttm_resource_manager *man)
41 {
42 	return container_of(man, struct xe_ttm_stolen_mgr, base.manager);
43 }
44 
45 /**
46  * xe_ttm_stolen_cpu_access_needs_ggtt() - If we can't directly CPU access
47  * stolen, can we then fallback to mapping through the GGTT.
48  * @xe: xe device
49  *
50  * Some older integrated platforms don't support reliable CPU access for stolen,
51  * however on such hardware we can always use the mappable part of the GGTT for
52  * CPU access. Check if that's the case for this device.
53  */
54 bool xe_ttm_stolen_cpu_access_needs_ggtt(struct xe_device *xe)
55 {
56 	return GRAPHICS_VERx100(xe) < 1270 && !IS_DGFX(xe);
57 }
58 
59 static s64 detect_bar2_dgfx(struct xe_device *xe, struct xe_ttm_stolen_mgr *mgr)
60 {
61 	struct xe_tile *tile = xe_device_get_root_tile(xe);
62 	struct xe_gt *mmio = xe_root_mmio_gt(xe);
63 	struct pci_dev *pdev = to_pci_dev(xe->drm.dev);
64 	u64 stolen_size;
65 	u64 tile_offset;
66 	u64 tile_size;
67 
68 	tile_offset = tile->mem.vram.io_start - xe->mem.vram.io_start;
69 	tile_size = tile->mem.vram.actual_physical_size;
70 
71 	/* Use DSM base address instead for stolen memory */
72 	mgr->stolen_base = (xe_mmio_read64_2x32(mmio, DSMBASE) & BDSM_MASK) - tile_offset;
73 	if (drm_WARN_ON(&xe->drm, tile_size < mgr->stolen_base))
74 		return 0;
75 
76 	stolen_size = tile_size - mgr->stolen_base;
77 
78 	/* Verify usage fits in the actual resource available */
79 	if (mgr->stolen_base + stolen_size <= pci_resource_len(pdev, LMEM_BAR))
80 		mgr->io_base = tile->mem.vram.io_start + mgr->stolen_base;
81 
82 	/*
83 	 * There may be few KB of platform dependent reserved memory at the end
84 	 * of vram which is not part of the DSM. Such reserved memory portion is
85 	 * always less then DSM granularity so align down the stolen_size to DSM
86 	 * granularity to accommodate such reserve vram portion.
87 	 */
88 	return ALIGN_DOWN(stolen_size, SZ_1M);
89 }
90 
91 static u32 get_wopcm_size(struct xe_device *xe)
92 {
93 	u32 wopcm_size;
94 	u64 val;
95 
96 	val = xe_mmio_read64_2x32(xe_root_mmio_gt(xe), STOLEN_RESERVED);
97 	val = REG_FIELD_GET64(WOPCM_SIZE_MASK, val);
98 
99 	switch (val) {
100 	case 0x5 ... 0x6:
101 		val--;
102 		fallthrough;
103 	case 0x0 ... 0x3:
104 		wopcm_size = (1U << val) * SZ_1M;
105 		break;
106 	default:
107 		WARN(1, "Missing case wopcm_size=%llx\n", val);
108 		wopcm_size = 0;
109 	}
110 
111 	return wopcm_size;
112 }
113 
114 static u32 detect_bar2_integrated(struct xe_device *xe, struct xe_ttm_stolen_mgr *mgr)
115 {
116 	struct pci_dev *pdev = to_pci_dev(xe->drm.dev);
117 	struct xe_gt *media_gt = xe_device_get_root_tile(xe)->media_gt;
118 	u32 stolen_size, wopcm_size;
119 	u32 ggc, gms;
120 
121 	ggc = xe_mmio_read32(xe_root_mmio_gt(xe), GGC);
122 
123 	/*
124 	 * Check GGMS: it should be fixed 0x3 (8MB), which corresponds to the
125 	 * GTT size
126 	 */
127 	if (drm_WARN_ON(&xe->drm, (ggc & GGMS_MASK) != GGMS_MASK))
128 		return 0;
129 
130 	/*
131 	 * Graphics >= 1270 uses the offset to the GSMBASE as address in the
132 	 * PTEs, together with the DM flag being set. Previously there was no
133 	 * such flag so the address was the io_base.
134 	 *
135 	 * DSMBASE = GSMBASE + 8MB
136 	 */
137 	mgr->stolen_base = SZ_8M;
138 	mgr->io_base = pci_resource_start(pdev, 2) + mgr->stolen_base;
139 
140 	/* return valid GMS value, -EIO if invalid */
141 	gms = REG_FIELD_GET(GMS_MASK, ggc);
142 	switch (gms) {
143 	case 0x0 ... 0x04:
144 		stolen_size = gms * 32 * SZ_1M;
145 		break;
146 	case 0xf0 ... 0xfe:
147 		stolen_size = (gms - 0xf0 + 1) * 4 * SZ_1M;
148 		break;
149 	default:
150 		return 0;
151 	}
152 
153 	/* Carve out the top of DSM as it contains the reserved WOPCM region */
154 	wopcm_size = get_wopcm_size(xe);
155 	if (drm_WARN_ON(&xe->drm, !wopcm_size))
156 		return 0;
157 
158 	stolen_size -= wopcm_size;
159 
160 	if (media_gt && XE_WA(media_gt, 14019821291)) {
161 		u64 gscpsmi_base = xe_mmio_read64_2x32(media_gt, GSCPSMI_BASE)
162 			& ~GENMASK_ULL(5, 0);
163 
164 		/*
165 		 * This workaround is primarily implemented by the BIOS.  We
166 		 * just need to figure out whether the BIOS has applied the
167 		 * workaround (meaning the programmed address falls within
168 		 * the DSM) and, if so, reserve that part of the DSM to
169 		 * prevent accidental reuse.  The DSM location should be just
170 		 * below the WOPCM.
171 		 */
172 		if (gscpsmi_base >= mgr->io_base &&
173 		    gscpsmi_base < mgr->io_base + stolen_size) {
174 			xe_gt_dbg(media_gt,
175 				  "Reserving %llu bytes of DSM for Wa_14019821291\n",
176 				  mgr->io_base + stolen_size - gscpsmi_base);
177 			stolen_size = gscpsmi_base - mgr->io_base;
178 		}
179 	}
180 
181 	if (drm_WARN_ON(&xe->drm, stolen_size + SZ_8M > pci_resource_len(pdev, 2)))
182 		return 0;
183 
184 	return stolen_size;
185 }
186 
187 extern struct resource intel_graphics_stolen_res;
188 
189 static u64 detect_stolen(struct xe_device *xe, struct xe_ttm_stolen_mgr *mgr)
190 {
191 #ifdef CONFIG_X86
192 	/* Map into GGTT */
193 	mgr->io_base = pci_resource_start(to_pci_dev(xe->drm.dev), 2);
194 
195 	/* Stolen memory is x86 only */
196 	mgr->stolen_base = intel_graphics_stolen_res.start;
197 	return resource_size(&intel_graphics_stolen_res);
198 #else
199 	return 0;
200 #endif
201 }
202 
203 void xe_ttm_stolen_mgr_init(struct xe_device *xe)
204 {
205 	struct xe_ttm_stolen_mgr *mgr = drmm_kzalloc(&xe->drm, sizeof(*mgr), GFP_KERNEL);
206 	struct pci_dev *pdev = to_pci_dev(xe->drm.dev);
207 	u64 stolen_size, io_size, pgsize;
208 	int err;
209 
210 	if (IS_SRIOV_VF(xe))
211 		stolen_size = 0;
212 	else if (IS_DGFX(xe))
213 		stolen_size = detect_bar2_dgfx(xe, mgr);
214 	else if (GRAPHICS_VERx100(xe) >= 1270)
215 		stolen_size = detect_bar2_integrated(xe, mgr);
216 	else
217 		stolen_size = detect_stolen(xe, mgr);
218 
219 	if (!stolen_size) {
220 		drm_dbg_kms(&xe->drm, "No stolen memory support\n");
221 		return;
222 	}
223 
224 	pgsize = xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K ? SZ_64K : SZ_4K;
225 	if (pgsize < PAGE_SIZE)
226 		pgsize = PAGE_SIZE;
227 
228 	/*
229 	 * We don't try to attempt partial visible support for stolen vram,
230 	 * since stolen is always at the end of vram, and the BAR size is pretty
231 	 * much always 256M, with small-bar.
232 	 */
233 	io_size = 0;
234 	if (mgr->io_base && !xe_ttm_stolen_cpu_access_needs_ggtt(xe))
235 		io_size = stolen_size;
236 
237 	err = __xe_ttm_vram_mgr_init(xe, &mgr->base, XE_PL_STOLEN, stolen_size,
238 				     io_size, pgsize);
239 	if (err) {
240 		drm_dbg_kms(&xe->drm, "Stolen mgr init failed: %i\n", err);
241 		return;
242 	}
243 
244 	drm_dbg_kms(&xe->drm, "Initialized stolen memory support with %llu bytes\n",
245 		    stolen_size);
246 
247 	if (io_size)
248 		mgr->mapping = devm_ioremap_wc(&pdev->dev, mgr->io_base, io_size);
249 }
250 
251 u64 xe_ttm_stolen_io_offset(struct xe_bo *bo, u32 offset)
252 {
253 	struct xe_device *xe = xe_bo_device(bo);
254 	struct ttm_resource_manager *ttm_mgr = ttm_manager_type(&xe->ttm, XE_PL_STOLEN);
255 	struct xe_ttm_stolen_mgr *mgr = to_stolen_mgr(ttm_mgr);
256 	struct xe_res_cursor cur;
257 
258 	XE_WARN_ON(!mgr->io_base);
259 
260 	if (xe_ttm_stolen_cpu_access_needs_ggtt(xe))
261 		return mgr->io_base + xe_bo_ggtt_addr(bo) + offset;
262 
263 	xe_res_first(bo->ttm.resource, offset, 4096, &cur);
264 	return mgr->io_base + cur.start;
265 }
266 
267 static int __xe_ttm_stolen_io_mem_reserve_bar2(struct xe_device *xe,
268 					       struct xe_ttm_stolen_mgr *mgr,
269 					       struct ttm_resource *mem)
270 {
271 	struct xe_res_cursor cur;
272 
273 	if (!mgr->io_base)
274 		return -EIO;
275 
276 	xe_res_first(mem, 0, 4096, &cur);
277 	mem->bus.offset = cur.start;
278 
279 	drm_WARN_ON(&xe->drm, !(mem->placement & TTM_PL_FLAG_CONTIGUOUS));
280 
281 	if (mem->placement & TTM_PL_FLAG_CONTIGUOUS && mgr->mapping)
282 		mem->bus.addr = (u8 __force *)mgr->mapping + mem->bus.offset;
283 
284 	mem->bus.offset += mgr->io_base;
285 	mem->bus.is_iomem = true;
286 	mem->bus.caching = ttm_write_combined;
287 
288 	return 0;
289 }
290 
291 static int __xe_ttm_stolen_io_mem_reserve_stolen(struct xe_device *xe,
292 						 struct xe_ttm_stolen_mgr *mgr,
293 						 struct ttm_resource *mem)
294 {
295 #ifdef CONFIG_X86
296 	struct xe_bo *bo = ttm_to_xe_bo(mem->bo);
297 
298 	XE_WARN_ON(IS_DGFX(xe));
299 
300 	/* XXX: Require BO to be mapped to GGTT? */
301 	if (drm_WARN_ON(&xe->drm, !(bo->flags & XE_BO_CREATE_GGTT_BIT)))
302 		return -EIO;
303 
304 	/* GGTT is always contiguously mapped */
305 	mem->bus.offset = xe_bo_ggtt_addr(bo) + mgr->io_base;
306 
307 	mem->bus.is_iomem = true;
308 	mem->bus.caching = ttm_write_combined;
309 
310 	return 0;
311 #else
312 	/* How is it even possible to get here without gen12 stolen? */
313 	drm_WARN_ON(&xe->drm, 1);
314 	return -EIO;
315 #endif
316 }
317 
318 int xe_ttm_stolen_io_mem_reserve(struct xe_device *xe, struct ttm_resource *mem)
319 {
320 	struct ttm_resource_manager *ttm_mgr = ttm_manager_type(&xe->ttm, XE_PL_STOLEN);
321 	struct xe_ttm_stolen_mgr *mgr = ttm_mgr ? to_stolen_mgr(ttm_mgr) : NULL;
322 
323 	if (!mgr || !mgr->io_base)
324 		return -EIO;
325 
326 	if (xe_ttm_stolen_cpu_access_needs_ggtt(xe))
327 		return __xe_ttm_stolen_io_mem_reserve_stolen(xe, mgr, mem);
328 	else
329 		return __xe_ttm_stolen_io_mem_reserve_bar2(xe, mgr, mem);
330 }
331 
332 u64 xe_ttm_stolen_gpu_offset(struct xe_device *xe)
333 {
334 	struct xe_ttm_stolen_mgr *mgr =
335 		to_stolen_mgr(ttm_manager_type(&xe->ttm, XE_PL_STOLEN));
336 
337 	return mgr->stolen_base;
338 }
339