1 /*	$NetBSD: i915_gpu_error.h,v 1.2 2021/12/18 23:45:28 riastradh Exp $	*/
2 
3 /*
4  * SPDX-License-Identifier: MIT
5  *
6  * Copyright � 2008-2018 Intel Corporation
7  */
8 
9 #ifndef _I915_GPU_ERROR_H_
10 #define _I915_GPU_ERROR_H_
11 
12 #include <linux/atomic.h>
13 #include <linux/kref.h>
14 #include <linux/ktime.h>
15 #include <linux/sched.h>
16 
17 #include <drm/drm_mm.h>
18 
19 #include "gt/intel_engine.h"
20 #include "gt/uc/intel_uc_fw.h"
21 
22 #include "intel_device_info.h"
23 
24 #include "i915_gem.h"
25 #include "i915_gem_gtt.h"
26 #include "i915_params.h"
27 #include "i915_scheduler.h"
28 
29 struct drm_i915_private;
30 struct i915_vma_compress;
31 struct intel_engine_capture_vma;
32 struct intel_overlay_error_state;
33 struct intel_display_error_state;
34 
35 struct i915_vma_coredump {
36 	struct i915_vma_coredump *next;
37 
38 	char name[20];
39 
40 	u64 gtt_offset;
41 	u64 gtt_size;
42 	u32 gtt_page_sizes;
43 
44 	int num_pages;
45 	int page_count;
46 	int unused;
47 	u32 *pages[0];
48 };
49 
50 struct i915_request_coredump {
51 	unsigned long flags;
52 	pid_t pid;
53 	u32 context;
54 	u32 seqno;
55 	u32 start;
56 	u32 head;
57 	u32 tail;
58 	struct i915_sched_attr sched_attr;
59 };
60 
61 struct intel_engine_coredump {
62 	const struct intel_engine_cs *engine;
63 
64 	bool simulated;
65 	u32 reset_count;
66 
67 	/* position of active request inside the ring */
68 	u32 rq_head, rq_post, rq_tail;
69 
70 	/* Register state */
71 	u32 ccid;
72 	u32 start;
73 	u32 tail;
74 	u32 head;
75 	u32 ctl;
76 	u32 mode;
77 	u32 hws;
78 	u32 ipeir;
79 	u32 ipehr;
80 	u32 bbstate;
81 	u32 instpm;
82 	u32 instps;
83 	u64 bbaddr;
84 	u64 acthd;
85 	u32 fault_reg;
86 	u64 faddr;
87 	u32 rc_psmi; /* sleep state */
88 	struct intel_instdone instdone;
89 
90 	struct i915_gem_context_coredump {
91 		char comm[TASK_COMM_LEN];
92 		pid_t pid;
93 		int active;
94 		int guilty;
95 		struct i915_sched_attr sched_attr;
96 	} context;
97 
98 	struct i915_vma_coredump *vma;
99 
100 	struct i915_request_coredump execlist[EXECLIST_MAX_PORTS];
101 	unsigned int num_ports;
102 
103 	struct {
104 		u32 gfx_mode;
105 		union {
106 			u64 pdp[4];
107 			u32 pp_dir_base;
108 		};
109 	} vm_info;
110 
111 	struct intel_engine_coredump *next;
112 };
113 
114 struct intel_gt_coredump {
115 	const struct intel_gt *_gt;
116 	bool awake;
117 	bool simulated;
118 
119 	/* Generic register state */
120 	u32 eir;
121 	u32 pgtbl_er;
122 	u32 ier;
123 	u32 gtier[6], ngtier;
124 	u32 derrmr;
125 	u32 forcewake;
126 	u32 error; /* gen6+ */
127 	u32 err_int; /* gen7 */
128 	u32 fault_data0; /* gen8, gen9 */
129 	u32 fault_data1; /* gen8, gen9 */
130 	u32 done_reg;
131 	u32 gac_eco;
132 	u32 gam_ecochk;
133 	u32 gab_ctl;
134 	u32 gfx_mode;
135 	u32 gtt_cache;
136 	u32 aux_err; /* gen12 */
137 	u32 sfc_done[GEN12_SFC_DONE_MAX]; /* gen12 */
138 	u32 gam_done; /* gen12 */
139 
140 	u32 nfence;
141 	u64 fence[I915_MAX_NUM_FENCES];
142 
143 	struct intel_engine_coredump *engine;
144 
145 	struct intel_uc_coredump {
146 		struct intel_uc_fw guc_fw;
147 		struct intel_uc_fw huc_fw;
148 		struct i915_vma_coredump *guc_log;
149 	} *uc;
150 
151 	struct intel_gt_coredump *next;
152 };
153 
154 struct i915_gpu_coredump {
155 	struct kref ref;
156 	ktime_t time;
157 	ktime_t boottime;
158 	ktime_t uptime;
159 	unsigned long capture;
160 
161 	struct drm_i915_private *i915;
162 
163 	struct intel_gt_coredump *gt;
164 
165 	char error_msg[128];
166 	bool simulated;
167 	bool wakelock;
168 	bool suspended;
169 	int iommu;
170 	u32 reset_count;
171 	u32 suspend_count;
172 
173 	struct intel_device_info device_info;
174 	struct intel_runtime_info runtime_info;
175 	struct intel_driver_caps driver_caps;
176 	struct i915_params params;
177 
178 	struct intel_overlay_error_state *overlay;
179 	struct intel_display_error_state *display;
180 
181 	struct scatterlist *sgl, *fit;
182 };
183 
184 struct i915_gpu_error {
185 	/* For reset and error_state handling. */
186 	spinlock_t lock;
187 	/* Protected by the above dev->gpu_error.lock. */
188 	struct i915_gpu_coredump *first_error;
189 
190 	atomic_t pending_fb_pin;
191 
192 	/** Number of times the device has been reset (global) */
193 	atomic_t reset_count;
194 
195 	/** Number of times an engine has been reset */
196 	atomic_t reset_engine_count[I915_NUM_ENGINES];
197 };
198 
199 struct drm_i915_error_state_buf {
200 	struct drm_i915_private *i915;
201 	struct scatterlist *sgl, *cur, *end;
202 
203 	char *buf;
204 	size_t bytes;
205 	size_t size;
206 	loff_t iter;
207 
208 	int err;
209 };
210 
211 #if IS_ENABLED(CONFIG_DRM_I915_CAPTURE_ERROR)
212 
213 __printf(2, 3)
214 void i915_error_printf(struct drm_i915_error_state_buf *e, const char *f, ...);
215 
216 struct i915_gpu_coredump *i915_gpu_coredump(struct drm_i915_private *i915);
217 void i915_capture_error_state(struct drm_i915_private *i915);
218 
219 struct i915_gpu_coredump *
220 i915_gpu_coredump_alloc(struct drm_i915_private *i915, gfp_t gfp);
221 
222 struct intel_gt_coredump *
223 intel_gt_coredump_alloc(struct intel_gt *gt, gfp_t gfp);
224 
225 struct intel_engine_coredump *
226 intel_engine_coredump_alloc(struct intel_engine_cs *engine, gfp_t gfp);
227 
228 struct intel_engine_capture_vma *
229 intel_engine_coredump_add_request(struct intel_engine_coredump *ee,
230 				  struct i915_request *rq,
231 				  gfp_t gfp);
232 
233 void intel_engine_coredump_add_vma(struct intel_engine_coredump *ee,
234 				   struct intel_engine_capture_vma *capture,
235 				   struct i915_vma_compress *compress);
236 
237 struct i915_vma_compress *
238 i915_vma_capture_prepare(struct intel_gt_coredump *gt);
239 
240 void i915_vma_capture_finish(struct intel_gt_coredump *gt,
241 			     struct i915_vma_compress *compress);
242 
243 void i915_error_state_store(struct i915_gpu_coredump *error);
244 
245 static inline struct i915_gpu_coredump *
i915_gpu_coredump_get(struct i915_gpu_coredump * gpu)246 i915_gpu_coredump_get(struct i915_gpu_coredump *gpu)
247 {
248 	kref_get(&gpu->ref);
249 	return gpu;
250 }
251 
252 ssize_t
253 i915_gpu_coredump_copy_to_buffer(struct i915_gpu_coredump *error,
254 				 char *buf, loff_t offset, size_t count);
255 
256 void __i915_gpu_coredump_free(struct kref *kref);
i915_gpu_coredump_put(struct i915_gpu_coredump * gpu)257 static inline void i915_gpu_coredump_put(struct i915_gpu_coredump *gpu)
258 {
259 	if (gpu)
260 		kref_put(&gpu->ref, __i915_gpu_coredump_free);
261 }
262 
263 struct i915_gpu_coredump *i915_first_error_state(struct drm_i915_private *i915);
264 void i915_reset_error_state(struct drm_i915_private *i915);
265 void i915_disable_error_state(struct drm_i915_private *i915, int err);
266 
267 #else
268 
i915_capture_error_state(struct drm_i915_private * i915)269 static inline void i915_capture_error_state(struct drm_i915_private *i915)
270 {
271 }
272 
273 static inline struct i915_gpu_coredump *
i915_gpu_coredump_alloc(struct drm_i915_private * i915,gfp_t gfp)274 i915_gpu_coredump_alloc(struct drm_i915_private *i915, gfp_t gfp)
275 {
276 	return NULL;
277 }
278 
279 static inline struct intel_gt_coredump *
intel_gt_coredump_alloc(struct intel_gt * gt,gfp_t gfp)280 intel_gt_coredump_alloc(struct intel_gt *gt, gfp_t gfp)
281 {
282 	return NULL;
283 }
284 
285 static inline struct intel_engine_coredump *
intel_engine_coredump_alloc(struct intel_engine_cs * engine,gfp_t gfp)286 intel_engine_coredump_alloc(struct intel_engine_cs *engine, gfp_t gfp)
287 {
288 	return NULL;
289 }
290 
291 static inline struct intel_engine_capture_vma *
intel_engine_coredump_add_request(struct intel_engine_coredump * ee,struct i915_request * rq,gfp_t gfp)292 intel_engine_coredump_add_request(struct intel_engine_coredump *ee,
293 				  struct i915_request *rq,
294 				  gfp_t gfp)
295 {
296 	return NULL;
297 }
298 
299 static inline void
intel_engine_coredump_add_vma(struct intel_engine_coredump * ee,struct intel_engine_capture_vma * capture,struct i915_vma_compress * compress)300 intel_engine_coredump_add_vma(struct intel_engine_coredump *ee,
301 			      struct intel_engine_capture_vma *capture,
302 			      struct i915_vma_compress *compress)
303 {
304 }
305 
306 static inline struct i915_vma_compress *
i915_vma_capture_prepare(struct intel_gt_coredump * gt)307 i915_vma_capture_prepare(struct intel_gt_coredump *gt)
308 {
309 	return NULL;
310 }
311 
312 static inline void
i915_vma_capture_finish(struct intel_gt_coredump * gt,struct i915_vma_compress * compress)313 i915_vma_capture_finish(struct intel_gt_coredump *gt,
314 			struct i915_vma_compress *compress)
315 {
316 }
317 
318 static inline void
i915_error_state_store(struct i915_gpu_coredump * error)319 i915_error_state_store(struct i915_gpu_coredump *error)
320 {
321 }
322 
i915_gpu_coredump_put(struct i915_gpu_coredump * gpu)323 static inline void i915_gpu_coredump_put(struct i915_gpu_coredump *gpu)
324 {
325 }
326 
327 static inline struct i915_gpu_coredump *
i915_first_error_state(struct drm_i915_private * i915)328 i915_first_error_state(struct drm_i915_private *i915)
329 {
330 	return ERR_PTR(-ENODEV);
331 }
332 
i915_reset_error_state(struct drm_i915_private * i915)333 static inline void i915_reset_error_state(struct drm_i915_private *i915)
334 {
335 }
336 
i915_disable_error_state(struct drm_i915_private * i915,int err)337 static inline void i915_disable_error_state(struct drm_i915_private *i915,
338 					    int err)
339 {
340 }
341 
342 #endif /* IS_ENABLED(CONFIG_DRM_I915_CAPTURE_ERROR) */
343 
344 #endif /* _I915_GPU_ERROR_H_ */
345