1 /* $NetBSD: i915_gpu_error.h,v 1.2 2021/12/18 23:45:28 riastradh Exp $ */
2
3 /*
4 * SPDX-License-Identifier: MIT
5 *
6 * Copyright � 2008-2018 Intel Corporation
7 */
8
9 #ifndef _I915_GPU_ERROR_H_
10 #define _I915_GPU_ERROR_H_
11
12 #include <linux/atomic.h>
13 #include <linux/kref.h>
14 #include <linux/ktime.h>
15 #include <linux/sched.h>
16
17 #include <drm/drm_mm.h>
18
19 #include "gt/intel_engine.h"
20 #include "gt/uc/intel_uc_fw.h"
21
22 #include "intel_device_info.h"
23
24 #include "i915_gem.h"
25 #include "i915_gem_gtt.h"
26 #include "i915_params.h"
27 #include "i915_scheduler.h"
28
29 struct drm_i915_private;
30 struct i915_vma_compress;
31 struct intel_engine_capture_vma;
32 struct intel_overlay_error_state;
33 struct intel_display_error_state;
34
35 struct i915_vma_coredump {
36 struct i915_vma_coredump *next;
37
38 char name[20];
39
40 u64 gtt_offset;
41 u64 gtt_size;
42 u32 gtt_page_sizes;
43
44 int num_pages;
45 int page_count;
46 int unused;
47 u32 *pages[0];
48 };
49
50 struct i915_request_coredump {
51 unsigned long flags;
52 pid_t pid;
53 u32 context;
54 u32 seqno;
55 u32 start;
56 u32 head;
57 u32 tail;
58 struct i915_sched_attr sched_attr;
59 };
60
61 struct intel_engine_coredump {
62 const struct intel_engine_cs *engine;
63
64 bool simulated;
65 u32 reset_count;
66
67 /* position of active request inside the ring */
68 u32 rq_head, rq_post, rq_tail;
69
70 /* Register state */
71 u32 ccid;
72 u32 start;
73 u32 tail;
74 u32 head;
75 u32 ctl;
76 u32 mode;
77 u32 hws;
78 u32 ipeir;
79 u32 ipehr;
80 u32 bbstate;
81 u32 instpm;
82 u32 instps;
83 u64 bbaddr;
84 u64 acthd;
85 u32 fault_reg;
86 u64 faddr;
87 u32 rc_psmi; /* sleep state */
88 struct intel_instdone instdone;
89
90 struct i915_gem_context_coredump {
91 char comm[TASK_COMM_LEN];
92 pid_t pid;
93 int active;
94 int guilty;
95 struct i915_sched_attr sched_attr;
96 } context;
97
98 struct i915_vma_coredump *vma;
99
100 struct i915_request_coredump execlist[EXECLIST_MAX_PORTS];
101 unsigned int num_ports;
102
103 struct {
104 u32 gfx_mode;
105 union {
106 u64 pdp[4];
107 u32 pp_dir_base;
108 };
109 } vm_info;
110
111 struct intel_engine_coredump *next;
112 };
113
114 struct intel_gt_coredump {
115 const struct intel_gt *_gt;
116 bool awake;
117 bool simulated;
118
119 /* Generic register state */
120 u32 eir;
121 u32 pgtbl_er;
122 u32 ier;
123 u32 gtier[6], ngtier;
124 u32 derrmr;
125 u32 forcewake;
126 u32 error; /* gen6+ */
127 u32 err_int; /* gen7 */
128 u32 fault_data0; /* gen8, gen9 */
129 u32 fault_data1; /* gen8, gen9 */
130 u32 done_reg;
131 u32 gac_eco;
132 u32 gam_ecochk;
133 u32 gab_ctl;
134 u32 gfx_mode;
135 u32 gtt_cache;
136 u32 aux_err; /* gen12 */
137 u32 sfc_done[GEN12_SFC_DONE_MAX]; /* gen12 */
138 u32 gam_done; /* gen12 */
139
140 u32 nfence;
141 u64 fence[I915_MAX_NUM_FENCES];
142
143 struct intel_engine_coredump *engine;
144
145 struct intel_uc_coredump {
146 struct intel_uc_fw guc_fw;
147 struct intel_uc_fw huc_fw;
148 struct i915_vma_coredump *guc_log;
149 } *uc;
150
151 struct intel_gt_coredump *next;
152 };
153
154 struct i915_gpu_coredump {
155 struct kref ref;
156 ktime_t time;
157 ktime_t boottime;
158 ktime_t uptime;
159 unsigned long capture;
160
161 struct drm_i915_private *i915;
162
163 struct intel_gt_coredump *gt;
164
165 char error_msg[128];
166 bool simulated;
167 bool wakelock;
168 bool suspended;
169 int iommu;
170 u32 reset_count;
171 u32 suspend_count;
172
173 struct intel_device_info device_info;
174 struct intel_runtime_info runtime_info;
175 struct intel_driver_caps driver_caps;
176 struct i915_params params;
177
178 struct intel_overlay_error_state *overlay;
179 struct intel_display_error_state *display;
180
181 struct scatterlist *sgl, *fit;
182 };
183
184 struct i915_gpu_error {
185 /* For reset and error_state handling. */
186 spinlock_t lock;
187 /* Protected by the above dev->gpu_error.lock. */
188 struct i915_gpu_coredump *first_error;
189
190 atomic_t pending_fb_pin;
191
192 /** Number of times the device has been reset (global) */
193 atomic_t reset_count;
194
195 /** Number of times an engine has been reset */
196 atomic_t reset_engine_count[I915_NUM_ENGINES];
197 };
198
199 struct drm_i915_error_state_buf {
200 struct drm_i915_private *i915;
201 struct scatterlist *sgl, *cur, *end;
202
203 char *buf;
204 size_t bytes;
205 size_t size;
206 loff_t iter;
207
208 int err;
209 };
210
211 #if IS_ENABLED(CONFIG_DRM_I915_CAPTURE_ERROR)
212
213 __printf(2, 3)
214 void i915_error_printf(struct drm_i915_error_state_buf *e, const char *f, ...);
215
216 struct i915_gpu_coredump *i915_gpu_coredump(struct drm_i915_private *i915);
217 void i915_capture_error_state(struct drm_i915_private *i915);
218
219 struct i915_gpu_coredump *
220 i915_gpu_coredump_alloc(struct drm_i915_private *i915, gfp_t gfp);
221
222 struct intel_gt_coredump *
223 intel_gt_coredump_alloc(struct intel_gt *gt, gfp_t gfp);
224
225 struct intel_engine_coredump *
226 intel_engine_coredump_alloc(struct intel_engine_cs *engine, gfp_t gfp);
227
228 struct intel_engine_capture_vma *
229 intel_engine_coredump_add_request(struct intel_engine_coredump *ee,
230 struct i915_request *rq,
231 gfp_t gfp);
232
233 void intel_engine_coredump_add_vma(struct intel_engine_coredump *ee,
234 struct intel_engine_capture_vma *capture,
235 struct i915_vma_compress *compress);
236
237 struct i915_vma_compress *
238 i915_vma_capture_prepare(struct intel_gt_coredump *gt);
239
240 void i915_vma_capture_finish(struct intel_gt_coredump *gt,
241 struct i915_vma_compress *compress);
242
243 void i915_error_state_store(struct i915_gpu_coredump *error);
244
245 static inline struct i915_gpu_coredump *
i915_gpu_coredump_get(struct i915_gpu_coredump * gpu)246 i915_gpu_coredump_get(struct i915_gpu_coredump *gpu)
247 {
248 kref_get(&gpu->ref);
249 return gpu;
250 }
251
252 ssize_t
253 i915_gpu_coredump_copy_to_buffer(struct i915_gpu_coredump *error,
254 char *buf, loff_t offset, size_t count);
255
256 void __i915_gpu_coredump_free(struct kref *kref);
i915_gpu_coredump_put(struct i915_gpu_coredump * gpu)257 static inline void i915_gpu_coredump_put(struct i915_gpu_coredump *gpu)
258 {
259 if (gpu)
260 kref_put(&gpu->ref, __i915_gpu_coredump_free);
261 }
262
263 struct i915_gpu_coredump *i915_first_error_state(struct drm_i915_private *i915);
264 void i915_reset_error_state(struct drm_i915_private *i915);
265 void i915_disable_error_state(struct drm_i915_private *i915, int err);
266
267 #else
268
i915_capture_error_state(struct drm_i915_private * i915)269 static inline void i915_capture_error_state(struct drm_i915_private *i915)
270 {
271 }
272
273 static inline struct i915_gpu_coredump *
i915_gpu_coredump_alloc(struct drm_i915_private * i915,gfp_t gfp)274 i915_gpu_coredump_alloc(struct drm_i915_private *i915, gfp_t gfp)
275 {
276 return NULL;
277 }
278
279 static inline struct intel_gt_coredump *
intel_gt_coredump_alloc(struct intel_gt * gt,gfp_t gfp)280 intel_gt_coredump_alloc(struct intel_gt *gt, gfp_t gfp)
281 {
282 return NULL;
283 }
284
285 static inline struct intel_engine_coredump *
intel_engine_coredump_alloc(struct intel_engine_cs * engine,gfp_t gfp)286 intel_engine_coredump_alloc(struct intel_engine_cs *engine, gfp_t gfp)
287 {
288 return NULL;
289 }
290
291 static inline struct intel_engine_capture_vma *
intel_engine_coredump_add_request(struct intel_engine_coredump * ee,struct i915_request * rq,gfp_t gfp)292 intel_engine_coredump_add_request(struct intel_engine_coredump *ee,
293 struct i915_request *rq,
294 gfp_t gfp)
295 {
296 return NULL;
297 }
298
299 static inline void
intel_engine_coredump_add_vma(struct intel_engine_coredump * ee,struct intel_engine_capture_vma * capture,struct i915_vma_compress * compress)300 intel_engine_coredump_add_vma(struct intel_engine_coredump *ee,
301 struct intel_engine_capture_vma *capture,
302 struct i915_vma_compress *compress)
303 {
304 }
305
306 static inline struct i915_vma_compress *
i915_vma_capture_prepare(struct intel_gt_coredump * gt)307 i915_vma_capture_prepare(struct intel_gt_coredump *gt)
308 {
309 return NULL;
310 }
311
312 static inline void
i915_vma_capture_finish(struct intel_gt_coredump * gt,struct i915_vma_compress * compress)313 i915_vma_capture_finish(struct intel_gt_coredump *gt,
314 struct i915_vma_compress *compress)
315 {
316 }
317
318 static inline void
i915_error_state_store(struct i915_gpu_coredump * error)319 i915_error_state_store(struct i915_gpu_coredump *error)
320 {
321 }
322
i915_gpu_coredump_put(struct i915_gpu_coredump * gpu)323 static inline void i915_gpu_coredump_put(struct i915_gpu_coredump *gpu)
324 {
325 }
326
327 static inline struct i915_gpu_coredump *
i915_first_error_state(struct drm_i915_private * i915)328 i915_first_error_state(struct drm_i915_private *i915)
329 {
330 return ERR_PTR(-ENODEV);
331 }
332
i915_reset_error_state(struct drm_i915_private * i915)333 static inline void i915_reset_error_state(struct drm_i915_private *i915)
334 {
335 }
336
i915_disable_error_state(struct drm_i915_private * i915,int err)337 static inline void i915_disable_error_state(struct drm_i915_private *i915,
338 int err)
339 {
340 }
341
342 #endif /* IS_ENABLED(CONFIG_DRM_I915_CAPTURE_ERROR) */
343
344 #endif /* _I915_GPU_ERROR_H_ */
345