1 /*
2 * SPDX-License-Identifier: MIT
3 *
4 * Copyright � 2008-2018 Intel Corporation
5 */
6
7 #ifndef _I915_GPU_ERROR_H_
8 #define _I915_GPU_ERROR_H_
9
10 #include <linux/atomic.h>
11 #include <linux/kref.h>
12 #include <linux/ktime.h>
13 #include <linux/sched.h>
14
15 #include <drm/drm_mm.h>
16
17 #include "gt/intel_engine.h"
18 #include "gt/intel_gt_types.h"
19 #include "gt/uc/intel_uc_fw.h"
20
21 #include "intel_device_info.h"
22
23 #include "i915_gem.h"
24 #include "i915_gem_gtt.h"
25 #include "i915_params.h"
26 #include "i915_scheduler.h"
27
28 struct drm_i915_private;
29 struct i915_vma_compress;
30 struct intel_engine_capture_vma;
31 struct intel_overlay_error_state;
32 struct intel_display_error_state;
33
34 struct i915_vma_coredump {
35 struct i915_vma_coredump *next;
36
37 char name[20];
38
39 u64 gtt_offset;
40 u64 gtt_size;
41 u32 gtt_page_sizes;
42
43 int num_pages;
44 int page_count;
45 int unused;
46 u32 *pages[];
47 };
48
49 struct i915_request_coredump {
50 unsigned long flags;
51 pid_t pid;
52 u32 context;
53 u32 seqno;
54 u32 head;
55 u32 tail;
56 struct i915_sched_attr sched_attr;
57 };
58
59 struct intel_engine_coredump {
60 const struct intel_engine_cs *engine;
61
62 bool hung;
63 bool simulated;
64 u32 reset_count;
65
66 /* position of active request inside the ring */
67 u32 rq_head, rq_post, rq_tail;
68
69 /* Register state */
70 u32 ccid;
71 u32 start;
72 u32 tail;
73 u32 head;
74 u32 ctl;
75 u32 mode;
76 u32 hws;
77 u32 ipeir;
78 u32 ipehr;
79 u32 esr;
80 u32 bbstate;
81 u32 instpm;
82 u32 instps;
83 u64 bbaddr;
84 u64 acthd;
85 u32 fault_reg;
86 u64 faddr;
87 u32 rc_psmi; /* sleep state */
88 struct intel_instdone instdone;
89
90 struct i915_gem_context_coredump {
91 char comm[TASK_COMM_LEN];
92
93 u64 total_runtime;
94 u32 avg_runtime;
95
96 pid_t pid;
97 int active;
98 int guilty;
99 struct i915_sched_attr sched_attr;
100 } context;
101
102 struct i915_vma_coredump *vma;
103
104 struct i915_request_coredump execlist[EXECLIST_MAX_PORTS];
105 unsigned int num_ports;
106
107 struct {
108 u32 gfx_mode;
109 union {
110 u64 pdp[4];
111 u32 pp_dir_base;
112 };
113 } vm_info;
114
115 struct intel_engine_coredump *next;
116 };
117
118 struct intel_gt_coredump {
119 const struct intel_gt *_gt;
120 bool awake;
121 bool simulated;
122
123 struct intel_gt_info info;
124
125 /* Generic register state */
126 u32 eir;
127 u32 pgtbl_er;
128 u32 ier;
129 u32 gtier[6], ngtier;
130 u32 derrmr;
131 u32 forcewake;
132 u32 error; /* gen6+ */
133 u32 err_int; /* gen7 */
134 u32 fault_data0; /* gen8, gen9 */
135 u32 fault_data1; /* gen8, gen9 */
136 u32 done_reg;
137 u32 gac_eco;
138 u32 gam_ecochk;
139 u32 gab_ctl;
140 u32 gfx_mode;
141 u32 gtt_cache;
142 u32 aux_err; /* gen12 */
143 u32 sfc_done[GEN12_SFC_DONE_MAX]; /* gen12 */
144 u32 gam_done; /* gen12 */
145
146 u32 nfence;
147 u64 fence[I915_MAX_NUM_FENCES];
148
149 struct intel_engine_coredump *engine;
150
151 struct intel_uc_coredump {
152 struct intel_uc_fw guc_fw;
153 struct intel_uc_fw huc_fw;
154 struct i915_vma_coredump *guc_log;
155 } *uc;
156
157 struct intel_gt_coredump *next;
158 };
159
160 struct i915_gpu_coredump {
161 struct kref ref;
162 ktime_t time;
163 ktime_t boottime;
164 ktime_t uptime;
165 unsigned long capture;
166
167 struct drm_i915_private *i915;
168
169 struct intel_gt_coredump *gt;
170
171 char error_msg[128];
172 bool simulated;
173 bool wakelock;
174 bool suspended;
175 int iommu;
176 u32 reset_count;
177 u32 suspend_count;
178
179 struct intel_device_info device_info;
180 struct intel_runtime_info runtime_info;
181 struct intel_driver_caps driver_caps;
182 struct i915_params params;
183
184 struct intel_overlay_error_state *overlay;
185 struct intel_display_error_state *display;
186
187 struct scatterlist *sgl, *fit;
188 };
189
190 struct i915_gpu_error {
191 /* For reset and error_state handling. */
192 spinlock_t lock;
193 /* Protected by the above dev->gpu_error.lock. */
194 struct i915_gpu_coredump *first_error;
195
196 atomic_t pending_fb_pin;
197
198 /** Number of times the device has been reset (global) */
199 atomic_t reset_count;
200
201 /** Number of times an engine has been reset */
202 atomic_t reset_engine_count[I915_NUM_ENGINES];
203 };
204
205 struct drm_i915_error_state_buf {
206 struct drm_i915_private *i915;
207 struct scatterlist *sgl, *cur, *end;
208
209 char *buf;
210 size_t bytes;
211 size_t size;
212 loff_t iter;
213
214 int err;
215 };
216
217 #if IS_ENABLED(CONFIG_DRM_I915_CAPTURE_ERROR)
218
219 __printf(2, 3)
220 void i915_error_printf(struct drm_i915_error_state_buf *e, const char *f, ...);
221
222 struct i915_gpu_coredump *i915_gpu_coredump(struct intel_gt *gt,
223 intel_engine_mask_t engine_mask);
224 void i915_capture_error_state(struct intel_gt *gt,
225 intel_engine_mask_t engine_mask);
226
227 struct i915_gpu_coredump *
228 i915_gpu_coredump_alloc(struct drm_i915_private *i915, gfp_t gfp);
229
230 struct intel_gt_coredump *
231 intel_gt_coredump_alloc(struct intel_gt *gt, gfp_t gfp);
232
233 struct intel_engine_coredump *
234 intel_engine_coredump_alloc(struct intel_engine_cs *engine, gfp_t gfp);
235
236 struct intel_engine_capture_vma *
237 intel_engine_coredump_add_request(struct intel_engine_coredump *ee,
238 struct i915_request *rq,
239 gfp_t gfp);
240
241 void intel_engine_coredump_add_vma(struct intel_engine_coredump *ee,
242 struct intel_engine_capture_vma *capture,
243 struct i915_vma_compress *compress);
244
245 struct i915_vma_compress *
246 i915_vma_capture_prepare(struct intel_gt_coredump *gt);
247
248 void i915_vma_capture_finish(struct intel_gt_coredump *gt,
249 struct i915_vma_compress *compress);
250
251 void i915_error_state_store(struct i915_gpu_coredump *error);
252
253 static inline struct i915_gpu_coredump *
i915_gpu_coredump_get(struct i915_gpu_coredump * gpu)254 i915_gpu_coredump_get(struct i915_gpu_coredump *gpu)
255 {
256 kref_get(&gpu->ref);
257 return gpu;
258 }
259
260 ssize_t
261 i915_gpu_coredump_copy_to_buffer(struct i915_gpu_coredump *error,
262 char *buf, loff_t offset, size_t count);
263
264 void __i915_gpu_coredump_free(struct kref *kref);
i915_gpu_coredump_put(struct i915_gpu_coredump * gpu)265 static inline void i915_gpu_coredump_put(struct i915_gpu_coredump *gpu)
266 {
267 if (gpu)
268 kref_put(&gpu->ref, __i915_gpu_coredump_free);
269 }
270
271 struct i915_gpu_coredump *i915_first_error_state(struct drm_i915_private *i915);
272 void i915_reset_error_state(struct drm_i915_private *i915);
273 void i915_disable_error_state(struct drm_i915_private *i915, int err);
274
275 #else
276
277 static inline void
i915_capture_error_state(struct intel_gt * gt,intel_engine_mask_t engine_mask)278 i915_capture_error_state(struct intel_gt *gt, intel_engine_mask_t engine_mask)
279 {
280 }
281
282 static inline struct i915_gpu_coredump *
i915_gpu_coredump_alloc(struct drm_i915_private * i915,gfp_t gfp)283 i915_gpu_coredump_alloc(struct drm_i915_private *i915, gfp_t gfp)
284 {
285 return NULL;
286 }
287
288 static inline struct intel_gt_coredump *
intel_gt_coredump_alloc(struct intel_gt * gt,gfp_t gfp)289 intel_gt_coredump_alloc(struct intel_gt *gt, gfp_t gfp)
290 {
291 return NULL;
292 }
293
294 static inline struct intel_engine_coredump *
intel_engine_coredump_alloc(struct intel_engine_cs * engine,gfp_t gfp)295 intel_engine_coredump_alloc(struct intel_engine_cs *engine, gfp_t gfp)
296 {
297 return NULL;
298 }
299
300 static inline struct intel_engine_capture_vma *
intel_engine_coredump_add_request(struct intel_engine_coredump * ee,struct i915_request * rq,gfp_t gfp)301 intel_engine_coredump_add_request(struct intel_engine_coredump *ee,
302 struct i915_request *rq,
303 gfp_t gfp)
304 {
305 return NULL;
306 }
307
308 static inline void
intel_engine_coredump_add_vma(struct intel_engine_coredump * ee,struct intel_engine_capture_vma * capture,struct i915_vma_compress * compress)309 intel_engine_coredump_add_vma(struct intel_engine_coredump *ee,
310 struct intel_engine_capture_vma *capture,
311 struct i915_vma_compress *compress)
312 {
313 }
314
315 static inline struct i915_vma_compress *
i915_vma_capture_prepare(struct intel_gt_coredump * gt)316 i915_vma_capture_prepare(struct intel_gt_coredump *gt)
317 {
318 return NULL;
319 }
320
321 static inline void
i915_vma_capture_finish(struct intel_gt_coredump * gt,struct i915_vma_compress * compress)322 i915_vma_capture_finish(struct intel_gt_coredump *gt,
323 struct i915_vma_compress *compress)
324 {
325 }
326
327 static inline void
i915_error_state_store(struct i915_gpu_coredump * error)328 i915_error_state_store(struct i915_gpu_coredump *error)
329 {
330 }
331
i915_gpu_coredump_put(struct i915_gpu_coredump * gpu)332 static inline void i915_gpu_coredump_put(struct i915_gpu_coredump *gpu)
333 {
334 }
335
336 static inline struct i915_gpu_coredump *
i915_first_error_state(struct drm_i915_private * i915)337 i915_first_error_state(struct drm_i915_private *i915)
338 {
339 return ERR_PTR(-ENODEV);
340 }
341
i915_reset_error_state(struct drm_i915_private * i915)342 static inline void i915_reset_error_state(struct drm_i915_private *i915)
343 {
344 }
345
i915_disable_error_state(struct drm_i915_private * i915,int err)346 static inline void i915_disable_error_state(struct drm_i915_private *i915,
347 int err)
348 {
349 }
350
351 #endif /* IS_ENABLED(CONFIG_DRM_I915_CAPTURE_ERROR) */
352
353 #endif /* _I915_GPU_ERROR_H_ */
354