1 /*
2  * SPDX-License-Identifier: MIT
3  *
4  * Copyright � 2008-2018 Intel Corporation
5  */
6 
7 #ifndef _I915_GPU_ERROR_H_
8 #define _I915_GPU_ERROR_H_
9 
10 #include <linux/atomic.h>
11 #include <linux/kref.h>
12 #include <linux/ktime.h>
13 #include <linux/sched.h>
14 
15 #include <drm/drm_mm.h>
16 
17 #include "gt/intel_engine.h"
18 #include "gt/intel_gt_types.h"
19 #include "gt/uc/intel_uc_fw.h"
20 
21 #include "intel_device_info.h"
22 
23 #include "i915_gem.h"
24 #include "i915_gem_gtt.h"
25 #include "i915_params.h"
26 #include "i915_scheduler.h"
27 
28 struct drm_i915_private;
29 struct i915_vma_compress;
30 struct intel_engine_capture_vma;
31 struct intel_overlay_error_state;
32 struct intel_display_error_state;
33 
34 struct i915_vma_coredump {
35 	struct i915_vma_coredump *next;
36 
37 	char name[20];
38 
39 	u64 gtt_offset;
40 	u64 gtt_size;
41 	u32 gtt_page_sizes;
42 
43 	int num_pages;
44 	int page_count;
45 	int unused;
46 	u32 *pages[];
47 };
48 
49 struct i915_request_coredump {
50 	unsigned long flags;
51 	pid_t pid;
52 	u32 context;
53 	u32 seqno;
54 	u32 head;
55 	u32 tail;
56 	struct i915_sched_attr sched_attr;
57 };
58 
59 struct intel_engine_coredump {
60 	const struct intel_engine_cs *engine;
61 
62 	bool hung;
63 	bool simulated;
64 	u32 reset_count;
65 
66 	/* position of active request inside the ring */
67 	u32 rq_head, rq_post, rq_tail;
68 
69 	/* Register state */
70 	u32 ccid;
71 	u32 start;
72 	u32 tail;
73 	u32 head;
74 	u32 ctl;
75 	u32 mode;
76 	u32 hws;
77 	u32 ipeir;
78 	u32 ipehr;
79 	u32 esr;
80 	u32 bbstate;
81 	u32 instpm;
82 	u32 instps;
83 	u64 bbaddr;
84 	u64 acthd;
85 	u32 fault_reg;
86 	u64 faddr;
87 	u32 rc_psmi; /* sleep state */
88 	struct intel_instdone instdone;
89 
90 	struct i915_gem_context_coredump {
91 		char comm[TASK_COMM_LEN];
92 
93 		u64 total_runtime;
94 		u32 avg_runtime;
95 
96 		pid_t pid;
97 		int active;
98 		int guilty;
99 		struct i915_sched_attr sched_attr;
100 	} context;
101 
102 	struct i915_vma_coredump *vma;
103 
104 	struct i915_request_coredump execlist[EXECLIST_MAX_PORTS];
105 	unsigned int num_ports;
106 
107 	struct {
108 		u32 gfx_mode;
109 		union {
110 			u64 pdp[4];
111 			u32 pp_dir_base;
112 		};
113 	} vm_info;
114 
115 	struct intel_engine_coredump *next;
116 };
117 
118 struct intel_gt_coredump {
119 	const struct intel_gt *_gt;
120 	bool awake;
121 	bool simulated;
122 
123 	struct intel_gt_info info;
124 
125 	/* Generic register state */
126 	u32 eir;
127 	u32 pgtbl_er;
128 	u32 ier;
129 	u32 gtier[6], ngtier;
130 	u32 derrmr;
131 	u32 forcewake;
132 	u32 error; /* gen6+ */
133 	u32 err_int; /* gen7 */
134 	u32 fault_data0; /* gen8, gen9 */
135 	u32 fault_data1; /* gen8, gen9 */
136 	u32 done_reg;
137 	u32 gac_eco;
138 	u32 gam_ecochk;
139 	u32 gab_ctl;
140 	u32 gfx_mode;
141 	u32 gtt_cache;
142 	u32 aux_err; /* gen12 */
143 	u32 sfc_done[GEN12_SFC_DONE_MAX]; /* gen12 */
144 	u32 gam_done; /* gen12 */
145 
146 	u32 nfence;
147 	u64 fence[I915_MAX_NUM_FENCES];
148 
149 	struct intel_engine_coredump *engine;
150 
151 	struct intel_uc_coredump {
152 		struct intel_uc_fw guc_fw;
153 		struct intel_uc_fw huc_fw;
154 		struct i915_vma_coredump *guc_log;
155 	} *uc;
156 
157 	struct intel_gt_coredump *next;
158 };
159 
160 struct i915_gpu_coredump {
161 	struct kref ref;
162 	ktime_t time;
163 	ktime_t boottime;
164 	ktime_t uptime;
165 	unsigned long capture;
166 
167 	struct drm_i915_private *i915;
168 
169 	struct intel_gt_coredump *gt;
170 
171 	char error_msg[128];
172 	bool simulated;
173 	bool wakelock;
174 	bool suspended;
175 	int iommu;
176 	u32 reset_count;
177 	u32 suspend_count;
178 
179 	struct intel_device_info device_info;
180 	struct intel_runtime_info runtime_info;
181 	struct intel_driver_caps driver_caps;
182 	struct i915_params params;
183 
184 	struct intel_overlay_error_state *overlay;
185 	struct intel_display_error_state *display;
186 
187 	struct scatterlist *sgl, *fit;
188 };
189 
190 struct i915_gpu_error {
191 	/* For reset and error_state handling. */
192 	spinlock_t lock;
193 	/* Protected by the above dev->gpu_error.lock. */
194 	struct i915_gpu_coredump *first_error;
195 
196 	atomic_t pending_fb_pin;
197 
198 	/** Number of times the device has been reset (global) */
199 	atomic_t reset_count;
200 
201 	/** Number of times an engine has been reset */
202 	atomic_t reset_engine_count[I915_NUM_ENGINES];
203 };
204 
205 struct drm_i915_error_state_buf {
206 	struct drm_i915_private *i915;
207 	struct scatterlist *sgl, *cur, *end;
208 
209 	char *buf;
210 	size_t bytes;
211 	size_t size;
212 	loff_t iter;
213 
214 	int err;
215 };
216 
217 #if IS_ENABLED(CONFIG_DRM_I915_CAPTURE_ERROR)
218 
219 __printf(2, 3)
220 void i915_error_printf(struct drm_i915_error_state_buf *e, const char *f, ...);
221 
222 struct i915_gpu_coredump *i915_gpu_coredump(struct intel_gt *gt,
223 					    intel_engine_mask_t engine_mask);
224 void i915_capture_error_state(struct intel_gt *gt,
225 			      intel_engine_mask_t engine_mask);
226 
227 struct i915_gpu_coredump *
228 i915_gpu_coredump_alloc(struct drm_i915_private *i915, gfp_t gfp);
229 
230 struct intel_gt_coredump *
231 intel_gt_coredump_alloc(struct intel_gt *gt, gfp_t gfp);
232 
233 struct intel_engine_coredump *
234 intel_engine_coredump_alloc(struct intel_engine_cs *engine, gfp_t gfp);
235 
236 struct intel_engine_capture_vma *
237 intel_engine_coredump_add_request(struct intel_engine_coredump *ee,
238 				  struct i915_request *rq,
239 				  gfp_t gfp);
240 
241 void intel_engine_coredump_add_vma(struct intel_engine_coredump *ee,
242 				   struct intel_engine_capture_vma *capture,
243 				   struct i915_vma_compress *compress);
244 
245 struct i915_vma_compress *
246 i915_vma_capture_prepare(struct intel_gt_coredump *gt);
247 
248 void i915_vma_capture_finish(struct intel_gt_coredump *gt,
249 			     struct i915_vma_compress *compress);
250 
251 void i915_error_state_store(struct i915_gpu_coredump *error);
252 
253 static inline struct i915_gpu_coredump *
i915_gpu_coredump_get(struct i915_gpu_coredump * gpu)254 i915_gpu_coredump_get(struct i915_gpu_coredump *gpu)
255 {
256 	kref_get(&gpu->ref);
257 	return gpu;
258 }
259 
260 ssize_t
261 i915_gpu_coredump_copy_to_buffer(struct i915_gpu_coredump *error,
262 				 char *buf, loff_t offset, size_t count);
263 
264 void __i915_gpu_coredump_free(struct kref *kref);
i915_gpu_coredump_put(struct i915_gpu_coredump * gpu)265 static inline void i915_gpu_coredump_put(struct i915_gpu_coredump *gpu)
266 {
267 	if (gpu)
268 		kref_put(&gpu->ref, __i915_gpu_coredump_free);
269 }
270 
271 struct i915_gpu_coredump *i915_first_error_state(struct drm_i915_private *i915);
272 void i915_reset_error_state(struct drm_i915_private *i915);
273 void i915_disable_error_state(struct drm_i915_private *i915, int err);
274 
275 #else
276 
277 static inline void
i915_capture_error_state(struct intel_gt * gt,intel_engine_mask_t engine_mask)278 i915_capture_error_state(struct intel_gt *gt, intel_engine_mask_t engine_mask)
279 {
280 }
281 
282 static inline struct i915_gpu_coredump *
i915_gpu_coredump_alloc(struct drm_i915_private * i915,gfp_t gfp)283 i915_gpu_coredump_alloc(struct drm_i915_private *i915, gfp_t gfp)
284 {
285 	return NULL;
286 }
287 
288 static inline struct intel_gt_coredump *
intel_gt_coredump_alloc(struct intel_gt * gt,gfp_t gfp)289 intel_gt_coredump_alloc(struct intel_gt *gt, gfp_t gfp)
290 {
291 	return NULL;
292 }
293 
294 static inline struct intel_engine_coredump *
intel_engine_coredump_alloc(struct intel_engine_cs * engine,gfp_t gfp)295 intel_engine_coredump_alloc(struct intel_engine_cs *engine, gfp_t gfp)
296 {
297 	return NULL;
298 }
299 
300 static inline struct intel_engine_capture_vma *
intel_engine_coredump_add_request(struct intel_engine_coredump * ee,struct i915_request * rq,gfp_t gfp)301 intel_engine_coredump_add_request(struct intel_engine_coredump *ee,
302 				  struct i915_request *rq,
303 				  gfp_t gfp)
304 {
305 	return NULL;
306 }
307 
308 static inline void
intel_engine_coredump_add_vma(struct intel_engine_coredump * ee,struct intel_engine_capture_vma * capture,struct i915_vma_compress * compress)309 intel_engine_coredump_add_vma(struct intel_engine_coredump *ee,
310 			      struct intel_engine_capture_vma *capture,
311 			      struct i915_vma_compress *compress)
312 {
313 }
314 
315 static inline struct i915_vma_compress *
i915_vma_capture_prepare(struct intel_gt_coredump * gt)316 i915_vma_capture_prepare(struct intel_gt_coredump *gt)
317 {
318 	return NULL;
319 }
320 
321 static inline void
i915_vma_capture_finish(struct intel_gt_coredump * gt,struct i915_vma_compress * compress)322 i915_vma_capture_finish(struct intel_gt_coredump *gt,
323 			struct i915_vma_compress *compress)
324 {
325 }
326 
327 static inline void
i915_error_state_store(struct i915_gpu_coredump * error)328 i915_error_state_store(struct i915_gpu_coredump *error)
329 {
330 }
331 
i915_gpu_coredump_put(struct i915_gpu_coredump * gpu)332 static inline void i915_gpu_coredump_put(struct i915_gpu_coredump *gpu)
333 {
334 }
335 
336 static inline struct i915_gpu_coredump *
i915_first_error_state(struct drm_i915_private * i915)337 i915_first_error_state(struct drm_i915_private *i915)
338 {
339 	return ERR_PTR(-ENODEV);
340 }
341 
i915_reset_error_state(struct drm_i915_private * i915)342 static inline void i915_reset_error_state(struct drm_i915_private *i915)
343 {
344 }
345 
i915_disable_error_state(struct drm_i915_private * i915,int err)346 static inline void i915_disable_error_state(struct drm_i915_private *i915,
347 					    int err)
348 {
349 }
350 
351 #endif /* IS_ENABLED(CONFIG_DRM_I915_CAPTURE_ERROR) */
352 
353 #endif /* _I915_GPU_ERROR_H_ */
354