xref: /openbsd/sys/dev/pci/drm/amd/amdkfd/kfd_debug.c (revision f005ef32)
1*f005ef32Sjsg /*
2*f005ef32Sjsg  * Copyright 2023 Advanced Micro Devices, Inc.
3*f005ef32Sjsg  *
4*f005ef32Sjsg  * Permission is hereby granted, free of charge, to any person obtaining a
5*f005ef32Sjsg  * copy of this software and associated documentation files (the "Software"),
6*f005ef32Sjsg  * to deal in the Software without restriction, including without limitation
7*f005ef32Sjsg  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8*f005ef32Sjsg  * and/or sell copies of the Software, and to permit persons to whom the
9*f005ef32Sjsg  * Software is furnished to do so, subject to the following conditions:
10*f005ef32Sjsg  *
11*f005ef32Sjsg  * The above copyright notice and this permission notice shall be included in
12*f005ef32Sjsg  * all copies or substantial portions of the Software.
13*f005ef32Sjsg  *
14*f005ef32Sjsg  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15*f005ef32Sjsg  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16*f005ef32Sjsg  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17*f005ef32Sjsg  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18*f005ef32Sjsg  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19*f005ef32Sjsg  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20*f005ef32Sjsg  * OTHER DEALINGS IN THE SOFTWARE.
21*f005ef32Sjsg  */
22*f005ef32Sjsg 
23*f005ef32Sjsg #include "kfd_debug.h"
24*f005ef32Sjsg #include "kfd_device_queue_manager.h"
25*f005ef32Sjsg #include "kfd_topology.h"
26*f005ef32Sjsg #include <linux/file.h>
27*f005ef32Sjsg #include <uapi/linux/kfd_ioctl.h>
28*f005ef32Sjsg 
29*f005ef32Sjsg #define MAX_WATCH_ADDRESSES	4
30*f005ef32Sjsg 
kfd_dbg_ev_query_debug_event(struct kfd_process * process,unsigned int * queue_id,unsigned int * gpu_id,uint64_t exception_clear_mask,uint64_t * event_status)31*f005ef32Sjsg int kfd_dbg_ev_query_debug_event(struct kfd_process *process,
32*f005ef32Sjsg 		      unsigned int *queue_id,
33*f005ef32Sjsg 		      unsigned int *gpu_id,
34*f005ef32Sjsg 		      uint64_t exception_clear_mask,
35*f005ef32Sjsg 		      uint64_t *event_status)
36*f005ef32Sjsg {
37*f005ef32Sjsg 	struct process_queue_manager *pqm;
38*f005ef32Sjsg 	struct process_queue_node *pqn;
39*f005ef32Sjsg 	int i;
40*f005ef32Sjsg 
41*f005ef32Sjsg 	if (!(process && process->debug_trap_enabled))
42*f005ef32Sjsg 		return -ENODATA;
43*f005ef32Sjsg 
44*f005ef32Sjsg 	mutex_lock(&process->event_mutex);
45*f005ef32Sjsg 	*event_status = 0;
46*f005ef32Sjsg 	*queue_id = 0;
47*f005ef32Sjsg 	*gpu_id = 0;
48*f005ef32Sjsg 
49*f005ef32Sjsg 	/* find and report queue events */
50*f005ef32Sjsg 	pqm = &process->pqm;
51*f005ef32Sjsg 	list_for_each_entry(pqn, &pqm->queues, process_queue_list) {
52*f005ef32Sjsg 		uint64_t tmp = process->exception_enable_mask;
53*f005ef32Sjsg 
54*f005ef32Sjsg 		if (!pqn->q)
55*f005ef32Sjsg 			continue;
56*f005ef32Sjsg 
57*f005ef32Sjsg 		tmp &= pqn->q->properties.exception_status;
58*f005ef32Sjsg 
59*f005ef32Sjsg 		if (!tmp)
60*f005ef32Sjsg 			continue;
61*f005ef32Sjsg 
62*f005ef32Sjsg 		*event_status = pqn->q->properties.exception_status;
63*f005ef32Sjsg 		*queue_id = pqn->q->properties.queue_id;
64*f005ef32Sjsg 		*gpu_id = pqn->q->device->id;
65*f005ef32Sjsg 		pqn->q->properties.exception_status &= ~exception_clear_mask;
66*f005ef32Sjsg 		goto out;
67*f005ef32Sjsg 	}
68*f005ef32Sjsg 
69*f005ef32Sjsg 	/* find and report device events */
70*f005ef32Sjsg 	for (i = 0; i < process->n_pdds; i++) {
71*f005ef32Sjsg 		struct kfd_process_device *pdd = process->pdds[i];
72*f005ef32Sjsg 		uint64_t tmp = process->exception_enable_mask
73*f005ef32Sjsg 						& pdd->exception_status;
74*f005ef32Sjsg 
75*f005ef32Sjsg 		if (!tmp)
76*f005ef32Sjsg 			continue;
77*f005ef32Sjsg 
78*f005ef32Sjsg 		*event_status = pdd->exception_status;
79*f005ef32Sjsg 		*gpu_id = pdd->dev->id;
80*f005ef32Sjsg 		pdd->exception_status &= ~exception_clear_mask;
81*f005ef32Sjsg 		goto out;
82*f005ef32Sjsg 	}
83*f005ef32Sjsg 
84*f005ef32Sjsg 	/* report process events */
85*f005ef32Sjsg 	if (process->exception_enable_mask & process->exception_status) {
86*f005ef32Sjsg 		*event_status = process->exception_status;
87*f005ef32Sjsg 		process->exception_status &= ~exception_clear_mask;
88*f005ef32Sjsg 	}
89*f005ef32Sjsg 
90*f005ef32Sjsg out:
91*f005ef32Sjsg 	mutex_unlock(&process->event_mutex);
92*f005ef32Sjsg 	return *event_status ? 0 : -EAGAIN;
93*f005ef32Sjsg }
94*f005ef32Sjsg 
debug_event_write_work_handler(struct work_struct * work)95*f005ef32Sjsg void debug_event_write_work_handler(struct work_struct *work)
96*f005ef32Sjsg {
97*f005ef32Sjsg 	struct kfd_process *process;
98*f005ef32Sjsg 
99*f005ef32Sjsg 	static const char write_data = '.';
100*f005ef32Sjsg 	loff_t pos = 0;
101*f005ef32Sjsg 
102*f005ef32Sjsg 	process = container_of(work,
103*f005ef32Sjsg 			struct kfd_process,
104*f005ef32Sjsg 			debug_event_workarea);
105*f005ef32Sjsg 
106*f005ef32Sjsg 	kernel_write(process->dbg_ev_file, &write_data, 1, &pos);
107*f005ef32Sjsg }
108*f005ef32Sjsg 
109*f005ef32Sjsg /* update process/device/queue exception status, write to descriptor
110*f005ef32Sjsg  * only if exception_status is enabled.
111*f005ef32Sjsg  */
kfd_dbg_ev_raise(uint64_t event_mask,struct kfd_process * process,struct kfd_node * dev,unsigned int source_id,bool use_worker,void * exception_data,size_t exception_data_size)112*f005ef32Sjsg bool kfd_dbg_ev_raise(uint64_t event_mask,
113*f005ef32Sjsg 			struct kfd_process *process, struct kfd_node *dev,
114*f005ef32Sjsg 			unsigned int source_id, bool use_worker,
115*f005ef32Sjsg 			void *exception_data, size_t exception_data_size)
116*f005ef32Sjsg {
117*f005ef32Sjsg 	struct process_queue_manager *pqm;
118*f005ef32Sjsg 	struct process_queue_node *pqn;
119*f005ef32Sjsg 	int i;
120*f005ef32Sjsg 	static const char write_data = '.';
121*f005ef32Sjsg 	loff_t pos = 0;
122*f005ef32Sjsg 	bool is_subscribed = true;
123*f005ef32Sjsg 
124*f005ef32Sjsg 	if (!(process && process->debug_trap_enabled))
125*f005ef32Sjsg 		return false;
126*f005ef32Sjsg 
127*f005ef32Sjsg 	mutex_lock(&process->event_mutex);
128*f005ef32Sjsg 
129*f005ef32Sjsg 	if (event_mask & KFD_EC_MASK_DEVICE) {
130*f005ef32Sjsg 		for (i = 0; i < process->n_pdds; i++) {
131*f005ef32Sjsg 			struct kfd_process_device *pdd = process->pdds[i];
132*f005ef32Sjsg 
133*f005ef32Sjsg 			if (pdd->dev != dev)
134*f005ef32Sjsg 				continue;
135*f005ef32Sjsg 
136*f005ef32Sjsg 			pdd->exception_status |= event_mask & KFD_EC_MASK_DEVICE;
137*f005ef32Sjsg 
138*f005ef32Sjsg 			if (event_mask & KFD_EC_MASK(EC_DEVICE_MEMORY_VIOLATION)) {
139*f005ef32Sjsg 				if (!pdd->vm_fault_exc_data) {
140*f005ef32Sjsg 					pdd->vm_fault_exc_data = kmemdup(
141*f005ef32Sjsg 							exception_data,
142*f005ef32Sjsg 							exception_data_size,
143*f005ef32Sjsg 							GFP_KERNEL);
144*f005ef32Sjsg 					if (!pdd->vm_fault_exc_data)
145*f005ef32Sjsg 						pr_debug("Failed to allocate exception data memory");
146*f005ef32Sjsg 				} else {
147*f005ef32Sjsg 					pr_debug("Debugger exception data not saved\n");
148*f005ef32Sjsg 					print_hex_dump_bytes("exception data: ",
149*f005ef32Sjsg 							DUMP_PREFIX_OFFSET,
150*f005ef32Sjsg 							exception_data,
151*f005ef32Sjsg 							exception_data_size);
152*f005ef32Sjsg 				}
153*f005ef32Sjsg 			}
154*f005ef32Sjsg 			break;
155*f005ef32Sjsg 		}
156*f005ef32Sjsg 	} else if (event_mask & KFD_EC_MASK_PROCESS) {
157*f005ef32Sjsg 		process->exception_status |= event_mask & KFD_EC_MASK_PROCESS;
158*f005ef32Sjsg 	} else {
159*f005ef32Sjsg 		pqm = &process->pqm;
160*f005ef32Sjsg 		list_for_each_entry(pqn, &pqm->queues,
161*f005ef32Sjsg 				process_queue_list) {
162*f005ef32Sjsg 			int target_id;
163*f005ef32Sjsg 
164*f005ef32Sjsg 			if (!pqn->q)
165*f005ef32Sjsg 				continue;
166*f005ef32Sjsg 
167*f005ef32Sjsg 			target_id = event_mask & KFD_EC_MASK(EC_QUEUE_NEW) ?
168*f005ef32Sjsg 					pqn->q->properties.queue_id :
169*f005ef32Sjsg 							pqn->q->doorbell_id;
170*f005ef32Sjsg 
171*f005ef32Sjsg 			if (pqn->q->device != dev || target_id != source_id)
172*f005ef32Sjsg 				continue;
173*f005ef32Sjsg 
174*f005ef32Sjsg 			pqn->q->properties.exception_status |= event_mask;
175*f005ef32Sjsg 			break;
176*f005ef32Sjsg 		}
177*f005ef32Sjsg 	}
178*f005ef32Sjsg 
179*f005ef32Sjsg 	if (process->exception_enable_mask & event_mask) {
180*f005ef32Sjsg 		if (use_worker)
181*f005ef32Sjsg 			schedule_work(&process->debug_event_workarea);
182*f005ef32Sjsg 		else
183*f005ef32Sjsg 			kernel_write(process->dbg_ev_file,
184*f005ef32Sjsg 					&write_data,
185*f005ef32Sjsg 					1,
186*f005ef32Sjsg 					&pos);
187*f005ef32Sjsg 	} else {
188*f005ef32Sjsg 		is_subscribed = false;
189*f005ef32Sjsg 	}
190*f005ef32Sjsg 
191*f005ef32Sjsg 	mutex_unlock(&process->event_mutex);
192*f005ef32Sjsg 
193*f005ef32Sjsg 	return is_subscribed;
194*f005ef32Sjsg }
195*f005ef32Sjsg 
196*f005ef32Sjsg /* set pending event queue entry from ring entry  */
kfd_set_dbg_ev_from_interrupt(struct kfd_node * dev,unsigned int pasid,uint32_t doorbell_id,uint64_t trap_mask,void * exception_data,size_t exception_data_size)197*f005ef32Sjsg bool kfd_set_dbg_ev_from_interrupt(struct kfd_node *dev,
198*f005ef32Sjsg 				   unsigned int pasid,
199*f005ef32Sjsg 				   uint32_t doorbell_id,
200*f005ef32Sjsg 				   uint64_t trap_mask,
201*f005ef32Sjsg 				   void *exception_data,
202*f005ef32Sjsg 				   size_t exception_data_size)
203*f005ef32Sjsg {
204*f005ef32Sjsg 	struct kfd_process *p;
205*f005ef32Sjsg 	bool signaled_to_debugger_or_runtime = false;
206*f005ef32Sjsg 
207*f005ef32Sjsg 	p = kfd_lookup_process_by_pasid(pasid);
208*f005ef32Sjsg 
209*f005ef32Sjsg 	if (!p)
210*f005ef32Sjsg 		return false;
211*f005ef32Sjsg 
212*f005ef32Sjsg 	if (!kfd_dbg_ev_raise(trap_mask, p, dev, doorbell_id, true,
213*f005ef32Sjsg 			      exception_data, exception_data_size)) {
214*f005ef32Sjsg 		struct process_queue_manager *pqm;
215*f005ef32Sjsg 		struct process_queue_node *pqn;
216*f005ef32Sjsg 
217*f005ef32Sjsg 		if (!!(trap_mask & KFD_EC_MASK_QUEUE) &&
218*f005ef32Sjsg 		       p->runtime_info.runtime_state == DEBUG_RUNTIME_STATE_ENABLED) {
219*f005ef32Sjsg 			mutex_lock(&p->mutex);
220*f005ef32Sjsg 
221*f005ef32Sjsg 			pqm = &p->pqm;
222*f005ef32Sjsg 			list_for_each_entry(pqn, &pqm->queues,
223*f005ef32Sjsg 							process_queue_list) {
224*f005ef32Sjsg 
225*f005ef32Sjsg 				if (!(pqn->q && pqn->q->device == dev &&
226*f005ef32Sjsg 				      pqn->q->doorbell_id == doorbell_id))
227*f005ef32Sjsg 					continue;
228*f005ef32Sjsg 
229*f005ef32Sjsg 				kfd_send_exception_to_runtime(p, pqn->q->properties.queue_id,
230*f005ef32Sjsg 							      trap_mask);
231*f005ef32Sjsg 
232*f005ef32Sjsg 				signaled_to_debugger_or_runtime = true;
233*f005ef32Sjsg 
234*f005ef32Sjsg 				break;
235*f005ef32Sjsg 			}
236*f005ef32Sjsg 
237*f005ef32Sjsg 			mutex_unlock(&p->mutex);
238*f005ef32Sjsg 		} else if (trap_mask & KFD_EC_MASK(EC_DEVICE_MEMORY_VIOLATION)) {
239*f005ef32Sjsg 			kfd_dqm_evict_pasid(dev->dqm, p->pasid);
240*f005ef32Sjsg 			kfd_signal_vm_fault_event(dev, p->pasid, NULL,
241*f005ef32Sjsg 							exception_data);
242*f005ef32Sjsg 
243*f005ef32Sjsg 			signaled_to_debugger_or_runtime = true;
244*f005ef32Sjsg 		}
245*f005ef32Sjsg 	} else {
246*f005ef32Sjsg 		signaled_to_debugger_or_runtime = true;
247*f005ef32Sjsg 	}
248*f005ef32Sjsg 
249*f005ef32Sjsg 	kfd_unref_process(p);
250*f005ef32Sjsg 
251*f005ef32Sjsg 	return signaled_to_debugger_or_runtime;
252*f005ef32Sjsg }
253*f005ef32Sjsg 
kfd_dbg_send_exception_to_runtime(struct kfd_process * p,unsigned int dev_id,unsigned int queue_id,uint64_t error_reason)254*f005ef32Sjsg int kfd_dbg_send_exception_to_runtime(struct kfd_process *p,
255*f005ef32Sjsg 					unsigned int dev_id,
256*f005ef32Sjsg 					unsigned int queue_id,
257*f005ef32Sjsg 					uint64_t error_reason)
258*f005ef32Sjsg {
259*f005ef32Sjsg 	if (error_reason & KFD_EC_MASK(EC_DEVICE_MEMORY_VIOLATION)) {
260*f005ef32Sjsg 		struct kfd_process_device *pdd = NULL;
261*f005ef32Sjsg 		struct kfd_hsa_memory_exception_data *data;
262*f005ef32Sjsg 		int i;
263*f005ef32Sjsg 
264*f005ef32Sjsg 		for (i = 0; i < p->n_pdds; i++) {
265*f005ef32Sjsg 			if (p->pdds[i]->dev->id == dev_id) {
266*f005ef32Sjsg 				pdd = p->pdds[i];
267*f005ef32Sjsg 				break;
268*f005ef32Sjsg 			}
269*f005ef32Sjsg 		}
270*f005ef32Sjsg 
271*f005ef32Sjsg 		if (!pdd)
272*f005ef32Sjsg 			return -ENODEV;
273*f005ef32Sjsg 
274*f005ef32Sjsg 		data = (struct kfd_hsa_memory_exception_data *)
275*f005ef32Sjsg 						pdd->vm_fault_exc_data;
276*f005ef32Sjsg 
277*f005ef32Sjsg 		kfd_dqm_evict_pasid(pdd->dev->dqm, p->pasid);
278*f005ef32Sjsg 		kfd_signal_vm_fault_event(pdd->dev, p->pasid, NULL, data);
279*f005ef32Sjsg 		error_reason &= ~KFD_EC_MASK(EC_DEVICE_MEMORY_VIOLATION);
280*f005ef32Sjsg 	}
281*f005ef32Sjsg 
282*f005ef32Sjsg 	if (error_reason & (KFD_EC_MASK(EC_PROCESS_RUNTIME))) {
283*f005ef32Sjsg 		/*
284*f005ef32Sjsg 		 * block should only happen after the debugger receives runtime
285*f005ef32Sjsg 		 * enable notice.
286*f005ef32Sjsg 		 */
287*f005ef32Sjsg 		up(&p->runtime_enable_sema);
288*f005ef32Sjsg 		error_reason &= ~KFD_EC_MASK(EC_PROCESS_RUNTIME);
289*f005ef32Sjsg 	}
290*f005ef32Sjsg 
291*f005ef32Sjsg 	if (error_reason)
292*f005ef32Sjsg 		return kfd_send_exception_to_runtime(p, queue_id, error_reason);
293*f005ef32Sjsg 
294*f005ef32Sjsg 	return 0;
295*f005ef32Sjsg }
296*f005ef32Sjsg 
kfd_dbg_set_queue_workaround(struct queue * q,bool enable)297*f005ef32Sjsg static int kfd_dbg_set_queue_workaround(struct queue *q, bool enable)
298*f005ef32Sjsg {
299*f005ef32Sjsg 	struct mqd_update_info minfo = {0};
300*f005ef32Sjsg 	int err;
301*f005ef32Sjsg 
302*f005ef32Sjsg 	if (!q)
303*f005ef32Sjsg 		return 0;
304*f005ef32Sjsg 
305*f005ef32Sjsg 	if (!kfd_dbg_has_cwsr_workaround(q->device))
306*f005ef32Sjsg 		return 0;
307*f005ef32Sjsg 
308*f005ef32Sjsg 	if (enable && q->properties.is_user_cu_masked)
309*f005ef32Sjsg 		return -EBUSY;
310*f005ef32Sjsg 
311*f005ef32Sjsg 	minfo.update_flag = enable ? UPDATE_FLAG_DBG_WA_ENABLE : UPDATE_FLAG_DBG_WA_DISABLE;
312*f005ef32Sjsg 
313*f005ef32Sjsg 	q->properties.is_dbg_wa = enable;
314*f005ef32Sjsg 	err = q->device->dqm->ops.update_queue(q->device->dqm, q, &minfo);
315*f005ef32Sjsg 	if (err)
316*f005ef32Sjsg 		q->properties.is_dbg_wa = false;
317*f005ef32Sjsg 
318*f005ef32Sjsg 	return err;
319*f005ef32Sjsg }
320*f005ef32Sjsg 
kfd_dbg_set_workaround(struct kfd_process * target,bool enable)321*f005ef32Sjsg static int kfd_dbg_set_workaround(struct kfd_process *target, bool enable)
322*f005ef32Sjsg {
323*f005ef32Sjsg 	struct process_queue_manager *pqm = &target->pqm;
324*f005ef32Sjsg 	struct process_queue_node *pqn;
325*f005ef32Sjsg 	int r = 0;
326*f005ef32Sjsg 
327*f005ef32Sjsg 	list_for_each_entry(pqn, &pqm->queues, process_queue_list) {
328*f005ef32Sjsg 		r = kfd_dbg_set_queue_workaround(pqn->q, enable);
329*f005ef32Sjsg 		if (enable && r)
330*f005ef32Sjsg 			goto unwind;
331*f005ef32Sjsg 	}
332*f005ef32Sjsg 
333*f005ef32Sjsg 	return 0;
334*f005ef32Sjsg 
335*f005ef32Sjsg unwind:
336*f005ef32Sjsg 	list_for_each_entry(pqn, &pqm->queues, process_queue_list)
337*f005ef32Sjsg 		kfd_dbg_set_queue_workaround(pqn->q, false);
338*f005ef32Sjsg 
339*f005ef32Sjsg 	if (enable)
340*f005ef32Sjsg 		target->runtime_info.runtime_state = r == -EBUSY ?
341*f005ef32Sjsg 				DEBUG_RUNTIME_STATE_ENABLED_BUSY :
342*f005ef32Sjsg 				DEBUG_RUNTIME_STATE_ENABLED_ERROR;
343*f005ef32Sjsg 
344*f005ef32Sjsg 	return r;
345*f005ef32Sjsg }
346*f005ef32Sjsg 
kfd_dbg_set_mes_debug_mode(struct kfd_process_device * pdd,bool sq_trap_en)347*f005ef32Sjsg int kfd_dbg_set_mes_debug_mode(struct kfd_process_device *pdd, bool sq_trap_en)
348*f005ef32Sjsg {
349*f005ef32Sjsg 	uint32_t spi_dbg_cntl = pdd->spi_dbg_override | pdd->spi_dbg_launch_mode;
350*f005ef32Sjsg 	uint32_t flags = pdd->process->dbg_flags;
351*f005ef32Sjsg 
352*f005ef32Sjsg 	if (!kfd_dbg_is_per_vmid_supported(pdd->dev))
353*f005ef32Sjsg 		return 0;
354*f005ef32Sjsg 
355*f005ef32Sjsg 	return amdgpu_mes_set_shader_debugger(pdd->dev->adev, pdd->proc_ctx_gpu_addr, spi_dbg_cntl,
356*f005ef32Sjsg 						pdd->watch_points, flags, sq_trap_en);
357*f005ef32Sjsg }
358*f005ef32Sjsg 
359*f005ef32Sjsg #define KFD_DEBUGGER_INVALID_WATCH_POINT_ID -1
kfd_dbg_get_dev_watch_id(struct kfd_process_device * pdd,int * watch_id)360*f005ef32Sjsg static int kfd_dbg_get_dev_watch_id(struct kfd_process_device *pdd, int *watch_id)
361*f005ef32Sjsg {
362*f005ef32Sjsg 	int i;
363*f005ef32Sjsg 
364*f005ef32Sjsg 	*watch_id = KFD_DEBUGGER_INVALID_WATCH_POINT_ID;
365*f005ef32Sjsg 
366*f005ef32Sjsg 	spin_lock(&pdd->dev->kfd->watch_points_lock);
367*f005ef32Sjsg 
368*f005ef32Sjsg 	for (i = 0; i < MAX_WATCH_ADDRESSES; i++) {
369*f005ef32Sjsg 		/* device watchpoint in use so skip */
370*f005ef32Sjsg 		if ((pdd->dev->kfd->alloc_watch_ids >> i) & 0x1)
371*f005ef32Sjsg 			continue;
372*f005ef32Sjsg 
373*f005ef32Sjsg 		pdd->alloc_watch_ids |= 0x1 << i;
374*f005ef32Sjsg 		pdd->dev->kfd->alloc_watch_ids |= 0x1 << i;
375*f005ef32Sjsg 		*watch_id = i;
376*f005ef32Sjsg 		spin_unlock(&pdd->dev->kfd->watch_points_lock);
377*f005ef32Sjsg 		return 0;
378*f005ef32Sjsg 	}
379*f005ef32Sjsg 
380*f005ef32Sjsg 	spin_unlock(&pdd->dev->kfd->watch_points_lock);
381*f005ef32Sjsg 
382*f005ef32Sjsg 	return -ENOMEM;
383*f005ef32Sjsg }
384*f005ef32Sjsg 
kfd_dbg_clear_dev_watch_id(struct kfd_process_device * pdd,int watch_id)385*f005ef32Sjsg static void kfd_dbg_clear_dev_watch_id(struct kfd_process_device *pdd, int watch_id)
386*f005ef32Sjsg {
387*f005ef32Sjsg 	spin_lock(&pdd->dev->kfd->watch_points_lock);
388*f005ef32Sjsg 
389*f005ef32Sjsg 	/* process owns device watch point so safe to clear */
390*f005ef32Sjsg 	if ((pdd->alloc_watch_ids >> watch_id) & 0x1) {
391*f005ef32Sjsg 		pdd->alloc_watch_ids &= ~(0x1 << watch_id);
392*f005ef32Sjsg 		pdd->dev->kfd->alloc_watch_ids &= ~(0x1 << watch_id);
393*f005ef32Sjsg 	}
394*f005ef32Sjsg 
395*f005ef32Sjsg 	spin_unlock(&pdd->dev->kfd->watch_points_lock);
396*f005ef32Sjsg }
397*f005ef32Sjsg 
kfd_dbg_owns_dev_watch_id(struct kfd_process_device * pdd,int watch_id)398*f005ef32Sjsg static bool kfd_dbg_owns_dev_watch_id(struct kfd_process_device *pdd, int watch_id)
399*f005ef32Sjsg {
400*f005ef32Sjsg 	bool owns_watch_id = false;
401*f005ef32Sjsg 
402*f005ef32Sjsg 	spin_lock(&pdd->dev->kfd->watch_points_lock);
403*f005ef32Sjsg 	owns_watch_id = watch_id < MAX_WATCH_ADDRESSES &&
404*f005ef32Sjsg 			((pdd->alloc_watch_ids >> watch_id) & 0x1);
405*f005ef32Sjsg 
406*f005ef32Sjsg 	spin_unlock(&pdd->dev->kfd->watch_points_lock);
407*f005ef32Sjsg 
408*f005ef32Sjsg 	return owns_watch_id;
409*f005ef32Sjsg }
410*f005ef32Sjsg 
kfd_dbg_trap_clear_dev_address_watch(struct kfd_process_device * pdd,uint32_t watch_id)411*f005ef32Sjsg int kfd_dbg_trap_clear_dev_address_watch(struct kfd_process_device *pdd,
412*f005ef32Sjsg 					uint32_t watch_id)
413*f005ef32Sjsg {
414*f005ef32Sjsg 	int r;
415*f005ef32Sjsg 
416*f005ef32Sjsg 	if (!kfd_dbg_owns_dev_watch_id(pdd, watch_id))
417*f005ef32Sjsg 		return -EINVAL;
418*f005ef32Sjsg 
419*f005ef32Sjsg 	if (!pdd->dev->kfd->shared_resources.enable_mes) {
420*f005ef32Sjsg 		r = debug_lock_and_unmap(pdd->dev->dqm);
421*f005ef32Sjsg 		if (r)
422*f005ef32Sjsg 			return r;
423*f005ef32Sjsg 	}
424*f005ef32Sjsg 
425*f005ef32Sjsg 	amdgpu_gfx_off_ctrl(pdd->dev->adev, false);
426*f005ef32Sjsg 	pdd->watch_points[watch_id] = pdd->dev->kfd2kgd->clear_address_watch(
427*f005ef32Sjsg 							pdd->dev->adev,
428*f005ef32Sjsg 							watch_id);
429*f005ef32Sjsg 	amdgpu_gfx_off_ctrl(pdd->dev->adev, true);
430*f005ef32Sjsg 
431*f005ef32Sjsg 	if (!pdd->dev->kfd->shared_resources.enable_mes)
432*f005ef32Sjsg 		r = debug_map_and_unlock(pdd->dev->dqm);
433*f005ef32Sjsg 	else
434*f005ef32Sjsg 		r = kfd_dbg_set_mes_debug_mode(pdd, true);
435*f005ef32Sjsg 
436*f005ef32Sjsg 	kfd_dbg_clear_dev_watch_id(pdd, watch_id);
437*f005ef32Sjsg 
438*f005ef32Sjsg 	return r;
439*f005ef32Sjsg }
440*f005ef32Sjsg 
kfd_dbg_trap_set_dev_address_watch(struct kfd_process_device * pdd,uint64_t watch_address,uint32_t watch_address_mask,uint32_t * watch_id,uint32_t watch_mode)441*f005ef32Sjsg int kfd_dbg_trap_set_dev_address_watch(struct kfd_process_device *pdd,
442*f005ef32Sjsg 					uint64_t watch_address,
443*f005ef32Sjsg 					uint32_t watch_address_mask,
444*f005ef32Sjsg 					uint32_t *watch_id,
445*f005ef32Sjsg 					uint32_t watch_mode)
446*f005ef32Sjsg {
447*f005ef32Sjsg 	int xcc_id, r = kfd_dbg_get_dev_watch_id(pdd, watch_id);
448*f005ef32Sjsg 	uint32_t xcc_mask = pdd->dev->xcc_mask;
449*f005ef32Sjsg 
450*f005ef32Sjsg 	if (r)
451*f005ef32Sjsg 		return r;
452*f005ef32Sjsg 
453*f005ef32Sjsg 	if (!pdd->dev->kfd->shared_resources.enable_mes) {
454*f005ef32Sjsg 		r = debug_lock_and_unmap(pdd->dev->dqm);
455*f005ef32Sjsg 		if (r) {
456*f005ef32Sjsg 			kfd_dbg_clear_dev_watch_id(pdd, *watch_id);
457*f005ef32Sjsg 			return r;
458*f005ef32Sjsg 		}
459*f005ef32Sjsg 	}
460*f005ef32Sjsg 
461*f005ef32Sjsg 	amdgpu_gfx_off_ctrl(pdd->dev->adev, false);
462*f005ef32Sjsg 	for_each_inst(xcc_id, xcc_mask)
463*f005ef32Sjsg 		pdd->watch_points[*watch_id] = pdd->dev->kfd2kgd->set_address_watch(
464*f005ef32Sjsg 				pdd->dev->adev,
465*f005ef32Sjsg 				watch_address,
466*f005ef32Sjsg 				watch_address_mask,
467*f005ef32Sjsg 				*watch_id,
468*f005ef32Sjsg 				watch_mode,
469*f005ef32Sjsg 				pdd->dev->vm_info.last_vmid_kfd,
470*f005ef32Sjsg 				xcc_id);
471*f005ef32Sjsg 	amdgpu_gfx_off_ctrl(pdd->dev->adev, true);
472*f005ef32Sjsg 
473*f005ef32Sjsg 	if (!pdd->dev->kfd->shared_resources.enable_mes)
474*f005ef32Sjsg 		r = debug_map_and_unlock(pdd->dev->dqm);
475*f005ef32Sjsg 	else
476*f005ef32Sjsg 		r = kfd_dbg_set_mes_debug_mode(pdd, true);
477*f005ef32Sjsg 
478*f005ef32Sjsg 	/* HWS is broken so no point in HW rollback but release the watchpoint anyways */
479*f005ef32Sjsg 	if (r)
480*f005ef32Sjsg 		kfd_dbg_clear_dev_watch_id(pdd, *watch_id);
481*f005ef32Sjsg 
482*f005ef32Sjsg 	return 0;
483*f005ef32Sjsg }
484*f005ef32Sjsg 
kfd_dbg_clear_process_address_watch(struct kfd_process * target)485*f005ef32Sjsg static void kfd_dbg_clear_process_address_watch(struct kfd_process *target)
486*f005ef32Sjsg {
487*f005ef32Sjsg 	int i, j;
488*f005ef32Sjsg 
489*f005ef32Sjsg 	for (i = 0; i < target->n_pdds; i++)
490*f005ef32Sjsg 		for (j = 0; j < MAX_WATCH_ADDRESSES; j++)
491*f005ef32Sjsg 			kfd_dbg_trap_clear_dev_address_watch(target->pdds[i], j);
492*f005ef32Sjsg }
493*f005ef32Sjsg 
kfd_dbg_trap_set_flags(struct kfd_process * target,uint32_t * flags)494*f005ef32Sjsg int kfd_dbg_trap_set_flags(struct kfd_process *target, uint32_t *flags)
495*f005ef32Sjsg {
496*f005ef32Sjsg 	uint32_t prev_flags = target->dbg_flags;
497*f005ef32Sjsg 	int i, r = 0, rewind_count = 0;
498*f005ef32Sjsg 
499*f005ef32Sjsg 	for (i = 0; i < target->n_pdds; i++) {
500*f005ef32Sjsg 		if (!kfd_dbg_is_per_vmid_supported(target->pdds[i]->dev) &&
501*f005ef32Sjsg 			(*flags & KFD_DBG_TRAP_FLAG_SINGLE_MEM_OP)) {
502*f005ef32Sjsg 			*flags = prev_flags;
503*f005ef32Sjsg 			return -EACCES;
504*f005ef32Sjsg 		}
505*f005ef32Sjsg 	}
506*f005ef32Sjsg 
507*f005ef32Sjsg 	target->dbg_flags = *flags & KFD_DBG_TRAP_FLAG_SINGLE_MEM_OP;
508*f005ef32Sjsg 	*flags = prev_flags;
509*f005ef32Sjsg 	for (i = 0; i < target->n_pdds; i++) {
510*f005ef32Sjsg 		struct kfd_process_device *pdd = target->pdds[i];
511*f005ef32Sjsg 
512*f005ef32Sjsg 		if (!kfd_dbg_is_per_vmid_supported(pdd->dev))
513*f005ef32Sjsg 			continue;
514*f005ef32Sjsg 
515*f005ef32Sjsg 		if (!pdd->dev->kfd->shared_resources.enable_mes)
516*f005ef32Sjsg 			r = debug_refresh_runlist(pdd->dev->dqm);
517*f005ef32Sjsg 		else
518*f005ef32Sjsg 			r = kfd_dbg_set_mes_debug_mode(pdd, true);
519*f005ef32Sjsg 
520*f005ef32Sjsg 		if (r) {
521*f005ef32Sjsg 			target->dbg_flags = prev_flags;
522*f005ef32Sjsg 			break;
523*f005ef32Sjsg 		}
524*f005ef32Sjsg 
525*f005ef32Sjsg 		rewind_count++;
526*f005ef32Sjsg 	}
527*f005ef32Sjsg 
528*f005ef32Sjsg 	/* Rewind flags */
529*f005ef32Sjsg 	if (r) {
530*f005ef32Sjsg 		target->dbg_flags = prev_flags;
531*f005ef32Sjsg 
532*f005ef32Sjsg 		for (i = 0; i < rewind_count; i++) {
533*f005ef32Sjsg 			struct kfd_process_device *pdd = target->pdds[i];
534*f005ef32Sjsg 
535*f005ef32Sjsg 			if (!kfd_dbg_is_per_vmid_supported(pdd->dev))
536*f005ef32Sjsg 				continue;
537*f005ef32Sjsg 
538*f005ef32Sjsg 			if (!pdd->dev->kfd->shared_resources.enable_mes)
539*f005ef32Sjsg 				debug_refresh_runlist(pdd->dev->dqm);
540*f005ef32Sjsg 			else
541*f005ef32Sjsg 				kfd_dbg_set_mes_debug_mode(pdd, true);
542*f005ef32Sjsg 		}
543*f005ef32Sjsg 	}
544*f005ef32Sjsg 
545*f005ef32Sjsg 	return r;
546*f005ef32Sjsg }
547*f005ef32Sjsg 
548*f005ef32Sjsg /* kfd_dbg_trap_deactivate:
549*f005ef32Sjsg  *	target: target process
550*f005ef32Sjsg  *	unwind: If this is unwinding a failed kfd_dbg_trap_enable()
551*f005ef32Sjsg  *	unwind_count:
552*f005ef32Sjsg  *		If unwind == true, how far down the pdd list we need
553*f005ef32Sjsg  *				to unwind
554*f005ef32Sjsg  *		else: ignored
555*f005ef32Sjsg  */
kfd_dbg_trap_deactivate(struct kfd_process * target,bool unwind,int unwind_count)556*f005ef32Sjsg void kfd_dbg_trap_deactivate(struct kfd_process *target, bool unwind, int unwind_count)
557*f005ef32Sjsg {
558*f005ef32Sjsg 	int i;
559*f005ef32Sjsg 
560*f005ef32Sjsg 	if (!unwind) {
561*f005ef32Sjsg 		uint32_t flags = 0;
562*f005ef32Sjsg 		int resume_count = resume_queues(target, 0, NULL);
563*f005ef32Sjsg 
564*f005ef32Sjsg 		if (resume_count)
565*f005ef32Sjsg 			pr_debug("Resumed %d queues\n", resume_count);
566*f005ef32Sjsg 
567*f005ef32Sjsg 		cancel_work_sync(&target->debug_event_workarea);
568*f005ef32Sjsg 		kfd_dbg_clear_process_address_watch(target);
569*f005ef32Sjsg 		kfd_dbg_trap_set_wave_launch_mode(target, 0);
570*f005ef32Sjsg 
571*f005ef32Sjsg 		kfd_dbg_trap_set_flags(target, &flags);
572*f005ef32Sjsg 	}
573*f005ef32Sjsg 
574*f005ef32Sjsg 	for (i = 0; i < target->n_pdds; i++) {
575*f005ef32Sjsg 		struct kfd_process_device *pdd = target->pdds[i];
576*f005ef32Sjsg 
577*f005ef32Sjsg 		/* If this is an unwind, and we have unwound the required
578*f005ef32Sjsg 		 * enable calls on the pdd list, we need to stop now
579*f005ef32Sjsg 		 * otherwise we may mess up another debugger session.
580*f005ef32Sjsg 		 */
581*f005ef32Sjsg 		if (unwind && i == unwind_count)
582*f005ef32Sjsg 			break;
583*f005ef32Sjsg 
584*f005ef32Sjsg 		kfd_process_set_trap_debug_flag(&pdd->qpd, false);
585*f005ef32Sjsg 
586*f005ef32Sjsg 		/* GFX off is already disabled by debug activate if not RLC restore supported. */
587*f005ef32Sjsg 		if (kfd_dbg_is_rlc_restore_supported(pdd->dev))
588*f005ef32Sjsg 			amdgpu_gfx_off_ctrl(pdd->dev->adev, false);
589*f005ef32Sjsg 		pdd->spi_dbg_override =
590*f005ef32Sjsg 				pdd->dev->kfd2kgd->disable_debug_trap(
591*f005ef32Sjsg 				pdd->dev->adev,
592*f005ef32Sjsg 				target->runtime_info.ttmp_setup,
593*f005ef32Sjsg 				pdd->dev->vm_info.last_vmid_kfd);
594*f005ef32Sjsg 		amdgpu_gfx_off_ctrl(pdd->dev->adev, true);
595*f005ef32Sjsg 
596*f005ef32Sjsg 		if (!kfd_dbg_is_per_vmid_supported(pdd->dev) &&
597*f005ef32Sjsg 				release_debug_trap_vmid(pdd->dev->dqm, &pdd->qpd))
598*f005ef32Sjsg 			pr_err("Failed to release debug vmid on [%i]\n", pdd->dev->id);
599*f005ef32Sjsg 
600*f005ef32Sjsg 		if (!pdd->dev->kfd->shared_resources.enable_mes)
601*f005ef32Sjsg 			debug_refresh_runlist(pdd->dev->dqm);
602*f005ef32Sjsg 		else
603*f005ef32Sjsg 			kfd_dbg_set_mes_debug_mode(pdd, !kfd_dbg_has_cwsr_workaround(pdd->dev));
604*f005ef32Sjsg 	}
605*f005ef32Sjsg 
606*f005ef32Sjsg 	kfd_dbg_set_workaround(target, false);
607*f005ef32Sjsg }
608*f005ef32Sjsg 
kfd_dbg_clean_exception_status(struct kfd_process * target)609*f005ef32Sjsg static void kfd_dbg_clean_exception_status(struct kfd_process *target)
610*f005ef32Sjsg {
611*f005ef32Sjsg 	struct process_queue_manager *pqm;
612*f005ef32Sjsg 	struct process_queue_node *pqn;
613*f005ef32Sjsg 	int i;
614*f005ef32Sjsg 
615*f005ef32Sjsg 	for (i = 0; i < target->n_pdds; i++) {
616*f005ef32Sjsg 		struct kfd_process_device *pdd = target->pdds[i];
617*f005ef32Sjsg 
618*f005ef32Sjsg 		kfd_process_drain_interrupts(pdd);
619*f005ef32Sjsg 
620*f005ef32Sjsg 		pdd->exception_status = 0;
621*f005ef32Sjsg 	}
622*f005ef32Sjsg 
623*f005ef32Sjsg 	pqm = &target->pqm;
624*f005ef32Sjsg 	list_for_each_entry(pqn, &pqm->queues, process_queue_list) {
625*f005ef32Sjsg 		if (!pqn->q)
626*f005ef32Sjsg 			continue;
627*f005ef32Sjsg 
628*f005ef32Sjsg 		pqn->q->properties.exception_status = 0;
629*f005ef32Sjsg 	}
630*f005ef32Sjsg 
631*f005ef32Sjsg 	target->exception_status = 0;
632*f005ef32Sjsg }
633*f005ef32Sjsg 
kfd_dbg_trap_disable(struct kfd_process * target)634*f005ef32Sjsg int kfd_dbg_trap_disable(struct kfd_process *target)
635*f005ef32Sjsg {
636*f005ef32Sjsg 	if (!target->debug_trap_enabled)
637*f005ef32Sjsg 		return 0;
638*f005ef32Sjsg 
639*f005ef32Sjsg 	/*
640*f005ef32Sjsg 	 * Defer deactivation to runtime if runtime not enabled otherwise reset
641*f005ef32Sjsg 	 * attached running target runtime state to enable for re-attach.
642*f005ef32Sjsg 	 */
643*f005ef32Sjsg 	if (target->runtime_info.runtime_state == DEBUG_RUNTIME_STATE_ENABLED)
644*f005ef32Sjsg 		kfd_dbg_trap_deactivate(target, false, 0);
645*f005ef32Sjsg 	else if (target->runtime_info.runtime_state != DEBUG_RUNTIME_STATE_DISABLED)
646*f005ef32Sjsg 		target->runtime_info.runtime_state = DEBUG_RUNTIME_STATE_ENABLED;
647*f005ef32Sjsg 
648*f005ef32Sjsg 	fput(target->dbg_ev_file);
649*f005ef32Sjsg 	target->dbg_ev_file = NULL;
650*f005ef32Sjsg 
651*f005ef32Sjsg 	if (target->debugger_process) {
652*f005ef32Sjsg 		atomic_dec(&target->debugger_process->debugged_process_count);
653*f005ef32Sjsg 		target->debugger_process = NULL;
654*f005ef32Sjsg 	}
655*f005ef32Sjsg 
656*f005ef32Sjsg 	target->debug_trap_enabled = false;
657*f005ef32Sjsg 	kfd_dbg_clean_exception_status(target);
658*f005ef32Sjsg 	kfd_unref_process(target);
659*f005ef32Sjsg 
660*f005ef32Sjsg 	return 0;
661*f005ef32Sjsg }
662*f005ef32Sjsg 
kfd_dbg_trap_activate(struct kfd_process * target)663*f005ef32Sjsg int kfd_dbg_trap_activate(struct kfd_process *target)
664*f005ef32Sjsg {
665*f005ef32Sjsg 	int i, r = 0;
666*f005ef32Sjsg 
667*f005ef32Sjsg 	r = kfd_dbg_set_workaround(target, true);
668*f005ef32Sjsg 	if (r)
669*f005ef32Sjsg 		return r;
670*f005ef32Sjsg 
671*f005ef32Sjsg 	for (i = 0; i < target->n_pdds; i++) {
672*f005ef32Sjsg 		struct kfd_process_device *pdd = target->pdds[i];
673*f005ef32Sjsg 
674*f005ef32Sjsg 		if (!kfd_dbg_is_per_vmid_supported(pdd->dev)) {
675*f005ef32Sjsg 			r = reserve_debug_trap_vmid(pdd->dev->dqm, &pdd->qpd);
676*f005ef32Sjsg 
677*f005ef32Sjsg 			if (r) {
678*f005ef32Sjsg 				target->runtime_info.runtime_state = (r == -EBUSY) ?
679*f005ef32Sjsg 							DEBUG_RUNTIME_STATE_ENABLED_BUSY :
680*f005ef32Sjsg 							DEBUG_RUNTIME_STATE_ENABLED_ERROR;
681*f005ef32Sjsg 
682*f005ef32Sjsg 				goto unwind_err;
683*f005ef32Sjsg 			}
684*f005ef32Sjsg 		}
685*f005ef32Sjsg 
686*f005ef32Sjsg 		/* Disable GFX OFF to prevent garbage read/writes to debug registers.
687*f005ef32Sjsg 		 * If RLC restore of debug registers is not supported and runtime enable
688*f005ef32Sjsg 		 * hasn't done so already on ttmp setup request, restore the trap config registers.
689*f005ef32Sjsg 		 *
690*f005ef32Sjsg 		 * If RLC restore of debug registers is not supported, keep gfx off disabled for
691*f005ef32Sjsg 		 * the debug session.
692*f005ef32Sjsg 		 */
693*f005ef32Sjsg 		amdgpu_gfx_off_ctrl(pdd->dev->adev, false);
694*f005ef32Sjsg 		if (!(kfd_dbg_is_rlc_restore_supported(pdd->dev) ||
695*f005ef32Sjsg 						target->runtime_info.ttmp_setup))
696*f005ef32Sjsg 			pdd->dev->kfd2kgd->enable_debug_trap(pdd->dev->adev, true,
697*f005ef32Sjsg 								pdd->dev->vm_info.last_vmid_kfd);
698*f005ef32Sjsg 
699*f005ef32Sjsg 		pdd->spi_dbg_override = pdd->dev->kfd2kgd->enable_debug_trap(
700*f005ef32Sjsg 					pdd->dev->adev,
701*f005ef32Sjsg 					false,
702*f005ef32Sjsg 					pdd->dev->vm_info.last_vmid_kfd);
703*f005ef32Sjsg 
704*f005ef32Sjsg 		if (kfd_dbg_is_rlc_restore_supported(pdd->dev))
705*f005ef32Sjsg 			amdgpu_gfx_off_ctrl(pdd->dev->adev, true);
706*f005ef32Sjsg 
707*f005ef32Sjsg 		/*
708*f005ef32Sjsg 		 * Setting the debug flag in the trap handler requires that the TMA has been
709*f005ef32Sjsg 		 * allocated, which occurs during CWSR initialization.
710*f005ef32Sjsg 		 * In the event that CWSR has not been initialized at this point, setting the
711*f005ef32Sjsg 		 * flag will be called again during CWSR initialization if the target process
712*f005ef32Sjsg 		 * is still debug enabled.
713*f005ef32Sjsg 		 */
714*f005ef32Sjsg 		kfd_process_set_trap_debug_flag(&pdd->qpd, true);
715*f005ef32Sjsg 
716*f005ef32Sjsg 		if (!pdd->dev->kfd->shared_resources.enable_mes)
717*f005ef32Sjsg 			r = debug_refresh_runlist(pdd->dev->dqm);
718*f005ef32Sjsg 		else
719*f005ef32Sjsg 			r = kfd_dbg_set_mes_debug_mode(pdd, true);
720*f005ef32Sjsg 
721*f005ef32Sjsg 		if (r) {
722*f005ef32Sjsg 			target->runtime_info.runtime_state =
723*f005ef32Sjsg 					DEBUG_RUNTIME_STATE_ENABLED_ERROR;
724*f005ef32Sjsg 			goto unwind_err;
725*f005ef32Sjsg 		}
726*f005ef32Sjsg 	}
727*f005ef32Sjsg 
728*f005ef32Sjsg 	return 0;
729*f005ef32Sjsg 
730*f005ef32Sjsg unwind_err:
731*f005ef32Sjsg 	/* Enabling debug failed, we need to disable on
732*f005ef32Sjsg 	 * all GPUs so the enable is all or nothing.
733*f005ef32Sjsg 	 */
734*f005ef32Sjsg 	kfd_dbg_trap_deactivate(target, true, i);
735*f005ef32Sjsg 	return r;
736*f005ef32Sjsg }
737*f005ef32Sjsg 
kfd_dbg_trap_enable(struct kfd_process * target,uint32_t fd,void __user * runtime_info,uint32_t * runtime_size)738*f005ef32Sjsg int kfd_dbg_trap_enable(struct kfd_process *target, uint32_t fd,
739*f005ef32Sjsg 			void __user *runtime_info, uint32_t *runtime_size)
740*f005ef32Sjsg {
741*f005ef32Sjsg 	struct file *f;
742*f005ef32Sjsg 	uint32_t copy_size;
743*f005ef32Sjsg 	int i, r = 0;
744*f005ef32Sjsg 
745*f005ef32Sjsg 	if (target->debug_trap_enabled)
746*f005ef32Sjsg 		return -EALREADY;
747*f005ef32Sjsg 
748*f005ef32Sjsg 	/* Enable pre-checks */
749*f005ef32Sjsg 	for (i = 0; i < target->n_pdds; i++) {
750*f005ef32Sjsg 		struct kfd_process_device *pdd = target->pdds[i];
751*f005ef32Sjsg 
752*f005ef32Sjsg 		if (!KFD_IS_SOC15(pdd->dev))
753*f005ef32Sjsg 			return -ENODEV;
754*f005ef32Sjsg 
755*f005ef32Sjsg 		if (pdd->qpd.num_gws && (!kfd_dbg_has_gws_support(pdd->dev) ||
756*f005ef32Sjsg 					 kfd_dbg_has_cwsr_workaround(pdd->dev)))
757*f005ef32Sjsg 			return -EBUSY;
758*f005ef32Sjsg 	}
759*f005ef32Sjsg 
760*f005ef32Sjsg 	copy_size = min((size_t)(*runtime_size), sizeof(target->runtime_info));
761*f005ef32Sjsg 
762*f005ef32Sjsg 	f = fget(fd);
763*f005ef32Sjsg 	if (!f) {
764*f005ef32Sjsg 		pr_err("Failed to get file for (%i)\n", fd);
765*f005ef32Sjsg 		return -EBADF;
766*f005ef32Sjsg 	}
767*f005ef32Sjsg 
768*f005ef32Sjsg 	target->dbg_ev_file = f;
769*f005ef32Sjsg 
770*f005ef32Sjsg 	/* defer activation to runtime if not runtime enabled */
771*f005ef32Sjsg 	if (target->runtime_info.runtime_state == DEBUG_RUNTIME_STATE_ENABLED)
772*f005ef32Sjsg 		kfd_dbg_trap_activate(target);
773*f005ef32Sjsg 
774*f005ef32Sjsg 	/* We already hold the process reference but hold another one for the
775*f005ef32Sjsg 	 * debug session.
776*f005ef32Sjsg 	 */
777*f005ef32Sjsg 	kref_get(&target->ref);
778*f005ef32Sjsg 	target->debug_trap_enabled = true;
779*f005ef32Sjsg 
780*f005ef32Sjsg 	if (target->debugger_process)
781*f005ef32Sjsg 		atomic_inc(&target->debugger_process->debugged_process_count);
782*f005ef32Sjsg 
783*f005ef32Sjsg 	if (copy_to_user(runtime_info, (void *)&target->runtime_info, copy_size)) {
784*f005ef32Sjsg 		kfd_dbg_trap_deactivate(target, false, 0);
785*f005ef32Sjsg 		r = -EFAULT;
786*f005ef32Sjsg 	}
787*f005ef32Sjsg 
788*f005ef32Sjsg 	*runtime_size = sizeof(target->runtime_info);
789*f005ef32Sjsg 
790*f005ef32Sjsg 	return r;
791*f005ef32Sjsg }
792*f005ef32Sjsg 
kfd_dbg_validate_trap_override_request(struct kfd_process * p,uint32_t trap_override,uint32_t trap_mask_request,uint32_t * trap_mask_supported)793*f005ef32Sjsg static int kfd_dbg_validate_trap_override_request(struct kfd_process *p,
794*f005ef32Sjsg 						uint32_t trap_override,
795*f005ef32Sjsg 						uint32_t trap_mask_request,
796*f005ef32Sjsg 						uint32_t *trap_mask_supported)
797*f005ef32Sjsg {
798*f005ef32Sjsg 	int i = 0;
799*f005ef32Sjsg 
800*f005ef32Sjsg 	*trap_mask_supported = 0xffffffff;
801*f005ef32Sjsg 
802*f005ef32Sjsg 	for (i = 0; i < p->n_pdds; i++) {
803*f005ef32Sjsg 		struct kfd_process_device *pdd = p->pdds[i];
804*f005ef32Sjsg 		int err = pdd->dev->kfd2kgd->validate_trap_override_request(
805*f005ef32Sjsg 								pdd->dev->adev,
806*f005ef32Sjsg 								trap_override,
807*f005ef32Sjsg 								trap_mask_supported);
808*f005ef32Sjsg 
809*f005ef32Sjsg 		if (err)
810*f005ef32Sjsg 			return err;
811*f005ef32Sjsg 	}
812*f005ef32Sjsg 
813*f005ef32Sjsg 	if (trap_mask_request & ~*trap_mask_supported)
814*f005ef32Sjsg 		return -EACCES;
815*f005ef32Sjsg 
816*f005ef32Sjsg 	return 0;
817*f005ef32Sjsg }
818*f005ef32Sjsg 
kfd_dbg_trap_set_wave_launch_override(struct kfd_process * target,uint32_t trap_override,uint32_t trap_mask_bits,uint32_t trap_mask_request,uint32_t * trap_mask_prev,uint32_t * trap_mask_supported)819*f005ef32Sjsg int kfd_dbg_trap_set_wave_launch_override(struct kfd_process *target,
820*f005ef32Sjsg 					uint32_t trap_override,
821*f005ef32Sjsg 					uint32_t trap_mask_bits,
822*f005ef32Sjsg 					uint32_t trap_mask_request,
823*f005ef32Sjsg 					uint32_t *trap_mask_prev,
824*f005ef32Sjsg 					uint32_t *trap_mask_supported)
825*f005ef32Sjsg {
826*f005ef32Sjsg 	int r = 0, i;
827*f005ef32Sjsg 
828*f005ef32Sjsg 	r = kfd_dbg_validate_trap_override_request(target,
829*f005ef32Sjsg 						trap_override,
830*f005ef32Sjsg 						trap_mask_request,
831*f005ef32Sjsg 						trap_mask_supported);
832*f005ef32Sjsg 
833*f005ef32Sjsg 	if (r)
834*f005ef32Sjsg 		return r;
835*f005ef32Sjsg 
836*f005ef32Sjsg 	for (i = 0; i < target->n_pdds; i++) {
837*f005ef32Sjsg 		struct kfd_process_device *pdd = target->pdds[i];
838*f005ef32Sjsg 
839*f005ef32Sjsg 		amdgpu_gfx_off_ctrl(pdd->dev->adev, false);
840*f005ef32Sjsg 		pdd->spi_dbg_override = pdd->dev->kfd2kgd->set_wave_launch_trap_override(
841*f005ef32Sjsg 				pdd->dev->adev,
842*f005ef32Sjsg 				pdd->dev->vm_info.last_vmid_kfd,
843*f005ef32Sjsg 				trap_override,
844*f005ef32Sjsg 				trap_mask_bits,
845*f005ef32Sjsg 				trap_mask_request,
846*f005ef32Sjsg 				trap_mask_prev,
847*f005ef32Sjsg 				pdd->spi_dbg_override);
848*f005ef32Sjsg 		amdgpu_gfx_off_ctrl(pdd->dev->adev, true);
849*f005ef32Sjsg 
850*f005ef32Sjsg 		if (!pdd->dev->kfd->shared_resources.enable_mes)
851*f005ef32Sjsg 			r = debug_refresh_runlist(pdd->dev->dqm);
852*f005ef32Sjsg 		else
853*f005ef32Sjsg 			r = kfd_dbg_set_mes_debug_mode(pdd, true);
854*f005ef32Sjsg 
855*f005ef32Sjsg 		if (r)
856*f005ef32Sjsg 			break;
857*f005ef32Sjsg 	}
858*f005ef32Sjsg 
859*f005ef32Sjsg 	return r;
860*f005ef32Sjsg }
861*f005ef32Sjsg 
kfd_dbg_trap_set_wave_launch_mode(struct kfd_process * target,uint8_t wave_launch_mode)862*f005ef32Sjsg int kfd_dbg_trap_set_wave_launch_mode(struct kfd_process *target,
863*f005ef32Sjsg 					uint8_t wave_launch_mode)
864*f005ef32Sjsg {
865*f005ef32Sjsg 	int r = 0, i;
866*f005ef32Sjsg 
867*f005ef32Sjsg 	if (wave_launch_mode != KFD_DBG_TRAP_WAVE_LAUNCH_MODE_NORMAL &&
868*f005ef32Sjsg 			wave_launch_mode != KFD_DBG_TRAP_WAVE_LAUNCH_MODE_HALT &&
869*f005ef32Sjsg 			wave_launch_mode != KFD_DBG_TRAP_WAVE_LAUNCH_MODE_DEBUG)
870*f005ef32Sjsg 		return -EINVAL;
871*f005ef32Sjsg 
872*f005ef32Sjsg 	for (i = 0; i < target->n_pdds; i++) {
873*f005ef32Sjsg 		struct kfd_process_device *pdd = target->pdds[i];
874*f005ef32Sjsg 
875*f005ef32Sjsg 		amdgpu_gfx_off_ctrl(pdd->dev->adev, false);
876*f005ef32Sjsg 		pdd->spi_dbg_launch_mode = pdd->dev->kfd2kgd->set_wave_launch_mode(
877*f005ef32Sjsg 				pdd->dev->adev,
878*f005ef32Sjsg 				wave_launch_mode,
879*f005ef32Sjsg 				pdd->dev->vm_info.last_vmid_kfd);
880*f005ef32Sjsg 		amdgpu_gfx_off_ctrl(pdd->dev->adev, true);
881*f005ef32Sjsg 
882*f005ef32Sjsg 		if (!pdd->dev->kfd->shared_resources.enable_mes)
883*f005ef32Sjsg 			r = debug_refresh_runlist(pdd->dev->dqm);
884*f005ef32Sjsg 		else
885*f005ef32Sjsg 			r = kfd_dbg_set_mes_debug_mode(pdd, true);
886*f005ef32Sjsg 
887*f005ef32Sjsg 		if (r)
888*f005ef32Sjsg 			break;
889*f005ef32Sjsg 	}
890*f005ef32Sjsg 
891*f005ef32Sjsg 	return r;
892*f005ef32Sjsg }
893*f005ef32Sjsg 
kfd_dbg_trap_query_exception_info(struct kfd_process * target,uint32_t source_id,uint32_t exception_code,bool clear_exception,void __user * info,uint32_t * info_size)894*f005ef32Sjsg int kfd_dbg_trap_query_exception_info(struct kfd_process *target,
895*f005ef32Sjsg 		uint32_t source_id,
896*f005ef32Sjsg 		uint32_t exception_code,
897*f005ef32Sjsg 		bool clear_exception,
898*f005ef32Sjsg 		void __user *info,
899*f005ef32Sjsg 		uint32_t *info_size)
900*f005ef32Sjsg {
901*f005ef32Sjsg 	bool found = false;
902*f005ef32Sjsg 	int r = 0;
903*f005ef32Sjsg 	uint32_t copy_size, actual_info_size = 0;
904*f005ef32Sjsg 	uint64_t *exception_status_ptr = NULL;
905*f005ef32Sjsg 
906*f005ef32Sjsg 	if (!target)
907*f005ef32Sjsg 		return -EINVAL;
908*f005ef32Sjsg 
909*f005ef32Sjsg 	if (!info || !info_size)
910*f005ef32Sjsg 		return -EINVAL;
911*f005ef32Sjsg 
912*f005ef32Sjsg 	mutex_lock(&target->event_mutex);
913*f005ef32Sjsg 
914*f005ef32Sjsg 	if (KFD_DBG_EC_TYPE_IS_QUEUE(exception_code)) {
915*f005ef32Sjsg 		/* Per queue exceptions */
916*f005ef32Sjsg 		struct queue *queue = NULL;
917*f005ef32Sjsg 		int i;
918*f005ef32Sjsg 
919*f005ef32Sjsg 		for (i = 0; i < target->n_pdds; i++) {
920*f005ef32Sjsg 			struct kfd_process_device *pdd = target->pdds[i];
921*f005ef32Sjsg 			struct qcm_process_device *qpd = &pdd->qpd;
922*f005ef32Sjsg 
923*f005ef32Sjsg 			list_for_each_entry(queue, &qpd->queues_list, list) {
924*f005ef32Sjsg 				if (!found && queue->properties.queue_id == source_id) {
925*f005ef32Sjsg 					found = true;
926*f005ef32Sjsg 					break;
927*f005ef32Sjsg 				}
928*f005ef32Sjsg 			}
929*f005ef32Sjsg 			if (found)
930*f005ef32Sjsg 				break;
931*f005ef32Sjsg 		}
932*f005ef32Sjsg 
933*f005ef32Sjsg 		if (!found) {
934*f005ef32Sjsg 			r = -EINVAL;
935*f005ef32Sjsg 			goto out;
936*f005ef32Sjsg 		}
937*f005ef32Sjsg 
938*f005ef32Sjsg 		if (!(queue->properties.exception_status & KFD_EC_MASK(exception_code))) {
939*f005ef32Sjsg 			r = -ENODATA;
940*f005ef32Sjsg 			goto out;
941*f005ef32Sjsg 		}
942*f005ef32Sjsg 		exception_status_ptr = &queue->properties.exception_status;
943*f005ef32Sjsg 	} else if (KFD_DBG_EC_TYPE_IS_DEVICE(exception_code)) {
944*f005ef32Sjsg 		/* Per device exceptions */
945*f005ef32Sjsg 		struct kfd_process_device *pdd = NULL;
946*f005ef32Sjsg 		int i;
947*f005ef32Sjsg 
948*f005ef32Sjsg 		for (i = 0; i < target->n_pdds; i++) {
949*f005ef32Sjsg 			pdd = target->pdds[i];
950*f005ef32Sjsg 			if (pdd->dev->id == source_id) {
951*f005ef32Sjsg 				found = true;
952*f005ef32Sjsg 				break;
953*f005ef32Sjsg 			}
954*f005ef32Sjsg 		}
955*f005ef32Sjsg 
956*f005ef32Sjsg 		if (!found) {
957*f005ef32Sjsg 			r = -EINVAL;
958*f005ef32Sjsg 			goto out;
959*f005ef32Sjsg 		}
960*f005ef32Sjsg 
961*f005ef32Sjsg 		if (!(pdd->exception_status & KFD_EC_MASK(exception_code))) {
962*f005ef32Sjsg 			r = -ENODATA;
963*f005ef32Sjsg 			goto out;
964*f005ef32Sjsg 		}
965*f005ef32Sjsg 
966*f005ef32Sjsg 		if (exception_code == EC_DEVICE_MEMORY_VIOLATION) {
967*f005ef32Sjsg 			copy_size = min((size_t)(*info_size), pdd->vm_fault_exc_data_size);
968*f005ef32Sjsg 
969*f005ef32Sjsg 			if (copy_to_user(info, pdd->vm_fault_exc_data, copy_size)) {
970*f005ef32Sjsg 				r = -EFAULT;
971*f005ef32Sjsg 				goto out;
972*f005ef32Sjsg 			}
973*f005ef32Sjsg 			actual_info_size = pdd->vm_fault_exc_data_size;
974*f005ef32Sjsg 			if (clear_exception) {
975*f005ef32Sjsg 				kfree(pdd->vm_fault_exc_data);
976*f005ef32Sjsg 				pdd->vm_fault_exc_data = NULL;
977*f005ef32Sjsg 				pdd->vm_fault_exc_data_size = 0;
978*f005ef32Sjsg 			}
979*f005ef32Sjsg 		}
980*f005ef32Sjsg 		exception_status_ptr = &pdd->exception_status;
981*f005ef32Sjsg 	} else if (KFD_DBG_EC_TYPE_IS_PROCESS(exception_code)) {
982*f005ef32Sjsg 		/* Per process exceptions */
983*f005ef32Sjsg 		if (!(target->exception_status & KFD_EC_MASK(exception_code))) {
984*f005ef32Sjsg 			r = -ENODATA;
985*f005ef32Sjsg 			goto out;
986*f005ef32Sjsg 		}
987*f005ef32Sjsg 
988*f005ef32Sjsg 		if (exception_code == EC_PROCESS_RUNTIME) {
989*f005ef32Sjsg 			copy_size = min((size_t)(*info_size), sizeof(target->runtime_info));
990*f005ef32Sjsg 
991*f005ef32Sjsg 			if (copy_to_user(info, (void *)&target->runtime_info, copy_size)) {
992*f005ef32Sjsg 				r = -EFAULT;
993*f005ef32Sjsg 				goto out;
994*f005ef32Sjsg 			}
995*f005ef32Sjsg 
996*f005ef32Sjsg 			actual_info_size = sizeof(target->runtime_info);
997*f005ef32Sjsg 		}
998*f005ef32Sjsg 
999*f005ef32Sjsg 		exception_status_ptr = &target->exception_status;
1000*f005ef32Sjsg 	} else {
1001*f005ef32Sjsg 		pr_debug("Bad exception type [%i]\n", exception_code);
1002*f005ef32Sjsg 		r = -EINVAL;
1003*f005ef32Sjsg 		goto out;
1004*f005ef32Sjsg 	}
1005*f005ef32Sjsg 
1006*f005ef32Sjsg 	*info_size = actual_info_size;
1007*f005ef32Sjsg 	if (clear_exception)
1008*f005ef32Sjsg 		*exception_status_ptr &= ~KFD_EC_MASK(exception_code);
1009*f005ef32Sjsg out:
1010*f005ef32Sjsg 	mutex_unlock(&target->event_mutex);
1011*f005ef32Sjsg 	return r;
1012*f005ef32Sjsg }
1013*f005ef32Sjsg 
kfd_dbg_trap_device_snapshot(struct kfd_process * target,uint64_t exception_clear_mask,void __user * user_info,uint32_t * number_of_device_infos,uint32_t * entry_size)1014*f005ef32Sjsg int kfd_dbg_trap_device_snapshot(struct kfd_process *target,
1015*f005ef32Sjsg 		uint64_t exception_clear_mask,
1016*f005ef32Sjsg 		void __user *user_info,
1017*f005ef32Sjsg 		uint32_t *number_of_device_infos,
1018*f005ef32Sjsg 		uint32_t *entry_size)
1019*f005ef32Sjsg {
1020*f005ef32Sjsg 	struct kfd_dbg_device_info_entry device_info;
1021*f005ef32Sjsg 	uint32_t tmp_entry_size = *entry_size, tmp_num_devices;
1022*f005ef32Sjsg 	int i, r = 0;
1023*f005ef32Sjsg 
1024*f005ef32Sjsg 	if (!(target && user_info && number_of_device_infos && entry_size))
1025*f005ef32Sjsg 		return -EINVAL;
1026*f005ef32Sjsg 
1027*f005ef32Sjsg 	tmp_num_devices = min_t(size_t, *number_of_device_infos, target->n_pdds);
1028*f005ef32Sjsg 	*number_of_device_infos = target->n_pdds;
1029*f005ef32Sjsg 	*entry_size = min_t(size_t, *entry_size, sizeof(device_info));
1030*f005ef32Sjsg 
1031*f005ef32Sjsg 	if (!tmp_num_devices)
1032*f005ef32Sjsg 		return 0;
1033*f005ef32Sjsg 
1034*f005ef32Sjsg 	memset(&device_info, 0, sizeof(device_info));
1035*f005ef32Sjsg 
1036*f005ef32Sjsg 	mutex_lock(&target->event_mutex);
1037*f005ef32Sjsg 
1038*f005ef32Sjsg 	/* Run over all pdd of the process */
1039*f005ef32Sjsg 	for (i = 0; i < tmp_num_devices; i++) {
1040*f005ef32Sjsg 		struct kfd_process_device *pdd = target->pdds[i];
1041*f005ef32Sjsg 		struct kfd_topology_device *topo_dev = kfd_topology_device_by_id(pdd->dev->id);
1042*f005ef32Sjsg 
1043*f005ef32Sjsg 		device_info.gpu_id = pdd->dev->id;
1044*f005ef32Sjsg 		device_info.exception_status = pdd->exception_status;
1045*f005ef32Sjsg 		device_info.lds_base = pdd->lds_base;
1046*f005ef32Sjsg 		device_info.lds_limit = pdd->lds_limit;
1047*f005ef32Sjsg 		device_info.scratch_base = pdd->scratch_base;
1048*f005ef32Sjsg 		device_info.scratch_limit = pdd->scratch_limit;
1049*f005ef32Sjsg 		device_info.gpuvm_base = pdd->gpuvm_base;
1050*f005ef32Sjsg 		device_info.gpuvm_limit = pdd->gpuvm_limit;
1051*f005ef32Sjsg 		device_info.location_id = topo_dev->node_props.location_id;
1052*f005ef32Sjsg 		device_info.vendor_id = topo_dev->node_props.vendor_id;
1053*f005ef32Sjsg 		device_info.device_id = topo_dev->node_props.device_id;
1054*f005ef32Sjsg 		device_info.revision_id = pdd->dev->adev->pdev->revision;
1055*f005ef32Sjsg 		device_info.subsystem_vendor_id = pdd->dev->adev->pdev->subsystem_vendor;
1056*f005ef32Sjsg 		device_info.subsystem_device_id = pdd->dev->adev->pdev->subsystem_device;
1057*f005ef32Sjsg 		device_info.fw_version = pdd->dev->kfd->mec_fw_version;
1058*f005ef32Sjsg 		device_info.gfx_target_version =
1059*f005ef32Sjsg 			topo_dev->node_props.gfx_target_version;
1060*f005ef32Sjsg 		device_info.simd_count = topo_dev->node_props.simd_count;
1061*f005ef32Sjsg 		device_info.max_waves_per_simd =
1062*f005ef32Sjsg 			topo_dev->node_props.max_waves_per_simd;
1063*f005ef32Sjsg 		device_info.array_count = topo_dev->node_props.array_count;
1064*f005ef32Sjsg 		device_info.simd_arrays_per_engine =
1065*f005ef32Sjsg 			topo_dev->node_props.simd_arrays_per_engine;
1066*f005ef32Sjsg 		device_info.num_xcc = NUM_XCC(pdd->dev->xcc_mask);
1067*f005ef32Sjsg 		device_info.capability = topo_dev->node_props.capability;
1068*f005ef32Sjsg 		device_info.debug_prop = topo_dev->node_props.debug_prop;
1069*f005ef32Sjsg 
1070*f005ef32Sjsg 		if (exception_clear_mask)
1071*f005ef32Sjsg 			pdd->exception_status &= ~exception_clear_mask;
1072*f005ef32Sjsg 
1073*f005ef32Sjsg 		if (copy_to_user(user_info, &device_info, *entry_size)) {
1074*f005ef32Sjsg 			r = -EFAULT;
1075*f005ef32Sjsg 			break;
1076*f005ef32Sjsg 		}
1077*f005ef32Sjsg 
1078*f005ef32Sjsg 		user_info += tmp_entry_size;
1079*f005ef32Sjsg 	}
1080*f005ef32Sjsg 
1081*f005ef32Sjsg 	mutex_unlock(&target->event_mutex);
1082*f005ef32Sjsg 
1083*f005ef32Sjsg 	return r;
1084*f005ef32Sjsg }
1085*f005ef32Sjsg 
kfd_dbg_set_enabled_debug_exception_mask(struct kfd_process * target,uint64_t exception_set_mask)1086*f005ef32Sjsg void kfd_dbg_set_enabled_debug_exception_mask(struct kfd_process *target,
1087*f005ef32Sjsg 					uint64_t exception_set_mask)
1088*f005ef32Sjsg {
1089*f005ef32Sjsg 	uint64_t found_mask = 0;
1090*f005ef32Sjsg 	struct process_queue_manager *pqm;
1091*f005ef32Sjsg 	struct process_queue_node *pqn;
1092*f005ef32Sjsg 	static const char write_data = '.';
1093*f005ef32Sjsg 	loff_t pos = 0;
1094*f005ef32Sjsg 	int i;
1095*f005ef32Sjsg 
1096*f005ef32Sjsg 	mutex_lock(&target->event_mutex);
1097*f005ef32Sjsg 
1098*f005ef32Sjsg 	found_mask |= target->exception_status;
1099*f005ef32Sjsg 
1100*f005ef32Sjsg 	pqm = &target->pqm;
1101*f005ef32Sjsg 	list_for_each_entry(pqn, &pqm->queues, process_queue_list) {
1102*f005ef32Sjsg 		if (!pqn->q)
1103*f005ef32Sjsg 			continue;
1104*f005ef32Sjsg 
1105*f005ef32Sjsg 		found_mask |= pqn->q->properties.exception_status;
1106*f005ef32Sjsg 	}
1107*f005ef32Sjsg 
1108*f005ef32Sjsg 	for (i = 0; i < target->n_pdds; i++) {
1109*f005ef32Sjsg 		struct kfd_process_device *pdd = target->pdds[i];
1110*f005ef32Sjsg 
1111*f005ef32Sjsg 		found_mask |= pdd->exception_status;
1112*f005ef32Sjsg 	}
1113*f005ef32Sjsg 
1114*f005ef32Sjsg 	if (exception_set_mask & found_mask)
1115*f005ef32Sjsg 		kernel_write(target->dbg_ev_file, &write_data, 1, &pos);
1116*f005ef32Sjsg 
1117*f005ef32Sjsg 	target->exception_enable_mask = exception_set_mask;
1118*f005ef32Sjsg 
1119*f005ef32Sjsg 	mutex_unlock(&target->event_mutex);
1120*f005ef32Sjsg }
1121