xref: /freebsd/sys/dev/mlx5/mlx5_ib/mlx5_ib_devx.c (revision 271171e0)
1 /*-
2  * Copyright (c) 2018-2020, Mellanox Technologies. All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions
6  * are met:
7  * 1. Redistributions of source code must retain the above copyright
8  *    notice, this list of conditions and the following disclaimer.
9  * 2. Redistributions in binary form must reproduce the above copyright
10  *    notice, this list of conditions and the following disclaimer in the
11  *    documentation and/or other materials provided with the distribution.
12  *
13  * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS `AS IS' AND
14  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
16  * ARE DISCLAIMED.  IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
17  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
18  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
19  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
20  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
21  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
22  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
23  * SUCH DAMAGE.
24  *
25  * $FreeBSD$
26  */
27 
28 #include "opt_rss.h"
29 #include "opt_ratelimit.h"
30 
31 #include <rdma/ib_user_verbs.h>
32 #include <rdma/ib_verbs.h>
33 #include <rdma/uverbs_types.h>
34 #include <rdma/uverbs_ioctl.h>
35 #include <rdma/mlx5_user_ioctl_cmds.h>
36 #include <rdma/mlx5_user_ioctl_verbs.h>
37 #include <rdma/ib_umem.h>
38 #include <rdma/uverbs_std_types.h>
39 #include <dev/mlx5/driver.h>
40 #include <dev/mlx5/fs.h>
41 #include <dev/mlx5/mlx5_ib/mlx5_ib.h>
42 
43 #include <sys/priv.h>
44 
45 #include <linux/xarray.h>
46 #include <linux/rculist.h>
47 #include <linux/srcu.h>
48 #include <linux/file.h>
49 #include <linux/poll.h>
50 #include <linux/wait.h>
51 
52 #define UVERBS_MODULE_NAME mlx5_ib
53 #include <rdma/uverbs_named_ioctl.h>
54 
55 static void dispatch_event_fd(struct list_head *fd_list, const void *data);
56 
57 enum devx_obj_flags {
58 	DEVX_OBJ_FLAGS_DCT = 1 << 1,
59 	DEVX_OBJ_FLAGS_CQ = 1 << 2,
60 };
61 
62 struct devx_async_data {
63 	struct mlx5_ib_dev *mdev;
64 	struct list_head list;
65 	struct devx_async_cmd_event_file *ev_file;
66 	struct mlx5_async_work cb_work;
67 	u16 cmd_out_len;
68 	/* must be last field in this structure */
69 	struct mlx5_ib_uapi_devx_async_cmd_hdr hdr;
70 };
71 
72 struct devx_async_event_data {
73 	struct list_head list; /* headed in ev_file->event_list */
74 	struct mlx5_ib_uapi_devx_async_event_hdr hdr;
75 };
76 
77 /* first level XA value data structure */
78 struct devx_event {
79 	struct xarray object_ids; /* second XA level, Key = object id */
80 	struct list_head unaffiliated_list;
81 };
82 
83 /* second level XA value data structure */
84 struct devx_obj_event {
85 	struct rcu_head rcu;
86 	struct list_head obj_sub_list;
87 };
88 
89 struct devx_event_subscription {
90 	struct list_head file_list; /* headed in ev_file->
91 				     * subscribed_events_list
92 				     */
93 	struct list_head xa_list; /* headed in devx_event->unaffiliated_list or
94 				   * devx_obj_event->obj_sub_list
95 				   */
96 	struct list_head obj_list; /* headed in devx_object */
97 	struct list_head event_list; /* headed in ev_file->event_list or in
98 				      * temp list via subscription
99 				      */
100 
101 	u8 is_cleaned:1;
102 	u32 xa_key_level1;
103 	u32 xa_key_level2;
104 	struct rcu_head	rcu;
105 	u64 cookie;
106 	struct devx_async_event_file *ev_file;
107 	struct fd eventfd;
108 };
109 
110 struct devx_async_event_file {
111 	struct ib_uobject uobj;
112 	/* Head of events that are subscribed to this FD */
113 	struct list_head subscribed_events_list;
114 	spinlock_t lock;
115 	wait_queue_head_t poll_wait;
116 	struct list_head event_list;
117 	struct mlx5_ib_dev *dev;
118 	u8 omit_data:1;
119 	u8 is_overflow_err:1;
120 	u8 is_destroyed:1;
121 };
122 
123 #define MLX5_MAX_DESTROY_INBOX_SIZE_DW MLX5_ST_SZ_DW(delete_fte_in)
124 struct devx_obj {
125 	struct mlx5_ib_dev	*ib_dev;
126 	u64			obj_id;
127 	u32			dinlen; /* destroy inbox length */
128 	u32			dinbox[MLX5_MAX_DESTROY_INBOX_SIZE_DW];
129 	u32			flags;
130 	union {
131 		struct mlx5_ib_devx_mr	devx_mr;
132 		struct mlx5_core_dct	core_dct;
133 		struct mlx5_core_cq	core_cq;
134 		u32			flow_counter_bulk_size;
135 	};
136 	struct list_head event_sub; /* holds devx_event_subscription entries */
137 };
138 
139 struct devx_umem {
140 	struct mlx5_core_dev		*mdev;
141 	struct ib_umem			*umem;
142 	u32				page_offset;
143 	int				page_shift;
144 	int				ncont;
145 	u32				dinlen;
146 	u32				dinbox[MLX5_ST_SZ_DW(general_obj_in_cmd_hdr)];
147 };
148 
149 struct devx_umem_reg_cmd {
150 	void				*in;
151 	u32				inlen;
152 	u32				out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)];
153 };
154 
155 static struct mlx5_ib_ucontext *
156 devx_ufile2uctx(const struct uverbs_attr_bundle *attrs)
157 {
158 	return to_mucontext(ib_uverbs_get_ucontext(attrs));
159 }
160 
161 int mlx5_ib_devx_create(struct mlx5_ib_dev *dev, bool is_user)
162 {
163 	u32 in[MLX5_ST_SZ_DW(create_uctx_in)] = {0};
164 	u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)] = {0};
165 	void *uctx;
166 	int err;
167 	u16 uid;
168 	u32 cap = 0;
169 
170 	/* 0 means not supported */
171 	if (!MLX5_CAP_GEN(dev->mdev, log_max_uctx))
172 		return -EINVAL;
173 
174 	uctx = MLX5_ADDR_OF(create_uctx_in, in, uctx);
175 	if (is_user && priv_check(curthread, PRIV_NET_RAW) == 0 &&
176 	    (MLX5_CAP_GEN(dev->mdev, uctx_cap) & MLX5_UCTX_CAP_RAW_TX))
177 		cap |= MLX5_UCTX_CAP_RAW_TX;
178 	if (is_user && priv_check(curthread, PRIV_DRIVER) == 0 &&
179 	    (MLX5_CAP_GEN(dev->mdev, uctx_cap) &
180 	     MLX5_UCTX_CAP_INTERNAL_DEV_RES))
181 		cap |= MLX5_UCTX_CAP_INTERNAL_DEV_RES;
182 
183 	MLX5_SET(create_uctx_in, in, opcode, MLX5_CMD_OP_CREATE_UCTX);
184 	MLX5_SET(uctx, uctx, cap, cap);
185 
186 	err = mlx5_cmd_exec(dev->mdev, in, sizeof(in), out, sizeof(out));
187 	if (err)
188 		return err;
189 
190 	uid = MLX5_GET(general_obj_out_cmd_hdr, out, obj_id);
191 	return uid;
192 }
193 
194 void mlx5_ib_devx_destroy(struct mlx5_ib_dev *dev, u16 uid)
195 {
196 	u32 in[MLX5_ST_SZ_DW(destroy_uctx_in)] = {0};
197 	u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)] = {0};
198 
199 	MLX5_SET(destroy_uctx_in, in, opcode, MLX5_CMD_OP_DESTROY_UCTX);
200 	MLX5_SET(destroy_uctx_in, in, uid, uid);
201 
202 	mlx5_cmd_exec(dev->mdev, in, sizeof(in), out, sizeof(out));
203 }
204 
205 bool mlx5_ib_devx_is_flow_dest(void *obj, int *dest_id, int *dest_type)
206 {
207 	struct devx_obj *devx_obj = obj;
208 	u16 opcode = MLX5_GET(general_obj_in_cmd_hdr, devx_obj->dinbox, opcode);
209 
210 	switch (opcode) {
211 	case MLX5_CMD_OP_DESTROY_TIR:
212 		*dest_type = MLX5_FLOW_DESTINATION_TYPE_TIR;
213 		*dest_id = MLX5_GET(general_obj_in_cmd_hdr, devx_obj->dinbox,
214 				    obj_id);
215 		return true;
216 
217 	case MLX5_CMD_OP_DESTROY_FLOW_TABLE:
218 		*dest_type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
219 		*dest_id = MLX5_GET(destroy_flow_table_in, devx_obj->dinbox,
220 				    table_id);
221 		return true;
222 	default:
223 		return false;
224 	}
225 }
226 
227 bool mlx5_ib_devx_is_flow_counter(void *obj, u32 offset, u32 *counter_id)
228 {
229 	struct devx_obj *devx_obj = obj;
230 	u16 opcode = MLX5_GET(general_obj_in_cmd_hdr, devx_obj->dinbox, opcode);
231 
232 	if (opcode == MLX5_CMD_OP_DEALLOC_FLOW_COUNTER) {
233 
234 		if (offset && offset >= devx_obj->flow_counter_bulk_size)
235 			return false;
236 
237 		*counter_id = MLX5_GET(dealloc_flow_counter_in,
238 				       devx_obj->dinbox,
239 				       flow_counter_id);
240 		*counter_id += offset;
241 		return true;
242 	}
243 
244 	return false;
245 }
246 
247 static bool is_legacy_unaffiliated_event_num(u16 event_num)
248 {
249 	switch (event_num) {
250 	case MLX5_EVENT_TYPE_PORT_CHANGE:
251 		return true;
252 	default:
253 		return false;
254 	}
255 }
256 
257 static bool is_legacy_obj_event_num(u16 event_num)
258 {
259 	switch (event_num) {
260 	case MLX5_EVENT_TYPE_PATH_MIG:
261 	case MLX5_EVENT_TYPE_COMM_EST:
262 	case MLX5_EVENT_TYPE_SQ_DRAINED:
263 	case MLX5_EVENT_TYPE_SRQ_LAST_WQE:
264 	case MLX5_EVENT_TYPE_SRQ_RQ_LIMIT:
265 	case MLX5_EVENT_TYPE_CQ_ERROR:
266 	case MLX5_EVENT_TYPE_WQ_CATAS_ERROR:
267 	case MLX5_EVENT_TYPE_PATH_MIG_FAILED:
268 	case MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR:
269 	case MLX5_EVENT_TYPE_WQ_ACCESS_ERROR:
270 	case MLX5_EVENT_TYPE_SRQ_CATAS_ERROR:
271 	case MLX5_EVENT_TYPE_DCT_DRAINED:
272 	case MLX5_EVENT_TYPE_COMP:
273 	case MLX5_EVENT_TYPE_DCT_KEY_VIOLATION:
274 	case MLX5_EVENT_TYPE_XRQ_ERROR:
275 		return true;
276 	default:
277 		return false;
278 	}
279 }
280 
281 static u16 get_legacy_obj_type(u16 opcode)
282 {
283 	switch (opcode) {
284 	case MLX5_CMD_OP_CREATE_RQ:
285 		return MLX5_EVENT_QUEUE_TYPE_RQ;
286 	case MLX5_CMD_OP_CREATE_QP:
287 		return MLX5_EVENT_QUEUE_TYPE_QP;
288 	case MLX5_CMD_OP_CREATE_SQ:
289 		return MLX5_EVENT_QUEUE_TYPE_SQ;
290 	case MLX5_CMD_OP_CREATE_DCT:
291 		return MLX5_EVENT_QUEUE_TYPE_DCT;
292 	default:
293 		return 0;
294 	}
295 }
296 
297 static u16 get_dec_obj_type(struct devx_obj *obj, u16 event_num)
298 {
299 	u16 opcode;
300 
301 	opcode = (obj->obj_id >> 32) & 0xffff;
302 
303 	if (is_legacy_obj_event_num(event_num))
304 		return get_legacy_obj_type(opcode);
305 
306 	switch (opcode) {
307 	case MLX5_CMD_OP_CREATE_GENERAL_OBJ:
308 		return (obj->obj_id >> 48);
309 	case MLX5_CMD_OP_CREATE_RQ:
310 		return MLX5_OBJ_TYPE_RQ;
311 	case MLX5_CMD_OP_CREATE_QP:
312 		return MLX5_OBJ_TYPE_QP;
313 	case MLX5_CMD_OP_CREATE_SQ:
314 		return MLX5_OBJ_TYPE_SQ;
315 	case MLX5_CMD_OP_CREATE_DCT:
316 		return MLX5_OBJ_TYPE_DCT;
317 	case MLX5_CMD_OP_CREATE_TIR:
318 		return MLX5_OBJ_TYPE_TIR;
319 	case MLX5_CMD_OP_CREATE_TIS:
320 		return MLX5_OBJ_TYPE_TIS;
321 	case MLX5_CMD_OP_CREATE_PSV:
322 		return MLX5_OBJ_TYPE_PSV;
323 	case MLX5_OBJ_TYPE_MKEY:
324 		return MLX5_OBJ_TYPE_MKEY;
325 	case MLX5_CMD_OP_CREATE_RMP:
326 		return MLX5_OBJ_TYPE_RMP;
327 	case MLX5_CMD_OP_CREATE_XRC_SRQ:
328 		return MLX5_OBJ_TYPE_XRC_SRQ;
329 	case MLX5_CMD_OP_CREATE_XRQ:
330 		return MLX5_OBJ_TYPE_XRQ;
331 	case MLX5_CMD_OP_CREATE_RQT:
332 		return MLX5_OBJ_TYPE_RQT;
333 	case MLX5_CMD_OP_ALLOC_FLOW_COUNTER:
334 		return MLX5_OBJ_TYPE_FLOW_COUNTER;
335 	case MLX5_CMD_OP_CREATE_CQ:
336 		return MLX5_OBJ_TYPE_CQ;
337 	default:
338 		return 0;
339 	}
340 }
341 
342 static u16 get_event_obj_type(unsigned long event_type, struct mlx5_eqe *eqe)
343 {
344 	switch (event_type) {
345 	case MLX5_EVENT_TYPE_WQ_CATAS_ERROR:
346 	case MLX5_EVENT_TYPE_WQ_ACCESS_ERROR:
347 	case MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR:
348 	case MLX5_EVENT_TYPE_SRQ_LAST_WQE:
349 	case MLX5_EVENT_TYPE_PATH_MIG:
350 	case MLX5_EVENT_TYPE_PATH_MIG_FAILED:
351 	case MLX5_EVENT_TYPE_COMM_EST:
352 	case MLX5_EVENT_TYPE_SQ_DRAINED:
353 	case MLX5_EVENT_TYPE_SRQ_RQ_LIMIT:
354 	case MLX5_EVENT_TYPE_SRQ_CATAS_ERROR:
355 		return eqe->data.qp_srq.type;
356 	case MLX5_EVENT_TYPE_CQ_ERROR:
357 	case MLX5_EVENT_TYPE_XRQ_ERROR:
358 		return 0;
359 	case MLX5_EVENT_TYPE_DCT_DRAINED:
360 	case MLX5_EVENT_TYPE_DCT_KEY_VIOLATION:
361 		return MLX5_EVENT_QUEUE_TYPE_DCT;
362 	default:
363 		return MLX5_GET(affiliated_event_header, &eqe->data, obj_type);
364 	}
365 }
366 
367 static u32 get_dec_obj_id(u64 obj_id)
368 {
369 	return (obj_id & 0xffffffff);
370 }
371 
372 /*
373  * As the obj_id in the firmware is not globally unique the object type
374  * must be considered upon checking for a valid object id.
375  * For that the opcode of the creator command is encoded as part of the obj_id.
376  */
377 static u64 get_enc_obj_id(u32 opcode, u32 obj_id)
378 {
379 	return ((u64)opcode << 32) | obj_id;
380 }
381 
382 static u64 devx_get_obj_id(const void *in)
383 {
384 	u16 opcode = MLX5_GET(general_obj_in_cmd_hdr, in, opcode);
385 	u64 obj_id;
386 
387 	switch (opcode) {
388 	case MLX5_CMD_OP_MODIFY_GENERAL_OBJ:
389 	case MLX5_CMD_OP_QUERY_GENERAL_OBJ:
390 		obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_GENERAL_OBJ |
391 					MLX5_GET(general_obj_in_cmd_hdr, in,
392 						 obj_type) << 16,
393 					MLX5_GET(general_obj_in_cmd_hdr, in,
394 						 obj_id));
395 		break;
396 	case MLX5_CMD_OP_QUERY_MKEY:
397 		obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_MKEY,
398 					MLX5_GET(query_mkey_in, in,
399 						 mkey_index));
400 		break;
401 	case MLX5_CMD_OP_QUERY_CQ:
402 		obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_CQ,
403 					MLX5_GET(query_cq_in, in, cqn));
404 		break;
405 	case MLX5_CMD_OP_MODIFY_CQ:
406 		obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_CQ,
407 					MLX5_GET(modify_cq_in, in, cqn));
408 		break;
409 	case MLX5_CMD_OP_QUERY_SQ:
410 		obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_SQ,
411 					MLX5_GET(query_sq_in, in, sqn));
412 		break;
413 	case MLX5_CMD_OP_MODIFY_SQ:
414 		obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_SQ,
415 					MLX5_GET(modify_sq_in, in, sqn));
416 		break;
417 	case MLX5_CMD_OP_QUERY_RQ:
418 		obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_RQ,
419 					MLX5_GET(query_rq_in, in, rqn));
420 		break;
421 	case MLX5_CMD_OP_MODIFY_RQ:
422 		obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_RQ,
423 					MLX5_GET(modify_rq_in, in, rqn));
424 		break;
425 	case MLX5_CMD_OP_QUERY_RMP:
426 		obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_RMP,
427 					MLX5_GET(query_rmp_in, in, rmpn));
428 		break;
429 	case MLX5_CMD_OP_MODIFY_RMP:
430 		obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_RMP,
431 					MLX5_GET(modify_rmp_in, in, rmpn));
432 		break;
433 	case MLX5_CMD_OP_QUERY_RQT:
434 		obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_RQT,
435 					MLX5_GET(query_rqt_in, in, rqtn));
436 		break;
437 	case MLX5_CMD_OP_MODIFY_RQT:
438 		obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_RQT,
439 					MLX5_GET(modify_rqt_in, in, rqtn));
440 		break;
441 	case MLX5_CMD_OP_QUERY_TIR:
442 		obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_TIR,
443 					MLX5_GET(query_tir_in, in, tirn));
444 		break;
445 	case MLX5_CMD_OP_MODIFY_TIR:
446 		obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_TIR,
447 					MLX5_GET(modify_tir_in, in, tirn));
448 		break;
449 	case MLX5_CMD_OP_QUERY_TIS:
450 		obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_TIS,
451 					MLX5_GET(query_tis_in, in, tisn));
452 		break;
453 	case MLX5_CMD_OP_MODIFY_TIS:
454 		obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_TIS,
455 					MLX5_GET(modify_tis_in, in, tisn));
456 		break;
457 	case MLX5_CMD_OP_QUERY_FLOW_TABLE:
458 		obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_FLOW_TABLE,
459 					MLX5_GET(query_flow_table_in, in,
460 						 table_id));
461 		break;
462 	case MLX5_CMD_OP_MODIFY_FLOW_TABLE:
463 		obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_FLOW_TABLE,
464 					MLX5_GET(modify_flow_table_in, in,
465 						 table_id));
466 		break;
467 	case MLX5_CMD_OP_QUERY_FLOW_GROUP:
468 		obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_FLOW_GROUP,
469 					MLX5_GET(query_flow_group_in, in,
470 						 group_id));
471 		break;
472 	case MLX5_CMD_OP_QUERY_FLOW_TABLE_ENTRY:
473 		obj_id = get_enc_obj_id(MLX5_CMD_OP_SET_FLOW_TABLE_ENTRY,
474 					MLX5_GET(query_fte_in, in,
475 						 flow_index));
476 		break;
477 	case MLX5_CMD_OP_SET_FLOW_TABLE_ENTRY:
478 		obj_id = get_enc_obj_id(MLX5_CMD_OP_SET_FLOW_TABLE_ENTRY,
479 					MLX5_GET(set_fte_in, in, flow_index));
480 		break;
481 	case MLX5_CMD_OP_QUERY_Q_COUNTER:
482 		obj_id = get_enc_obj_id(MLX5_CMD_OP_ALLOC_Q_COUNTER,
483 					MLX5_GET(query_q_counter_in, in,
484 						 counter_set_id));
485 		break;
486 	case MLX5_CMD_OP_QUERY_FLOW_COUNTER:
487 		obj_id = get_enc_obj_id(MLX5_CMD_OP_ALLOC_FLOW_COUNTER,
488 					MLX5_GET(query_flow_counter_in, in,
489 						 flow_counter_id));
490 		break;
491 	case MLX5_CMD_OP_QUERY_MODIFY_HEADER_CONTEXT:
492 		obj_id = get_enc_obj_id(MLX5_CMD_OP_ALLOC_MODIFY_HEADER_CONTEXT,
493 					MLX5_GET(general_obj_in_cmd_hdr, in,
494 						 obj_id));
495 		break;
496 	case MLX5_CMD_OP_QUERY_SCHEDULING_ELEMENT:
497 		obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_SCHEDULING_ELEMENT,
498 					MLX5_GET(query_scheduling_element_in,
499 						 in, scheduling_element_id));
500 		break;
501 	case MLX5_CMD_OP_MODIFY_SCHEDULING_ELEMENT:
502 		obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_SCHEDULING_ELEMENT,
503 					MLX5_GET(modify_scheduling_element_in,
504 						 in, scheduling_element_id));
505 		break;
506 	case MLX5_CMD_OP_ADD_VXLAN_UDP_DPORT:
507 		obj_id = get_enc_obj_id(MLX5_CMD_OP_ADD_VXLAN_UDP_DPORT,
508 					MLX5_GET(add_vxlan_udp_dport_in, in,
509 						 vxlan_udp_port));
510 		break;
511 	case MLX5_CMD_OP_QUERY_L2_TABLE_ENTRY:
512 		obj_id = get_enc_obj_id(MLX5_CMD_OP_SET_L2_TABLE_ENTRY,
513 					MLX5_GET(query_l2_table_entry_in, in,
514 						 table_index));
515 		break;
516 	case MLX5_CMD_OP_SET_L2_TABLE_ENTRY:
517 		obj_id = get_enc_obj_id(MLX5_CMD_OP_SET_L2_TABLE_ENTRY,
518 					MLX5_GET(set_l2_table_entry_in, in,
519 						 table_index));
520 		break;
521 	case MLX5_CMD_OP_QUERY_QP:
522 		obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_QP,
523 					MLX5_GET(query_qp_in, in, qpn));
524 		break;
525 	case MLX5_CMD_OP_RST2INIT_QP:
526 		obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_QP,
527 					MLX5_GET(rst2init_qp_in, in, qpn));
528 		break;
529 	case MLX5_CMD_OP_INIT2RTR_QP:
530 		obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_QP,
531 					MLX5_GET(init2rtr_qp_in, in, qpn));
532 		break;
533 	case MLX5_CMD_OP_RTR2RTS_QP:
534 		obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_QP,
535 					MLX5_GET(rtr2rts_qp_in, in, qpn));
536 		break;
537 	case MLX5_CMD_OP_RTS2RTS_QP:
538 		obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_QP,
539 					MLX5_GET(rts2rts_qp_in, in, qpn));
540 		break;
541 	case MLX5_CMD_OP_SQERR2RTS_QP:
542 		obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_QP,
543 					MLX5_GET(sqerr2rts_qp_in, in, qpn));
544 		break;
545 	case MLX5_CMD_OP_2ERR_QP:
546 		obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_QP,
547 					MLX5_GET(qp_2err_in, in, qpn));
548 		break;
549 	case MLX5_CMD_OP_2RST_QP:
550 		obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_QP,
551 					MLX5_GET(qp_2rst_in, in, qpn));
552 		break;
553 	case MLX5_CMD_OP_QUERY_DCT:
554 		obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_DCT,
555 					MLX5_GET(query_dct_in, in, dctn));
556 		break;
557 	case MLX5_CMD_OP_QUERY_XRQ:
558 	case MLX5_CMD_OP_QUERY_XRQ_DC_PARAMS_ENTRY:
559 	case MLX5_CMD_OP_QUERY_XRQ_ERROR_PARAMS:
560 		obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_XRQ,
561 					MLX5_GET(query_xrq_in, in, xrqn));
562 		break;
563 	case MLX5_CMD_OP_QUERY_XRC_SRQ:
564 		obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_XRC_SRQ,
565 					MLX5_GET(query_xrc_srq_in, in,
566 						 xrc_srqn));
567 		break;
568 	case MLX5_CMD_OP_ARM_XRC_SRQ:
569 		obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_XRC_SRQ,
570 					MLX5_GET(arm_xrc_srq_in, in, xrc_srqn));
571 		break;
572 	case MLX5_CMD_OP_QUERY_SRQ:
573 		obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_SRQ,
574 					MLX5_GET(query_srq_in, in, srqn));
575 		break;
576 	case MLX5_CMD_OP_ARM_RQ:
577 		obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_RQ,
578 					MLX5_GET(arm_rq_in, in, srq_number));
579 		break;
580 	case MLX5_CMD_OP_ARM_DCT_FOR_KEY_VIOLATION:
581 		obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_DCT,
582 					MLX5_GET(drain_dct_in, in, dctn));
583 		break;
584 	case MLX5_CMD_OP_ARM_XRQ:
585 	case MLX5_CMD_OP_SET_XRQ_DC_PARAMS_ENTRY:
586 	case MLX5_CMD_OP_RELEASE_XRQ_ERROR:
587 	case MLX5_CMD_OP_MODIFY_XRQ:
588 		obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_XRQ,
589 					MLX5_GET(arm_xrq_in, in, xrqn));
590 		break;
591 	case MLX5_CMD_OP_QUERY_PACKET_REFORMAT_CONTEXT:
592 		obj_id = get_enc_obj_id
593 				(MLX5_CMD_OP_ALLOC_PACKET_REFORMAT_CONTEXT,
594 				 MLX5_GET(query_packet_reformat_context_in,
595 					  in, packet_reformat_id));
596 		break;
597 	default:
598 		obj_id = 0;
599 	}
600 
601 	return obj_id;
602 }
603 
604 static bool devx_is_valid_obj_id(struct uverbs_attr_bundle *attrs,
605 				 struct ib_uobject *uobj, const void *in)
606 {
607 	struct mlx5_ib_dev *dev = mlx5_udata_to_mdev(&attrs->driver_udata);
608 	u64 obj_id = devx_get_obj_id(in);
609 
610 	if (!obj_id)
611 		return false;
612 
613 	switch (uobj_get_object_id(uobj)) {
614 	case UVERBS_OBJECT_CQ:
615 		return get_enc_obj_id(MLX5_CMD_OP_CREATE_CQ,
616 				      to_mcq(uobj->object)->mcq.cqn) ==
617 				      obj_id;
618 
619 	case UVERBS_OBJECT_SRQ:
620 	{
621 		struct mlx5_core_srq *srq = &(to_msrq(uobj->object)->msrq);
622 		u16 opcode;
623 
624 		switch (srq->common.res) {
625 		case MLX5_RES_XSRQ:
626 			opcode = MLX5_CMD_OP_CREATE_XRC_SRQ;
627 			break;
628 		case MLX5_RES_XRQ:
629 			opcode = MLX5_CMD_OP_CREATE_XRQ;
630 			break;
631 		default:
632 			if (!dev->mdev->issi)
633 				opcode = MLX5_CMD_OP_CREATE_SRQ;
634 			else
635 				opcode = MLX5_CMD_OP_CREATE_RMP;
636 		}
637 
638 		return get_enc_obj_id(opcode,
639 				      to_msrq(uobj->object)->msrq.srqn) ==
640 				      obj_id;
641 	}
642 
643 	case UVERBS_OBJECT_QP:
644 	{
645 		struct mlx5_ib_qp *qp = to_mqp(uobj->object);
646 		enum ib_qp_type	qp_type = qp->ibqp.qp_type;
647 
648 		if (qp_type == IB_QPT_RAW_PACKET ||
649 		    (qp->flags & MLX5_IB_QP_UNDERLAY)) {
650 			struct mlx5_ib_raw_packet_qp *raw_packet_qp =
651 							 &qp->raw_packet_qp;
652 			struct mlx5_ib_rq *rq = &raw_packet_qp->rq;
653 			struct mlx5_ib_sq *sq = &raw_packet_qp->sq;
654 
655 			return (get_enc_obj_id(MLX5_CMD_OP_CREATE_RQ,
656 					       rq->base.mqp.qpn) == obj_id ||
657 				get_enc_obj_id(MLX5_CMD_OP_CREATE_SQ,
658 					       sq->base.mqp.qpn) == obj_id ||
659 				get_enc_obj_id(MLX5_CMD_OP_CREATE_TIR,
660 					       rq->tirn) == obj_id ||
661 				get_enc_obj_id(MLX5_CMD_OP_CREATE_TIS,
662 					       sq->tisn) == obj_id);
663 		}
664 
665 		if (qp_type == MLX5_IB_QPT_DCT)
666 			return get_enc_obj_id(MLX5_CMD_OP_CREATE_DCT,
667 					      qp->dct.mdct.dctn) == obj_id;
668 
669 		return get_enc_obj_id(MLX5_CMD_OP_CREATE_QP,
670 				      qp->ibqp.qp_num) == obj_id;
671 	}
672 
673 	case UVERBS_OBJECT_WQ:
674 		return get_enc_obj_id(MLX5_CMD_OP_CREATE_RQ,
675 				      to_mrwq(uobj->object)->core_qp.qpn) ==
676 				      obj_id;
677 
678 	case UVERBS_OBJECT_RWQ_IND_TBL:
679 		return get_enc_obj_id(MLX5_CMD_OP_CREATE_RQT,
680 				      to_mrwq_ind_table(uobj->object)->rqtn) ==
681 				      obj_id;
682 
683 	case MLX5_IB_OBJECT_DEVX_OBJ:
684 		return ((struct devx_obj *)uobj->object)->obj_id == obj_id;
685 
686 	default:
687 		return false;
688 	}
689 }
690 
691 static void devx_set_umem_valid(const void *in)
692 {
693 	u16 opcode = MLX5_GET(general_obj_in_cmd_hdr, in, opcode);
694 
695 	switch (opcode) {
696 	case MLX5_CMD_OP_CREATE_MKEY:
697 		MLX5_SET(create_mkey_in, in, mkey_umem_valid, 1);
698 		break;
699 	case MLX5_CMD_OP_CREATE_CQ:
700 	{
701 		void *cqc;
702 
703 		MLX5_SET(create_cq_in, in, cq_umem_valid, 1);
704 		cqc = MLX5_ADDR_OF(create_cq_in, in, cq_context);
705 		MLX5_SET(cqc, cqc, dbr_umem_valid, 1);
706 		break;
707 	}
708 	case MLX5_CMD_OP_CREATE_QP:
709 	{
710 		void *qpc;
711 
712 		qpc = MLX5_ADDR_OF(create_qp_in, in, qpc);
713 		MLX5_SET(qpc, qpc, dbr_umem_valid, 1);
714 		MLX5_SET(create_qp_in, in, wq_umem_valid, 1);
715 		break;
716 	}
717 
718 	case MLX5_CMD_OP_CREATE_RQ:
719 	{
720 		void *rqc, *wq;
721 
722 		rqc = MLX5_ADDR_OF(create_rq_in, in, ctx);
723 		wq  = MLX5_ADDR_OF(rqc, rqc, wq);
724 		MLX5_SET(wq, wq, dbr_umem_valid, 1);
725 		MLX5_SET(wq, wq, wq_umem_valid, 1);
726 		break;
727 	}
728 
729 	case MLX5_CMD_OP_CREATE_SQ:
730 	{
731 		void *sqc, *wq;
732 
733 		sqc = MLX5_ADDR_OF(create_sq_in, in, ctx);
734 		wq = MLX5_ADDR_OF(sqc, sqc, wq);
735 		MLX5_SET(wq, wq, dbr_umem_valid, 1);
736 		MLX5_SET(wq, wq, wq_umem_valid, 1);
737 		break;
738 	}
739 
740 	case MLX5_CMD_OP_MODIFY_CQ:
741 		MLX5_SET(modify_cq_in, in, cq_umem_valid, 1);
742 		break;
743 
744 	case MLX5_CMD_OP_CREATE_RMP:
745 	{
746 		void *rmpc, *wq;
747 
748 		rmpc = MLX5_ADDR_OF(create_rmp_in, in, ctx);
749 		wq = MLX5_ADDR_OF(rmpc, rmpc, wq);
750 		MLX5_SET(wq, wq, dbr_umem_valid, 1);
751 		MLX5_SET(wq, wq, wq_umem_valid, 1);
752 		break;
753 	}
754 
755 	case MLX5_CMD_OP_CREATE_XRQ:
756 	{
757 		void *xrqc, *wq;
758 
759 		xrqc = MLX5_ADDR_OF(create_xrq_in, in, xrq_context);
760 		wq = MLX5_ADDR_OF(xrqc, xrqc, wq);
761 		MLX5_SET(wq, wq, dbr_umem_valid, 1);
762 		MLX5_SET(wq, wq, wq_umem_valid, 1);
763 		break;
764 	}
765 
766 	case MLX5_CMD_OP_CREATE_XRC_SRQ:
767 	{
768 		void *xrc_srqc;
769 
770 		MLX5_SET(create_xrc_srq_in, in, xrc_srq_umem_valid, 1);
771 		xrc_srqc = MLX5_ADDR_OF(create_xrc_srq_in, in,
772 					xrc_srq_context_entry);
773 		MLX5_SET(xrc_srqc, xrc_srqc, dbr_umem_valid, 1);
774 		break;
775 	}
776 
777 	default:
778 		return;
779 	}
780 }
781 
782 static bool devx_is_obj_create_cmd(const void *in, u16 *opcode)
783 {
784 	*opcode = MLX5_GET(general_obj_in_cmd_hdr, in, opcode);
785 
786 	switch (*opcode) {
787 	case MLX5_CMD_OP_CREATE_GENERAL_OBJ:
788 	case MLX5_CMD_OP_CREATE_MKEY:
789 	case MLX5_CMD_OP_CREATE_CQ:
790 	case MLX5_CMD_OP_ALLOC_PD:
791 	case MLX5_CMD_OP_ALLOC_TRANSPORT_DOMAIN:
792 	case MLX5_CMD_OP_CREATE_RMP:
793 	case MLX5_CMD_OP_CREATE_SQ:
794 	case MLX5_CMD_OP_CREATE_RQ:
795 	case MLX5_CMD_OP_CREATE_RQT:
796 	case MLX5_CMD_OP_CREATE_TIR:
797 	case MLX5_CMD_OP_CREATE_TIS:
798 	case MLX5_CMD_OP_ALLOC_Q_COUNTER:
799 	case MLX5_CMD_OP_CREATE_FLOW_TABLE:
800 	case MLX5_CMD_OP_CREATE_FLOW_GROUP:
801 	case MLX5_CMD_OP_ALLOC_FLOW_COUNTER:
802 	case MLX5_CMD_OP_ALLOC_PACKET_REFORMAT_CONTEXT:
803 	case MLX5_CMD_OP_ALLOC_MODIFY_HEADER_CONTEXT:
804 	case MLX5_CMD_OP_CREATE_SCHEDULING_ELEMENT:
805 	case MLX5_CMD_OP_ADD_VXLAN_UDP_DPORT:
806 	case MLX5_CMD_OP_SET_L2_TABLE_ENTRY:
807 	case MLX5_CMD_OP_CREATE_QP:
808 	case MLX5_CMD_OP_CREATE_SRQ:
809 	case MLX5_CMD_OP_CREATE_XRC_SRQ:
810 	case MLX5_CMD_OP_CREATE_DCT:
811 	case MLX5_CMD_OP_CREATE_XRQ:
812 	case MLX5_CMD_OP_ATTACH_TO_MCG:
813 	case MLX5_CMD_OP_ALLOC_XRCD:
814 		return true;
815 	case MLX5_CMD_OP_SET_FLOW_TABLE_ENTRY:
816 	{
817 		u16 op_mod = MLX5_GET(set_fte_in, in, op_mod);
818 		if (op_mod == 0)
819 			return true;
820 		return false;
821 	}
822 	case MLX5_CMD_OP_CREATE_PSV:
823 	{
824 		u8 num_psv = MLX5_GET(create_psv_in, in, num_psv);
825 
826 		if (num_psv == 1)
827 			return true;
828 		return false;
829 	}
830 	default:
831 		return false;
832 	}
833 }
834 
835 static bool devx_is_obj_modify_cmd(const void *in)
836 {
837 	u16 opcode = MLX5_GET(general_obj_in_cmd_hdr, in, opcode);
838 
839 	switch (opcode) {
840 	case MLX5_CMD_OP_MODIFY_GENERAL_OBJ:
841 	case MLX5_CMD_OP_MODIFY_CQ:
842 	case MLX5_CMD_OP_MODIFY_RMP:
843 	case MLX5_CMD_OP_MODIFY_SQ:
844 	case MLX5_CMD_OP_MODIFY_RQ:
845 	case MLX5_CMD_OP_MODIFY_RQT:
846 	case MLX5_CMD_OP_MODIFY_TIR:
847 	case MLX5_CMD_OP_MODIFY_TIS:
848 	case MLX5_CMD_OP_MODIFY_FLOW_TABLE:
849 	case MLX5_CMD_OP_MODIFY_SCHEDULING_ELEMENT:
850 	case MLX5_CMD_OP_ADD_VXLAN_UDP_DPORT:
851 	case MLX5_CMD_OP_SET_L2_TABLE_ENTRY:
852 	case MLX5_CMD_OP_RST2INIT_QP:
853 	case MLX5_CMD_OP_INIT2RTR_QP:
854 	case MLX5_CMD_OP_RTR2RTS_QP:
855 	case MLX5_CMD_OP_RTS2RTS_QP:
856 	case MLX5_CMD_OP_SQERR2RTS_QP:
857 	case MLX5_CMD_OP_2ERR_QP:
858 	case MLX5_CMD_OP_2RST_QP:
859 	case MLX5_CMD_OP_ARM_XRC_SRQ:
860 	case MLX5_CMD_OP_ARM_RQ:
861 	case MLX5_CMD_OP_ARM_DCT_FOR_KEY_VIOLATION:
862 	case MLX5_CMD_OP_ARM_XRQ:
863 	case MLX5_CMD_OP_SET_XRQ_DC_PARAMS_ENTRY:
864 	case MLX5_CMD_OP_RELEASE_XRQ_ERROR:
865 	case MLX5_CMD_OP_MODIFY_XRQ:
866 		return true;
867 	case MLX5_CMD_OP_SET_FLOW_TABLE_ENTRY:
868 	{
869 		u16 op_mod = MLX5_GET(set_fte_in, in, op_mod);
870 
871 		if (op_mod == 1)
872 			return true;
873 		return false;
874 	}
875 	default:
876 		return false;
877 	}
878 }
879 
880 static bool devx_is_obj_query_cmd(const void *in)
881 {
882 	u16 opcode = MLX5_GET(general_obj_in_cmd_hdr, in, opcode);
883 
884 	switch (opcode) {
885 	case MLX5_CMD_OP_QUERY_GENERAL_OBJ:
886 	case MLX5_CMD_OP_QUERY_MKEY:
887 	case MLX5_CMD_OP_QUERY_CQ:
888 	case MLX5_CMD_OP_QUERY_RMP:
889 	case MLX5_CMD_OP_QUERY_SQ:
890 	case MLX5_CMD_OP_QUERY_RQ:
891 	case MLX5_CMD_OP_QUERY_RQT:
892 	case MLX5_CMD_OP_QUERY_TIR:
893 	case MLX5_CMD_OP_QUERY_TIS:
894 	case MLX5_CMD_OP_QUERY_Q_COUNTER:
895 	case MLX5_CMD_OP_QUERY_FLOW_TABLE:
896 	case MLX5_CMD_OP_QUERY_FLOW_GROUP:
897 	case MLX5_CMD_OP_QUERY_FLOW_TABLE_ENTRY:
898 	case MLX5_CMD_OP_QUERY_FLOW_COUNTER:
899 	case MLX5_CMD_OP_QUERY_MODIFY_HEADER_CONTEXT:
900 	case MLX5_CMD_OP_QUERY_SCHEDULING_ELEMENT:
901 	case MLX5_CMD_OP_QUERY_L2_TABLE_ENTRY:
902 	case MLX5_CMD_OP_QUERY_QP:
903 	case MLX5_CMD_OP_QUERY_SRQ:
904 	case MLX5_CMD_OP_QUERY_XRC_SRQ:
905 	case MLX5_CMD_OP_QUERY_DCT:
906 	case MLX5_CMD_OP_QUERY_XRQ:
907 	case MLX5_CMD_OP_QUERY_XRQ_DC_PARAMS_ENTRY:
908 	case MLX5_CMD_OP_QUERY_XRQ_ERROR_PARAMS:
909 	case MLX5_CMD_OP_QUERY_PACKET_REFORMAT_CONTEXT:
910 		return true;
911 	default:
912 		return false;
913 	}
914 }
915 
916 static bool devx_is_whitelist_cmd(void *in)
917 {
918 	u16 opcode = MLX5_GET(general_obj_in_cmd_hdr, in, opcode);
919 
920 	switch (opcode) {
921 	case MLX5_CMD_OP_QUERY_HCA_CAP:
922 	case MLX5_CMD_OP_QUERY_HCA_VPORT_CONTEXT:
923 	case MLX5_CMD_OP_QUERY_ESW_VPORT_CONTEXT:
924 		return true;
925 	default:
926 		return false;
927 	}
928 }
929 
930 static int devx_get_uid(struct mlx5_ib_ucontext *c, void *cmd_in)
931 {
932 	if (devx_is_whitelist_cmd(cmd_in)) {
933 		if (c->devx_uid)
934 			return c->devx_uid;
935 
936 		return -EOPNOTSUPP;
937 	}
938 
939 	if (!c->devx_uid)
940 		return -EINVAL;
941 
942 	return c->devx_uid;
943 }
944 
945 static bool devx_is_general_cmd(void *in, struct mlx5_ib_dev *dev)
946 {
947 	u16 opcode = MLX5_GET(general_obj_in_cmd_hdr, in, opcode);
948 
949 	/* Pass all cmds for vhca_tunnel as general, tracking is done in FW */
950 	if ((MLX5_CAP_GEN_64(dev->mdev, vhca_tunnel_commands) &&
951 	     MLX5_GET(general_obj_in_cmd_hdr, in, vhca_tunnel_id)) ||
952 	    (opcode >= MLX5_CMD_OP_GENERAL_START &&
953 	     opcode < MLX5_CMD_OP_GENERAL_END))
954 		return true;
955 
956 	switch (opcode) {
957 	case MLX5_CMD_OP_QUERY_HCA_CAP:
958 	case MLX5_CMD_OP_QUERY_HCA_VPORT_CONTEXT:
959 	case MLX5_CMD_OP_QUERY_ESW_VPORT_CONTEXT:
960 	case MLX5_CMD_OP_QUERY_VPORT_STATE:
961 	case MLX5_CMD_OP_QUERY_ADAPTER:
962 	case MLX5_CMD_OP_QUERY_ISSI:
963 	case MLX5_CMD_OP_QUERY_NIC_VPORT_CONTEXT:
964 	case MLX5_CMD_OP_QUERY_ROCE_ADDRESS:
965 	case MLX5_CMD_OP_QUERY_VNIC_ENV:
966 	case MLX5_CMD_OP_QUERY_VPORT_COUNTER:
967 	case MLX5_CMD_OP_GET_DROPPED_PACKET_LOG:
968 	case MLX5_CMD_OP_NOP:
969 	case MLX5_CMD_OP_QUERY_CONG_STATUS:
970 	case MLX5_CMD_OP_QUERY_CONG_PARAMS:
971 	case MLX5_CMD_OP_QUERY_CONG_STATISTICS:
972 	case MLX5_CMD_OP_QUERY_LAG:
973 		return true;
974 	default:
975 		return false;
976 	}
977 }
978 
979 static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_QUERY_EQN)(
980 	struct uverbs_attr_bundle *attrs)
981 {
982 	struct mlx5_ib_ucontext *c;
983 	struct mlx5_ib_dev *dev;
984 	int user_vector;
985 	int dev_eqn;
986 	unsigned int irqn;
987 	int err;
988 
989 	if (uverbs_copy_from(&user_vector, attrs,
990 			     MLX5_IB_ATTR_DEVX_QUERY_EQN_USER_VEC))
991 		return -EFAULT;
992 
993 	c = devx_ufile2uctx(attrs);
994 	if (IS_ERR(c))
995 		return PTR_ERR(c);
996 	dev = to_mdev(c->ibucontext.device);
997 
998 	err = mlx5_vector2eqn(dev->mdev, user_vector, &dev_eqn, &irqn);
999 	if (err < 0)
1000 		return err;
1001 
1002 	if (uverbs_copy_to(attrs, MLX5_IB_ATTR_DEVX_QUERY_EQN_DEV_EQN,
1003 			   &dev_eqn, sizeof(dev_eqn)))
1004 		return -EFAULT;
1005 
1006 	return 0;
1007 }
1008 
1009 /*
1010  *Security note:
1011  * The hardware protection mechanism works like this: Each device object that
1012  * is subject to UAR doorbells (QP/SQ/CQ) gets a UAR ID (called uar_page in
1013  * the device specification manual) upon its creation. Then upon doorbell,
1014  * hardware fetches the object context for which the doorbell was rang, and
1015  * validates that the UAR through which the DB was rang matches the UAR ID
1016  * of the object.
1017  * If no match the doorbell is silently ignored by the hardware. Of course,
1018  * the user cannot ring a doorbell on a UAR that was not mapped to it.
1019  * Now in devx, as the devx kernel does not manipulate the QP/SQ/CQ command
1020  * mailboxes (except tagging them with UID), we expose to the user its UAR
1021  * ID, so it can embed it in these objects in the expected specification
1022  * format. So the only thing the user can do is hurt itself by creating a
1023  * QP/SQ/CQ with a UAR ID other than his, and then in this case other users
1024  * may ring a doorbell on its objects.
1025  * The consequence of that will be that another user can schedule a QP/SQ
1026  * of the buggy user for execution (just insert it to the hardware schedule
1027  * queue or arm its CQ for event generation), no further harm is expected.
1028  */
1029 static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_QUERY_UAR)(
1030 	struct uverbs_attr_bundle *attrs)
1031 {
1032 	struct mlx5_ib_ucontext *c;
1033 	struct mlx5_ib_dev *dev;
1034 	u32 user_idx;
1035 	s32 dev_idx;
1036 
1037 	c = devx_ufile2uctx(attrs);
1038 	if (IS_ERR(c))
1039 		return PTR_ERR(c);
1040 	dev = to_mdev(c->ibucontext.device);
1041 
1042 	if (uverbs_copy_from(&user_idx, attrs,
1043 			     MLX5_IB_ATTR_DEVX_QUERY_UAR_USER_IDX))
1044 		return -EFAULT;
1045 
1046 	dev_idx = bfregn_to_uar_index(dev, &c->bfregi, user_idx, true);
1047 	if (dev_idx < 0)
1048 		return dev_idx;
1049 
1050 	if (uverbs_copy_to(attrs, MLX5_IB_ATTR_DEVX_QUERY_UAR_DEV_IDX,
1051 			   &dev_idx, sizeof(dev_idx)))
1052 		return -EFAULT;
1053 
1054 	return 0;
1055 }
1056 
1057 static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OTHER)(
1058 	struct uverbs_attr_bundle *attrs)
1059 {
1060 	struct mlx5_ib_ucontext *c;
1061 	struct mlx5_ib_dev *dev;
1062 	void *cmd_in = uverbs_attr_get_alloced_ptr(
1063 		attrs, MLX5_IB_ATTR_DEVX_OTHER_CMD_IN);
1064 	int cmd_out_len = uverbs_attr_get_len(attrs,
1065 					MLX5_IB_ATTR_DEVX_OTHER_CMD_OUT);
1066 	void *cmd_out;
1067 	int err;
1068 	int uid;
1069 
1070 	c = devx_ufile2uctx(attrs);
1071 	if (IS_ERR(c))
1072 		return PTR_ERR(c);
1073 	dev = to_mdev(c->ibucontext.device);
1074 
1075 	uid = devx_get_uid(c, cmd_in);
1076 	if (uid < 0)
1077 		return uid;
1078 
1079 	/* Only white list of some general HCA commands are allowed for this method. */
1080 	if (!devx_is_general_cmd(cmd_in, dev))
1081 		return -EINVAL;
1082 
1083 	cmd_out = uverbs_zalloc(attrs, cmd_out_len);
1084 	if (IS_ERR(cmd_out))
1085 		return PTR_ERR(cmd_out);
1086 
1087 	MLX5_SET(general_obj_in_cmd_hdr, cmd_in, uid, uid);
1088 	err = mlx5_cmd_exec(dev->mdev, cmd_in,
1089 			    uverbs_attr_get_len(attrs, MLX5_IB_ATTR_DEVX_OTHER_CMD_IN),
1090 			    cmd_out, cmd_out_len);
1091 	if (err)
1092 		return err;
1093 
1094 	return uverbs_copy_to(attrs, MLX5_IB_ATTR_DEVX_OTHER_CMD_OUT, cmd_out,
1095 			      cmd_out_len);
1096 }
1097 
1098 static void devx_obj_build_destroy_cmd(void *in, void *out, void *din,
1099 				       u32 *dinlen,
1100 				       u32 *obj_id)
1101 {
1102 	u16 obj_type = MLX5_GET(general_obj_in_cmd_hdr, in, obj_type);
1103 	u16 uid = MLX5_GET(general_obj_in_cmd_hdr, in, uid);
1104 
1105 	*obj_id = MLX5_GET(general_obj_out_cmd_hdr, out, obj_id);
1106 	*dinlen = MLX5_ST_SZ_BYTES(general_obj_in_cmd_hdr);
1107 
1108 	MLX5_SET(general_obj_in_cmd_hdr, din, obj_id, *obj_id);
1109 	MLX5_SET(general_obj_in_cmd_hdr, din, uid, uid);
1110 
1111 	switch (MLX5_GET(general_obj_in_cmd_hdr, in, opcode)) {
1112 	case MLX5_CMD_OP_CREATE_GENERAL_OBJ:
1113 		MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DESTROY_GENERAL_OBJ);
1114 		MLX5_SET(general_obj_in_cmd_hdr, din, obj_type, obj_type);
1115 		break;
1116 
1117 	case MLX5_CMD_OP_CREATE_UMEM:
1118 		MLX5_SET(general_obj_in_cmd_hdr, din, opcode,
1119 			 MLX5_CMD_OP_DESTROY_UMEM);
1120 		break;
1121 	case MLX5_CMD_OP_CREATE_MKEY:
1122 		MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DESTROY_MKEY);
1123 		break;
1124 	case MLX5_CMD_OP_CREATE_CQ:
1125 		MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DESTROY_CQ);
1126 		break;
1127 	case MLX5_CMD_OP_ALLOC_PD:
1128 		MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DEALLOC_PD);
1129 		break;
1130 	case MLX5_CMD_OP_ALLOC_TRANSPORT_DOMAIN:
1131 		MLX5_SET(general_obj_in_cmd_hdr, din, opcode,
1132 			 MLX5_CMD_OP_DEALLOC_TRANSPORT_DOMAIN);
1133 		break;
1134 	case MLX5_CMD_OP_CREATE_RMP:
1135 		MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DESTROY_RMP);
1136 		break;
1137 	case MLX5_CMD_OP_CREATE_SQ:
1138 		MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DESTROY_SQ);
1139 		break;
1140 	case MLX5_CMD_OP_CREATE_RQ:
1141 		MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DESTROY_RQ);
1142 		break;
1143 	case MLX5_CMD_OP_CREATE_RQT:
1144 		MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DESTROY_RQT);
1145 		break;
1146 	case MLX5_CMD_OP_CREATE_TIR:
1147 		MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DESTROY_TIR);
1148 		break;
1149 	case MLX5_CMD_OP_CREATE_TIS:
1150 		MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DESTROY_TIS);
1151 		break;
1152 	case MLX5_CMD_OP_ALLOC_Q_COUNTER:
1153 		MLX5_SET(general_obj_in_cmd_hdr, din, opcode,
1154 			 MLX5_CMD_OP_DEALLOC_Q_COUNTER);
1155 		break;
1156 	case MLX5_CMD_OP_CREATE_FLOW_TABLE:
1157 		*dinlen = MLX5_ST_SZ_BYTES(destroy_flow_table_in);
1158 		*obj_id = MLX5_GET(create_flow_table_out, out, table_id);
1159 		MLX5_SET(destroy_flow_table_in, din, other_vport,
1160 			 MLX5_GET(create_flow_table_in,  in, other_vport));
1161 		MLX5_SET(destroy_flow_table_in, din, vport_number,
1162 			 MLX5_GET(create_flow_table_in,  in, vport_number));
1163 		MLX5_SET(destroy_flow_table_in, din, table_type,
1164 			 MLX5_GET(create_flow_table_in,  in, table_type));
1165 		MLX5_SET(destroy_flow_table_in, din, table_id, *obj_id);
1166 		MLX5_SET(general_obj_in_cmd_hdr, din, opcode,
1167 			 MLX5_CMD_OP_DESTROY_FLOW_TABLE);
1168 		break;
1169 	case MLX5_CMD_OP_CREATE_FLOW_GROUP:
1170 		*dinlen = MLX5_ST_SZ_BYTES(destroy_flow_group_in);
1171 		*obj_id = MLX5_GET(create_flow_group_out, out, group_id);
1172 		MLX5_SET(destroy_flow_group_in, din, other_vport,
1173 			 MLX5_GET(create_flow_group_in, in, other_vport));
1174 		MLX5_SET(destroy_flow_group_in, din, vport_number,
1175 			 MLX5_GET(create_flow_group_in, in, vport_number));
1176 		MLX5_SET(destroy_flow_group_in, din, table_type,
1177 			 MLX5_GET(create_flow_group_in, in, table_type));
1178 		MLX5_SET(destroy_flow_group_in, din, table_id,
1179 			 MLX5_GET(create_flow_group_in, in, table_id));
1180 		MLX5_SET(destroy_flow_group_in, din, group_id, *obj_id);
1181 		MLX5_SET(general_obj_in_cmd_hdr, din, opcode,
1182 			 MLX5_CMD_OP_DESTROY_FLOW_GROUP);
1183 		break;
1184 	case MLX5_CMD_OP_SET_FLOW_TABLE_ENTRY:
1185 		*dinlen = MLX5_ST_SZ_BYTES(delete_fte_in);
1186 		*obj_id = MLX5_GET(set_fte_in, in, flow_index);
1187 		MLX5_SET(delete_fte_in, din, other_vport,
1188 			 MLX5_GET(set_fte_in,  in, other_vport));
1189 		MLX5_SET(delete_fte_in, din, vport_number,
1190 			 MLX5_GET(set_fte_in, in, vport_number));
1191 		MLX5_SET(delete_fte_in, din, table_type,
1192 			 MLX5_GET(set_fte_in, in, table_type));
1193 		MLX5_SET(delete_fte_in, din, table_id,
1194 			 MLX5_GET(set_fte_in, in, table_id));
1195 		MLX5_SET(delete_fte_in, din, flow_index, *obj_id);
1196 		MLX5_SET(general_obj_in_cmd_hdr, din, opcode,
1197 			 MLX5_CMD_OP_DELETE_FLOW_TABLE_ENTRY);
1198 		break;
1199 	case MLX5_CMD_OP_ALLOC_FLOW_COUNTER:
1200 		MLX5_SET(general_obj_in_cmd_hdr, din, opcode,
1201 			 MLX5_CMD_OP_DEALLOC_FLOW_COUNTER);
1202 		break;
1203 	case MLX5_CMD_OP_ALLOC_PACKET_REFORMAT_CONTEXT:
1204 		MLX5_SET(general_obj_in_cmd_hdr, din, opcode,
1205 			 MLX5_CMD_OP_DEALLOC_PACKET_REFORMAT_CONTEXT);
1206 		break;
1207 	case MLX5_CMD_OP_ALLOC_MODIFY_HEADER_CONTEXT:
1208 		MLX5_SET(general_obj_in_cmd_hdr, din, opcode,
1209 			 MLX5_CMD_OP_DEALLOC_MODIFY_HEADER_CONTEXT);
1210 		break;
1211 	case MLX5_CMD_OP_CREATE_SCHEDULING_ELEMENT:
1212 		*dinlen = MLX5_ST_SZ_BYTES(destroy_scheduling_element_in);
1213 		*obj_id = MLX5_GET(create_scheduling_element_out, out,
1214 				   scheduling_element_id);
1215 		MLX5_SET(destroy_scheduling_element_in, din,
1216 			 scheduling_hierarchy,
1217 			 MLX5_GET(create_scheduling_element_in, in,
1218 				  scheduling_hierarchy));
1219 		MLX5_SET(destroy_scheduling_element_in, din,
1220 			 scheduling_element_id, *obj_id);
1221 		MLX5_SET(general_obj_in_cmd_hdr, din, opcode,
1222 			 MLX5_CMD_OP_DESTROY_SCHEDULING_ELEMENT);
1223 		break;
1224 	case MLX5_CMD_OP_ADD_VXLAN_UDP_DPORT:
1225 		*dinlen = MLX5_ST_SZ_BYTES(delete_vxlan_udp_dport_in);
1226 		*obj_id = MLX5_GET(add_vxlan_udp_dport_in, in, vxlan_udp_port);
1227 		MLX5_SET(delete_vxlan_udp_dport_in, din, vxlan_udp_port, *obj_id);
1228 		MLX5_SET(general_obj_in_cmd_hdr, din, opcode,
1229 			 MLX5_CMD_OP_DELETE_VXLAN_UDP_DPORT);
1230 		break;
1231 	case MLX5_CMD_OP_SET_L2_TABLE_ENTRY:
1232 		*dinlen = MLX5_ST_SZ_BYTES(delete_l2_table_entry_in);
1233 		*obj_id = MLX5_GET(set_l2_table_entry_in, in, table_index);
1234 		MLX5_SET(delete_l2_table_entry_in, din, table_index, *obj_id);
1235 		MLX5_SET(general_obj_in_cmd_hdr, din, opcode,
1236 			 MLX5_CMD_OP_DELETE_L2_TABLE_ENTRY);
1237 		break;
1238 	case MLX5_CMD_OP_CREATE_QP:
1239 		MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DESTROY_QP);
1240 		break;
1241 	case MLX5_CMD_OP_CREATE_SRQ:
1242 		MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DESTROY_SRQ);
1243 		break;
1244 	case MLX5_CMD_OP_CREATE_XRC_SRQ:
1245 		MLX5_SET(general_obj_in_cmd_hdr, din, opcode,
1246 			 MLX5_CMD_OP_DESTROY_XRC_SRQ);
1247 		break;
1248 	case MLX5_CMD_OP_CREATE_DCT:
1249 		MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DESTROY_DCT);
1250 		break;
1251 	case MLX5_CMD_OP_CREATE_XRQ:
1252 		MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DESTROY_XRQ);
1253 		break;
1254 	case MLX5_CMD_OP_ATTACH_TO_MCG:
1255 		*dinlen = MLX5_ST_SZ_BYTES(detach_from_mcg_in);
1256 		MLX5_SET(detach_from_mcg_in, din, qpn,
1257 			 MLX5_GET(attach_to_mcg_in, in, qpn));
1258 		memcpy(MLX5_ADDR_OF(detach_from_mcg_in, din, multicast_gid),
1259 		       MLX5_ADDR_OF(attach_to_mcg_in, in, multicast_gid),
1260 		       MLX5_FLD_SZ_BYTES(attach_to_mcg_in, multicast_gid));
1261 		MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DETACH_FROM_MCG);
1262 		break;
1263 	case MLX5_CMD_OP_ALLOC_XRCD:
1264 		MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DEALLOC_XRCD);
1265 		break;
1266 	case MLX5_CMD_OP_CREATE_PSV:
1267 		MLX5_SET(general_obj_in_cmd_hdr, din, opcode,
1268 			 MLX5_CMD_OP_DESTROY_PSV);
1269 		MLX5_SET(destroy_psv_in, din, psvn,
1270 			 MLX5_GET(create_psv_out, out, psv0_index));
1271 		break;
1272 	default:
1273 		/* The entry must match to one of the devx_is_obj_create_cmd */
1274 		WARN_ON(true);
1275 		break;
1276 	}
1277 }
1278 
1279 static int devx_handle_mkey_create(struct mlx5_ib_dev *dev,
1280 				   struct devx_obj *obj,
1281 				   void *in, int in_len)
1282 {
1283 	int min_len = MLX5_BYTE_OFF(create_mkey_in, memory_key_mkey_entry) +
1284 			MLX5_FLD_SZ_BYTES(create_mkey_in,
1285 			memory_key_mkey_entry);
1286 	void *mkc;
1287 	u8 access_mode;
1288 
1289 	if (in_len < min_len)
1290 		return -EINVAL;
1291 
1292 	mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
1293 
1294 	access_mode = MLX5_GET(mkc, mkc, access_mode);
1295 	access_mode |= MLX5_GET(mkc, mkc, access_mode_4_2) << 2;
1296 
1297 	if (access_mode == MLX5_ACCESS_MODE_KLM ||
1298 		access_mode == MLX5_ACCESS_MODE_KSM) {
1299 		return 0;
1300 	}
1301 
1302 	MLX5_SET(create_mkey_in, in, mkey_umem_valid, 1);
1303 	return 0;
1304 }
1305 
1306 static void devx_cleanup_subscription(struct mlx5_ib_dev *dev,
1307 				      struct devx_event_subscription *sub)
1308 {
1309 	struct devx_event *event;
1310 	struct devx_obj_event *xa_val_level2;
1311 
1312 	if (sub->is_cleaned)
1313 		return;
1314 
1315 	sub->is_cleaned = 1;
1316 	list_del_rcu(&sub->xa_list);
1317 
1318 	if (list_empty(&sub->obj_list))
1319 		return;
1320 
1321 	list_del_rcu(&sub->obj_list);
1322 	/* check whether key level 1 for this obj_sub_list is empty */
1323 	event = xa_load(&dev->devx_event_table.event_xa,
1324 			sub->xa_key_level1);
1325 	WARN_ON(!event);
1326 
1327 	xa_val_level2 = xa_load(&event->object_ids, sub->xa_key_level2);
1328 	if (list_empty(&xa_val_level2->obj_sub_list)) {
1329 		xa_erase(&event->object_ids,
1330 			 sub->xa_key_level2);
1331 		kfree_rcu(xa_val_level2, rcu);
1332 	}
1333 }
1334 
1335 static int devx_obj_cleanup(struct ib_uobject *uobject,
1336 			    enum rdma_remove_reason why,
1337 			    struct uverbs_attr_bundle *attrs)
1338 {
1339 	u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)];
1340 	struct mlx5_devx_event_table *devx_event_table;
1341 	struct devx_obj *obj = uobject->object;
1342 	struct devx_event_subscription *sub_entry, *tmp;
1343 	struct mlx5_ib_dev *dev;
1344 	int ret;
1345 
1346 	dev = mlx5_udata_to_mdev(&attrs->driver_udata);
1347 	if (obj->flags & DEVX_OBJ_FLAGS_DCT)
1348 		ret = mlx5_core_destroy_dct(obj->ib_dev->mdev, &obj->core_dct);
1349 	else if (obj->flags & DEVX_OBJ_FLAGS_CQ)
1350 		ret = mlx5_core_destroy_cq(obj->ib_dev->mdev, &obj->core_cq);
1351 	else
1352 		ret = mlx5_cmd_exec(obj->ib_dev->mdev, obj->dinbox,
1353 				    obj->dinlen, out, sizeof(out));
1354 	if (ib_is_destroy_retryable(ret, why, uobject))
1355 		return ret;
1356 
1357 	devx_event_table = &dev->devx_event_table;
1358 
1359 	mutex_lock(&devx_event_table->event_xa_lock);
1360 	list_for_each_entry_safe(sub_entry, tmp, &obj->event_sub, obj_list)
1361 		devx_cleanup_subscription(dev, sub_entry);
1362 	mutex_unlock(&devx_event_table->event_xa_lock);
1363 
1364 	kfree(obj);
1365 	return ret;
1366 }
1367 
1368 static void devx_cq_comp(struct mlx5_core_cq *mcq, struct mlx5_eqe *eqe)
1369 {
1370 	struct devx_obj *obj = container_of(mcq, struct devx_obj, core_cq);
1371 	struct mlx5_devx_event_table *table;
1372 	struct devx_event *event;
1373 	struct devx_obj_event *obj_event;
1374 	u32 obj_id = mcq->cqn;
1375 
1376 	table = &obj->ib_dev->devx_event_table;
1377 	rcu_read_lock();
1378 	event = xa_load(&table->event_xa, MLX5_EVENT_TYPE_COMP);
1379 	if (!event)
1380 		goto out;
1381 
1382 	obj_event = xa_load(&event->object_ids, obj_id);
1383 	if (!obj_event)
1384 		goto out;
1385 
1386 	dispatch_event_fd(&obj_event->obj_sub_list, eqe);
1387 out:
1388 	rcu_read_unlock();
1389 }
1390 
1391 static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_CREATE)(
1392 	struct uverbs_attr_bundle *attrs)
1393 {
1394 	void *cmd_in = uverbs_attr_get_alloced_ptr(attrs, MLX5_IB_ATTR_DEVX_OBJ_CREATE_CMD_IN);
1395 	int cmd_out_len =  uverbs_attr_get_len(attrs,
1396 					MLX5_IB_ATTR_DEVX_OBJ_CREATE_CMD_OUT);
1397 	int cmd_in_len = uverbs_attr_get_len(attrs,
1398 					MLX5_IB_ATTR_DEVX_OBJ_CREATE_CMD_IN);
1399 	void *cmd_out;
1400 	struct ib_uobject *uobj = uverbs_attr_get_uobject(
1401 		attrs, MLX5_IB_ATTR_DEVX_OBJ_CREATE_HANDLE);
1402 	struct mlx5_ib_ucontext *c = rdma_udata_to_drv_context(
1403 		&attrs->driver_udata, struct mlx5_ib_ucontext, ibucontext);
1404 	struct mlx5_ib_dev *dev = to_mdev(c->ibucontext.device);
1405 	u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)];
1406 	struct devx_obj *obj;
1407 	u16 obj_type = 0;
1408 	int err;
1409 	int uid;
1410 	u32 obj_id;
1411 	u16 opcode;
1412 
1413 	if (MLX5_GET(general_obj_in_cmd_hdr, cmd_in, vhca_tunnel_id))
1414 		return -EINVAL;
1415 
1416 	uid = devx_get_uid(c, cmd_in);
1417 	if (uid < 0)
1418 		return uid;
1419 
1420 	if (!devx_is_obj_create_cmd(cmd_in, &opcode))
1421 		return -EINVAL;
1422 
1423 	cmd_out = uverbs_zalloc(attrs, cmd_out_len);
1424 	if (IS_ERR(cmd_out))
1425 		return PTR_ERR(cmd_out);
1426 
1427 	obj = kzalloc(sizeof(struct devx_obj), GFP_KERNEL);
1428 	if (!obj)
1429 		return -ENOMEM;
1430 
1431 	MLX5_SET(general_obj_in_cmd_hdr, cmd_in, uid, uid);
1432 	if (opcode == MLX5_CMD_OP_CREATE_MKEY) {
1433 		err = devx_handle_mkey_create(dev, obj, cmd_in, cmd_in_len);
1434 		if (err)
1435 			goto obj_free;
1436 	} else {
1437 		devx_set_umem_valid(cmd_in);
1438 	}
1439 
1440 	if (opcode == MLX5_CMD_OP_CREATE_DCT) {
1441 		obj->flags |= DEVX_OBJ_FLAGS_DCT;
1442 		err = mlx5_core_create_dct(dev->mdev, &obj->core_dct,
1443 					   cmd_in, cmd_in_len,
1444 					   cmd_out, cmd_out_len);
1445 	} else if (opcode == MLX5_CMD_OP_CREATE_CQ) {
1446 		obj->flags |= DEVX_OBJ_FLAGS_CQ;
1447 		obj->core_cq.comp = devx_cq_comp;
1448 		err = mlx5_core_create_cq(dev->mdev, &obj->core_cq,
1449 					  cmd_in, cmd_in_len, cmd_out,
1450 					  cmd_out_len);
1451 	} else {
1452 		err = mlx5_cmd_exec(dev->mdev, cmd_in,
1453 				    cmd_in_len,
1454 				    cmd_out, cmd_out_len);
1455 	}
1456 
1457 	if (err)
1458 		goto obj_free;
1459 
1460 	if (opcode == MLX5_CMD_OP_ALLOC_FLOW_COUNTER) {
1461 		u8 bulk = MLX5_GET(alloc_flow_counter_in,
1462 				   cmd_in,
1463 				   flow_counter_bulk);
1464 		obj->flow_counter_bulk_size = 128UL * bulk;
1465 	}
1466 
1467 	uobj->object = obj;
1468 	INIT_LIST_HEAD(&obj->event_sub);
1469 	obj->ib_dev = dev;
1470 	devx_obj_build_destroy_cmd(cmd_in, cmd_out, obj->dinbox, &obj->dinlen,
1471 				   &obj_id);
1472 	WARN_ON(obj->dinlen > MLX5_MAX_DESTROY_INBOX_SIZE_DW * sizeof(u32));
1473 
1474 	err = uverbs_copy_to(attrs, MLX5_IB_ATTR_DEVX_OBJ_CREATE_CMD_OUT, cmd_out, cmd_out_len);
1475 	if (err)
1476 		goto obj_destroy;
1477 
1478 	if (opcode == MLX5_CMD_OP_CREATE_GENERAL_OBJ)
1479 		obj_type = MLX5_GET(general_obj_in_cmd_hdr, cmd_in, obj_type);
1480 	obj->obj_id = get_enc_obj_id(opcode | obj_type << 16, obj_id);
1481 
1482 	return 0;
1483 
1484 obj_destroy:
1485 	if (obj->flags & DEVX_OBJ_FLAGS_DCT)
1486 		mlx5_core_destroy_dct(obj->ib_dev->mdev, &obj->core_dct);
1487 	else if (obj->flags & DEVX_OBJ_FLAGS_CQ)
1488 		mlx5_core_destroy_cq(obj->ib_dev->mdev, &obj->core_cq);
1489 	else
1490 		mlx5_cmd_exec(obj->ib_dev->mdev, obj->dinbox, obj->dinlen, out,
1491 			      sizeof(out));
1492 obj_free:
1493 	kfree(obj);
1494 	return err;
1495 }
1496 
1497 static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_MODIFY)(
1498 	struct uverbs_attr_bundle *attrs)
1499 {
1500 	void *cmd_in = uverbs_attr_get_alloced_ptr(attrs, MLX5_IB_ATTR_DEVX_OBJ_MODIFY_CMD_IN);
1501 	int cmd_out_len = uverbs_attr_get_len(attrs,
1502 					MLX5_IB_ATTR_DEVX_OBJ_MODIFY_CMD_OUT);
1503 	struct ib_uobject *uobj = uverbs_attr_get_uobject(attrs,
1504 							  MLX5_IB_ATTR_DEVX_OBJ_MODIFY_HANDLE);
1505 	struct mlx5_ib_ucontext *c = rdma_udata_to_drv_context(
1506 		&attrs->driver_udata, struct mlx5_ib_ucontext, ibucontext);
1507 	struct mlx5_ib_dev *mdev = to_mdev(c->ibucontext.device);
1508 	void *cmd_out;
1509 	int err;
1510 	int uid;
1511 
1512 	if (MLX5_GET(general_obj_in_cmd_hdr, cmd_in, vhca_tunnel_id))
1513 		return -EINVAL;
1514 
1515 	uid = devx_get_uid(c, cmd_in);
1516 	if (uid < 0)
1517 		return uid;
1518 
1519 	if (!devx_is_obj_modify_cmd(cmd_in))
1520 		return -EINVAL;
1521 
1522 	if (!devx_is_valid_obj_id(attrs, uobj, cmd_in))
1523 		return -EINVAL;
1524 
1525 	cmd_out = uverbs_zalloc(attrs, cmd_out_len);
1526 	if (IS_ERR(cmd_out))
1527 		return PTR_ERR(cmd_out);
1528 
1529 	MLX5_SET(general_obj_in_cmd_hdr, cmd_in, uid, uid);
1530 	devx_set_umem_valid(cmd_in);
1531 
1532 	err = mlx5_cmd_exec(mdev->mdev, cmd_in,
1533 			    uverbs_attr_get_len(attrs, MLX5_IB_ATTR_DEVX_OBJ_MODIFY_CMD_IN),
1534 			    cmd_out, cmd_out_len);
1535 	if (err)
1536 		return err;
1537 
1538 	return uverbs_copy_to(attrs, MLX5_IB_ATTR_DEVX_OBJ_MODIFY_CMD_OUT,
1539 			      cmd_out, cmd_out_len);
1540 }
1541 
1542 static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_QUERY)(
1543 	struct uverbs_attr_bundle *attrs)
1544 {
1545 	void *cmd_in = uverbs_attr_get_alloced_ptr(attrs, MLX5_IB_ATTR_DEVX_OBJ_QUERY_CMD_IN);
1546 	int cmd_out_len = uverbs_attr_get_len(attrs,
1547 					      MLX5_IB_ATTR_DEVX_OBJ_QUERY_CMD_OUT);
1548 	struct ib_uobject *uobj = uverbs_attr_get_uobject(attrs,
1549 							  MLX5_IB_ATTR_DEVX_OBJ_QUERY_HANDLE);
1550 	struct mlx5_ib_ucontext *c = rdma_udata_to_drv_context(
1551 		&attrs->driver_udata, struct mlx5_ib_ucontext, ibucontext);
1552 	void *cmd_out;
1553 	int err;
1554 	int uid;
1555 	struct mlx5_ib_dev *mdev = to_mdev(c->ibucontext.device);
1556 
1557 	if (MLX5_GET(general_obj_in_cmd_hdr, cmd_in, vhca_tunnel_id))
1558 		return -EINVAL;
1559 
1560 	uid = devx_get_uid(c, cmd_in);
1561 	if (uid < 0)
1562 		return uid;
1563 
1564 	if (!devx_is_obj_query_cmd(cmd_in))
1565 		return -EINVAL;
1566 
1567 	if (!devx_is_valid_obj_id(attrs, uobj, cmd_in))
1568 		return -EINVAL;
1569 
1570 	cmd_out = uverbs_zalloc(attrs, cmd_out_len);
1571 	if (IS_ERR(cmd_out))
1572 		return PTR_ERR(cmd_out);
1573 
1574 	MLX5_SET(general_obj_in_cmd_hdr, cmd_in, uid, uid);
1575 	err = mlx5_cmd_exec(mdev->mdev, cmd_in,
1576 			    uverbs_attr_get_len(attrs, MLX5_IB_ATTR_DEVX_OBJ_QUERY_CMD_IN),
1577 			    cmd_out, cmd_out_len);
1578 	if (err)
1579 		return err;
1580 
1581 	return uverbs_copy_to(attrs, MLX5_IB_ATTR_DEVX_OBJ_QUERY_CMD_OUT,
1582 			      cmd_out, cmd_out_len);
1583 }
1584 
1585 struct devx_async_event_queue {
1586 	spinlock_t		lock;
1587 	wait_queue_head_t	poll_wait;
1588 	struct list_head	event_list;
1589 	atomic_t		bytes_in_use;
1590 	u8			is_destroyed:1;
1591 };
1592 
1593 struct devx_async_cmd_event_file {
1594 	struct ib_uobject		uobj;
1595 	struct devx_async_event_queue	ev_queue;
1596 	struct mlx5_async_ctx		async_ctx;
1597 };
1598 
1599 static void devx_init_event_queue(struct devx_async_event_queue *ev_queue)
1600 {
1601 	spin_lock_init(&ev_queue->lock);
1602 	INIT_LIST_HEAD(&ev_queue->event_list);
1603 	init_waitqueue_head(&ev_queue->poll_wait);
1604 	atomic_set(&ev_queue->bytes_in_use, 0);
1605 	ev_queue->is_destroyed = 0;
1606 }
1607 
1608 static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_ASYNC_CMD_FD_ALLOC)(
1609 	struct uverbs_attr_bundle *attrs)
1610 {
1611 	struct devx_async_cmd_event_file *ev_file;
1612 
1613 	struct ib_uobject *uobj = uverbs_attr_get_uobject(
1614 		attrs, MLX5_IB_ATTR_DEVX_ASYNC_CMD_FD_ALLOC_HANDLE);
1615 	struct mlx5_ib_dev *mdev = mlx5_udata_to_mdev(&attrs->driver_udata);
1616 
1617 	ev_file = container_of(uobj, struct devx_async_cmd_event_file,
1618 			       uobj);
1619 	devx_init_event_queue(&ev_file->ev_queue);
1620 	mlx5_cmd_init_async_ctx(mdev->mdev, &ev_file->async_ctx);
1621 	return 0;
1622 }
1623 
1624 static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_ASYNC_EVENT_FD_ALLOC)(
1625 	struct uverbs_attr_bundle *attrs)
1626 {
1627 	struct ib_uobject *uobj = uverbs_attr_get_uobject(
1628 		attrs, MLX5_IB_ATTR_DEVX_ASYNC_EVENT_FD_ALLOC_HANDLE);
1629 	struct devx_async_event_file *ev_file;
1630 	struct mlx5_ib_ucontext *c = rdma_udata_to_drv_context(
1631 		&attrs->driver_udata, struct mlx5_ib_ucontext, ibucontext);
1632 	struct mlx5_ib_dev *dev = to_mdev(c->ibucontext.device);
1633 	u32 flags;
1634 	int err;
1635 
1636 	err = uverbs_get_flags32(&flags, attrs,
1637 		MLX5_IB_ATTR_DEVX_ASYNC_EVENT_FD_ALLOC_FLAGS,
1638 		MLX5_IB_UAPI_DEVX_CR_EV_CH_FLAGS_OMIT_DATA);
1639 
1640 	if (err)
1641 		return err;
1642 
1643 	ev_file = container_of(uobj, struct devx_async_event_file,
1644 			       uobj);
1645 	spin_lock_init(&ev_file->lock);
1646 	INIT_LIST_HEAD(&ev_file->event_list);
1647 	init_waitqueue_head(&ev_file->poll_wait);
1648 	if (flags & MLX5_IB_UAPI_DEVX_CR_EV_CH_FLAGS_OMIT_DATA)
1649 		ev_file->omit_data = 1;
1650 	INIT_LIST_HEAD(&ev_file->subscribed_events_list);
1651 	ev_file->dev = dev;
1652 	get_device(&dev->ib_dev.dev);
1653 	return 0;
1654 }
1655 
1656 static void devx_query_callback(int status, struct mlx5_async_work *context)
1657 {
1658 	struct devx_async_data *async_data =
1659 		container_of(context, struct devx_async_data, cb_work);
1660 	struct devx_async_cmd_event_file *ev_file = async_data->ev_file;
1661 	struct devx_async_event_queue *ev_queue = &ev_file->ev_queue;
1662 	unsigned long flags;
1663 
1664 	/*
1665 	 * Note that if the struct devx_async_cmd_event_file uobj begins to be
1666 	 * destroyed it will block at mlx5_cmd_cleanup_async_ctx() until this
1667 	 * routine returns, ensuring that it always remains valid here.
1668 	 */
1669 	spin_lock_irqsave(&ev_queue->lock, flags);
1670 	list_add_tail(&async_data->list, &ev_queue->event_list);
1671 	spin_unlock_irqrestore(&ev_queue->lock, flags);
1672 
1673 	wake_up_interruptible(&ev_queue->poll_wait);
1674 }
1675 
1676 #define MAX_ASYNC_BYTES_IN_USE (1024 * 1024) /* 1MB */
1677 
1678 static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_ASYNC_QUERY)(
1679 	struct uverbs_attr_bundle *attrs)
1680 {
1681 	void *cmd_in = uverbs_attr_get_alloced_ptr(attrs,
1682 				MLX5_IB_ATTR_DEVX_OBJ_QUERY_ASYNC_CMD_IN);
1683 	struct ib_uobject *uobj = uverbs_attr_get_uobject(
1684 				attrs,
1685 				MLX5_IB_ATTR_DEVX_OBJ_QUERY_ASYNC_HANDLE);
1686 	u16 cmd_out_len;
1687 	struct mlx5_ib_ucontext *c = rdma_udata_to_drv_context(
1688 		&attrs->driver_udata, struct mlx5_ib_ucontext, ibucontext);
1689 	struct ib_uobject *fd_uobj;
1690 	int err;
1691 	int uid;
1692 	struct mlx5_ib_dev *mdev = to_mdev(c->ibucontext.device);
1693 	struct devx_async_cmd_event_file *ev_file;
1694 	struct devx_async_data *async_data;
1695 
1696 	if (MLX5_GET(general_obj_in_cmd_hdr, cmd_in, vhca_tunnel_id))
1697 		return -EINVAL;
1698 
1699 	uid = devx_get_uid(c, cmd_in);
1700 	if (uid < 0)
1701 		return uid;
1702 
1703 	if (!devx_is_obj_query_cmd(cmd_in))
1704 		return -EINVAL;
1705 
1706 	err = uverbs_get_const(&cmd_out_len, attrs,
1707 			       MLX5_IB_ATTR_DEVX_OBJ_QUERY_ASYNC_OUT_LEN);
1708 	if (err)
1709 		return err;
1710 
1711 	if (!devx_is_valid_obj_id(attrs, uobj, cmd_in))
1712 		return -EINVAL;
1713 
1714 	fd_uobj = uverbs_attr_get_uobject(attrs,
1715 				MLX5_IB_ATTR_DEVX_OBJ_QUERY_ASYNC_FD);
1716 	if (IS_ERR(fd_uobj))
1717 		return PTR_ERR(fd_uobj);
1718 
1719 	ev_file = container_of(fd_uobj, struct devx_async_cmd_event_file,
1720 			       uobj);
1721 
1722 	if (atomic_add_return(cmd_out_len, &ev_file->ev_queue.bytes_in_use) >
1723 			MAX_ASYNC_BYTES_IN_USE) {
1724 		atomic_sub(cmd_out_len, &ev_file->ev_queue.bytes_in_use);
1725 		return -EAGAIN;
1726 	}
1727 
1728 	async_data = kvzalloc(struct_size(async_data, hdr.out_data,
1729 					  cmd_out_len), GFP_KERNEL);
1730 	if (!async_data) {
1731 		err = -ENOMEM;
1732 		goto sub_bytes;
1733 	}
1734 
1735 	err = uverbs_copy_from(&async_data->hdr.wr_id, attrs,
1736 			       MLX5_IB_ATTR_DEVX_OBJ_QUERY_ASYNC_WR_ID);
1737 	if (err)
1738 		goto free_async;
1739 
1740 	async_data->cmd_out_len = cmd_out_len;
1741 	async_data->mdev = mdev;
1742 	async_data->ev_file = ev_file;
1743 
1744 	MLX5_SET(general_obj_in_cmd_hdr, cmd_in, uid, uid);
1745 	err = mlx5_cmd_exec_cb(&ev_file->async_ctx, cmd_in,
1746 		    uverbs_attr_get_len(attrs,
1747 				MLX5_IB_ATTR_DEVX_OBJ_QUERY_ASYNC_CMD_IN),
1748 		    async_data->hdr.out_data,
1749 		    async_data->cmd_out_len,
1750 		    devx_query_callback, &async_data->cb_work);
1751 
1752 	if (err)
1753 		goto free_async;
1754 
1755 	return 0;
1756 
1757 free_async:
1758 	kvfree(async_data);
1759 sub_bytes:
1760 	atomic_sub(cmd_out_len, &ev_file->ev_queue.bytes_in_use);
1761 	return err;
1762 }
1763 
1764 static void
1765 subscribe_event_xa_dealloc(struct mlx5_devx_event_table *devx_event_table,
1766 			   u32 key_level1,
1767 			   bool is_level2,
1768 			   u32 key_level2)
1769 {
1770 	struct devx_event *event;
1771 	struct devx_obj_event *xa_val_level2;
1772 
1773 	/* Level 1 is valid for future use, no need to free */
1774 	if (!is_level2)
1775 		return;
1776 
1777 	event = xa_load(&devx_event_table->event_xa, key_level1);
1778 	WARN_ON(!event);
1779 
1780 	xa_val_level2 = xa_load(&event->object_ids,
1781 				key_level2);
1782 	if (list_empty(&xa_val_level2->obj_sub_list)) {
1783 		xa_erase(&event->object_ids,
1784 			 key_level2);
1785 		kfree_rcu(xa_val_level2, rcu);
1786 	}
1787 }
1788 
1789 static int
1790 subscribe_event_xa_alloc(struct mlx5_devx_event_table *devx_event_table,
1791 			 u32 key_level1,
1792 			 bool is_level2,
1793 			 u32 key_level2)
1794 {
1795 	struct devx_obj_event *obj_event;
1796 	struct devx_event *event;
1797 	int err;
1798 
1799 	event = xa_load(&devx_event_table->event_xa, key_level1);
1800 	if (!event) {
1801 		event = kzalloc(sizeof(*event), GFP_KERNEL);
1802 		if (!event)
1803 			return -ENOMEM;
1804 
1805 		INIT_LIST_HEAD(&event->unaffiliated_list);
1806 		xa_init_flags(&event->object_ids, 0);
1807 
1808 		err = xa_insert(&devx_event_table->event_xa,
1809 				key_level1,
1810 				event,
1811 				GFP_KERNEL);
1812 		if (err) {
1813 			kfree(event);
1814 			return err;
1815 		}
1816 	}
1817 
1818 	if (!is_level2)
1819 		return 0;
1820 
1821 	obj_event = xa_load(&event->object_ids, key_level2);
1822 	if (!obj_event) {
1823 		obj_event = kzalloc(sizeof(*obj_event), GFP_KERNEL);
1824 		if (!obj_event)
1825 			/* Level1 is valid for future use, no need to free */
1826 			return -ENOMEM;
1827 
1828 		err = xa_insert(&event->object_ids,
1829 				key_level2,
1830 				obj_event,
1831 				GFP_KERNEL);
1832 		if (err)
1833 			return err;
1834 		INIT_LIST_HEAD(&obj_event->obj_sub_list);
1835 	}
1836 
1837 	return 0;
1838 }
1839 
1840 static bool is_valid_events_legacy(int num_events, u16 *event_type_num_list,
1841 				   struct devx_obj *obj)
1842 {
1843 	int i;
1844 
1845 	for (i = 0; i < num_events; i++) {
1846 		if (obj) {
1847 			if (!is_legacy_obj_event_num(event_type_num_list[i]))
1848 				return false;
1849 		} else if (!is_legacy_unaffiliated_event_num(
1850 				event_type_num_list[i])) {
1851 			return false;
1852 		}
1853 	}
1854 
1855 	return true;
1856 }
1857 
1858 #define MAX_SUPP_EVENT_NUM 255
1859 static bool is_valid_events(struct mlx5_core_dev *dev,
1860 			    int num_events, u16 *event_type_num_list,
1861 			    struct devx_obj *obj)
1862 {
1863 	__be64 *aff_events;
1864 	__be64 *unaff_events;
1865 	int mask_entry;
1866 	int mask_bit;
1867 	int i;
1868 
1869 	if (MLX5_CAP_GEN(dev, event_cap)) {
1870 		aff_events = (__be64 *)MLX5_CAP_DEV_EVENT(dev,
1871 						user_affiliated_events);
1872 		unaff_events = (__be64 *)MLX5_CAP_DEV_EVENT(dev,
1873 						  user_unaffiliated_events);
1874 	} else {
1875 		return is_valid_events_legacy(num_events, event_type_num_list,
1876 					      obj);
1877 	}
1878 
1879 	for (i = 0; i < num_events; i++) {
1880 		if (event_type_num_list[i] > MAX_SUPP_EVENT_NUM)
1881 			return false;
1882 
1883 		mask_entry = event_type_num_list[i] / 64;
1884 		mask_bit = event_type_num_list[i] % 64;
1885 
1886 		if (obj) {
1887 			/* CQ completion */
1888 			if (event_type_num_list[i] == 0)
1889 				continue;
1890 
1891 			if (!(be64_to_cpu(aff_events[mask_entry]) &
1892 					(1ull << mask_bit)))
1893 				return false;
1894 
1895 			continue;
1896 		}
1897 
1898 		if (!(be64_to_cpu(unaff_events[mask_entry]) &
1899 				(1ull << mask_bit)))
1900 			return false;
1901 	}
1902 
1903 	return true;
1904 }
1905 
1906 #define MAX_NUM_EVENTS 16
1907 static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_SUBSCRIBE_EVENT)(
1908 	struct uverbs_attr_bundle *attrs)
1909 {
1910 	struct ib_uobject *devx_uobj = uverbs_attr_get_uobject(
1911 				attrs,
1912 				MLX5_IB_ATTR_DEVX_SUBSCRIBE_EVENT_OBJ_HANDLE);
1913 	struct mlx5_ib_ucontext *c = rdma_udata_to_drv_context(
1914 		&attrs->driver_udata, struct mlx5_ib_ucontext, ibucontext);
1915 	struct mlx5_ib_dev *dev = to_mdev(c->ibucontext.device);
1916 	struct ib_uobject *fd_uobj;
1917 	struct devx_obj *obj = NULL;
1918 	struct devx_async_event_file *ev_file;
1919 	struct mlx5_devx_event_table *devx_event_table = &dev->devx_event_table;
1920 	u16 *event_type_num_list;
1921 	struct devx_event_subscription *event_sub, *tmp_sub;
1922 	struct list_head sub_list;
1923 	int redirect_fd;
1924 	bool use_eventfd = false;
1925 	int num_events;
1926 	u16 obj_type = 0;
1927 	u64 cookie = 0;
1928 	u32 obj_id = 0;
1929 	int err;
1930 	int i;
1931 
1932 	if (!c->devx_uid)
1933 		return -EINVAL;
1934 
1935 	if (!IS_ERR(devx_uobj)) {
1936 		obj = (struct devx_obj *)devx_uobj->object;
1937 		if (obj)
1938 			obj_id = get_dec_obj_id(obj->obj_id);
1939 	}
1940 
1941 	fd_uobj = uverbs_attr_get_uobject(attrs,
1942 				MLX5_IB_ATTR_DEVX_SUBSCRIBE_EVENT_FD_HANDLE);
1943 	if (IS_ERR(fd_uobj))
1944 		return PTR_ERR(fd_uobj);
1945 
1946 	ev_file = container_of(fd_uobj, struct devx_async_event_file,
1947 			       uobj);
1948 
1949 	if (uverbs_attr_is_valid(attrs,
1950 				 MLX5_IB_ATTR_DEVX_SUBSCRIBE_EVENT_FD_NUM)) {
1951 		err = uverbs_copy_from(&redirect_fd, attrs,
1952 			       MLX5_IB_ATTR_DEVX_SUBSCRIBE_EVENT_FD_NUM);
1953 		if (err)
1954 			return err;
1955 
1956 		use_eventfd = true;
1957 	}
1958 
1959 	if (uverbs_attr_is_valid(attrs,
1960 				 MLX5_IB_ATTR_DEVX_SUBSCRIBE_EVENT_COOKIE)) {
1961 		if (use_eventfd)
1962 			return -EINVAL;
1963 
1964 		err = uverbs_copy_from(&cookie, attrs,
1965 				MLX5_IB_ATTR_DEVX_SUBSCRIBE_EVENT_COOKIE);
1966 		if (err)
1967 			return err;
1968 	}
1969 
1970 	num_events = uverbs_attr_ptr_get_array_size(
1971 		attrs, MLX5_IB_ATTR_DEVX_SUBSCRIBE_EVENT_TYPE_NUM_LIST,
1972 		sizeof(u16));
1973 
1974 	if (num_events < 0)
1975 		return num_events;
1976 
1977 	if (num_events > MAX_NUM_EVENTS)
1978 		return -EINVAL;
1979 
1980 	event_type_num_list = uverbs_attr_get_alloced_ptr(attrs,
1981 			MLX5_IB_ATTR_DEVX_SUBSCRIBE_EVENT_TYPE_NUM_LIST);
1982 
1983 	if (!is_valid_events(dev->mdev, num_events, event_type_num_list, obj))
1984 		return -EINVAL;
1985 
1986 	INIT_LIST_HEAD(&sub_list);
1987 
1988 	/* Protect from concurrent subscriptions to same XA entries to allow
1989 	 * both to succeed
1990 	 */
1991 	mutex_lock(&devx_event_table->event_xa_lock);
1992 	for (i = 0; i < num_events; i++) {
1993 		u32 key_level1;
1994 
1995 		if (obj)
1996 			obj_type = get_dec_obj_type(obj,
1997 						    event_type_num_list[i]);
1998 		key_level1 = event_type_num_list[i] | obj_type << 16;
1999 
2000 		err = subscribe_event_xa_alloc(devx_event_table,
2001 					       key_level1,
2002 					       obj,
2003 					       obj_id);
2004 		if (err)
2005 			goto err;
2006 
2007 		event_sub = kzalloc(sizeof(*event_sub), GFP_KERNEL);
2008 		if (!event_sub)
2009 			goto err;
2010 
2011 		list_add_tail(&event_sub->event_list, &sub_list);
2012 		uverbs_uobject_get(&ev_file->uobj);
2013 		if (use_eventfd) {
2014 			event_sub->eventfd =
2015 				fdget(redirect_fd);
2016 
2017 			if (event_sub->eventfd.file == NULL) {
2018 				err = -EBADF;
2019 				goto err;
2020 			}
2021 		}
2022 
2023 		event_sub->cookie = cookie;
2024 		event_sub->ev_file = ev_file;
2025 		/* May be needed upon cleanup the devx object/subscription */
2026 		event_sub->xa_key_level1 = key_level1;
2027 		event_sub->xa_key_level2 = obj_id;
2028 		INIT_LIST_HEAD(&event_sub->obj_list);
2029 	}
2030 
2031 	/* Once all the allocations and the XA data insertions were done we
2032 	 * can go ahead and add all the subscriptions to the relevant lists
2033 	 * without concern of a failure.
2034 	 */
2035 	list_for_each_entry_safe(event_sub, tmp_sub, &sub_list, event_list) {
2036 		struct devx_event *event;
2037 		struct devx_obj_event *obj_event;
2038 
2039 		list_del_init(&event_sub->event_list);
2040 
2041 		spin_lock_irq(&ev_file->lock);
2042 		list_add_tail_rcu(&event_sub->file_list,
2043 				  &ev_file->subscribed_events_list);
2044 		spin_unlock_irq(&ev_file->lock);
2045 
2046 		event = xa_load(&devx_event_table->event_xa,
2047 				event_sub->xa_key_level1);
2048 		WARN_ON(!event);
2049 
2050 		if (!obj) {
2051 			list_add_tail_rcu(&event_sub->xa_list,
2052 					  &event->unaffiliated_list);
2053 			continue;
2054 		}
2055 
2056 		obj_event = xa_load(&event->object_ids, obj_id);
2057 		WARN_ON(!obj_event);
2058 		list_add_tail_rcu(&event_sub->xa_list,
2059 				  &obj_event->obj_sub_list);
2060 		list_add_tail_rcu(&event_sub->obj_list,
2061 				  &obj->event_sub);
2062 	}
2063 
2064 	mutex_unlock(&devx_event_table->event_xa_lock);
2065 	return 0;
2066 
2067 err:
2068 	list_for_each_entry_safe(event_sub, tmp_sub, &sub_list, event_list) {
2069 		list_del(&event_sub->event_list);
2070 
2071 		subscribe_event_xa_dealloc(devx_event_table,
2072 					   event_sub->xa_key_level1,
2073 					   obj,
2074 					   obj_id);
2075 
2076 		if (event_sub->eventfd.file)
2077 			fdput(event_sub->eventfd);
2078 		uverbs_uobject_put(&event_sub->ev_file->uobj);
2079 		kfree(event_sub);
2080 	}
2081 
2082 	mutex_unlock(&devx_event_table->event_xa_lock);
2083 	return err;
2084 }
2085 
2086 static int devx_umem_get(struct mlx5_ib_dev *dev, struct ib_ucontext *ucontext,
2087 			 struct uverbs_attr_bundle *attrs,
2088 			 struct devx_umem *obj)
2089 {
2090 	u64 addr;
2091 	size_t size;
2092 	u32 access;
2093 	int npages;
2094 	int err;
2095 	u32 page_mask;
2096 
2097 	if (uverbs_copy_from(&addr, attrs, MLX5_IB_ATTR_DEVX_UMEM_REG_ADDR) ||
2098 	    uverbs_copy_from(&size, attrs, MLX5_IB_ATTR_DEVX_UMEM_REG_LEN))
2099 		return -EFAULT;
2100 
2101 	err = uverbs_get_flags32(&access, attrs,
2102 				 MLX5_IB_ATTR_DEVX_UMEM_REG_ACCESS,
2103 				 IB_ACCESS_LOCAL_WRITE |
2104 				 IB_ACCESS_REMOTE_WRITE |
2105 				 IB_ACCESS_REMOTE_READ);
2106 	if (err)
2107 		return err;
2108 
2109 	err = ib_check_mr_access(access);
2110 	if (err)
2111 		return err;
2112 
2113 	obj->umem = ib_umem_get(ucontext, addr, size, access, 0);
2114 	if (IS_ERR(obj->umem))
2115 		return PTR_ERR(obj->umem);
2116 
2117 	mlx5_ib_cont_pages(obj->umem, obj->umem->address,
2118 			   MLX5_MKEY_PAGE_SHIFT_MASK, &npages,
2119 			   &obj->page_shift, &obj->ncont, NULL);
2120 
2121 	if (!npages) {
2122 		ib_umem_release(obj->umem);
2123 		return -EINVAL;
2124 	}
2125 
2126 	page_mask = (1 << obj->page_shift) - 1;
2127 	obj->page_offset = obj->umem->address & page_mask;
2128 
2129 	return 0;
2130 }
2131 
2132 static int devx_umem_reg_cmd_alloc(struct uverbs_attr_bundle *attrs,
2133 				   struct devx_umem *obj,
2134 				   struct devx_umem_reg_cmd *cmd)
2135 {
2136 	cmd->inlen = MLX5_ST_SZ_BYTES(create_umem_in) +
2137 		    (MLX5_ST_SZ_BYTES(mtt) * obj->ncont);
2138 	cmd->in = uverbs_zalloc(attrs, cmd->inlen);
2139 	return PTR_ERR_OR_ZERO(cmd->in);
2140 }
2141 
2142 static void devx_umem_reg_cmd_build(struct mlx5_ib_dev *dev,
2143 				    struct devx_umem *obj,
2144 				    struct devx_umem_reg_cmd *cmd)
2145 {
2146 	void *umem;
2147 	__be64 *mtt;
2148 
2149 	umem = MLX5_ADDR_OF(create_umem_in, cmd->in, umem);
2150 	mtt = (__be64 *)MLX5_ADDR_OF(umem, umem, mtt);
2151 
2152 	MLX5_SET(create_umem_in, cmd->in, opcode, MLX5_CMD_OP_CREATE_UMEM);
2153 	MLX5_SET64(umem, umem, num_of_mtt, obj->ncont);
2154 	MLX5_SET(umem, umem, log_page_size, obj->page_shift -
2155 					    MLX5_ADAPTER_PAGE_SHIFT);
2156 	MLX5_SET(umem, umem, page_offset, obj->page_offset);
2157 	mlx5_ib_populate_pas(dev, obj->umem, obj->page_shift, mtt,
2158 			     (obj->umem->writable ? MLX5_IB_MTT_WRITE : 0) |
2159 			     MLX5_IB_MTT_READ);
2160 }
2161 
2162 static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_UMEM_REG)(
2163 	struct uverbs_attr_bundle *attrs)
2164 {
2165 	struct devx_umem_reg_cmd cmd;
2166 	struct devx_umem *obj;
2167 	struct ib_uobject *uobj = uverbs_attr_get_uobject(
2168 		attrs, MLX5_IB_ATTR_DEVX_UMEM_REG_HANDLE);
2169 	u32 obj_id;
2170 	struct mlx5_ib_ucontext *c = rdma_udata_to_drv_context(
2171 		&attrs->driver_udata, struct mlx5_ib_ucontext, ibucontext);
2172 	struct mlx5_ib_dev *dev = to_mdev(c->ibucontext.device);
2173 	int err;
2174 
2175 	if (!c->devx_uid)
2176 		return -EINVAL;
2177 
2178 	obj = kzalloc(sizeof(struct devx_umem), GFP_KERNEL);
2179 	if (!obj)
2180 		return -ENOMEM;
2181 
2182 	err = devx_umem_get(dev, &c->ibucontext, attrs, obj);
2183 	if (err)
2184 		goto err_obj_free;
2185 
2186 	err = devx_umem_reg_cmd_alloc(attrs, obj, &cmd);
2187 	if (err)
2188 		goto err_umem_release;
2189 
2190 	devx_umem_reg_cmd_build(dev, obj, &cmd);
2191 
2192 	MLX5_SET(create_umem_in, cmd.in, uid, c->devx_uid);
2193 	err = mlx5_cmd_exec(dev->mdev, cmd.in, cmd.inlen, cmd.out,
2194 			    sizeof(cmd.out));
2195 	if (err)
2196 		goto err_umem_release;
2197 
2198 	obj->mdev = dev->mdev;
2199 	uobj->object = obj;
2200 	devx_obj_build_destroy_cmd(cmd.in, cmd.out, obj->dinbox, &obj->dinlen, &obj_id);
2201 	err = uverbs_copy_to(attrs, MLX5_IB_ATTR_DEVX_UMEM_REG_OUT_ID, &obj_id, sizeof(obj_id));
2202 	if (err)
2203 		goto err_umem_destroy;
2204 
2205 	return 0;
2206 
2207 err_umem_destroy:
2208 	mlx5_cmd_exec(obj->mdev, obj->dinbox, obj->dinlen, cmd.out, sizeof(cmd.out));
2209 err_umem_release:
2210 	ib_umem_release(obj->umem);
2211 err_obj_free:
2212 	kfree(obj);
2213 	return err;
2214 }
2215 
2216 static int devx_umem_cleanup(struct ib_uobject *uobject,
2217 			     enum rdma_remove_reason why,
2218 			     struct uverbs_attr_bundle *attrs)
2219 {
2220 	struct devx_umem *obj = uobject->object;
2221 	u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)];
2222 	int err;
2223 
2224 	err = mlx5_cmd_exec(obj->mdev, obj->dinbox, obj->dinlen, out, sizeof(out));
2225 	if (ib_is_destroy_retryable(err, why, uobject))
2226 		return err;
2227 
2228 	ib_umem_release(obj->umem);
2229 	kfree(obj);
2230 	return 0;
2231 }
2232 
2233 static bool is_unaffiliated_event(struct mlx5_core_dev *dev,
2234 				  unsigned long event_type)
2235 {
2236 	__be64 *unaff_events;
2237 	int mask_entry;
2238 	int mask_bit;
2239 
2240 	if (!MLX5_CAP_GEN(dev, event_cap))
2241 		return is_legacy_unaffiliated_event_num(event_type);
2242 
2243 	unaff_events = (__be64 *)MLX5_CAP_DEV_EVENT(dev,
2244 					  user_unaffiliated_events);
2245 	WARN_ON(event_type > MAX_SUPP_EVENT_NUM);
2246 
2247 	mask_entry = event_type / 64;
2248 	mask_bit = event_type % 64;
2249 
2250 	if (!(be64_to_cpu(unaff_events[mask_entry]) & (1ull << mask_bit)))
2251 		return false;
2252 
2253 	return true;
2254 }
2255 
2256 static u32 devx_get_obj_id_from_event(unsigned long event_type, void *data)
2257 {
2258 	struct mlx5_eqe *eqe = data;
2259 	u32 obj_id = 0;
2260 
2261 	switch (event_type) {
2262 	case MLX5_EVENT_TYPE_SRQ_CATAS_ERROR:
2263 	case MLX5_EVENT_TYPE_SRQ_RQ_LIMIT:
2264 	case MLX5_EVENT_TYPE_PATH_MIG:
2265 	case MLX5_EVENT_TYPE_COMM_EST:
2266 	case MLX5_EVENT_TYPE_SQ_DRAINED:
2267 	case MLX5_EVENT_TYPE_SRQ_LAST_WQE:
2268 	case MLX5_EVENT_TYPE_WQ_CATAS_ERROR:
2269 	case MLX5_EVENT_TYPE_PATH_MIG_FAILED:
2270 	case MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR:
2271 	case MLX5_EVENT_TYPE_WQ_ACCESS_ERROR:
2272 		obj_id = be32_to_cpu(eqe->data.qp_srq.qp_srq_n) & 0xffffff;
2273 		break;
2274 	case MLX5_EVENT_TYPE_XRQ_ERROR:
2275 		obj_id = be32_to_cpu(eqe->data.xrq_err.type_xrqn) & 0xffffff;
2276 		break;
2277 	case MLX5_EVENT_TYPE_DCT_DRAINED:
2278 	case MLX5_EVENT_TYPE_DCT_KEY_VIOLATION:
2279 		obj_id = be32_to_cpu(eqe->data.dct.dctn) & 0xffffff;
2280 		break;
2281 	case MLX5_EVENT_TYPE_CQ_ERROR:
2282 		obj_id = be32_to_cpu(eqe->data.cq_err.cqn) & 0xffffff;
2283 		break;
2284 	default:
2285 		obj_id = MLX5_GET(affiliated_event_header, &eqe->data, obj_id);
2286 		break;
2287 	}
2288 
2289 	return obj_id;
2290 }
2291 
2292 static int deliver_event(struct devx_event_subscription *event_sub,
2293 			 const void *data)
2294 {
2295 	struct devx_async_event_file *ev_file;
2296 	struct devx_async_event_data *event_data;
2297 	unsigned long flags;
2298 
2299 	ev_file = event_sub->ev_file;
2300 
2301 	if (ev_file->omit_data) {
2302 		spin_lock_irqsave(&ev_file->lock, flags);
2303 		if (!list_empty(&event_sub->event_list) ||
2304 		    ev_file->is_destroyed) {
2305 			spin_unlock_irqrestore(&ev_file->lock, flags);
2306 			return 0;
2307 		}
2308 
2309 		list_add_tail(&event_sub->event_list, &ev_file->event_list);
2310 		spin_unlock_irqrestore(&ev_file->lock, flags);
2311 		wake_up_interruptible(&ev_file->poll_wait);
2312 		return 0;
2313 	}
2314 
2315 	event_data = kzalloc(sizeof(*event_data) + sizeof(struct mlx5_eqe),
2316 			     GFP_ATOMIC);
2317 	if (!event_data) {
2318 		spin_lock_irqsave(&ev_file->lock, flags);
2319 		ev_file->is_overflow_err = 1;
2320 		spin_unlock_irqrestore(&ev_file->lock, flags);
2321 		return -ENOMEM;
2322 	}
2323 
2324 	event_data->hdr.cookie = event_sub->cookie;
2325 	memcpy(event_data->hdr.out_data, data, sizeof(struct mlx5_eqe));
2326 
2327 	spin_lock_irqsave(&ev_file->lock, flags);
2328 	if (!ev_file->is_destroyed)
2329 		list_add_tail(&event_data->list, &ev_file->event_list);
2330 	else
2331 		kfree(event_data);
2332 	spin_unlock_irqrestore(&ev_file->lock, flags);
2333 	wake_up_interruptible(&ev_file->poll_wait);
2334 
2335 	return 0;
2336 }
2337 
2338 static void dispatch_event_fd(struct list_head *fd_list,
2339 			      const void *data)
2340 {
2341 	struct devx_event_subscription *item;
2342 
2343 	list_for_each_entry_rcu(item, fd_list, xa_list) {
2344 		if (item->eventfd.file != NULL)
2345 			linux_poll_wakeup(item->eventfd.file);
2346 		else
2347 			deliver_event(item, data);
2348 	}
2349 }
2350 
2351 static bool mlx5_devx_event_notifier(struct mlx5_core_dev *mdev,
2352 				     uint8_t event_type, void *data)
2353 {
2354 	struct mlx5_ib_dev *dev;
2355 	struct mlx5_devx_event_table *table;
2356 	struct devx_event *event;
2357 	struct devx_obj_event *obj_event;
2358 	u16 obj_type = 0;
2359 	bool is_unaffiliated;
2360 	u32 obj_id;
2361 
2362 	/* Explicit filtering to kernel events which may occur frequently */
2363 	if (event_type == MLX5_EVENT_TYPE_CMD ||
2364 	    event_type == MLX5_EVENT_TYPE_PAGE_REQUEST)
2365 		return true;
2366 
2367 	dev = mdev->priv.eq_table.dev;
2368 	table = &dev->devx_event_table;
2369 	is_unaffiliated = is_unaffiliated_event(dev->mdev, event_type);
2370 
2371 	if (!is_unaffiliated)
2372 		obj_type = get_event_obj_type(event_type, data);
2373 
2374 	rcu_read_lock();
2375 	event = xa_load(&table->event_xa, event_type | (obj_type << 16));
2376 	if (!event) {
2377 		rcu_read_unlock();
2378 		return false;
2379 	}
2380 
2381 	if (is_unaffiliated) {
2382 		dispatch_event_fd(&event->unaffiliated_list, data);
2383 		rcu_read_unlock();
2384 		return true;
2385 	}
2386 
2387 	obj_id = devx_get_obj_id_from_event(event_type, data);
2388 	obj_event = xa_load(&event->object_ids, obj_id);
2389 	if (!obj_event) {
2390 		rcu_read_unlock();
2391 		return false;
2392 	}
2393 
2394 	dispatch_event_fd(&obj_event->obj_sub_list, data);
2395 
2396 	rcu_read_unlock();
2397 	return true;
2398 }
2399 
2400 void mlx5_ib_devx_init_event_table(struct mlx5_ib_dev *dev)
2401 {
2402 	struct mlx5_devx_event_table *table = &dev->devx_event_table;
2403 
2404 	xa_init_flags(&table->event_xa, 0);
2405 	mutex_init(&table->event_xa_lock);
2406 	dev->mdev->priv.eq_table.dev = dev;
2407 	dev->mdev->priv.eq_table.cb = mlx5_devx_event_notifier;
2408 }
2409 
2410 void mlx5_ib_devx_cleanup_event_table(struct mlx5_ib_dev *dev)
2411 {
2412 	struct mlx5_devx_event_table *table = &dev->devx_event_table;
2413 	struct devx_event_subscription *sub, *tmp;
2414 	struct devx_event *event;
2415 	void *entry;
2416 	unsigned long id;
2417 
2418 	dev->mdev->priv.eq_table.cb = NULL;
2419 	dev->mdev->priv.eq_table.dev = NULL;
2420 	mutex_lock(&dev->devx_event_table.event_xa_lock);
2421 	xa_for_each(&table->event_xa, id, entry) {
2422 		event = entry;
2423 		list_for_each_entry_safe(sub, tmp, &event->unaffiliated_list,
2424 					 xa_list)
2425 			devx_cleanup_subscription(dev, sub);
2426 		kfree(entry);
2427 	}
2428 	mutex_unlock(&dev->devx_event_table.event_xa_lock);
2429 	xa_destroy(&table->event_xa);
2430 }
2431 
2432 static ssize_t devx_async_cmd_event_read(struct file *filp, char __user *buf,
2433 					 size_t count, loff_t *pos)
2434 {
2435 	struct devx_async_cmd_event_file *comp_ev_file = filp->private_data;
2436 	struct devx_async_event_queue *ev_queue = &comp_ev_file->ev_queue;
2437 	struct devx_async_data *event;
2438 	int ret = 0;
2439 	size_t eventsz;
2440 
2441 	spin_lock_irq(&ev_queue->lock);
2442 
2443 	while (list_empty(&ev_queue->event_list)) {
2444 		spin_unlock_irq(&ev_queue->lock);
2445 
2446 		if (filp->f_flags & O_NONBLOCK)
2447 			return -EAGAIN;
2448 
2449 		if (wait_event_interruptible(
2450 			    ev_queue->poll_wait,
2451 			    (!list_empty(&ev_queue->event_list) ||
2452 			     ev_queue->is_destroyed))) {
2453 			return -ERESTARTSYS;
2454 		}
2455 
2456 		spin_lock_irq(&ev_queue->lock);
2457 		if (ev_queue->is_destroyed) {
2458 			spin_unlock_irq(&ev_queue->lock);
2459 			return -EIO;
2460 		}
2461 	}
2462 
2463 	event = list_entry(ev_queue->event_list.next,
2464 			   struct devx_async_data, list);
2465 	eventsz = event->cmd_out_len +
2466 			sizeof(struct mlx5_ib_uapi_devx_async_cmd_hdr);
2467 
2468 	if (eventsz > count) {
2469 		spin_unlock_irq(&ev_queue->lock);
2470 		return -ENOSPC;
2471 	}
2472 
2473 	list_del(ev_queue->event_list.next);
2474 	spin_unlock_irq(&ev_queue->lock);
2475 
2476 	if (copy_to_user(buf, &event->hdr, eventsz))
2477 		ret = -EFAULT;
2478 	else
2479 		ret = eventsz;
2480 
2481 	atomic_sub(event->cmd_out_len, &ev_queue->bytes_in_use);
2482 	kvfree(event);
2483 	return ret;
2484 }
2485 
2486 static __poll_t devx_async_cmd_event_poll(struct file *filp,
2487 					  struct poll_table_struct *wait)
2488 {
2489 	struct devx_async_cmd_event_file *comp_ev_file = filp->private_data;
2490 	struct devx_async_event_queue *ev_queue = &comp_ev_file->ev_queue;
2491 	__poll_t pollflags = 0;
2492 
2493 	poll_wait(filp, &ev_queue->poll_wait, wait);
2494 
2495 	spin_lock_irq(&ev_queue->lock);
2496 	if (ev_queue->is_destroyed)
2497 		pollflags = POLLIN | POLLRDNORM | POLLHUP;
2498 	else if (!list_empty(&ev_queue->event_list))
2499 		pollflags = POLLIN | POLLRDNORM;
2500 	spin_unlock_irq(&ev_queue->lock);
2501 
2502 	return pollflags;
2503 }
2504 
2505 static const struct file_operations devx_async_cmd_event_fops = {
2506 	.owner	 = THIS_MODULE,
2507 	.read	 = devx_async_cmd_event_read,
2508 	.poll    = devx_async_cmd_event_poll,
2509 	.release = uverbs_uobject_fd_release,
2510 	.llseek	 = no_llseek,
2511 };
2512 
2513 static ssize_t devx_async_event_read(struct file *filp, char __user *buf,
2514 				     size_t count, loff_t *pos)
2515 {
2516 	struct devx_async_event_file *ev_file = filp->private_data;
2517 	struct devx_event_subscription *event_sub;
2518 	struct devx_async_event_data *uninitialized_var(event);
2519 	int ret = 0;
2520 	size_t eventsz;
2521 	bool omit_data;
2522 	void *event_data;
2523 
2524 	omit_data = ev_file->omit_data;
2525 
2526 	spin_lock_irq(&ev_file->lock);
2527 
2528 	if (ev_file->is_overflow_err) {
2529 		ev_file->is_overflow_err = 0;
2530 		spin_unlock_irq(&ev_file->lock);
2531 		return -EOVERFLOW;
2532 	}
2533 
2534 
2535 	while (list_empty(&ev_file->event_list)) {
2536 		spin_unlock_irq(&ev_file->lock);
2537 
2538 		if (filp->f_flags & O_NONBLOCK)
2539 			return -EAGAIN;
2540 
2541 		if (wait_event_interruptible(ev_file->poll_wait,
2542 			    (!list_empty(&ev_file->event_list) ||
2543 			     ev_file->is_destroyed))) {
2544 			return -ERESTARTSYS;
2545 		}
2546 
2547 		spin_lock_irq(&ev_file->lock);
2548 		if (ev_file->is_destroyed) {
2549 			spin_unlock_irq(&ev_file->lock);
2550 			return -EIO;
2551 		}
2552 	}
2553 
2554 	if (omit_data) {
2555 		event_sub = list_first_entry(&ev_file->event_list,
2556 					struct devx_event_subscription,
2557 					event_list);
2558 		eventsz = sizeof(event_sub->cookie);
2559 		event_data = &event_sub->cookie;
2560 	} else {
2561 		event = list_first_entry(&ev_file->event_list,
2562 				      struct devx_async_event_data, list);
2563 		eventsz = sizeof(struct mlx5_eqe) +
2564 			sizeof(struct mlx5_ib_uapi_devx_async_event_hdr);
2565 		event_data = &event->hdr;
2566 	}
2567 
2568 	if (eventsz > count) {
2569 		spin_unlock_irq(&ev_file->lock);
2570 		return -EINVAL;
2571 	}
2572 
2573 	if (omit_data)
2574 		list_del_init(&event_sub->event_list);
2575 	else
2576 		list_del(&event->list);
2577 
2578 	spin_unlock_irq(&ev_file->lock);
2579 
2580 	if (copy_to_user(buf, event_data, eventsz))
2581 		/* This points to an application issue, not a kernel concern */
2582 		ret = -EFAULT;
2583 	else
2584 		ret = eventsz;
2585 
2586 	if (!omit_data)
2587 		kfree(event);
2588 	return ret;
2589 }
2590 
2591 static __poll_t devx_async_event_poll(struct file *filp,
2592 				      struct poll_table_struct *wait)
2593 {
2594 	struct devx_async_event_file *ev_file = filp->private_data;
2595 	__poll_t pollflags = 0;
2596 
2597 	poll_wait(filp, &ev_file->poll_wait, wait);
2598 
2599 	spin_lock_irq(&ev_file->lock);
2600 	if (ev_file->is_destroyed)
2601 		pollflags = POLLIN | POLLRDNORM | POLLHUP;
2602 	else if (!list_empty(&ev_file->event_list))
2603 		pollflags = POLLIN | POLLRDNORM;
2604 	spin_unlock_irq(&ev_file->lock);
2605 
2606 	return pollflags;
2607 }
2608 
2609 static void devx_free_subscription(struct rcu_head *rcu)
2610 {
2611 	struct devx_event_subscription *event_sub =
2612 		container_of(rcu, struct devx_event_subscription, rcu);
2613 
2614 	if (event_sub->eventfd.file)
2615 		fdput(event_sub->eventfd);
2616 	uverbs_uobject_put(&event_sub->ev_file->uobj);
2617 	kfree(event_sub);
2618 }
2619 
2620 static const struct file_operations devx_async_event_fops = {
2621 	.owner	 = THIS_MODULE,
2622 	.read	 = devx_async_event_read,
2623 	.poll    = devx_async_event_poll,
2624 	.release = uverbs_uobject_fd_release,
2625 	.llseek	 = no_llseek,
2626 };
2627 
2628 static int devx_async_cmd_event_destroy_uobj(struct ib_uobject *uobj,
2629 					     enum rdma_remove_reason why)
2630 {
2631 	struct devx_async_cmd_event_file *comp_ev_file =
2632 		container_of(uobj, struct devx_async_cmd_event_file,
2633 			     uobj);
2634 	struct devx_async_event_queue *ev_queue = &comp_ev_file->ev_queue;
2635 	struct devx_async_data *entry, *tmp;
2636 
2637 	spin_lock_irq(&ev_queue->lock);
2638 	ev_queue->is_destroyed = 1;
2639 	spin_unlock_irq(&ev_queue->lock);
2640 	wake_up_interruptible(&ev_queue->poll_wait);
2641 
2642 	mlx5_cmd_cleanup_async_ctx(&comp_ev_file->async_ctx);
2643 
2644 	spin_lock_irq(&comp_ev_file->ev_queue.lock);
2645 	list_for_each_entry_safe(entry, tmp,
2646 				 &comp_ev_file->ev_queue.event_list, list) {
2647 		list_del(&entry->list);
2648 		kvfree(entry);
2649 	}
2650 	spin_unlock_irq(&comp_ev_file->ev_queue.lock);
2651 	return 0;
2652 };
2653 
2654 static int devx_async_event_destroy_uobj(struct ib_uobject *uobj,
2655 					 enum rdma_remove_reason why)
2656 {
2657 	struct devx_async_event_file *ev_file =
2658 		container_of(uobj, struct devx_async_event_file,
2659 			     uobj);
2660 	struct devx_event_subscription *event_sub, *event_sub_tmp;
2661 	struct mlx5_ib_dev *dev = ev_file->dev;
2662 
2663 	spin_lock_irq(&ev_file->lock);
2664 	ev_file->is_destroyed = 1;
2665 
2666 	/* free the pending events allocation */
2667 	if (ev_file->omit_data) {
2668 		struct devx_event_subscription *event_sub, *tmp;
2669 
2670 		list_for_each_entry_safe(event_sub, tmp, &ev_file->event_list,
2671 					 event_list)
2672 			list_del_init(&event_sub->event_list);
2673 
2674 	} else {
2675 		struct devx_async_event_data *entry, *tmp;
2676 
2677 		list_for_each_entry_safe(entry, tmp, &ev_file->event_list,
2678 					 list) {
2679 			list_del(&entry->list);
2680 			kfree(entry);
2681 		}
2682 	}
2683 
2684 	spin_unlock_irq(&ev_file->lock);
2685 	wake_up_interruptible(&ev_file->poll_wait);
2686 
2687 	mutex_lock(&dev->devx_event_table.event_xa_lock);
2688 	/* delete the subscriptions which are related to this FD */
2689 	list_for_each_entry_safe(event_sub, event_sub_tmp,
2690 				 &ev_file->subscribed_events_list, file_list) {
2691 		devx_cleanup_subscription(dev, event_sub);
2692 		list_del_rcu(&event_sub->file_list);
2693 		/* subscription may not be used by the read API any more */
2694 		call_rcu(&event_sub->rcu, devx_free_subscription);
2695 	}
2696 	mutex_unlock(&dev->devx_event_table.event_xa_lock);
2697 
2698 	put_device(&dev->ib_dev.dev);
2699 	return 0;
2700 };
2701 
2702 DECLARE_UVERBS_NAMED_METHOD(
2703 	MLX5_IB_METHOD_DEVX_UMEM_REG,
2704 	UVERBS_ATTR_IDR(MLX5_IB_ATTR_DEVX_UMEM_REG_HANDLE,
2705 			MLX5_IB_OBJECT_DEVX_UMEM,
2706 			UVERBS_ACCESS_NEW,
2707 			UA_MANDATORY),
2708 	UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_DEVX_UMEM_REG_ADDR,
2709 			   UVERBS_ATTR_TYPE(u64),
2710 			   UA_MANDATORY),
2711 	UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_DEVX_UMEM_REG_LEN,
2712 			   UVERBS_ATTR_TYPE(u64),
2713 			   UA_MANDATORY),
2714 	UVERBS_ATTR_FLAGS_IN(MLX5_IB_ATTR_DEVX_UMEM_REG_ACCESS,
2715 			     enum ib_access_flags),
2716 	UVERBS_ATTR_PTR_OUT(MLX5_IB_ATTR_DEVX_UMEM_REG_OUT_ID,
2717 			    UVERBS_ATTR_TYPE(u32),
2718 			    UA_MANDATORY));
2719 
2720 DECLARE_UVERBS_NAMED_METHOD_DESTROY(
2721 	MLX5_IB_METHOD_DEVX_UMEM_DEREG,
2722 	UVERBS_ATTR_IDR(MLX5_IB_ATTR_DEVX_UMEM_DEREG_HANDLE,
2723 			MLX5_IB_OBJECT_DEVX_UMEM,
2724 			UVERBS_ACCESS_DESTROY,
2725 			UA_MANDATORY));
2726 
2727 DECLARE_UVERBS_NAMED_METHOD(
2728 	MLX5_IB_METHOD_DEVX_QUERY_EQN,
2729 	UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_DEVX_QUERY_EQN_USER_VEC,
2730 			   UVERBS_ATTR_TYPE(u32),
2731 			   UA_MANDATORY),
2732 	UVERBS_ATTR_PTR_OUT(MLX5_IB_ATTR_DEVX_QUERY_EQN_DEV_EQN,
2733 			    UVERBS_ATTR_TYPE(u32),
2734 			    UA_MANDATORY));
2735 
2736 DECLARE_UVERBS_NAMED_METHOD(
2737 	MLX5_IB_METHOD_DEVX_QUERY_UAR,
2738 	UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_DEVX_QUERY_UAR_USER_IDX,
2739 			   UVERBS_ATTR_TYPE(u32),
2740 			   UA_MANDATORY),
2741 	UVERBS_ATTR_PTR_OUT(MLX5_IB_ATTR_DEVX_QUERY_UAR_DEV_IDX,
2742 			    UVERBS_ATTR_TYPE(u32),
2743 			    UA_MANDATORY));
2744 
2745 DECLARE_UVERBS_NAMED_METHOD(
2746 	MLX5_IB_METHOD_DEVX_OTHER,
2747 	UVERBS_ATTR_PTR_IN(
2748 		MLX5_IB_ATTR_DEVX_OTHER_CMD_IN,
2749 		UVERBS_ATTR_MIN_SIZE(MLX5_ST_SZ_BYTES(general_obj_in_cmd_hdr)),
2750 		UA_MANDATORY,
2751 		UA_ALLOC_AND_COPY),
2752 	UVERBS_ATTR_PTR_OUT(
2753 		MLX5_IB_ATTR_DEVX_OTHER_CMD_OUT,
2754 		UVERBS_ATTR_MIN_SIZE(MLX5_ST_SZ_BYTES(general_obj_out_cmd_hdr)),
2755 		UA_MANDATORY));
2756 
2757 DECLARE_UVERBS_NAMED_METHOD(
2758 	MLX5_IB_METHOD_DEVX_OBJ_CREATE,
2759 	UVERBS_ATTR_IDR(MLX5_IB_ATTR_DEVX_OBJ_CREATE_HANDLE,
2760 			MLX5_IB_OBJECT_DEVX_OBJ,
2761 			UVERBS_ACCESS_NEW,
2762 			UA_MANDATORY),
2763 	UVERBS_ATTR_PTR_IN(
2764 		MLX5_IB_ATTR_DEVX_OBJ_CREATE_CMD_IN,
2765 		UVERBS_ATTR_MIN_SIZE(MLX5_ST_SZ_BYTES(general_obj_in_cmd_hdr)),
2766 		UA_MANDATORY,
2767 		UA_ALLOC_AND_COPY),
2768 	UVERBS_ATTR_PTR_OUT(
2769 		MLX5_IB_ATTR_DEVX_OBJ_CREATE_CMD_OUT,
2770 		UVERBS_ATTR_MIN_SIZE(MLX5_ST_SZ_BYTES(general_obj_out_cmd_hdr)),
2771 		UA_MANDATORY));
2772 
2773 DECLARE_UVERBS_NAMED_METHOD_DESTROY(
2774 	MLX5_IB_METHOD_DEVX_OBJ_DESTROY,
2775 	UVERBS_ATTR_IDR(MLX5_IB_ATTR_DEVX_OBJ_DESTROY_HANDLE,
2776 			MLX5_IB_OBJECT_DEVX_OBJ,
2777 			UVERBS_ACCESS_DESTROY,
2778 			UA_MANDATORY));
2779 
2780 DECLARE_UVERBS_NAMED_METHOD(
2781 	MLX5_IB_METHOD_DEVX_OBJ_MODIFY,
2782 	UVERBS_ATTR_IDR(MLX5_IB_ATTR_DEVX_OBJ_MODIFY_HANDLE,
2783 			UVERBS_IDR_ANY_OBJECT,
2784 			UVERBS_ACCESS_WRITE,
2785 			UA_MANDATORY),
2786 	UVERBS_ATTR_PTR_IN(
2787 		MLX5_IB_ATTR_DEVX_OBJ_MODIFY_CMD_IN,
2788 		UVERBS_ATTR_MIN_SIZE(MLX5_ST_SZ_BYTES(general_obj_in_cmd_hdr)),
2789 		UA_MANDATORY,
2790 		UA_ALLOC_AND_COPY),
2791 	UVERBS_ATTR_PTR_OUT(
2792 		MLX5_IB_ATTR_DEVX_OBJ_MODIFY_CMD_OUT,
2793 		UVERBS_ATTR_MIN_SIZE(MLX5_ST_SZ_BYTES(general_obj_out_cmd_hdr)),
2794 		UA_MANDATORY));
2795 
2796 DECLARE_UVERBS_NAMED_METHOD(
2797 	MLX5_IB_METHOD_DEVX_OBJ_QUERY,
2798 	UVERBS_ATTR_IDR(MLX5_IB_ATTR_DEVX_OBJ_QUERY_HANDLE,
2799 			UVERBS_IDR_ANY_OBJECT,
2800 			UVERBS_ACCESS_READ,
2801 			UA_MANDATORY),
2802 	UVERBS_ATTR_PTR_IN(
2803 		MLX5_IB_ATTR_DEVX_OBJ_QUERY_CMD_IN,
2804 		UVERBS_ATTR_MIN_SIZE(MLX5_ST_SZ_BYTES(general_obj_in_cmd_hdr)),
2805 		UA_MANDATORY,
2806 		UA_ALLOC_AND_COPY),
2807 	UVERBS_ATTR_PTR_OUT(
2808 		MLX5_IB_ATTR_DEVX_OBJ_QUERY_CMD_OUT,
2809 		UVERBS_ATTR_MIN_SIZE(MLX5_ST_SZ_BYTES(general_obj_out_cmd_hdr)),
2810 		UA_MANDATORY));
2811 
2812 DECLARE_UVERBS_NAMED_METHOD(
2813 	MLX5_IB_METHOD_DEVX_OBJ_ASYNC_QUERY,
2814 	UVERBS_ATTR_IDR(MLX5_IB_ATTR_DEVX_OBJ_QUERY_HANDLE,
2815 			UVERBS_IDR_ANY_OBJECT,
2816 			UVERBS_ACCESS_READ,
2817 			UA_MANDATORY),
2818 	UVERBS_ATTR_PTR_IN(
2819 		MLX5_IB_ATTR_DEVX_OBJ_QUERY_CMD_IN,
2820 		UVERBS_ATTR_MIN_SIZE(MLX5_ST_SZ_BYTES(general_obj_in_cmd_hdr)),
2821 		UA_MANDATORY,
2822 		UA_ALLOC_AND_COPY),
2823 	UVERBS_ATTR_CONST_IN(MLX5_IB_ATTR_DEVX_OBJ_QUERY_ASYNC_OUT_LEN,
2824 		u16, UA_MANDATORY),
2825 	UVERBS_ATTR_FD(MLX5_IB_ATTR_DEVX_OBJ_QUERY_ASYNC_FD,
2826 		MLX5_IB_OBJECT_DEVX_ASYNC_CMD_FD,
2827 		UVERBS_ACCESS_READ,
2828 		UA_MANDATORY),
2829 	UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_DEVX_OBJ_QUERY_ASYNC_WR_ID,
2830 		UVERBS_ATTR_TYPE(u64),
2831 		UA_MANDATORY));
2832 
2833 DECLARE_UVERBS_NAMED_METHOD(
2834 	MLX5_IB_METHOD_DEVX_SUBSCRIBE_EVENT,
2835 	UVERBS_ATTR_FD(MLX5_IB_ATTR_DEVX_SUBSCRIBE_EVENT_FD_HANDLE,
2836 		MLX5_IB_OBJECT_DEVX_ASYNC_EVENT_FD,
2837 		UVERBS_ACCESS_READ,
2838 		UA_MANDATORY),
2839 	UVERBS_ATTR_IDR(MLX5_IB_ATTR_DEVX_SUBSCRIBE_EVENT_OBJ_HANDLE,
2840 		MLX5_IB_OBJECT_DEVX_OBJ,
2841 		UVERBS_ACCESS_READ,
2842 		UA_OPTIONAL),
2843 	UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_DEVX_SUBSCRIBE_EVENT_TYPE_NUM_LIST,
2844 		UVERBS_ATTR_MIN_SIZE(sizeof(u16)),
2845 		UA_MANDATORY,
2846 		UA_ALLOC_AND_COPY),
2847 	UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_DEVX_SUBSCRIBE_EVENT_COOKIE,
2848 		UVERBS_ATTR_TYPE(u64),
2849 		UA_OPTIONAL),
2850 	UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_DEVX_SUBSCRIBE_EVENT_FD_NUM,
2851 		UVERBS_ATTR_TYPE(u32),
2852 		UA_OPTIONAL));
2853 
2854 DECLARE_UVERBS_GLOBAL_METHODS(MLX5_IB_OBJECT_DEVX,
2855 			      &UVERBS_METHOD(MLX5_IB_METHOD_DEVX_OTHER),
2856 			      &UVERBS_METHOD(MLX5_IB_METHOD_DEVX_QUERY_UAR),
2857 			      &UVERBS_METHOD(MLX5_IB_METHOD_DEVX_QUERY_EQN),
2858 			      &UVERBS_METHOD(MLX5_IB_METHOD_DEVX_SUBSCRIBE_EVENT));
2859 
2860 DECLARE_UVERBS_NAMED_OBJECT(MLX5_IB_OBJECT_DEVX_OBJ,
2861 			    UVERBS_TYPE_ALLOC_IDR(devx_obj_cleanup),
2862 			    &UVERBS_METHOD(MLX5_IB_METHOD_DEVX_OBJ_CREATE),
2863 			    &UVERBS_METHOD(MLX5_IB_METHOD_DEVX_OBJ_DESTROY),
2864 			    &UVERBS_METHOD(MLX5_IB_METHOD_DEVX_OBJ_MODIFY),
2865 			    &UVERBS_METHOD(MLX5_IB_METHOD_DEVX_OBJ_QUERY),
2866 			    &UVERBS_METHOD(MLX5_IB_METHOD_DEVX_OBJ_ASYNC_QUERY));
2867 
2868 DECLARE_UVERBS_NAMED_OBJECT(MLX5_IB_OBJECT_DEVX_UMEM,
2869 			    UVERBS_TYPE_ALLOC_IDR(devx_umem_cleanup),
2870 			    &UVERBS_METHOD(MLX5_IB_METHOD_DEVX_UMEM_REG),
2871 			    &UVERBS_METHOD(MLX5_IB_METHOD_DEVX_UMEM_DEREG));
2872 
2873 
2874 DECLARE_UVERBS_NAMED_METHOD(
2875 	MLX5_IB_METHOD_DEVX_ASYNC_CMD_FD_ALLOC,
2876 	UVERBS_ATTR_FD(MLX5_IB_ATTR_DEVX_ASYNC_CMD_FD_ALLOC_HANDLE,
2877 			MLX5_IB_OBJECT_DEVX_ASYNC_CMD_FD,
2878 			UVERBS_ACCESS_NEW,
2879 			UA_MANDATORY));
2880 
2881 DECLARE_UVERBS_NAMED_OBJECT(
2882 	MLX5_IB_OBJECT_DEVX_ASYNC_CMD_FD,
2883 	UVERBS_TYPE_ALLOC_FD(sizeof(struct devx_async_cmd_event_file),
2884 			     devx_async_cmd_event_destroy_uobj,
2885 			     &devx_async_cmd_event_fops, "[devx_async_cmd]",
2886 			     FMODE_READ),
2887 	&UVERBS_METHOD(MLX5_IB_METHOD_DEVX_ASYNC_CMD_FD_ALLOC));
2888 
2889 DECLARE_UVERBS_NAMED_METHOD(
2890 	MLX5_IB_METHOD_DEVX_ASYNC_EVENT_FD_ALLOC,
2891 	UVERBS_ATTR_FD(MLX5_IB_ATTR_DEVX_ASYNC_EVENT_FD_ALLOC_HANDLE,
2892 			MLX5_IB_OBJECT_DEVX_ASYNC_EVENT_FD,
2893 			UVERBS_ACCESS_NEW,
2894 			UA_MANDATORY),
2895 	UVERBS_ATTR_FLAGS_IN(MLX5_IB_ATTR_DEVX_ASYNC_EVENT_FD_ALLOC_FLAGS,
2896 			enum mlx5_ib_uapi_devx_create_event_channel_flags,
2897 			UA_MANDATORY));
2898 
2899 DECLARE_UVERBS_NAMED_OBJECT(
2900 	MLX5_IB_OBJECT_DEVX_ASYNC_EVENT_FD,
2901 	UVERBS_TYPE_ALLOC_FD(sizeof(struct devx_async_event_file),
2902 			     devx_async_event_destroy_uobj,
2903 			     &devx_async_event_fops, "[devx_async_event]",
2904 			     FMODE_READ),
2905 	&UVERBS_METHOD(MLX5_IB_METHOD_DEVX_ASYNC_EVENT_FD_ALLOC));
2906 
2907 static bool devx_is_supported(struct ib_device *device)
2908 {
2909 	struct mlx5_ib_dev *dev = to_mdev(device);
2910 
2911 	return MLX5_CAP_GEN(dev->mdev, log_max_uctx);
2912 }
2913 
2914 const struct uapi_definition mlx5_ib_devx_defs[] = {
2915 	UAPI_DEF_CHAIN_OBJ_TREE_NAMED(
2916 		MLX5_IB_OBJECT_DEVX,
2917 		UAPI_DEF_IS_OBJ_SUPPORTED(devx_is_supported)),
2918 	UAPI_DEF_CHAIN_OBJ_TREE_NAMED(
2919 		MLX5_IB_OBJECT_DEVX_OBJ,
2920 		UAPI_DEF_IS_OBJ_SUPPORTED(devx_is_supported)),
2921 	UAPI_DEF_CHAIN_OBJ_TREE_NAMED(
2922 		MLX5_IB_OBJECT_DEVX_UMEM,
2923 		UAPI_DEF_IS_OBJ_SUPPORTED(devx_is_supported)),
2924 	UAPI_DEF_CHAIN_OBJ_TREE_NAMED(
2925 		MLX5_IB_OBJECT_DEVX_ASYNC_CMD_FD,
2926 		UAPI_DEF_IS_OBJ_SUPPORTED(devx_is_supported)),
2927 	UAPI_DEF_CHAIN_OBJ_TREE_NAMED(
2928 		MLX5_IB_OBJECT_DEVX_ASYNC_EVENT_FD,
2929 		UAPI_DEF_IS_OBJ_SUPPORTED(devx_is_supported)),
2930 	{},
2931 };
2932