xref: /freebsd/sys/dev/mlx5/mlx5_core/mlx5_main.c (revision 780fb4a2)
1 /*-
2  * Copyright (c) 2013-2017, Mellanox Technologies, Ltd.  All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions
6  * are met:
7  * 1. Redistributions of source code must retain the above copyright
8  *    notice, this list of conditions and the following disclaimer.
9  * 2. Redistributions in binary form must reproduce the above copyright
10  *    notice, this list of conditions and the following disclaimer in the
11  *    documentation and/or other materials provided with the distribution.
12  *
13  * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS `AS IS' AND
14  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
16  * ARE DISCLAIMED.  IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
17  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
18  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
19  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
20  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
21  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
22  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
23  * SUCH DAMAGE.
24  *
25  * $FreeBSD$
26  */
27 
28 #define	LINUXKPI_PARAM_PREFIX mlx5_
29 
30 #include <linux/kmod.h>
31 #include <linux/module.h>
32 #include <linux/errno.h>
33 #include <linux/pci.h>
34 #include <linux/dma-mapping.h>
35 #include <linux/slab.h>
36 #include <linux/io-mapping.h>
37 #include <linux/interrupt.h>
38 #include <dev/mlx5/driver.h>
39 #include <dev/mlx5/cq.h>
40 #include <dev/mlx5/qp.h>
41 #include <dev/mlx5/srq.h>
42 #include <linux/delay.h>
43 #include <dev/mlx5/mlx5_ifc.h>
44 #include "mlx5_core.h"
45 #include "fs_core.h"
46 
47 MODULE_AUTHOR("Eli Cohen <eli@mellanox.com>");
48 MODULE_DESCRIPTION("Mellanox Connect-IB, ConnectX-4 core driver");
49 MODULE_LICENSE("Dual BSD/GPL");
50 #if (__FreeBSD_version >= 1100000)
51 MODULE_DEPEND(mlx5, linuxkpi, 1, 1, 1);
52 #endif
53 MODULE_VERSION(mlx5, 1);
54 
55 int mlx5_core_debug_mask;
56 module_param_named(debug_mask, mlx5_core_debug_mask, int, 0644);
57 MODULE_PARM_DESC(debug_mask, "debug mask: 1 = dump cmd data, 2 = dump cmd exec time, 3 = both. Default=0");
58 
59 #define MLX5_DEFAULT_PROF	2
60 static int prof_sel = MLX5_DEFAULT_PROF;
61 module_param_named(prof_sel, prof_sel, int, 0444);
62 MODULE_PARM_DESC(prof_sel, "profile selector. Valid range 0 - 2");
63 
64 #define NUMA_NO_NODE       -1
65 
66 static LIST_HEAD(intf_list);
67 static LIST_HEAD(dev_list);
68 static DEFINE_MUTEX(intf_mutex);
69 
70 struct mlx5_device_context {
71 	struct list_head	list;
72 	struct mlx5_interface  *intf;
73 	void		       *context;
74 };
75 
76 enum {
77 	MLX5_ATOMIC_REQ_MODE_BE = 0x0,
78 	MLX5_ATOMIC_REQ_MODE_HOST_ENDIANNESS = 0x1,
79 };
80 
81 static struct mlx5_profile profiles[] = {
82 	[0] = {
83 		.mask           = 0,
84 	},
85 	[1] = {
86 		.mask		= MLX5_PROF_MASK_QP_SIZE,
87 		.log_max_qp	= 12,
88 	},
89 	[2] = {
90 		.mask		= MLX5_PROF_MASK_QP_SIZE |
91 				  MLX5_PROF_MASK_MR_CACHE,
92 		.log_max_qp	= 17,
93 		.mr_cache[0]	= {
94 			.size	= 500,
95 			.limit	= 250
96 		},
97 		.mr_cache[1]	= {
98 			.size	= 500,
99 			.limit	= 250
100 		},
101 		.mr_cache[2]	= {
102 			.size	= 500,
103 			.limit	= 250
104 		},
105 		.mr_cache[3]	= {
106 			.size	= 500,
107 			.limit	= 250
108 		},
109 		.mr_cache[4]	= {
110 			.size	= 500,
111 			.limit	= 250
112 		},
113 		.mr_cache[5]	= {
114 			.size	= 500,
115 			.limit	= 250
116 		},
117 		.mr_cache[6]	= {
118 			.size	= 500,
119 			.limit	= 250
120 		},
121 		.mr_cache[7]	= {
122 			.size	= 500,
123 			.limit	= 250
124 		},
125 		.mr_cache[8]	= {
126 			.size	= 500,
127 			.limit	= 250
128 		},
129 		.mr_cache[9]	= {
130 			.size	= 500,
131 			.limit	= 250
132 		},
133 		.mr_cache[10]	= {
134 			.size	= 500,
135 			.limit	= 250
136 		},
137 		.mr_cache[11]	= {
138 			.size	= 500,
139 			.limit	= 250
140 		},
141 		.mr_cache[12]	= {
142 			.size	= 64,
143 			.limit	= 32
144 		},
145 		.mr_cache[13]	= {
146 			.size	= 32,
147 			.limit	= 16
148 		},
149 		.mr_cache[14]	= {
150 			.size	= 16,
151 			.limit	= 8
152 		},
153 	},
154 	[3] = {
155 		.mask		= MLX5_PROF_MASK_QP_SIZE,
156 		.log_max_qp	= 17,
157 	},
158 };
159 
160 static int set_dma_caps(struct pci_dev *pdev)
161 {
162 	int err;
163 
164 	err = pci_set_dma_mask(pdev, DMA_BIT_MASK(64));
165 	if (err) {
166 		device_printf((&pdev->dev)->bsddev, "WARN: ""Warning: couldn't set 64-bit PCI DMA mask\n");
167 		err = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
168 		if (err) {
169 			device_printf((&pdev->dev)->bsddev, "ERR: ""Can't set PCI DMA mask, aborting\n");
170 			return err;
171 		}
172 	}
173 
174 	err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
175 	if (err) {
176 		device_printf((&pdev->dev)->bsddev, "WARN: ""Warning: couldn't set 64-bit consistent PCI DMA mask\n");
177 		err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32));
178 		if (err) {
179 			device_printf((&pdev->dev)->bsddev, "ERR: ""Can't set consistent PCI DMA mask, aborting\n");
180 			return err;
181 		}
182 	}
183 
184 	dma_set_max_seg_size(&pdev->dev, 2u * 1024 * 1024 * 1024);
185 	return err;
186 }
187 
188 static int mlx5_pci_enable_device(struct mlx5_core_dev *dev)
189 {
190 	struct pci_dev *pdev = dev->pdev;
191 	int err = 0;
192 
193 	mutex_lock(&dev->pci_status_mutex);
194 	if (dev->pci_status == MLX5_PCI_STATUS_DISABLED) {
195 		err = pci_enable_device(pdev);
196 		if (!err)
197 			dev->pci_status = MLX5_PCI_STATUS_ENABLED;
198 	}
199 	mutex_unlock(&dev->pci_status_mutex);
200 
201 	return err;
202 }
203 
204 static void mlx5_pci_disable_device(struct mlx5_core_dev *dev)
205 {
206 	struct pci_dev *pdev = dev->pdev;
207 
208 	mutex_lock(&dev->pci_status_mutex);
209 	if (dev->pci_status == MLX5_PCI_STATUS_ENABLED) {
210 		pci_disable_device(pdev);
211 		dev->pci_status = MLX5_PCI_STATUS_DISABLED;
212 	}
213 	mutex_unlock(&dev->pci_status_mutex);
214 }
215 
216 static int request_bar(struct pci_dev *pdev)
217 {
218 	int err = 0;
219 
220 	if (!(pci_resource_flags(pdev, 0) & IORESOURCE_MEM)) {
221 		device_printf((&pdev->dev)->bsddev, "ERR: ""Missing registers BAR, aborting\n");
222 		return -ENODEV;
223 	}
224 
225 	err = pci_request_regions(pdev, DRIVER_NAME);
226 	if (err)
227 		device_printf((&pdev->dev)->bsddev, "ERR: ""Couldn't get PCI resources, aborting\n");
228 
229 	return err;
230 }
231 
232 static void release_bar(struct pci_dev *pdev)
233 {
234 	pci_release_regions(pdev);
235 }
236 
237 static int mlx5_enable_msix(struct mlx5_core_dev *dev)
238 {
239 	struct mlx5_priv *priv = &dev->priv;
240 	struct mlx5_eq_table *table = &priv->eq_table;
241 	int num_eqs = 1 << MLX5_CAP_GEN(dev, log_max_eq);
242 	int nvec;
243 	int i;
244 
245 	nvec = MLX5_CAP_GEN(dev, num_ports) * num_online_cpus() +
246 	       MLX5_EQ_VEC_COMP_BASE;
247 	nvec = min_t(int, nvec, num_eqs);
248 	if (nvec <= MLX5_EQ_VEC_COMP_BASE)
249 		return -ENOMEM;
250 
251 	priv->msix_arr = kzalloc(nvec * sizeof(*priv->msix_arr), GFP_KERNEL);
252 
253 	priv->irq_info = kzalloc(nvec * sizeof(*priv->irq_info), GFP_KERNEL);
254 
255 	for (i = 0; i < nvec; i++)
256 		priv->msix_arr[i].entry = i;
257 
258 	nvec = pci_enable_msix_range(dev->pdev, priv->msix_arr,
259 				     MLX5_EQ_VEC_COMP_BASE + 1, nvec);
260 	if (nvec < 0)
261 		return nvec;
262 
263 	table->num_comp_vectors = nvec - MLX5_EQ_VEC_COMP_BASE;
264 
265 	return 0;
266 
267 }
268 
269 static void mlx5_disable_msix(struct mlx5_core_dev *dev)
270 {
271 	struct mlx5_priv *priv = &dev->priv;
272 
273 	pci_disable_msix(dev->pdev);
274 	kfree(priv->irq_info);
275 	kfree(priv->msix_arr);
276 }
277 
278 struct mlx5_reg_host_endianess {
279 	u8	he;
280 	u8      rsvd[15];
281 };
282 
283 
284 #define CAP_MASK(pos, size) ((u64)((1 << (size)) - 1) << (pos))
285 
286 enum {
287 	MLX5_CAP_BITS_RW_MASK = CAP_MASK(MLX5_CAP_OFF_CMDIF_CSUM, 2) |
288 				MLX5_DEV_CAP_FLAG_DCT |
289 				MLX5_DEV_CAP_FLAG_DRAIN_SIGERR,
290 };
291 
292 static u16 to_fw_pkey_sz(u32 size)
293 {
294 	switch (size) {
295 	case 128:
296 		return 0;
297 	case 256:
298 		return 1;
299 	case 512:
300 		return 2;
301 	case 1024:
302 		return 3;
303 	case 2048:
304 		return 4;
305 	case 4096:
306 		return 5;
307 	default:
308 		printf("mlx5_core: WARN: ""invalid pkey table size %d\n", size);
309 		return 0;
310 	}
311 }
312 
313 static int mlx5_core_get_caps_mode(struct mlx5_core_dev *dev,
314 				   enum mlx5_cap_type cap_type,
315 				   enum mlx5_cap_mode cap_mode)
316 {
317 	u8 in[MLX5_ST_SZ_BYTES(query_hca_cap_in)];
318 	int out_sz = MLX5_ST_SZ_BYTES(query_hca_cap_out);
319 	void *out, *hca_caps;
320 	u16 opmod = (cap_type << 1) | (cap_mode & 0x01);
321 	int err;
322 
323 	memset(in, 0, sizeof(in));
324 	out = kzalloc(out_sz, GFP_KERNEL);
325 
326 	MLX5_SET(query_hca_cap_in, in, opcode, MLX5_CMD_OP_QUERY_HCA_CAP);
327 	MLX5_SET(query_hca_cap_in, in, op_mod, opmod);
328 	err = mlx5_cmd_exec(dev, in, sizeof(in), out, out_sz);
329 	if (err) {
330 		mlx5_core_warn(dev,
331 			       "QUERY_HCA_CAP : type(%x) opmode(%x) Failed(%d)\n",
332 			       cap_type, cap_mode, err);
333 		goto query_ex;
334 	}
335 
336 	hca_caps =  MLX5_ADDR_OF(query_hca_cap_out, out, capability);
337 
338 	switch (cap_mode) {
339 	case HCA_CAP_OPMOD_GET_MAX:
340 		memcpy(dev->hca_caps_max[cap_type], hca_caps,
341 		       MLX5_UN_SZ_BYTES(hca_cap_union));
342 		break;
343 	case HCA_CAP_OPMOD_GET_CUR:
344 		memcpy(dev->hca_caps_cur[cap_type], hca_caps,
345 		       MLX5_UN_SZ_BYTES(hca_cap_union));
346 		break;
347 	default:
348 		mlx5_core_warn(dev,
349 			       "Tried to query dev cap type(%x) with wrong opmode(%x)\n",
350 			       cap_type, cap_mode);
351 		err = -EINVAL;
352 		break;
353 	}
354 query_ex:
355 	kfree(out);
356 	return err;
357 }
358 
359 int mlx5_core_get_caps(struct mlx5_core_dev *dev, enum mlx5_cap_type cap_type)
360 {
361 	int ret;
362 
363 	ret = mlx5_core_get_caps_mode(dev, cap_type, HCA_CAP_OPMOD_GET_CUR);
364 	if (ret)
365 		return ret;
366 
367 	return mlx5_core_get_caps_mode(dev, cap_type, HCA_CAP_OPMOD_GET_MAX);
368 }
369 
370 static int set_caps(struct mlx5_core_dev *dev, void *in, int in_sz)
371 {
372 	u32 out[MLX5_ST_SZ_DW(set_hca_cap_out)] = {0};
373 
374 	MLX5_SET(set_hca_cap_in, in, opcode, MLX5_CMD_OP_SET_HCA_CAP);
375 
376 	return mlx5_cmd_exec(dev, in, in_sz, out, sizeof(out));
377 }
378 
379 static int handle_hca_cap(struct mlx5_core_dev *dev)
380 {
381 	void *set_ctx = NULL;
382 	struct mlx5_profile *prof = dev->profile;
383 	int err = -ENOMEM;
384 	int set_sz = MLX5_ST_SZ_BYTES(set_hca_cap_in);
385 	void *set_hca_cap;
386 
387 	set_ctx = kzalloc(set_sz, GFP_KERNEL);
388 
389 	err = mlx5_core_get_caps(dev, MLX5_CAP_GENERAL);
390 	if (err)
391 		goto query_ex;
392 
393 	set_hca_cap = MLX5_ADDR_OF(set_hca_cap_in, set_ctx,
394 				   capability);
395 	memcpy(set_hca_cap, dev->hca_caps_cur[MLX5_CAP_GENERAL],
396 	       MLX5_ST_SZ_BYTES(cmd_hca_cap));
397 
398 	mlx5_core_dbg(dev, "Current Pkey table size %d Setting new size %d\n",
399 		      mlx5_to_sw_pkey_sz(MLX5_CAP_GEN(dev, pkey_table_size)),
400 		      128);
401 	/* we limit the size of the pkey table to 128 entries for now */
402 	MLX5_SET(cmd_hca_cap, set_hca_cap, pkey_table_size,
403 		 to_fw_pkey_sz(128));
404 
405 	if (prof->mask & MLX5_PROF_MASK_QP_SIZE)
406 		MLX5_SET(cmd_hca_cap, set_hca_cap, log_max_qp,
407 			 prof->log_max_qp);
408 
409 	/* disable cmdif checksum */
410 	MLX5_SET(cmd_hca_cap, set_hca_cap, cmdif_checksum, 0);
411 
412 	/* enable drain sigerr */
413 	MLX5_SET(cmd_hca_cap, set_hca_cap, drain_sigerr, 1);
414 
415 	MLX5_SET(cmd_hca_cap, set_hca_cap, log_uar_page_sz, PAGE_SHIFT - 12);
416 
417 	err = set_caps(dev, set_ctx, set_sz);
418 
419 query_ex:
420 	kfree(set_ctx);
421 	return err;
422 }
423 
424 static int handle_hca_cap_atomic(struct mlx5_core_dev *dev)
425 {
426 	void *set_ctx;
427 	void *set_hca_cap;
428 	int set_sz = MLX5_ST_SZ_BYTES(set_hca_cap_in);
429 	int req_endianness;
430 	int err;
431 
432 	if (MLX5_CAP_GEN(dev, atomic)) {
433 		err = mlx5_core_get_caps(dev, MLX5_CAP_ATOMIC);
434 		if (err)
435 			return err;
436 	} else {
437 		return 0;
438 	}
439 
440 	req_endianness =
441 		MLX5_CAP_ATOMIC(dev,
442 				supported_atomic_req_8B_endianess_mode_1);
443 
444 	if (req_endianness != MLX5_ATOMIC_REQ_MODE_HOST_ENDIANNESS)
445 		return 0;
446 
447 	set_ctx = kzalloc(set_sz, GFP_KERNEL);
448 	if (!set_ctx)
449 		return -ENOMEM;
450 
451 	MLX5_SET(set_hca_cap_in, set_ctx, op_mod,
452 		 MLX5_SET_HCA_CAP_OP_MOD_ATOMIC << 1);
453 	set_hca_cap = MLX5_ADDR_OF(set_hca_cap_in, set_ctx, capability);
454 
455 	/* Set requestor to host endianness */
456 	MLX5_SET(atomic_caps, set_hca_cap, atomic_req_8B_endianess_mode,
457 		 MLX5_ATOMIC_REQ_MODE_HOST_ENDIANNESS);
458 
459 	err = set_caps(dev, set_ctx, set_sz);
460 
461 	kfree(set_ctx);
462 	return err;
463 }
464 
465 static int set_hca_ctrl(struct mlx5_core_dev *dev)
466 {
467 	struct mlx5_reg_host_endianess he_in;
468 	struct mlx5_reg_host_endianess he_out;
469 	int err;
470 
471 	if (MLX5_CAP_GEN(dev, port_type) == MLX5_CAP_PORT_TYPE_ETH &&
472 	    !MLX5_CAP_GEN(dev, roce))
473 		return 0;
474 
475 	memset(&he_in, 0, sizeof(he_in));
476 	he_in.he = MLX5_SET_HOST_ENDIANNESS;
477 	err = mlx5_core_access_reg(dev, &he_in,  sizeof(he_in),
478 					&he_out, sizeof(he_out),
479 					MLX5_REG_HOST_ENDIANNESS, 0, 1);
480 	return err;
481 }
482 
483 static int mlx5_core_enable_hca(struct mlx5_core_dev *dev)
484 {
485 	u32 out[MLX5_ST_SZ_DW(enable_hca_out)] = {0};
486 	u32 in[MLX5_ST_SZ_DW(enable_hca_in)] = {0};
487 
488 	MLX5_SET(enable_hca_in, in, opcode, MLX5_CMD_OP_ENABLE_HCA);
489 	return mlx5_cmd_exec(dev, &in, sizeof(in), &out, sizeof(out));
490 }
491 
492 static int mlx5_core_disable_hca(struct mlx5_core_dev *dev)
493 {
494 	u32 out[MLX5_ST_SZ_DW(disable_hca_out)] = {0};
495 	u32 in[MLX5_ST_SZ_DW(disable_hca_in)] = {0};
496 
497 	MLX5_SET(disable_hca_in, in, opcode, MLX5_CMD_OP_DISABLE_HCA);
498 	return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
499 }
500 
501 static int mlx5_core_set_issi(struct mlx5_core_dev *dev)
502 {
503 	u32 query_in[MLX5_ST_SZ_DW(query_issi_in)] = {0};
504 	u32 query_out[MLX5_ST_SZ_DW(query_issi_out)] = {0};
505 	u32 sup_issi;
506 	int err;
507 
508 	MLX5_SET(query_issi_in, query_in, opcode, MLX5_CMD_OP_QUERY_ISSI);
509 
510 	err = mlx5_cmd_exec(dev, query_in, sizeof(query_in), query_out, sizeof(query_out));
511 	if (err) {
512 		u32 syndrome;
513 		u8 status;
514 
515 		mlx5_cmd_mbox_status(query_out, &status, &syndrome);
516 		if (status == MLX5_CMD_STAT_BAD_OP_ERR) {
517 			pr_debug("Only ISSI 0 is supported\n");
518 			return 0;
519 		}
520 
521 		printf("mlx5_core: ERR: ""failed to query ISSI\n");
522 		return err;
523 	}
524 
525 	sup_issi = MLX5_GET(query_issi_out, query_out, supported_issi_dw0);
526 
527 	if (sup_issi & (1 << 1)) {
528 		u32 set_in[MLX5_ST_SZ_DW(set_issi_in)]	 = {0};
529 		u32 set_out[MLX5_ST_SZ_DW(set_issi_out)] = {0};
530 
531 		MLX5_SET(set_issi_in, set_in, opcode, MLX5_CMD_OP_SET_ISSI);
532 		MLX5_SET(set_issi_in, set_in, current_issi, 1);
533 
534 		err = mlx5_cmd_exec(dev, set_in, sizeof(set_in), set_out, sizeof(set_out));
535 		if (err) {
536 			printf("mlx5_core: ERR: ""failed to set ISSI=1 err(%d)\n", err);
537 			return err;
538 		}
539 
540 		dev->issi = 1;
541 
542 		return 0;
543 	} else if (sup_issi & (1 << 0)) {
544 		return 0;
545 	}
546 
547 	return -ENOTSUPP;
548 }
549 
550 
551 int mlx5_vector2eqn(struct mlx5_core_dev *dev, int vector, int *eqn, int *irqn)
552 {
553 	struct mlx5_eq_table *table = &dev->priv.eq_table;
554 	struct mlx5_eq *eq;
555 	int err = -ENOENT;
556 
557 	spin_lock(&table->lock);
558 	list_for_each_entry(eq, &table->comp_eqs_list, list) {
559 		if (eq->index == vector) {
560 			*eqn = eq->eqn;
561 			*irqn = eq->irqn;
562 			err = 0;
563 			break;
564 		}
565 	}
566 	spin_unlock(&table->lock);
567 
568 	return err;
569 }
570 EXPORT_SYMBOL(mlx5_vector2eqn);
571 
572 int mlx5_rename_eq(struct mlx5_core_dev *dev, int eq_ix, char *name)
573 {
574 	struct mlx5_priv *priv = &dev->priv;
575 	struct mlx5_eq_table *table = &priv->eq_table;
576 	struct mlx5_eq *eq;
577 	int err = -ENOENT;
578 
579 	spin_lock(&table->lock);
580 	list_for_each_entry(eq, &table->comp_eqs_list, list) {
581 		if (eq->index == eq_ix) {
582 			int irq_ix = eq_ix + MLX5_EQ_VEC_COMP_BASE;
583 
584 			snprintf(priv->irq_info[irq_ix].name, MLX5_MAX_IRQ_NAME,
585 				 "%s-%d", name, eq_ix);
586 
587 			err = 0;
588 			break;
589 		}
590 	}
591 	spin_unlock(&table->lock);
592 
593 	return err;
594 }
595 
596 static void free_comp_eqs(struct mlx5_core_dev *dev)
597 {
598 	struct mlx5_eq_table *table = &dev->priv.eq_table;
599 	struct mlx5_eq *eq, *n;
600 
601 	spin_lock(&table->lock);
602 	list_for_each_entry_safe(eq, n, &table->comp_eqs_list, list) {
603 		list_del(&eq->list);
604 		spin_unlock(&table->lock);
605 		if (mlx5_destroy_unmap_eq(dev, eq))
606 			mlx5_core_warn(dev, "failed to destroy EQ 0x%x\n",
607 				       eq->eqn);
608 		kfree(eq);
609 		spin_lock(&table->lock);
610 	}
611 	spin_unlock(&table->lock);
612 }
613 
614 static int alloc_comp_eqs(struct mlx5_core_dev *dev)
615 {
616 	struct mlx5_eq_table *table = &dev->priv.eq_table;
617 	char name[MLX5_MAX_IRQ_NAME];
618 	struct mlx5_eq *eq;
619 	int ncomp_vec;
620 	int nent;
621 	int err;
622 	int i;
623 
624 	INIT_LIST_HEAD(&table->comp_eqs_list);
625 	ncomp_vec = table->num_comp_vectors;
626 	nent = MLX5_COMP_EQ_SIZE;
627 	for (i = 0; i < ncomp_vec; i++) {
628 		eq = kzalloc(sizeof(*eq), GFP_KERNEL);
629 
630 		snprintf(name, MLX5_MAX_IRQ_NAME, "mlx5_comp%d", i);
631 		err = mlx5_create_map_eq(dev, eq,
632 					 i + MLX5_EQ_VEC_COMP_BASE, nent, 0,
633 					 name, &dev->priv.uuari.uars[0]);
634 		if (err) {
635 			kfree(eq);
636 			goto clean;
637 		}
638 		mlx5_core_dbg(dev, "allocated completion EQN %d\n", eq->eqn);
639 		eq->index = i;
640 		spin_lock(&table->lock);
641 		list_add_tail(&eq->list, &table->comp_eqs_list);
642 		spin_unlock(&table->lock);
643 	}
644 
645 	return 0;
646 
647 clean:
648 	free_comp_eqs(dev);
649 	return err;
650 }
651 
652 static int map_bf_area(struct mlx5_core_dev *dev)
653 {
654 	resource_size_t bf_start = pci_resource_start(dev->pdev, 0);
655 	resource_size_t bf_len = pci_resource_len(dev->pdev, 0);
656 
657 	dev->priv.bf_mapping = io_mapping_create_wc(bf_start, bf_len);
658 
659 	return dev->priv.bf_mapping ? 0 : -ENOMEM;
660 }
661 
662 static void unmap_bf_area(struct mlx5_core_dev *dev)
663 {
664 	if (dev->priv.bf_mapping)
665 		io_mapping_free(dev->priv.bf_mapping);
666 }
667 
668 static inline int fw_initializing(struct mlx5_core_dev *dev)
669 {
670 	return ioread32be(&dev->iseg->initializing) >> 31;
671 }
672 
673 static int wait_fw_init(struct mlx5_core_dev *dev, u32 max_wait_mili)
674 {
675 	u64 end = jiffies + msecs_to_jiffies(max_wait_mili);
676 	int err = 0;
677 
678 	while (fw_initializing(dev)) {
679 		if (time_after(jiffies, end)) {
680 			err = -EBUSY;
681 			break;
682 		}
683 		msleep(FW_INIT_WAIT_MS);
684 	}
685 
686 	return err;
687 }
688 
689 static void mlx5_add_device(struct mlx5_interface *intf, struct mlx5_priv *priv)
690 {
691 	struct mlx5_device_context *dev_ctx;
692 	struct mlx5_core_dev *dev = container_of(priv, struct mlx5_core_dev, priv);
693 
694 	dev_ctx = kzalloc(sizeof(*dev_ctx), GFP_KERNEL);
695 	if (!dev_ctx)
696 		return;
697 
698 	dev_ctx->intf    = intf;
699 	CURVNET_SET_QUIET(vnet0);
700 	dev_ctx->context = intf->add(dev);
701 	CURVNET_RESTORE();
702 
703 	if (dev_ctx->context) {
704 		spin_lock_irq(&priv->ctx_lock);
705 		list_add_tail(&dev_ctx->list, &priv->ctx_list);
706 		spin_unlock_irq(&priv->ctx_lock);
707 	} else {
708 		kfree(dev_ctx);
709 	}
710 }
711 
712 static void mlx5_remove_device(struct mlx5_interface *intf, struct mlx5_priv *priv)
713 {
714 	struct mlx5_device_context *dev_ctx;
715 	struct mlx5_core_dev *dev = container_of(priv, struct mlx5_core_dev, priv);
716 
717 	list_for_each_entry(dev_ctx, &priv->ctx_list, list)
718 		if (dev_ctx->intf == intf) {
719 			spin_lock_irq(&priv->ctx_lock);
720 			list_del(&dev_ctx->list);
721 			spin_unlock_irq(&priv->ctx_lock);
722 
723 			intf->remove(dev, dev_ctx->context);
724 			kfree(dev_ctx);
725 			return;
726 		}
727 }
728 
729 static int mlx5_register_device(struct mlx5_core_dev *dev)
730 {
731 	struct mlx5_priv *priv = &dev->priv;
732 	struct mlx5_interface *intf;
733 
734 	mutex_lock(&intf_mutex);
735 	list_add_tail(&priv->dev_list, &dev_list);
736 	list_for_each_entry(intf, &intf_list, list)
737 		mlx5_add_device(intf, priv);
738 	mutex_unlock(&intf_mutex);
739 
740 	return 0;
741 }
742 
743 static void mlx5_unregister_device(struct mlx5_core_dev *dev)
744 {
745 	struct mlx5_priv *priv = &dev->priv;
746 	struct mlx5_interface *intf;
747 
748 	mutex_lock(&intf_mutex);
749 	list_for_each_entry(intf, &intf_list, list)
750 		mlx5_remove_device(intf, priv);
751 	list_del(&priv->dev_list);
752 	mutex_unlock(&intf_mutex);
753 }
754 
755 int mlx5_register_interface(struct mlx5_interface *intf)
756 {
757 	struct mlx5_priv *priv;
758 
759 	if (!intf->add || !intf->remove)
760 		return -EINVAL;
761 
762 	mutex_lock(&intf_mutex);
763 	list_add_tail(&intf->list, &intf_list);
764 	list_for_each_entry(priv, &dev_list, dev_list)
765 		mlx5_add_device(intf, priv);
766 	mutex_unlock(&intf_mutex);
767 
768 	return 0;
769 }
770 EXPORT_SYMBOL(mlx5_register_interface);
771 
772 void mlx5_unregister_interface(struct mlx5_interface *intf)
773 {
774 	struct mlx5_priv *priv;
775 
776 	mutex_lock(&intf_mutex);
777 	list_for_each_entry(priv, &dev_list, dev_list)
778 		mlx5_remove_device(intf, priv);
779 	list_del(&intf->list);
780 	mutex_unlock(&intf_mutex);
781 }
782 EXPORT_SYMBOL(mlx5_unregister_interface);
783 
784 void *mlx5_get_protocol_dev(struct mlx5_core_dev *mdev, int protocol)
785 {
786 	struct mlx5_priv *priv = &mdev->priv;
787 	struct mlx5_device_context *dev_ctx;
788 	unsigned long flags;
789 	void *result = NULL;
790 
791 	spin_lock_irqsave(&priv->ctx_lock, flags);
792 
793 	list_for_each_entry(dev_ctx, &mdev->priv.ctx_list, list)
794 		if ((dev_ctx->intf->protocol == protocol) &&
795 		    dev_ctx->intf->get_dev) {
796 			result = dev_ctx->intf->get_dev(dev_ctx->context);
797 			break;
798 		}
799 
800 	spin_unlock_irqrestore(&priv->ctx_lock, flags);
801 
802 	return result;
803 }
804 EXPORT_SYMBOL(mlx5_get_protocol_dev);
805 
806 static int mlx5_pci_init(struct mlx5_core_dev *dev, struct mlx5_priv *priv)
807 {
808 	struct pci_dev *pdev = dev->pdev;
809 	int err = 0;
810 
811 	pci_set_drvdata(dev->pdev, dev);
812 	strncpy(priv->name, dev_name(&pdev->dev), MLX5_MAX_NAME_LEN);
813 	priv->name[MLX5_MAX_NAME_LEN - 1] = 0;
814 
815 	mutex_init(&priv->pgdir_mutex);
816 	INIT_LIST_HEAD(&priv->pgdir_list);
817 	spin_lock_init(&priv->mkey_lock);
818 
819 	priv->numa_node = NUMA_NO_NODE;
820 
821 	err = mlx5_pci_enable_device(dev);
822 	if (err) {
823 		device_printf((&pdev->dev)->bsddev, "ERR: ""Cannot enable PCI device, aborting\n");
824 		goto err_dbg;
825 	}
826 
827 	err = request_bar(pdev);
828 	if (err) {
829 		device_printf((&pdev->dev)->bsddev, "ERR: ""error requesting BARs, aborting\n");
830 		goto err_disable;
831 	}
832 
833 	pci_set_master(pdev);
834 
835 	err = set_dma_caps(pdev);
836 	if (err) {
837 		device_printf((&pdev->dev)->bsddev, "ERR: ""Failed setting DMA capabilities mask, aborting\n");
838 		goto err_clr_master;
839 	}
840 
841 	dev->iseg_base = pci_resource_start(dev->pdev, 0);
842 	dev->iseg = ioremap(dev->iseg_base, sizeof(*dev->iseg));
843 	if (!dev->iseg) {
844 		err = -ENOMEM;
845 		device_printf((&pdev->dev)->bsddev, "ERR: ""Failed mapping initialization segment, aborting\n");
846 		goto err_clr_master;
847 	}
848 
849 	if (mlx5_vsc_find_cap(dev))
850 		dev_err(&pdev->dev, "Unable to find vendor specific capabilities\n");
851 
852         return 0;
853 
854 err_clr_master:
855 	pci_clear_master(dev->pdev);
856 	release_bar(dev->pdev);
857 err_disable:
858 	mlx5_pci_disable_device(dev);
859 err_dbg:
860 	return err;
861 }
862 
863 static void mlx5_pci_close(struct mlx5_core_dev *dev, struct mlx5_priv *priv)
864 {
865 	iounmap(dev->iseg);
866 	pci_clear_master(dev->pdev);
867 	release_bar(dev->pdev);
868 	mlx5_pci_disable_device(dev);
869 }
870 
871 static int mlx5_init_once(struct mlx5_core_dev *dev, struct mlx5_priv *priv)
872 {
873 	struct pci_dev *pdev = dev->pdev;
874 	int err;
875 
876 	mlx5_vsec_init(dev);
877 
878 	err = mlx5_query_hca_caps(dev);
879 	if (err) {
880 		dev_err(&pdev->dev, "query hca failed\n");
881 		goto out;
882 	}
883 
884 	err = mlx5_query_board_id(dev);
885 	if (err) {
886 		dev_err(&pdev->dev, "query board id failed\n");
887 		goto out;
888 	}
889 
890 	err = mlx5_eq_init(dev);
891 	if (err) {
892 		dev_err(&pdev->dev, "failed to initialize eq\n");
893 		goto out;
894 	}
895 
896 	MLX5_INIT_DOORBELL_LOCK(&priv->cq_uar_lock);
897 
898 	err = mlx5_init_cq_table(dev);
899 	if (err) {
900 		dev_err(&pdev->dev, "failed to initialize cq table\n");
901 		goto err_eq_cleanup;
902 	}
903 
904 	mlx5_init_qp_table(dev);
905 	mlx5_init_srq_table(dev);
906 	mlx5_init_mr_table(dev);
907 
908 #ifdef RATELIMIT
909 	err = mlx5_init_rl_table(dev);
910 	if (err) {
911 		dev_err(&pdev->dev, "Failed to init rate limiting\n");
912 		goto err_tables_cleanup;
913 	}
914 #endif
915 	return 0;
916 
917 #ifdef RATELIMIT
918 err_tables_cleanup:
919 	mlx5_cleanup_mr_table(dev);
920 	mlx5_cleanup_srq_table(dev);
921 	mlx5_cleanup_qp_table(dev);
922 	mlx5_cleanup_cq_table(dev);
923 #endif
924 
925 err_eq_cleanup:
926 	mlx5_eq_cleanup(dev);
927 
928 out:
929 	return err;
930 }
931 
932 static void mlx5_cleanup_once(struct mlx5_core_dev *dev)
933 {
934 #ifdef RATELIMIT
935 	mlx5_cleanup_rl_table(dev);
936 #endif
937 	mlx5_cleanup_mr_table(dev);
938 	mlx5_cleanup_srq_table(dev);
939 	mlx5_cleanup_qp_table(dev);
940 	mlx5_cleanup_cq_table(dev);
941 	mlx5_eq_cleanup(dev);
942 }
943 
944 static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv,
945 			 bool boot)
946 {
947 	struct pci_dev *pdev = dev->pdev;
948 	int err;
949 
950 	mutex_lock(&dev->intf_state_mutex);
951 	if (test_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state)) {
952 		dev_warn(&dev->pdev->dev, "%s: interface is up, NOP\n",
953 			 __func__);
954 		goto out;
955 	}
956 
957 	device_printf((&pdev->dev)->bsddev, "INFO: ""firmware version: %d.%d.%d\n", fw_rev_maj(dev), fw_rev_min(dev), fw_rev_sub(dev));
958 
959 	/*
960 	 * On load removing any previous indication of internal error,
961 	 * device is up
962 	 */
963 	dev->state = MLX5_DEVICE_STATE_UP;
964 
965 	err = mlx5_cmd_init(dev);
966 	if (err) {
967 		device_printf((&pdev->dev)->bsddev, "ERR: ""Failed initializing command interface, aborting\n");
968 		goto out_err;
969 	}
970 
971 	err = wait_fw_init(dev, FW_INIT_TIMEOUT_MILI);
972 	if (err) {
973 		device_printf((&dev->pdev->dev)->bsddev, "ERR: ""Firmware over %d MS in initializing state, aborting\n", FW_INIT_TIMEOUT_MILI);
974 		goto err_cmd_cleanup;
975 	}
976 
977 	err = mlx5_core_enable_hca(dev);
978 	if (err) {
979 		device_printf((&pdev->dev)->bsddev, "ERR: ""enable hca failed\n");
980 		goto err_cmd_cleanup;
981 	}
982 
983 	err = mlx5_core_set_issi(dev);
984 	if (err) {
985 		device_printf((&pdev->dev)->bsddev, "ERR: ""failed to set issi\n");
986 		goto err_disable_hca;
987 	}
988 
989 	err = mlx5_pagealloc_start(dev);
990 	if (err) {
991 		device_printf((&pdev->dev)->bsddev, "ERR: ""mlx5_pagealloc_start failed\n");
992 		goto err_disable_hca;
993 	}
994 
995 	err = mlx5_satisfy_startup_pages(dev, 1);
996 	if (err) {
997 		device_printf((&pdev->dev)->bsddev, "ERR: ""failed to allocate boot pages\n");
998 		goto err_pagealloc_stop;
999 	}
1000 
1001 	err = set_hca_ctrl(dev);
1002 	if (err) {
1003 		device_printf((&pdev->dev)->bsddev, "ERR: ""set_hca_ctrl failed\n");
1004 		goto reclaim_boot_pages;
1005 	}
1006 
1007 	err = handle_hca_cap(dev);
1008 	if (err) {
1009 		device_printf((&pdev->dev)->bsddev, "ERR: ""handle_hca_cap failed\n");
1010 		goto reclaim_boot_pages;
1011 	}
1012 
1013 	err = handle_hca_cap_atomic(dev);
1014 	if (err) {
1015 		device_printf((&pdev->dev)->bsddev, "ERR: ""handle_hca_cap_atomic failed\n");
1016 		goto reclaim_boot_pages;
1017 	}
1018 
1019 	err = mlx5_satisfy_startup_pages(dev, 0);
1020 	if (err) {
1021 		device_printf((&pdev->dev)->bsddev, "ERR: ""failed to allocate init pages\n");
1022 		goto reclaim_boot_pages;
1023 	}
1024 
1025 	err = mlx5_cmd_init_hca(dev);
1026 	if (err) {
1027 		device_printf((&pdev->dev)->bsddev, "ERR: ""init hca failed\n");
1028 		goto reclaim_boot_pages;
1029 	}
1030 
1031 	mlx5_start_health_poll(dev);
1032 
1033 	if (boot && mlx5_init_once(dev, priv)) {
1034 		dev_err(&pdev->dev, "sw objs init failed\n");
1035 		goto err_stop_poll;
1036 	}
1037 
1038 	err = mlx5_enable_msix(dev);
1039 	if (err) {
1040 		device_printf((&pdev->dev)->bsddev, "ERR: ""enable msix failed\n");
1041 		goto err_cleanup_once;
1042 	}
1043 
1044 	err = mlx5_alloc_uuars(dev, &priv->uuari);
1045 	if (err) {
1046 		device_printf((&pdev->dev)->bsddev, "ERR: ""Failed allocating uar, aborting\n");
1047 		goto err_disable_msix;
1048 	}
1049 
1050 	err = mlx5_start_eqs(dev);
1051 	if (err) {
1052 		device_printf((&pdev->dev)->bsddev, "ERR: ""Failed to start pages and async EQs\n");
1053 		goto err_free_uar;
1054 	}
1055 
1056 	err = alloc_comp_eqs(dev);
1057 	if (err) {
1058 		device_printf((&pdev->dev)->bsddev, "ERR: ""Failed to alloc completion EQs\n");
1059 		goto err_stop_eqs;
1060 	}
1061 
1062 	if (map_bf_area(dev))
1063 		device_printf((&pdev->dev)->bsddev, "ERR: ""Failed to map blue flame area\n");
1064 
1065 	err = mlx5_init_fs(dev);
1066 	if (err) {
1067 		mlx5_core_err(dev, "flow steering init %d\n", err);
1068 		goto err_free_comp_eqs;
1069 	}
1070 
1071 	err = mlx5_register_device(dev);
1072 	if (err) {
1073 		dev_err(&pdev->dev, "mlx5_register_device failed %d\n", err);
1074 		goto err_fs;
1075 	}
1076 
1077 	clear_bit(MLX5_INTERFACE_STATE_DOWN, &dev->intf_state);
1078 	set_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state);
1079 
1080 out:
1081 	mutex_unlock(&dev->intf_state_mutex);
1082 	return 0;
1083 
1084 err_fs:
1085 	mlx5_cleanup_fs(dev);
1086 
1087 err_free_comp_eqs:
1088 	free_comp_eqs(dev);
1089 	unmap_bf_area(dev);
1090 
1091 err_stop_eqs:
1092 	mlx5_stop_eqs(dev);
1093 
1094 err_free_uar:
1095 	mlx5_free_uuars(dev, &priv->uuari);
1096 
1097 err_disable_msix:
1098 	mlx5_disable_msix(dev);
1099 
1100 err_cleanup_once:
1101 	if (boot)
1102 		mlx5_cleanup_once(dev);
1103 
1104 err_stop_poll:
1105 	mlx5_stop_health_poll(dev);
1106 	if (mlx5_cmd_teardown_hca(dev)) {
1107 		device_printf((&dev->pdev->dev)->bsddev, "ERR: ""tear_down_hca failed, skip cleanup\n");
1108 		goto out_err;
1109 	}
1110 
1111 reclaim_boot_pages:
1112 	mlx5_reclaim_startup_pages(dev);
1113 
1114 err_pagealloc_stop:
1115 	mlx5_pagealloc_stop(dev);
1116 
1117 err_disable_hca:
1118 	mlx5_core_disable_hca(dev);
1119 
1120 err_cmd_cleanup:
1121 	mlx5_cmd_cleanup(dev);
1122 
1123 out_err:
1124 	dev->state = MLX5_DEVICE_STATE_INTERNAL_ERROR;
1125 	mutex_unlock(&dev->intf_state_mutex);
1126 
1127 	return err;
1128 }
1129 
1130 static int mlx5_unload_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv,
1131 			   bool cleanup)
1132 {
1133 	int err = 0;
1134 
1135 	if (cleanup)
1136 		mlx5_drain_health_recovery(dev);
1137 
1138 	mutex_lock(&dev->intf_state_mutex);
1139 	if (test_bit(MLX5_INTERFACE_STATE_DOWN, &dev->intf_state)) {
1140 		dev_warn(&dev->pdev->dev, "%s: interface is down, NOP\n", __func__);
1141                 if (cleanup)
1142                         mlx5_cleanup_once(dev);
1143 		goto out;
1144 	}
1145 
1146 	mlx5_unregister_device(dev);
1147 
1148 	mlx5_cleanup_fs(dev);
1149 	unmap_bf_area(dev);
1150 	mlx5_wait_for_reclaim_vfs_pages(dev);
1151 	free_comp_eqs(dev);
1152 	mlx5_stop_eqs(dev);
1153 	mlx5_free_uuars(dev, &priv->uuari);
1154 	mlx5_disable_msix(dev);
1155         if (cleanup)
1156                 mlx5_cleanup_once(dev);
1157 	mlx5_stop_health_poll(dev);
1158 	err = mlx5_cmd_teardown_hca(dev);
1159 	if (err) {
1160 		device_printf((&dev->pdev->dev)->bsddev, "ERR: ""tear_down_hca failed, skip cleanup\n");
1161 		goto out;
1162 	}
1163 	mlx5_pagealloc_stop(dev);
1164 	mlx5_reclaim_startup_pages(dev);
1165 	mlx5_core_disable_hca(dev);
1166 	mlx5_cmd_cleanup(dev);
1167 
1168 out:
1169 	clear_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state);
1170 	set_bit(MLX5_INTERFACE_STATE_DOWN, &dev->intf_state);
1171 	mutex_unlock(&dev->intf_state_mutex);
1172 	return err;
1173 }
1174 
1175 void mlx5_core_event(struct mlx5_core_dev *dev, enum mlx5_dev_event event,
1176 		     unsigned long param)
1177 {
1178 	struct mlx5_priv *priv = &dev->priv;
1179 	struct mlx5_device_context *dev_ctx;
1180 	unsigned long flags;
1181 
1182 	spin_lock_irqsave(&priv->ctx_lock, flags);
1183 
1184 	list_for_each_entry(dev_ctx, &priv->ctx_list, list)
1185 		if (dev_ctx->intf->event)
1186 			dev_ctx->intf->event(dev, dev_ctx->context, event, param);
1187 
1188 	spin_unlock_irqrestore(&priv->ctx_lock, flags);
1189 }
1190 
1191 struct mlx5_core_event_handler {
1192 	void (*event)(struct mlx5_core_dev *dev,
1193 		      enum mlx5_dev_event event,
1194 		      void *data);
1195 };
1196 
1197 static int init_one(struct pci_dev *pdev,
1198 		    const struct pci_device_id *id)
1199 {
1200 	struct mlx5_core_dev *dev;
1201 	struct mlx5_priv *priv;
1202 	int err;
1203 
1204 	dev = kzalloc(sizeof(*dev), GFP_KERNEL);
1205 	priv = &dev->priv;
1206 	if (id)
1207 		priv->pci_dev_data = id->driver_data;
1208 
1209 	if (prof_sel < 0 || prof_sel >= ARRAY_SIZE(profiles)) {
1210 		printf("mlx5_core: WARN: ""selected profile out of range, selecting default (%d)\n", MLX5_DEFAULT_PROF);
1211 		prof_sel = MLX5_DEFAULT_PROF;
1212 	}
1213 	dev->profile = &profiles[prof_sel];
1214 	dev->pdev = pdev;
1215 	dev->event = mlx5_core_event;
1216 
1217 	INIT_LIST_HEAD(&priv->ctx_list);
1218 	spin_lock_init(&priv->ctx_lock);
1219         mutex_init(&dev->pci_status_mutex);
1220         mutex_init(&dev->intf_state_mutex);
1221 	err = mlx5_pci_init(dev, priv);
1222 	if (err) {
1223 		device_printf((&pdev->dev)->bsddev, "ERR: ""mlx5_pci_init failed %d\n", err);
1224 		goto clean_dev;
1225 	}
1226 
1227         err = mlx5_health_init(dev);
1228         if (err) {
1229                 device_printf((&pdev->dev)->bsddev, "ERR: ""mlx5_health_init failed %d\n", err);
1230                 goto close_pci;
1231         }
1232 
1233 	mlx5_pagealloc_init(dev);
1234 
1235 	err = mlx5_load_one(dev, priv, true);
1236 	if (err) {
1237 		device_printf((&pdev->dev)->bsddev, "ERR: ""mlx5_register_device failed %d\n", err);
1238 		goto clean_health;
1239 	}
1240 
1241 	mlx5_fwdump_prep(dev);
1242 
1243 	pci_save_state(pdev->dev.bsddev);
1244 	return 0;
1245 
1246 clean_health:
1247 	mlx5_pagealloc_cleanup(dev);
1248         mlx5_health_cleanup(dev);
1249 close_pci:
1250         mlx5_pci_close(dev, priv);
1251 clean_dev:
1252 	kfree(dev);
1253 	return err;
1254 }
1255 
1256 static void remove_one(struct pci_dev *pdev)
1257 {
1258 	struct mlx5_core_dev *dev  = pci_get_drvdata(pdev);
1259 	struct mlx5_priv *priv = &dev->priv;
1260 
1261 	if (mlx5_unload_one(dev, priv, true)) {
1262 		dev_err(&dev->pdev->dev, "mlx5_unload_one failed\n");
1263 		mlx5_health_cleanup(dev);
1264 		return;
1265 	}
1266 
1267 	mlx5_fwdump_clean(dev);
1268 	mlx5_pagealloc_cleanup(dev);
1269 	mlx5_health_cleanup(dev);
1270 	mlx5_pci_close(dev, priv);
1271 	pci_set_drvdata(pdev, NULL);
1272 	kfree(dev);
1273 }
1274 
1275 static pci_ers_result_t mlx5_pci_err_detected(struct pci_dev *pdev,
1276 					      pci_channel_state_t state)
1277 {
1278 	struct mlx5_core_dev *dev = pci_get_drvdata(pdev);
1279 	struct mlx5_priv *priv = &dev->priv;
1280 
1281 	dev_info(&pdev->dev, "%s was called\n", __func__);
1282 	mlx5_enter_error_state(dev, false);
1283 	mlx5_unload_one(dev, priv, false);
1284 
1285 	if (state) {
1286 		mlx5_drain_health_wq(dev);
1287 		mlx5_pci_disable_device(dev);
1288 	}
1289 
1290 	return state == pci_channel_io_perm_failure ?
1291 		PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_NEED_RESET;
1292 }
1293 
1294 static pci_ers_result_t mlx5_pci_slot_reset(struct pci_dev *pdev)
1295 {
1296 	struct mlx5_core_dev *dev = pci_get_drvdata(pdev);
1297 	int err = 0;
1298 
1299 	dev_info(&pdev->dev, "%s was called\n", __func__);
1300 
1301 	err = mlx5_pci_enable_device(dev);
1302 	if (err) {
1303 		dev_err(&pdev->dev, "%s: mlx5_pci_enable_device failed with error code: %d\n"
1304 			, __func__, err);
1305 		return PCI_ERS_RESULT_DISCONNECT;
1306 	}
1307 	pci_set_master(pdev);
1308 	pci_set_powerstate(pdev->dev.bsddev, PCI_POWERSTATE_D0);
1309 	pci_restore_state(pdev->dev.bsddev);
1310 	pci_save_state(pdev->dev.bsddev);
1311 
1312 	return err ? PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_RECOVERED;
1313 }
1314 
1315 /* wait for the device to show vital signs. For now we check
1316  * that we can read the device ID and that the health buffer
1317  * shows a non zero value which is different than 0xffffffff
1318  */
1319 static void wait_vital(struct pci_dev *pdev)
1320 {
1321 	struct mlx5_core_dev *dev = pci_get_drvdata(pdev);
1322 	struct mlx5_core_health *health = &dev->priv.health;
1323 	const int niter = 100;
1324 	u32 count;
1325 	u16 did;
1326 	int i;
1327 
1328 	/* Wait for firmware to be ready after reset */
1329 	msleep(1000);
1330 	for (i = 0; i < niter; i++) {
1331 		if (pci_read_config_word(pdev, 2, &did)) {
1332 			dev_warn(&pdev->dev, "failed reading config word\n");
1333 			break;
1334 		}
1335 		if (did == pdev->device) {
1336 			dev_info(&pdev->dev, "device ID correctly read after %d iterations\n", i);
1337 			break;
1338 		}
1339 		msleep(50);
1340 	}
1341 	if (i == niter)
1342 		dev_warn(&pdev->dev, "%s-%d: could not read device ID\n", __func__, __LINE__);
1343 
1344 	for (i = 0; i < niter; i++) {
1345 		count = ioread32be(health->health_counter);
1346 		if (count && count != 0xffffffff) {
1347 			dev_info(&pdev->dev, "Counter value 0x%x after %d iterations\n", count, i);
1348 			break;
1349 		}
1350 		msleep(50);
1351 	}
1352 
1353 	if (i == niter)
1354 		dev_warn(&pdev->dev, "%s-%d: could not read device ID\n", __func__, __LINE__);
1355 }
1356 
1357 static void mlx5_pci_resume(struct pci_dev *pdev)
1358 {
1359 	struct mlx5_core_dev *dev = pci_get_drvdata(pdev);
1360 	struct mlx5_priv *priv = &dev->priv;
1361 	int err;
1362 
1363 	dev_info(&pdev->dev, "%s was called\n", __func__);
1364 
1365 	wait_vital(pdev);
1366 
1367 	err = mlx5_load_one(dev, priv, false);
1368 	if (err)
1369 		dev_err(&pdev->dev, "%s: mlx5_load_one failed with error code: %d\n"
1370 			, __func__, err);
1371 	else
1372 		dev_info(&pdev->dev, "%s: device recovered\n", __func__);
1373 }
1374 
1375 static const struct pci_error_handlers mlx5_err_handler = {
1376 	.error_detected = mlx5_pci_err_detected,
1377 	.slot_reset	= mlx5_pci_slot_reset,
1378 	.resume		= mlx5_pci_resume
1379 };
1380 
1381 static int mlx5_try_fast_unload(struct mlx5_core_dev *dev)
1382 {
1383 	int err;
1384 
1385 	if (!MLX5_CAP_GEN(dev, force_teardown)) {
1386 		mlx5_core_dbg(dev, "force teardown is not supported in the firmware\n");
1387 		return -EOPNOTSUPP;
1388 	}
1389 
1390 	if (dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) {
1391 		mlx5_core_dbg(dev, "Device in internal error state, giving up\n");
1392 		return -EAGAIN;
1393 	}
1394 
1395 	err = mlx5_cmd_force_teardown_hca(dev);
1396 	if (err) {
1397 		mlx5_core_dbg(dev, "Firmware couldn't do fast unload error: %d\n", err);
1398 		return err;
1399 	}
1400 
1401 	mlx5_enter_error_state(dev, true);
1402 
1403 	return 0;
1404 }
1405 
1406 static void shutdown_one(struct pci_dev *pdev)
1407 {
1408 	struct mlx5_core_dev *dev  = pci_get_drvdata(pdev);
1409 	struct mlx5_priv *priv = &dev->priv;
1410 	int err;
1411 
1412 	set_bit(MLX5_INTERFACE_STATE_SHUTDOWN, &dev->intf_state);
1413 	err = mlx5_try_fast_unload(dev);
1414 	if (err)
1415 	        mlx5_unload_one(dev, priv, false);
1416 	mlx5_pci_disable_device(dev);
1417 }
1418 
1419 static const struct pci_device_id mlx5_core_pci_table[] = {
1420 	{ PCI_VDEVICE(MELLANOX, 4113) }, /* Connect-IB */
1421 	{ PCI_VDEVICE(MELLANOX, 4114) }, /* Connect-IB VF */
1422 	{ PCI_VDEVICE(MELLANOX, 4115) }, /* ConnectX-4 */
1423 	{ PCI_VDEVICE(MELLANOX, 4116) }, /* ConnectX-4 VF */
1424 	{ PCI_VDEVICE(MELLANOX, 4117) }, /* ConnectX-4LX */
1425 	{ PCI_VDEVICE(MELLANOX, 4118) }, /* ConnectX-4LX VF */
1426 	{ PCI_VDEVICE(MELLANOX, 4119) }, /* ConnectX-5 */
1427 	{ PCI_VDEVICE(MELLANOX, 4120) }, /* ConnectX-5 VF */
1428 	{ PCI_VDEVICE(MELLANOX, 4121) },
1429 	{ PCI_VDEVICE(MELLANOX, 4122) },
1430 	{ PCI_VDEVICE(MELLANOX, 4123) },
1431 	{ PCI_VDEVICE(MELLANOX, 4124) },
1432 	{ PCI_VDEVICE(MELLANOX, 4125) },
1433 	{ PCI_VDEVICE(MELLANOX, 4126) },
1434 	{ PCI_VDEVICE(MELLANOX, 4127) },
1435 	{ PCI_VDEVICE(MELLANOX, 4128) },
1436 	{ PCI_VDEVICE(MELLANOX, 4129) },
1437 	{ PCI_VDEVICE(MELLANOX, 4130) },
1438 	{ PCI_VDEVICE(MELLANOX, 4131) },
1439 	{ PCI_VDEVICE(MELLANOX, 4132) },
1440 	{ PCI_VDEVICE(MELLANOX, 4133) },
1441 	{ PCI_VDEVICE(MELLANOX, 4134) },
1442 	{ PCI_VDEVICE(MELLANOX, 4135) },
1443 	{ PCI_VDEVICE(MELLANOX, 4136) },
1444 	{ PCI_VDEVICE(MELLANOX, 4137) },
1445 	{ PCI_VDEVICE(MELLANOX, 4138) },
1446 	{ PCI_VDEVICE(MELLANOX, 4139) },
1447 	{ PCI_VDEVICE(MELLANOX, 4140) },
1448 	{ PCI_VDEVICE(MELLANOX, 4141) },
1449 	{ PCI_VDEVICE(MELLANOX, 4142) },
1450 	{ PCI_VDEVICE(MELLANOX, 4143) },
1451 	{ PCI_VDEVICE(MELLANOX, 4144) },
1452 	{ 0, }
1453 };
1454 
1455 MODULE_DEVICE_TABLE(pci, mlx5_core_pci_table);
1456 
1457 void mlx5_disable_device(struct mlx5_core_dev *dev)
1458 {
1459 	mlx5_pci_err_detected(dev->pdev, 0);
1460 }
1461 
1462 void mlx5_recover_device(struct mlx5_core_dev *dev)
1463 {
1464 	mlx5_pci_disable_device(dev);
1465 	if (mlx5_pci_slot_reset(dev->pdev) == PCI_ERS_RESULT_RECOVERED)
1466 		mlx5_pci_resume(dev->pdev);
1467 }
1468 
1469 struct pci_driver mlx5_core_driver = {
1470 	.name           = DRIVER_NAME,
1471 	.id_table       = mlx5_core_pci_table,
1472 	.shutdown	= shutdown_one,
1473 	.probe          = init_one,
1474 	.remove         = remove_one,
1475 	.err_handler	= &mlx5_err_handler
1476 };
1477 
1478 static int __init init(void)
1479 {
1480 	int err;
1481 
1482 	err = pci_register_driver(&mlx5_core_driver);
1483 	if (err)
1484 		goto err_debug;
1485 
1486 	err = mlx5_fwdump_init();
1487 	if (err)
1488 		goto err_fwdump;
1489 
1490  	return 0;
1491 
1492 err_fwdump:
1493 	pci_unregister_driver(&mlx5_core_driver);
1494 
1495 err_debug:
1496 	return err;
1497 }
1498 
1499 static void __exit cleanup(void)
1500 {
1501 	mlx5_fwdump_fini();
1502 	pci_unregister_driver(&mlx5_core_driver);
1503 }
1504 
1505 module_init(init);
1506 module_exit(cleanup);
1507