1 /*-
2 * Copyright (c) 2013-2021, Mellanox Technologies, Ltd. All rights reserved.
3 * Copyright (c) 2022 NVIDIA corporation & affiliates.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS `AS IS' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 */
26
27 #include "opt_rss.h"
28 #include "opt_ratelimit.h"
29
30 #include <linux/kmod.h>
31 #include <linux/module.h>
32 #include <linux/errno.h>
33 #include <linux/pci.h>
34 #include <linux/dma-mapping.h>
35 #include <linux/slab.h>
36 #include <linux/io-mapping.h>
37 #include <linux/interrupt.h>
38 #include <linux/hardirq.h>
39 #include <dev/mlx5/driver.h>
40 #include <dev/mlx5/cq.h>
41 #include <dev/mlx5/qp.h>
42 #include <dev/mlx5/srq.h>
43 #include <dev/mlx5/mpfs.h>
44 #include <dev/mlx5/vport.h>
45 #include <linux/delay.h>
46 #include <dev/mlx5/mlx5_ifc.h>
47 #include <dev/mlx5/mlx5_fpga/core.h>
48 #include <dev/mlx5/mlx5_lib/mlx5.h>
49 #include <dev/mlx5/mlx5_core/mlx5_core.h>
50 #include <dev/mlx5/mlx5_core/eswitch.h>
51 #include <dev/mlx5/mlx5_core/fs_core.h>
52 #include <dev/mlx5/mlx5_core/diag_cnt.h>
53 #ifdef PCI_IOV
54 #include <sys/nv.h>
55 #include <dev/pci/pci_iov.h>
56 #include <sys/iov_schema.h>
57 #endif
58
59 static const char mlx5_version[] = "Mellanox Core driver "
60 DRIVER_VERSION " (" DRIVER_RELDATE ")";
61 MODULE_DESCRIPTION("Mellanox ConnectX-4 and onwards core driver");
62 MODULE_LICENSE("Dual BSD/GPL");
63 MODULE_DEPEND(mlx5, linuxkpi, 1, 1, 1);
64 MODULE_DEPEND(mlx5, mlxfw, 1, 1, 1);
65 MODULE_DEPEND(mlx5, firmware, 1, 1, 1);
66 MODULE_VERSION(mlx5, 1);
67
68 SYSCTL_NODE(_hw, OID_AUTO, mlx5, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
69 "mlx5 hardware controls");
70
71 int mlx5_core_debug_mask;
72 SYSCTL_INT(_hw_mlx5, OID_AUTO, debug_mask, CTLFLAG_RWTUN,
73 &mlx5_core_debug_mask, 0,
74 "debug mask: 1 = dump cmd data, 2 = dump cmd exec time, 3 = both. Default=0");
75
76 #define MLX5_DEFAULT_PROF 2
77 static int mlx5_prof_sel = MLX5_DEFAULT_PROF;
78 SYSCTL_INT(_hw_mlx5, OID_AUTO, prof_sel, CTLFLAG_RWTUN,
79 &mlx5_prof_sel, 0,
80 "profile selector. Valid range 0 - 2");
81
82 static int mlx5_fast_unload_enabled = 1;
83 SYSCTL_INT(_hw_mlx5, OID_AUTO, fast_unload_enabled, CTLFLAG_RWTUN,
84 &mlx5_fast_unload_enabled, 0,
85 "Set to enable fast unload. Clear to disable.");
86
87 static int mlx5_core_comp_eq_size = 1024;
88 SYSCTL_INT(_hw_mlx5, OID_AUTO, comp_eq_size, CTLFLAG_RDTUN | CTLFLAG_MPSAFE,
89 &mlx5_core_comp_eq_size, 0,
90 "Set default completion EQ size between 1024 and 16384 inclusivly. Value should be power of two.");
91
92 static LIST_HEAD(intf_list);
93 static LIST_HEAD(dev_list);
94 static DEFINE_MUTEX(intf_mutex);
95
96 struct mlx5_device_context {
97 struct list_head list;
98 struct mlx5_interface *intf;
99 void *context;
100 };
101
102 enum {
103 MLX5_ATOMIC_REQ_MODE_BE = 0x0,
104 MLX5_ATOMIC_REQ_MODE_HOST_ENDIANNESS = 0x1,
105 };
106
107 static struct mlx5_profile profiles[] = {
108 [0] = {
109 .mask = 0,
110 },
111 [1] = {
112 .mask = MLX5_PROF_MASK_QP_SIZE,
113 .log_max_qp = 12,
114 },
115 [2] = {
116 .mask = MLX5_PROF_MASK_QP_SIZE |
117 MLX5_PROF_MASK_MR_CACHE,
118 .log_max_qp = 17,
119 .mr_cache[0] = {
120 .size = 500,
121 .limit = 250
122 },
123 .mr_cache[1] = {
124 .size = 500,
125 .limit = 250
126 },
127 .mr_cache[2] = {
128 .size = 500,
129 .limit = 250
130 },
131 .mr_cache[3] = {
132 .size = 500,
133 .limit = 250
134 },
135 .mr_cache[4] = {
136 .size = 500,
137 .limit = 250
138 },
139 .mr_cache[5] = {
140 .size = 500,
141 .limit = 250
142 },
143 .mr_cache[6] = {
144 .size = 500,
145 .limit = 250
146 },
147 .mr_cache[7] = {
148 .size = 500,
149 .limit = 250
150 },
151 .mr_cache[8] = {
152 .size = 500,
153 .limit = 250
154 },
155 .mr_cache[9] = {
156 .size = 500,
157 .limit = 250
158 },
159 .mr_cache[10] = {
160 .size = 500,
161 .limit = 250
162 },
163 .mr_cache[11] = {
164 .size = 500,
165 .limit = 250
166 },
167 .mr_cache[12] = {
168 .size = 64,
169 .limit = 32
170 },
171 .mr_cache[13] = {
172 .size = 32,
173 .limit = 16
174 },
175 .mr_cache[14] = {
176 .size = 16,
177 .limit = 8
178 },
179 },
180 [3] = {
181 .mask = MLX5_PROF_MASK_QP_SIZE,
182 .log_max_qp = 17,
183 },
184 };
185
186 static int
mlx5_core_get_comp_eq_size(void)187 mlx5_core_get_comp_eq_size(void)
188 {
189 int value = mlx5_core_comp_eq_size;
190
191 if (value < 1024)
192 value = 1024;
193 else if (value > 16384)
194 value = 16384;
195
196 /* make value power of two, rounded down */
197 while (value & (value - 1))
198 value &= (value - 1);
199 return (value);
200 }
201
mlx5_set_driver_version(struct mlx5_core_dev * dev)202 static void mlx5_set_driver_version(struct mlx5_core_dev *dev)
203 {
204 const size_t driver_ver_sz =
205 MLX5_FLD_SZ_BYTES(set_driver_version_in, driver_version);
206 u8 in[MLX5_ST_SZ_BYTES(set_driver_version_in)] = {};
207 u8 out[MLX5_ST_SZ_BYTES(set_driver_version_out)] = {};
208 char *string;
209
210 if (!MLX5_CAP_GEN(dev, driver_version))
211 return;
212
213 string = MLX5_ADDR_OF(set_driver_version_in, in, driver_version);
214
215 snprintf(string, driver_ver_sz, "FreeBSD,mlx5_core,%u.%u.%u," DRIVER_VERSION,
216 __FreeBSD_version / 100000, (__FreeBSD_version / 1000) % 100,
217 __FreeBSD_version % 1000);
218
219 /* Send the command */
220 MLX5_SET(set_driver_version_in, in, opcode,
221 MLX5_CMD_OP_SET_DRIVER_VERSION);
222
223 mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
224 }
225
226 #ifdef PCI_IOV
227 static const char iov_mac_addr_name[] = "mac-addr";
228 static const char iov_node_guid_name[] = "node-guid";
229 static const char iov_port_guid_name[] = "port-guid";
230 #endif
231
set_dma_caps(struct pci_dev * pdev)232 static int set_dma_caps(struct pci_dev *pdev)
233 {
234 struct mlx5_core_dev *dev = pci_get_drvdata(pdev);
235 int err;
236
237 err = pci_set_dma_mask(pdev, DMA_BIT_MASK(64));
238 if (err) {
239 mlx5_core_warn(dev, "couldn't set 64-bit PCI DMA mask\n");
240 err = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
241 if (err) {
242 mlx5_core_err(dev, "Can't set PCI DMA mask, aborting\n");
243 return err;
244 }
245 }
246
247 err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
248 if (err) {
249 mlx5_core_warn(dev, "couldn't set 64-bit consistent PCI DMA mask\n");
250 err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32));
251 if (err) {
252 mlx5_core_err(dev, "Can't set consistent PCI DMA mask, aborting\n");
253 return err;
254 }
255 }
256
257 dma_set_max_seg_size(&pdev->dev, 2u * 1024 * 1024 * 1024);
258 return err;
259 }
260
mlx5_pci_read_power_status(struct mlx5_core_dev * dev,u16 * p_power,u8 * p_status)261 int mlx5_pci_read_power_status(struct mlx5_core_dev *dev,
262 u16 *p_power, u8 *p_status)
263 {
264 u32 in[MLX5_ST_SZ_DW(mpein_reg)] = {};
265 u32 out[MLX5_ST_SZ_DW(mpein_reg)] = {};
266 int err;
267
268 err = mlx5_core_access_reg(dev, in, sizeof(in), out, sizeof(out),
269 MLX5_ACCESS_REG_SUMMARY_CTRL_ID_MPEIN, 0, 0);
270
271 *p_status = MLX5_GET(mpein_reg, out, pwr_status);
272 *p_power = MLX5_GET(mpein_reg, out, pci_power);
273 return err;
274 }
275
mlx5_pci_enable_device(struct mlx5_core_dev * dev)276 static int mlx5_pci_enable_device(struct mlx5_core_dev *dev)
277 {
278 struct pci_dev *pdev = dev->pdev;
279 int err = 0;
280
281 mutex_lock(&dev->pci_status_mutex);
282 if (dev->pci_status == MLX5_PCI_STATUS_DISABLED) {
283 err = pci_enable_device(pdev);
284 if (!err)
285 dev->pci_status = MLX5_PCI_STATUS_ENABLED;
286 }
287 mutex_unlock(&dev->pci_status_mutex);
288
289 return err;
290 }
291
mlx5_pci_disable_device(struct mlx5_core_dev * dev)292 static void mlx5_pci_disable_device(struct mlx5_core_dev *dev)
293 {
294 struct pci_dev *pdev = dev->pdev;
295
296 mutex_lock(&dev->pci_status_mutex);
297 if (dev->pci_status == MLX5_PCI_STATUS_ENABLED) {
298 pci_disable_device(pdev);
299 dev->pci_status = MLX5_PCI_STATUS_DISABLED;
300 }
301 mutex_unlock(&dev->pci_status_mutex);
302 }
303
request_bar(struct pci_dev * pdev)304 static int request_bar(struct pci_dev *pdev)
305 {
306 struct mlx5_core_dev *dev = pci_get_drvdata(pdev);
307 int err = 0;
308
309 if (!(pci_resource_flags(pdev, 0) & IORESOURCE_MEM)) {
310 mlx5_core_err(dev, "Missing registers BAR, aborting\n");
311 return -ENODEV;
312 }
313
314 err = pci_request_regions(pdev, DRIVER_NAME);
315 if (err)
316 mlx5_core_err(dev, "Couldn't get PCI resources, aborting\n");
317
318 return err;
319 }
320
release_bar(struct pci_dev * pdev)321 static void release_bar(struct pci_dev *pdev)
322 {
323 pci_release_regions(pdev);
324 }
325
mlx5_enable_msix(struct mlx5_core_dev * dev)326 static int mlx5_enable_msix(struct mlx5_core_dev *dev)
327 {
328 struct mlx5_priv *priv = &dev->priv;
329 struct mlx5_eq_table *table = &priv->eq_table;
330 int num_eqs = 1 << MLX5_CAP_GEN(dev, log_max_eq);
331 int limit = dev->msix_eqvec;
332 int nvec = MLX5_EQ_VEC_COMP_BASE;
333 int i;
334
335 if (limit > 0)
336 nvec += limit;
337 else
338 nvec += MLX5_CAP_GEN(dev, num_ports) * num_online_cpus();
339
340 if (nvec > num_eqs)
341 nvec = num_eqs;
342 if (nvec > 256)
343 nvec = 256; /* limit of firmware API */
344 if (nvec <= MLX5_EQ_VEC_COMP_BASE)
345 return -ENOMEM;
346
347 priv->msix_arr = kzalloc(nvec * sizeof(*priv->msix_arr), GFP_KERNEL);
348
349 for (i = 0; i < nvec; i++)
350 priv->msix_arr[i].entry = i;
351
352 nvec = pci_enable_msix_range(dev->pdev, priv->msix_arr,
353 MLX5_EQ_VEC_COMP_BASE + 1, nvec);
354 if (nvec < 0)
355 return nvec;
356
357 table->num_comp_vectors = nvec - MLX5_EQ_VEC_COMP_BASE;
358 return 0;
359 }
360
mlx5_disable_msix(struct mlx5_core_dev * dev)361 static void mlx5_disable_msix(struct mlx5_core_dev *dev)
362 {
363 struct mlx5_priv *priv = &dev->priv;
364
365 pci_disable_msix(dev->pdev);
366 kfree(priv->msix_arr);
367 }
368
369 struct mlx5_reg_host_endianess {
370 u8 he;
371 u8 rsvd[15];
372 };
373
374
375 #define CAP_MASK(pos, size) ((u64)((1 << (size)) - 1) << (pos))
376
377 enum {
378 MLX5_CAP_BITS_RW_MASK = CAP_MASK(MLX5_CAP_OFF_CMDIF_CSUM, 2) |
379 MLX5_DEV_CAP_FLAG_DCT |
380 MLX5_DEV_CAP_FLAG_DRAIN_SIGERR,
381 };
382
to_fw_pkey_sz(struct mlx5_core_dev * dev,u32 size)383 static u16 to_fw_pkey_sz(struct mlx5_core_dev *dev, u32 size)
384 {
385 switch (size) {
386 case 128:
387 return 0;
388 case 256:
389 return 1;
390 case 512:
391 return 2;
392 case 1024:
393 return 3;
394 case 2048:
395 return 4;
396 case 4096:
397 return 5;
398 default:
399 mlx5_core_warn(dev, "invalid pkey table size %d\n", size);
400 return 0;
401 }
402 }
403
mlx5_core_get_caps_mode(struct mlx5_core_dev * dev,enum mlx5_cap_type cap_type,enum mlx5_cap_mode cap_mode)404 static int mlx5_core_get_caps_mode(struct mlx5_core_dev *dev,
405 enum mlx5_cap_type cap_type,
406 enum mlx5_cap_mode cap_mode)
407 {
408 u8 in[MLX5_ST_SZ_BYTES(query_hca_cap_in)];
409 int out_sz = MLX5_ST_SZ_BYTES(query_hca_cap_out);
410 void *out, *hca_caps;
411 u16 opmod = (cap_type << 1) | (cap_mode & 0x01);
412 int err;
413
414 memset(in, 0, sizeof(in));
415 out = kzalloc(out_sz, GFP_KERNEL);
416
417 MLX5_SET(query_hca_cap_in, in, opcode, MLX5_CMD_OP_QUERY_HCA_CAP);
418 MLX5_SET(query_hca_cap_in, in, op_mod, opmod);
419 err = mlx5_cmd_exec(dev, in, sizeof(in), out, out_sz);
420 if (err) {
421 mlx5_core_warn(dev,
422 "QUERY_HCA_CAP : type(%x) opmode(%x) Failed(%d)\n",
423 cap_type, cap_mode, err);
424 goto query_ex;
425 }
426
427 hca_caps = MLX5_ADDR_OF(query_hca_cap_out, out, capability);
428
429 switch (cap_mode) {
430 case HCA_CAP_OPMOD_GET_MAX:
431 memcpy(dev->hca_caps_max[cap_type], hca_caps,
432 MLX5_UN_SZ_BYTES(hca_cap_union));
433 break;
434 case HCA_CAP_OPMOD_GET_CUR:
435 memcpy(dev->hca_caps_cur[cap_type], hca_caps,
436 MLX5_UN_SZ_BYTES(hca_cap_union));
437 break;
438 default:
439 mlx5_core_warn(dev,
440 "Tried to query dev cap type(%x) with wrong opmode(%x)\n",
441 cap_type, cap_mode);
442 err = -EINVAL;
443 break;
444 }
445 query_ex:
446 kfree(out);
447 return err;
448 }
449
mlx5_core_get_caps(struct mlx5_core_dev * dev,enum mlx5_cap_type cap_type)450 int mlx5_core_get_caps(struct mlx5_core_dev *dev, enum mlx5_cap_type cap_type)
451 {
452 int ret;
453
454 ret = mlx5_core_get_caps_mode(dev, cap_type, HCA_CAP_OPMOD_GET_CUR);
455 if (ret)
456 return ret;
457
458 return mlx5_core_get_caps_mode(dev, cap_type, HCA_CAP_OPMOD_GET_MAX);
459 }
460
set_caps(struct mlx5_core_dev * dev,void * in,int in_sz)461 static int set_caps(struct mlx5_core_dev *dev, void *in, int in_sz)
462 {
463 u32 out[MLX5_ST_SZ_DW(set_hca_cap_out)] = {0};
464
465 MLX5_SET(set_hca_cap_in, in, opcode, MLX5_CMD_OP_SET_HCA_CAP);
466
467 return mlx5_cmd_exec(dev, in, in_sz, out, sizeof(out));
468 }
469
handle_hca_cap(struct mlx5_core_dev * dev)470 static int handle_hca_cap(struct mlx5_core_dev *dev)
471 {
472 void *set_ctx = NULL;
473 struct mlx5_profile *prof = dev->profile;
474 int err = -ENOMEM;
475 int set_sz = MLX5_ST_SZ_BYTES(set_hca_cap_in);
476 void *set_hca_cap;
477
478 set_ctx = kzalloc(set_sz, GFP_KERNEL);
479
480 err = mlx5_core_get_caps(dev, MLX5_CAP_GENERAL);
481 if (err)
482 goto query_ex;
483
484 set_hca_cap = MLX5_ADDR_OF(set_hca_cap_in, set_ctx,
485 capability);
486 memcpy(set_hca_cap, dev->hca_caps_cur[MLX5_CAP_GENERAL],
487 MLX5_ST_SZ_BYTES(cmd_hca_cap));
488
489 mlx5_core_dbg(dev, "Current Pkey table size %d Setting new size %d\n",
490 mlx5_to_sw_pkey_sz(MLX5_CAP_GEN(dev, pkey_table_size)),
491 128);
492 /* we limit the size of the pkey table to 128 entries for now */
493 MLX5_SET(cmd_hca_cap, set_hca_cap, pkey_table_size,
494 to_fw_pkey_sz(dev, 128));
495
496 if (prof->mask & MLX5_PROF_MASK_QP_SIZE)
497 MLX5_SET(cmd_hca_cap, set_hca_cap, log_max_qp,
498 prof->log_max_qp);
499
500 /* disable cmdif checksum */
501 MLX5_SET(cmd_hca_cap, set_hca_cap, cmdif_checksum, 0);
502
503 /* Enable 4K UAR only when HCA supports it and page size is bigger
504 * than 4K.
505 */
506 if (MLX5_CAP_GEN_MAX(dev, uar_4k) && PAGE_SIZE > 4096)
507 MLX5_SET(cmd_hca_cap, set_hca_cap, uar_4k, 1);
508
509 /* enable drain sigerr */
510 MLX5_SET(cmd_hca_cap, set_hca_cap, drain_sigerr, 1);
511
512 MLX5_SET(cmd_hca_cap, set_hca_cap, log_uar_page_sz, PAGE_SHIFT - 12);
513
514 err = set_caps(dev, set_ctx, set_sz);
515
516 query_ex:
517 kfree(set_ctx);
518 return err;
519 }
520
handle_hca_cap_atomic(struct mlx5_core_dev * dev)521 static int handle_hca_cap_atomic(struct mlx5_core_dev *dev)
522 {
523 void *set_ctx;
524 void *set_hca_cap;
525 int set_sz = MLX5_ST_SZ_BYTES(set_hca_cap_in);
526 int req_endianness;
527 int err;
528
529 if (MLX5_CAP_GEN(dev, atomic)) {
530 err = mlx5_core_get_caps(dev, MLX5_CAP_ATOMIC);
531 if (err)
532 return err;
533 } else {
534 return 0;
535 }
536
537 req_endianness =
538 MLX5_CAP_ATOMIC(dev,
539 supported_atomic_req_8B_endianess_mode_1);
540
541 if (req_endianness != MLX5_ATOMIC_REQ_MODE_HOST_ENDIANNESS)
542 return 0;
543
544 set_ctx = kzalloc(set_sz, GFP_KERNEL);
545 if (!set_ctx)
546 return -ENOMEM;
547
548 MLX5_SET(set_hca_cap_in, set_ctx, op_mod,
549 MLX5_SET_HCA_CAP_OP_MOD_ATOMIC << 1);
550 set_hca_cap = MLX5_ADDR_OF(set_hca_cap_in, set_ctx, capability);
551
552 /* Set requestor to host endianness */
553 MLX5_SET(atomic_caps, set_hca_cap, atomic_req_8B_endianess_mode,
554 MLX5_ATOMIC_REQ_MODE_HOST_ENDIANNESS);
555
556 err = set_caps(dev, set_ctx, set_sz);
557
558 kfree(set_ctx);
559 return err;
560 }
561
handle_hca_cap_2(struct mlx5_core_dev * dev)562 static int handle_hca_cap_2(struct mlx5_core_dev *dev)
563 {
564 int err;
565
566 if (MLX5_CAP_GEN_MAX(dev, hca_cap_2)) {
567 err = mlx5_core_get_caps(dev, MLX5_CAP_GENERAL_2);
568 if (err)
569 return err;
570 }
571
572 return 0;
573 }
574
set_hca_ctrl(struct mlx5_core_dev * dev)575 static int set_hca_ctrl(struct mlx5_core_dev *dev)
576 {
577 struct mlx5_reg_host_endianess he_in;
578 struct mlx5_reg_host_endianess he_out;
579 int err;
580
581 if (MLX5_CAP_GEN(dev, port_type) == MLX5_CAP_PORT_TYPE_ETH &&
582 !MLX5_CAP_GEN(dev, roce))
583 return 0;
584
585 memset(&he_in, 0, sizeof(he_in));
586 he_in.he = MLX5_SET_HOST_ENDIANNESS;
587 err = mlx5_core_access_reg(dev, &he_in, sizeof(he_in),
588 &he_out, sizeof(he_out),
589 MLX5_REG_HOST_ENDIANNESS, 0, 1);
590 return err;
591 }
592
mlx5_core_set_hca_defaults(struct mlx5_core_dev * dev)593 static int mlx5_core_set_hca_defaults(struct mlx5_core_dev *dev)
594 {
595 int ret = 0;
596
597 /* Disable local_lb by default */
598 if (MLX5_CAP_GEN(dev, port_type) == MLX5_CAP_PORT_TYPE_ETH)
599 ret = mlx5_nic_vport_update_local_lb(dev, false);
600
601 return ret;
602 }
603
mlx5_core_enable_hca(struct mlx5_core_dev * dev,u16 func_id)604 static int mlx5_core_enable_hca(struct mlx5_core_dev *dev, u16 func_id)
605 {
606 u32 out[MLX5_ST_SZ_DW(enable_hca_out)] = {0};
607 u32 in[MLX5_ST_SZ_DW(enable_hca_in)] = {0};
608
609 MLX5_SET(enable_hca_in, in, opcode, MLX5_CMD_OP_ENABLE_HCA);
610 MLX5_SET(enable_hca_in, in, function_id, func_id);
611 return mlx5_cmd_exec(dev, &in, sizeof(in), &out, sizeof(out));
612 }
613
mlx5_core_disable_hca(struct mlx5_core_dev * dev)614 static int mlx5_core_disable_hca(struct mlx5_core_dev *dev)
615 {
616 u32 out[MLX5_ST_SZ_DW(disable_hca_out)] = {0};
617 u32 in[MLX5_ST_SZ_DW(disable_hca_in)] = {0};
618
619 MLX5_SET(disable_hca_in, in, opcode, MLX5_CMD_OP_DISABLE_HCA);
620 return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
621 }
622
mlx5_core_set_issi(struct mlx5_core_dev * dev)623 static int mlx5_core_set_issi(struct mlx5_core_dev *dev)
624 {
625 u32 query_in[MLX5_ST_SZ_DW(query_issi_in)] = {0};
626 u32 query_out[MLX5_ST_SZ_DW(query_issi_out)] = {0};
627 u32 sup_issi;
628 int err;
629
630 MLX5_SET(query_issi_in, query_in, opcode, MLX5_CMD_OP_QUERY_ISSI);
631
632 err = mlx5_cmd_exec(dev, query_in, sizeof(query_in), query_out, sizeof(query_out));
633 if (err) {
634 u32 syndrome;
635 u8 status;
636
637 mlx5_cmd_mbox_status(query_out, &status, &syndrome);
638 if (status == MLX5_CMD_STAT_BAD_OP_ERR) {
639 mlx5_core_dbg(dev, "Only ISSI 0 is supported\n");
640 return 0;
641 }
642
643 mlx5_core_err(dev, "failed to query ISSI\n");
644 return err;
645 }
646
647 sup_issi = MLX5_GET(query_issi_out, query_out, supported_issi_dw0);
648
649 if (sup_issi & (1 << 1)) {
650 u32 set_in[MLX5_ST_SZ_DW(set_issi_in)] = {0};
651 u32 set_out[MLX5_ST_SZ_DW(set_issi_out)] = {0};
652
653 MLX5_SET(set_issi_in, set_in, opcode, MLX5_CMD_OP_SET_ISSI);
654 MLX5_SET(set_issi_in, set_in, current_issi, 1);
655
656 err = mlx5_cmd_exec(dev, set_in, sizeof(set_in), set_out, sizeof(set_out));
657 if (err) {
658 mlx5_core_err(dev, "failed to set ISSI=1 err(%d)\n", err);
659 return err;
660 }
661
662 dev->issi = 1;
663
664 return 0;
665 } else if (sup_issi & (1 << 0)) {
666 return 0;
667 }
668
669 return -ENOTSUPP;
670 }
671
672
mlx5_vector2eqn(struct mlx5_core_dev * dev,int vector,int * eqn,int * irqn)673 int mlx5_vector2eqn(struct mlx5_core_dev *dev, int vector, int *eqn, int *irqn)
674 {
675 struct mlx5_eq_table *table = &dev->priv.eq_table;
676 struct mlx5_eq *eq;
677 int err = -ENOENT;
678
679 spin_lock(&table->lock);
680 list_for_each_entry(eq, &table->comp_eqs_list, list) {
681 if (eq->index == vector) {
682 *eqn = eq->eqn;
683 *irqn = eq->irqn;
684 err = 0;
685 break;
686 }
687 }
688 spin_unlock(&table->lock);
689
690 return err;
691 }
692 EXPORT_SYMBOL(mlx5_vector2eqn);
693
free_comp_eqs(struct mlx5_core_dev * dev)694 static void free_comp_eqs(struct mlx5_core_dev *dev)
695 {
696 struct mlx5_eq_table *table = &dev->priv.eq_table;
697 struct mlx5_eq *eq, *n;
698
699 spin_lock(&table->lock);
700 list_for_each_entry_safe(eq, n, &table->comp_eqs_list, list) {
701 list_del(&eq->list);
702 spin_unlock(&table->lock);
703 if (mlx5_destroy_unmap_eq(dev, eq))
704 mlx5_core_warn(dev, "failed to destroy EQ 0x%x\n",
705 eq->eqn);
706 kfree(eq);
707 spin_lock(&table->lock);
708 }
709 spin_unlock(&table->lock);
710 }
711
alloc_comp_eqs(struct mlx5_core_dev * dev)712 static int alloc_comp_eqs(struct mlx5_core_dev *dev)
713 {
714 struct mlx5_eq_table *table = &dev->priv.eq_table;
715 struct mlx5_eq *eq;
716 int ncomp_vec;
717 int nent;
718 int err;
719 int i;
720
721 INIT_LIST_HEAD(&table->comp_eqs_list);
722 ncomp_vec = table->num_comp_vectors;
723 nent = mlx5_core_get_comp_eq_size();
724 for (i = 0; i < ncomp_vec; i++) {
725 eq = kzalloc_node(sizeof(*eq), GFP_KERNEL, dev->priv.numa_node);
726
727 err = mlx5_create_map_eq(dev, eq,
728 i + MLX5_EQ_VEC_COMP_BASE, nent, 0);
729 if (err) {
730 kfree(eq);
731 goto clean;
732 }
733 mlx5_core_dbg(dev, "allocated completion EQN %d\n", eq->eqn);
734 eq->index = i;
735 spin_lock(&table->lock);
736 list_add_tail(&eq->list, &table->comp_eqs_list);
737 spin_unlock(&table->lock);
738 }
739
740 return 0;
741
742 clean:
743 free_comp_eqs(dev);
744 return err;
745 }
746
fw_initializing(struct mlx5_core_dev * dev)747 static inline int fw_initializing(struct mlx5_core_dev *dev)
748 {
749 return ioread32be(&dev->iseg->initializing) >> 31;
750 }
751
wait_fw_init(struct mlx5_core_dev * dev,u32 max_wait_mili,u32 warn_time_mili)752 static int wait_fw_init(struct mlx5_core_dev *dev, u32 max_wait_mili,
753 u32 warn_time_mili)
754 {
755 int warn = jiffies + msecs_to_jiffies(warn_time_mili);
756 int end = jiffies + msecs_to_jiffies(max_wait_mili);
757 int err = 0;
758
759 MPASS(max_wait_mili > warn_time_mili);
760
761 while (fw_initializing(dev) == 1) {
762 if (time_after(jiffies, end)) {
763 err = -EBUSY;
764 break;
765 }
766 if (warn_time_mili && time_after(jiffies, warn)) {
767 mlx5_core_warn(dev,
768 "Waiting for FW initialization, timeout abort in %u s\n",
769 (unsigned)(jiffies_to_msecs(end - warn) / 1000));
770 warn = jiffies + msecs_to_jiffies(warn_time_mili);
771 }
772 msleep(FW_INIT_WAIT_MS);
773 }
774
775 if (err != 0)
776 mlx5_core_dbg(dev, "Full initializing bit dword = 0x%x\n",
777 ioread32be(&dev->iseg->initializing));
778
779 return err;
780 }
781
mlx5_add_device(struct mlx5_interface * intf,struct mlx5_priv * priv)782 static void mlx5_add_device(struct mlx5_interface *intf, struct mlx5_priv *priv)
783 {
784 struct mlx5_device_context *dev_ctx;
785 struct mlx5_core_dev *dev = container_of(priv, struct mlx5_core_dev, priv);
786
787 dev_ctx = kzalloc_node(sizeof(*dev_ctx), GFP_KERNEL, priv->numa_node);
788 if (!dev_ctx)
789 return;
790
791 dev_ctx->intf = intf;
792 CURVNET_SET_QUIET(vnet0);
793 dev_ctx->context = intf->add(dev);
794 CURVNET_RESTORE();
795
796 if (dev_ctx->context) {
797 spin_lock_irq(&priv->ctx_lock);
798 list_add_tail(&dev_ctx->list, &priv->ctx_list);
799 spin_unlock_irq(&priv->ctx_lock);
800 } else {
801 kfree(dev_ctx);
802 }
803 }
804
mlx5_remove_device(struct mlx5_interface * intf,struct mlx5_priv * priv)805 static void mlx5_remove_device(struct mlx5_interface *intf, struct mlx5_priv *priv)
806 {
807 struct mlx5_device_context *dev_ctx;
808 struct mlx5_core_dev *dev = container_of(priv, struct mlx5_core_dev, priv);
809
810 list_for_each_entry(dev_ctx, &priv->ctx_list, list)
811 if (dev_ctx->intf == intf) {
812 spin_lock_irq(&priv->ctx_lock);
813 list_del(&dev_ctx->list);
814 spin_unlock_irq(&priv->ctx_lock);
815
816 intf->remove(dev, dev_ctx->context);
817 kfree(dev_ctx);
818 return;
819 }
820 }
821
822 int
mlx5_register_device(struct mlx5_core_dev * dev)823 mlx5_register_device(struct mlx5_core_dev *dev)
824 {
825 struct mlx5_priv *priv = &dev->priv;
826 struct mlx5_interface *intf;
827
828 mutex_lock(&intf_mutex);
829 list_add_tail(&priv->dev_list, &dev_list);
830 list_for_each_entry(intf, &intf_list, list)
831 mlx5_add_device(intf, priv);
832 mutex_unlock(&intf_mutex);
833
834 return 0;
835 }
836
837 void
mlx5_unregister_device(struct mlx5_core_dev * dev)838 mlx5_unregister_device(struct mlx5_core_dev *dev)
839 {
840 struct mlx5_priv *priv = &dev->priv;
841 struct mlx5_interface *intf;
842
843 mutex_lock(&intf_mutex);
844 list_for_each_entry(intf, &intf_list, list)
845 mlx5_remove_device(intf, priv);
846 list_del(&priv->dev_list);
847 mutex_unlock(&intf_mutex);
848 }
849
mlx5_register_interface(struct mlx5_interface * intf)850 int mlx5_register_interface(struct mlx5_interface *intf)
851 {
852 struct mlx5_priv *priv;
853
854 if (!intf->add || !intf->remove)
855 return -EINVAL;
856
857 mutex_lock(&intf_mutex);
858 list_add_tail(&intf->list, &intf_list);
859 list_for_each_entry(priv, &dev_list, dev_list)
860 mlx5_add_device(intf, priv);
861 mutex_unlock(&intf_mutex);
862
863 return 0;
864 }
865 EXPORT_SYMBOL(mlx5_register_interface);
866
mlx5_unregister_interface(struct mlx5_interface * intf)867 void mlx5_unregister_interface(struct mlx5_interface *intf)
868 {
869 struct mlx5_priv *priv;
870
871 mutex_lock(&intf_mutex);
872 list_for_each_entry(priv, &dev_list, dev_list)
873 mlx5_remove_device(intf, priv);
874 list_del(&intf->list);
875 mutex_unlock(&intf_mutex);
876 }
877 EXPORT_SYMBOL(mlx5_unregister_interface);
878
mlx5_get_protocol_dev(struct mlx5_core_dev * mdev,int protocol)879 void *mlx5_get_protocol_dev(struct mlx5_core_dev *mdev, int protocol)
880 {
881 struct mlx5_priv *priv = &mdev->priv;
882 struct mlx5_device_context *dev_ctx;
883 unsigned long flags;
884 void *result = NULL;
885
886 spin_lock_irqsave(&priv->ctx_lock, flags);
887
888 list_for_each_entry(dev_ctx, &mdev->priv.ctx_list, list)
889 if ((dev_ctx->intf->protocol == protocol) &&
890 dev_ctx->intf->get_dev) {
891 result = dev_ctx->intf->get_dev(dev_ctx->context);
892 break;
893 }
894
895 spin_unlock_irqrestore(&priv->ctx_lock, flags);
896
897 return result;
898 }
899 EXPORT_SYMBOL(mlx5_get_protocol_dev);
900
901 static int mlx5_auto_fw_update;
902 SYSCTL_INT(_hw_mlx5, OID_AUTO, auto_fw_update, CTLFLAG_RDTUN | CTLFLAG_NOFETCH,
903 &mlx5_auto_fw_update, 0,
904 "Allow automatic firmware update on driver start");
905 static int
mlx5_firmware_update(struct mlx5_core_dev * dev)906 mlx5_firmware_update(struct mlx5_core_dev *dev)
907 {
908 const struct firmware *fw;
909 int err;
910
911 TUNABLE_INT_FETCH("hw.mlx5.auto_fw_update", &mlx5_auto_fw_update);
912 if (!mlx5_auto_fw_update)
913 return (0);
914 fw = firmware_get("mlx5fw_mfa");
915 if (fw) {
916 err = mlx5_firmware_flash(dev, fw);
917 firmware_put(fw, FIRMWARE_UNLOAD);
918 }
919 else
920 return (-ENOENT);
921
922 return err;
923 }
924
mlx5_pci_init(struct mlx5_core_dev * dev,struct mlx5_priv * priv)925 static int mlx5_pci_init(struct mlx5_core_dev *dev, struct mlx5_priv *priv)
926 {
927 struct pci_dev *pdev = dev->pdev;
928 int err;
929
930 pdev = dev->pdev;
931 pci_set_drvdata(dev->pdev, dev);
932 strncpy(priv->name, dev_name(&pdev->dev), MLX5_MAX_NAME_LEN);
933 priv->name[MLX5_MAX_NAME_LEN - 1] = 0;
934
935 mutex_init(&priv->pgdir_mutex);
936 INIT_LIST_HEAD(&priv->pgdir_list);
937 spin_lock_init(&priv->mkey_lock);
938
939 err = mlx5_pci_enable_device(dev);
940 if (err) {
941 mlx5_core_err(dev, "Cannot enable PCI device, aborting\n");
942 goto err_dbg;
943 }
944
945 err = request_bar(pdev);
946 if (err) {
947 mlx5_core_err(dev, "error requesting BARs, aborting\n");
948 goto err_disable;
949 }
950
951 pci_set_master(pdev);
952
953 err = set_dma_caps(pdev);
954 if (err) {
955 mlx5_core_err(dev, "Failed setting DMA capabilities mask, aborting\n");
956 goto err_clr_master;
957 }
958
959 dev->iseg_base = pci_resource_start(dev->pdev, 0);
960 dev->iseg = ioremap(dev->iseg_base, sizeof(*dev->iseg));
961 if (!dev->iseg) {
962 err = -ENOMEM;
963 mlx5_core_err(dev, "Failed mapping initialization segment, aborting\n");
964 goto err_clr_master;
965 }
966
967 return 0;
968
969 err_clr_master:
970 release_bar(dev->pdev);
971 err_disable:
972 mlx5_pci_disable_device(dev);
973 err_dbg:
974 return err;
975 }
976
mlx5_pci_close(struct mlx5_core_dev * dev,struct mlx5_priv * priv)977 static void mlx5_pci_close(struct mlx5_core_dev *dev, struct mlx5_priv *priv)
978 {
979 #ifdef PCI_IOV
980 if (MLX5_CAP_GEN(dev, eswitch_flow_table))
981 pci_iov_detach(dev->pdev->dev.bsddev);
982 #endif
983 iounmap(dev->iseg);
984 release_bar(dev->pdev);
985 mlx5_pci_disable_device(dev);
986 }
987
mlx5_init_once(struct mlx5_core_dev * dev,struct mlx5_priv * priv)988 static int mlx5_init_once(struct mlx5_core_dev *dev, struct mlx5_priv *priv)
989 {
990 int err;
991
992 err = mlx5_vsc_find_cap(dev);
993 if (err)
994 mlx5_core_warn(dev, "Unable to find vendor specific capabilities\n");
995
996 err = mlx5_query_hca_caps(dev);
997 if (err) {
998 mlx5_core_err(dev, "query hca failed\n");
999 goto out;
1000 }
1001
1002 err = mlx5_query_board_id(dev);
1003 if (err) {
1004 mlx5_core_err(dev, "query board id failed\n");
1005 goto out;
1006 }
1007
1008 err = mlx5_eq_init(dev);
1009 if (err) {
1010 mlx5_core_err(dev, "failed to initialize eq\n");
1011 goto out;
1012 }
1013
1014 MLX5_INIT_DOORBELL_LOCK(&priv->cq_uar_lock);
1015
1016 err = mlx5_init_cq_table(dev);
1017 if (err) {
1018 mlx5_core_err(dev, "failed to initialize cq table\n");
1019 goto err_eq_cleanup;
1020 }
1021
1022 mlx5_init_qp_table(dev);
1023 mlx5_init_srq_table(dev);
1024 mlx5_init_mr_table(dev);
1025
1026 mlx5_init_reserved_gids(dev);
1027 mlx5_fpga_init(dev);
1028
1029 #ifdef RATELIMIT
1030 err = mlx5_init_rl_table(dev);
1031 if (err) {
1032 mlx5_core_err(dev, "Failed to init rate limiting\n");
1033 goto err_tables_cleanup;
1034 }
1035 #endif
1036 return 0;
1037
1038 #ifdef RATELIMIT
1039 err_tables_cleanup:
1040 mlx5_cleanup_mr_table(dev);
1041 mlx5_cleanup_srq_table(dev);
1042 mlx5_cleanup_qp_table(dev);
1043 mlx5_cleanup_cq_table(dev);
1044 #endif
1045
1046 err_eq_cleanup:
1047 mlx5_eq_cleanup(dev);
1048
1049 out:
1050 return err;
1051 }
1052
mlx5_cleanup_once(struct mlx5_core_dev * dev)1053 static void mlx5_cleanup_once(struct mlx5_core_dev *dev)
1054 {
1055 #ifdef RATELIMIT
1056 mlx5_cleanup_rl_table(dev);
1057 #endif
1058 mlx5_fpga_cleanup(dev);
1059 mlx5_cleanup_reserved_gids(dev);
1060 mlx5_cleanup_mr_table(dev);
1061 mlx5_cleanup_srq_table(dev);
1062 mlx5_cleanup_qp_table(dev);
1063 mlx5_cleanup_cq_table(dev);
1064 mlx5_eq_cleanup(dev);
1065 }
1066
mlx5_load_one(struct mlx5_core_dev * dev,struct mlx5_priv * priv,bool boot)1067 static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv,
1068 bool boot)
1069 {
1070 int err;
1071
1072 mutex_lock(&dev->intf_state_mutex);
1073 if (test_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state)) {
1074 mlx5_core_warn(dev, "interface is up, NOP\n");
1075 goto out;
1076 }
1077
1078 mlx5_core_dbg(dev, "firmware version: %d.%d.%d\n",
1079 fw_rev_maj(dev), fw_rev_min(dev), fw_rev_sub(dev));
1080
1081 /*
1082 * On load removing any previous indication of internal error,
1083 * device is up
1084 */
1085 dev->state = MLX5_DEVICE_STATE_UP;
1086
1087 /* wait for firmware to accept initialization segments configurations
1088 */
1089 err = wait_fw_init(dev, FW_PRE_INIT_TIMEOUT_MILI,
1090 FW_INIT_WARN_MESSAGE_INTERVAL);
1091 if (err) {
1092 dev_err(&dev->pdev->dev,
1093 "Firmware over %d MS in pre-initializing state, aborting\n",
1094 FW_PRE_INIT_TIMEOUT_MILI);
1095 goto out_err;
1096 }
1097
1098 err = mlx5_cmd_init(dev);
1099 if (err) {
1100 mlx5_core_err(dev,
1101 "Failed initializing command interface, aborting\n");
1102 goto out_err;
1103 }
1104
1105 err = wait_fw_init(dev, FW_INIT_TIMEOUT_MILI, 0);
1106 if (err) {
1107 mlx5_core_err(dev,
1108 "Firmware over %d MS in initializing state, aborting\n",
1109 FW_INIT_TIMEOUT_MILI);
1110 goto err_cmd_cleanup;
1111 }
1112
1113 err = mlx5_core_enable_hca(dev, 0);
1114 if (err) {
1115 mlx5_core_err(dev, "enable hca failed\n");
1116 goto err_cmd_cleanup;
1117 }
1118
1119 err = mlx5_core_set_issi(dev);
1120 if (err) {
1121 mlx5_core_err(dev, "failed to set issi\n");
1122 goto err_disable_hca;
1123 }
1124
1125 err = mlx5_pagealloc_start(dev);
1126 if (err) {
1127 mlx5_core_err(dev, "mlx5_pagealloc_start failed\n");
1128 goto err_disable_hca;
1129 }
1130
1131 err = mlx5_satisfy_startup_pages(dev, 1);
1132 if (err) {
1133 mlx5_core_err(dev, "failed to allocate boot pages\n");
1134 goto err_pagealloc_stop;
1135 }
1136
1137 err = set_hca_ctrl(dev);
1138 if (err) {
1139 mlx5_core_err(dev, "set_hca_ctrl failed\n");
1140 goto reclaim_boot_pages;
1141 }
1142
1143 err = handle_hca_cap(dev);
1144 if (err) {
1145 mlx5_core_err(dev, "handle_hca_cap failed\n");
1146 goto reclaim_boot_pages;
1147 }
1148
1149 err = handle_hca_cap_atomic(dev);
1150 if (err) {
1151 mlx5_core_err(dev, "handle_hca_cap_atomic failed\n");
1152 goto reclaim_boot_pages;
1153 }
1154
1155 err = handle_hca_cap_2(dev);
1156 if (err) {
1157 mlx5_core_err(dev, "handle_hca_cap_2 failed\n");
1158 goto reclaim_boot_pages;
1159 }
1160
1161 err = mlx5_satisfy_startup_pages(dev, 0);
1162 if (err) {
1163 mlx5_core_err(dev, "failed to allocate init pages\n");
1164 goto reclaim_boot_pages;
1165 }
1166
1167 err = mlx5_cmd_init_hca(dev);
1168 if (err) {
1169 mlx5_core_err(dev, "init hca failed\n");
1170 goto reclaim_boot_pages;
1171 }
1172
1173 mlx5_set_driver_version(dev);
1174
1175 mlx5_start_health_poll(dev);
1176
1177 if (boot && (err = mlx5_init_once(dev, priv))) {
1178 mlx5_core_err(dev, "sw objs init failed\n");
1179 goto err_stop_poll;
1180 }
1181
1182 dev->priv.uar = mlx5_get_uars_page(dev);
1183 if (IS_ERR(dev->priv.uar)) {
1184 mlx5_core_err(dev, "Failed allocating uar, aborting\n");
1185 err = PTR_ERR(dev->priv.uar);
1186 goto err_cleanup_once;
1187 }
1188
1189 err = mlx5_enable_msix(dev);
1190 if (err) {
1191 mlx5_core_err(dev, "enable msix failed\n");
1192 goto err_cleanup_uar;
1193 }
1194
1195 err = mlx5_start_eqs(dev);
1196 if (err) {
1197 mlx5_core_err(dev, "Failed to start pages and async EQs\n");
1198 goto err_disable_msix;
1199 }
1200
1201 err = alloc_comp_eqs(dev);
1202 if (err) {
1203 mlx5_core_err(dev, "Failed to alloc completion EQs\n");
1204 goto err_stop_eqs;
1205 }
1206
1207 err = mlx5_init_fs(dev);
1208 if (err) {
1209 mlx5_core_err(dev, "flow steering init %d\n", err);
1210 goto err_free_comp_eqs;
1211 }
1212
1213 err = mlx5_core_set_hca_defaults(dev);
1214 if (err) {
1215 mlx5_core_err(dev, "Failed to set HCA defaults %d\n", err);
1216 goto err_free_comp_eqs;
1217 }
1218
1219 err = mlx5_mpfs_init(dev);
1220 if (err) {
1221 mlx5_core_err(dev, "mpfs init failed %d\n", err);
1222 goto err_fs;
1223 }
1224
1225 err = mlx5_fpga_device_start(dev);
1226 if (err) {
1227 mlx5_core_err(dev, "fpga device start failed %d\n", err);
1228 goto err_mpfs;
1229 }
1230
1231 err = mlx5_diag_cnt_init(dev);
1232 if (err) {
1233 mlx5_core_err(dev, "diag cnt init failed %d\n", err);
1234 goto err_fpga;
1235 }
1236
1237 err = mlx5_register_device(dev);
1238 if (err) {
1239 mlx5_core_err(dev, "mlx5_register_device failed %d\n", err);
1240 goto err_diag_cnt;
1241 }
1242
1243 set_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state);
1244
1245 out:
1246 mutex_unlock(&dev->intf_state_mutex);
1247 return 0;
1248
1249 err_diag_cnt:
1250 mlx5_diag_cnt_cleanup(dev);
1251
1252 err_fpga:
1253 mlx5_fpga_device_stop(dev);
1254
1255 err_mpfs:
1256 mlx5_mpfs_destroy(dev);
1257
1258 err_fs:
1259 mlx5_cleanup_fs(dev);
1260
1261 err_free_comp_eqs:
1262 free_comp_eqs(dev);
1263
1264 err_stop_eqs:
1265 mlx5_stop_eqs(dev);
1266
1267 err_disable_msix:
1268 mlx5_disable_msix(dev);
1269
1270 err_cleanup_uar:
1271 mlx5_put_uars_page(dev, dev->priv.uar);
1272
1273 err_cleanup_once:
1274 if (boot)
1275 mlx5_cleanup_once(dev);
1276
1277 err_stop_poll:
1278 mlx5_stop_health_poll(dev, boot);
1279 if (mlx5_cmd_teardown_hca(dev)) {
1280 mlx5_core_err(dev, "tear_down_hca failed, skip cleanup\n");
1281 goto out_err;
1282 }
1283
1284 reclaim_boot_pages:
1285 mlx5_reclaim_startup_pages(dev);
1286
1287 err_pagealloc_stop:
1288 mlx5_pagealloc_stop(dev);
1289
1290 err_disable_hca:
1291 mlx5_core_disable_hca(dev);
1292
1293 err_cmd_cleanup:
1294 mlx5_cmd_cleanup(dev);
1295
1296 out_err:
1297 dev->state = MLX5_DEVICE_STATE_INTERNAL_ERROR;
1298 mutex_unlock(&dev->intf_state_mutex);
1299
1300 return err;
1301 }
1302
mlx5_unload_one(struct mlx5_core_dev * dev,struct mlx5_priv * priv,bool cleanup)1303 static int mlx5_unload_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv,
1304 bool cleanup)
1305 {
1306 int err = 0;
1307
1308 if (cleanup)
1309 mlx5_drain_health_recovery(dev);
1310
1311 mutex_lock(&dev->intf_state_mutex);
1312 if (!test_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state)) {
1313 mlx5_core_warn(dev, "%s: interface is down, NOP\n", __func__);
1314 if (cleanup)
1315 mlx5_cleanup_once(dev);
1316 goto out;
1317 }
1318
1319 mlx5_unregister_device(dev);
1320
1321 mlx5_eswitch_cleanup(dev->priv.eswitch);
1322 mlx5_diag_cnt_cleanup(dev);
1323 mlx5_fpga_device_stop(dev);
1324 mlx5_mpfs_destroy(dev);
1325 mlx5_cleanup_fs(dev);
1326 mlx5_wait_for_reclaim_vfs_pages(dev);
1327 free_comp_eqs(dev);
1328 mlx5_stop_eqs(dev);
1329 mlx5_disable_msix(dev);
1330 mlx5_put_uars_page(dev, dev->priv.uar);
1331 if (cleanup)
1332 mlx5_cleanup_once(dev);
1333 mlx5_stop_health_poll(dev, cleanup);
1334 err = mlx5_cmd_teardown_hca(dev);
1335 if (err) {
1336 mlx5_core_err(dev, "tear_down_hca failed, skip cleanup\n");
1337 goto out;
1338 }
1339 mlx5_pagealloc_stop(dev);
1340 mlx5_reclaim_startup_pages(dev);
1341 mlx5_core_disable_hca(dev);
1342 mlx5_cmd_cleanup(dev);
1343
1344 out:
1345 clear_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state);
1346 mutex_unlock(&dev->intf_state_mutex);
1347 return err;
1348 }
1349
mlx5_core_event(struct mlx5_core_dev * dev,enum mlx5_dev_event event,unsigned long param)1350 void mlx5_core_event(struct mlx5_core_dev *dev, enum mlx5_dev_event event,
1351 unsigned long param)
1352 {
1353 struct mlx5_priv *priv = &dev->priv;
1354 struct mlx5_device_context *dev_ctx;
1355 unsigned long flags;
1356
1357 spin_lock_irqsave(&priv->ctx_lock, flags);
1358
1359 list_for_each_entry(dev_ctx, &priv->ctx_list, list)
1360 if (dev_ctx->intf->event)
1361 dev_ctx->intf->event(dev, dev_ctx->context, event, param);
1362
1363 spin_unlock_irqrestore(&priv->ctx_lock, flags);
1364 }
1365
1366 struct mlx5_core_event_handler {
1367 void (*event)(struct mlx5_core_dev *dev,
1368 enum mlx5_dev_event event,
1369 void *data);
1370 };
1371
1372 #define MLX5_STATS_DESC(a, b, c, d, e, ...) d, e,
1373
1374 #define MLX5_PORT_MODULE_ERROR_STATS(m) \
1375 m(+1, u64, power_budget_exceeded, "power_budget", "Module Power Budget Exceeded") \
1376 m(+1, u64, long_range, "long_range", "Module Long Range for non MLNX cable/module") \
1377 m(+1, u64, bus_stuck, "bus_stuck", "Module Bus stuck(I2C or data shorted)") \
1378 m(+1, u64, no_eeprom, "no_eeprom", "No EEPROM/retry timeout") \
1379 m(+1, u64, enforce_part_number, "enforce_part_number", "Module Enforce part number list") \
1380 m(+1, u64, unknown_id, "unknown_id", "Module Unknown identifier") \
1381 m(+1, u64, high_temp, "high_temp", "Module High Temperature") \
1382 m(+1, u64, cable_shorted, "cable_shorted", "Module Cable is shorted") \
1383 m(+1, u64, pmd_type_not_enabled, "pmd_type_not_enabled", "PMD type is not enabled") \
1384 m(+1, u64, laster_tec_failure, "laster_tec_failure", "Laster TEC failure") \
1385 m(+1, u64, high_current, "high_current", "High current") \
1386 m(+1, u64, high_voltage, "high_voltage", "High voltage") \
1387 m(+1, u64, pcie_sys_power_slot_exceeded, "pcie_sys_power_slot_exceeded", "PCIe system power slot Exceeded") \
1388 m(+1, u64, high_power, "high_power", "High power") \
1389 m(+1, u64, module_state_machine_fault, "module_state_machine_fault", "Module State Machine fault")
1390
1391 static const char *mlx5_pme_err_desc[] = {
1392 MLX5_PORT_MODULE_ERROR_STATS(MLX5_STATS_DESC)
1393 };
1394
init_one(struct pci_dev * pdev,const struct pci_device_id * id)1395 static int init_one(struct pci_dev *pdev,
1396 const struct pci_device_id *id)
1397 {
1398 struct mlx5_core_dev *dev;
1399 struct mlx5_priv *priv;
1400 device_t bsddev = pdev->dev.bsddev;
1401 #ifdef PCI_IOV
1402 nvlist_t *pf_schema, *vf_schema;
1403 int num_vfs, sriov_pos;
1404 #endif
1405 int i,err;
1406 int numa_node;
1407 struct sysctl_oid *pme_sysctl_node;
1408 struct sysctl_oid *pme_err_sysctl_node;
1409 struct sysctl_oid *cap_sysctl_node;
1410 struct sysctl_oid *current_cap_sysctl_node;
1411 struct sysctl_oid *max_cap_sysctl_node;
1412
1413 printk_once("mlx5: %s", mlx5_version);
1414
1415 numa_node = dev_to_node(&pdev->dev);
1416
1417 dev = kzalloc_node(sizeof(*dev), GFP_KERNEL, numa_node);
1418
1419 priv = &dev->priv;
1420 priv->numa_node = numa_node;
1421
1422 if (id)
1423 priv->pci_dev_data = id->driver_data;
1424
1425 if (mlx5_prof_sel < 0 || mlx5_prof_sel >= ARRAY_SIZE(profiles)) {
1426 device_printf(bsddev,
1427 "WARN: selected profile out of range, selecting default (%d)\n",
1428 MLX5_DEFAULT_PROF);
1429 mlx5_prof_sel = MLX5_DEFAULT_PROF;
1430 }
1431 dev->profile = &profiles[mlx5_prof_sel];
1432 dev->pdev = pdev;
1433 dev->event = mlx5_core_event;
1434
1435 /* Set desc */
1436 device_set_desc(bsddev, mlx5_version);
1437
1438 sysctl_ctx_init(&dev->sysctl_ctx);
1439 SYSCTL_ADD_INT(&dev->sysctl_ctx,
1440 SYSCTL_CHILDREN(device_get_sysctl_tree(bsddev)),
1441 OID_AUTO, "msix_eqvec", CTLFLAG_RDTUN, &dev->msix_eqvec, 0,
1442 "Maximum number of MSIX event queue vectors, if set");
1443 SYSCTL_ADD_INT(&dev->sysctl_ctx,
1444 SYSCTL_CHILDREN(device_get_sysctl_tree(bsddev)),
1445 OID_AUTO, "power_status", CTLFLAG_RD, &dev->pwr_status, 0,
1446 "0:Invalid 1:Sufficient 2:Insufficient");
1447 SYSCTL_ADD_INT(&dev->sysctl_ctx,
1448 SYSCTL_CHILDREN(device_get_sysctl_tree(bsddev)),
1449 OID_AUTO, "power_value", CTLFLAG_RD, &dev->pwr_value, 0,
1450 "Current power value in Watts");
1451
1452 pme_sysctl_node = SYSCTL_ADD_NODE(&dev->sysctl_ctx,
1453 SYSCTL_CHILDREN(device_get_sysctl_tree(bsddev)),
1454 OID_AUTO, "pme_stats", CTLFLAG_RD | CTLFLAG_MPSAFE, NULL,
1455 "Port module event statistics");
1456 if (pme_sysctl_node == NULL) {
1457 err = -ENOMEM;
1458 goto clean_sysctl_ctx;
1459 }
1460 pme_err_sysctl_node = SYSCTL_ADD_NODE(&dev->sysctl_ctx,
1461 SYSCTL_CHILDREN(pme_sysctl_node),
1462 OID_AUTO, "errors", CTLFLAG_RD | CTLFLAG_MPSAFE, NULL,
1463 "Port module event error statistics");
1464 if (pme_err_sysctl_node == NULL) {
1465 err = -ENOMEM;
1466 goto clean_sysctl_ctx;
1467 }
1468 SYSCTL_ADD_U64(&dev->sysctl_ctx,
1469 SYSCTL_CHILDREN(pme_sysctl_node), OID_AUTO,
1470 "module_plug", CTLFLAG_RD | CTLFLAG_MPSAFE,
1471 &dev->priv.pme_stats.status_counters[MLX5_MODULE_STATUS_PLUGGED_ENABLED],
1472 0, "Number of time module plugged");
1473 SYSCTL_ADD_U64(&dev->sysctl_ctx,
1474 SYSCTL_CHILDREN(pme_sysctl_node), OID_AUTO,
1475 "module_unplug", CTLFLAG_RD | CTLFLAG_MPSAFE,
1476 &dev->priv.pme_stats.status_counters[MLX5_MODULE_STATUS_UNPLUGGED],
1477 0, "Number of time module unplugged");
1478 for (i = 0 ; i < MLX5_MODULE_EVENT_ERROR_NUM; i++) {
1479 SYSCTL_ADD_U64(&dev->sysctl_ctx,
1480 SYSCTL_CHILDREN(pme_err_sysctl_node), OID_AUTO,
1481 mlx5_pme_err_desc[2 * i], CTLFLAG_RD | CTLFLAG_MPSAFE,
1482 &dev->priv.pme_stats.error_counters[i],
1483 0, mlx5_pme_err_desc[2 * i + 1]);
1484 }
1485
1486 cap_sysctl_node = SYSCTL_ADD_NODE(&dev->sysctl_ctx,
1487 SYSCTL_CHILDREN(device_get_sysctl_tree(bsddev)),
1488 OID_AUTO, "caps", CTLFLAG_RD | CTLFLAG_MPSAFE, NULL,
1489 "hardware capabilities raw bitstrings");
1490 if (cap_sysctl_node == NULL) {
1491 err = -ENOMEM;
1492 goto clean_sysctl_ctx;
1493 }
1494 current_cap_sysctl_node = SYSCTL_ADD_NODE(&dev->sysctl_ctx,
1495 SYSCTL_CHILDREN(cap_sysctl_node),
1496 OID_AUTO, "current", CTLFLAG_RD | CTLFLAG_MPSAFE, NULL,
1497 "");
1498 if (current_cap_sysctl_node == NULL) {
1499 err = -ENOMEM;
1500 goto clean_sysctl_ctx;
1501 }
1502 max_cap_sysctl_node = SYSCTL_ADD_NODE(&dev->sysctl_ctx,
1503 SYSCTL_CHILDREN(cap_sysctl_node),
1504 OID_AUTO, "max", CTLFLAG_RD | CTLFLAG_MPSAFE, NULL,
1505 "");
1506 if (max_cap_sysctl_node == NULL) {
1507 err = -ENOMEM;
1508 goto clean_sysctl_ctx;
1509 }
1510 SYSCTL_ADD_OPAQUE(&dev->sysctl_ctx,
1511 SYSCTL_CHILDREN(current_cap_sysctl_node),
1512 OID_AUTO, "general", CTLFLAG_RD | CTLFLAG_MPSAFE,
1513 &dev->hca_caps_cur[MLX5_CAP_GENERAL],
1514 MLX5_UN_SZ_DW(hca_cap_union) * sizeof(u32), "IU", "");
1515 SYSCTL_ADD_OPAQUE(&dev->sysctl_ctx,
1516 SYSCTL_CHILDREN(max_cap_sysctl_node),
1517 OID_AUTO, "general", CTLFLAG_RD | CTLFLAG_MPSAFE,
1518 &dev->hca_caps_max[MLX5_CAP_GENERAL],
1519 MLX5_UN_SZ_DW(hca_cap_union) * sizeof(u32), "IU", "");
1520 SYSCTL_ADD_OPAQUE(&dev->sysctl_ctx,
1521 SYSCTL_CHILDREN(current_cap_sysctl_node),
1522 OID_AUTO, "ether", CTLFLAG_RD | CTLFLAG_MPSAFE,
1523 &dev->hca_caps_cur[MLX5_CAP_ETHERNET_OFFLOADS],
1524 MLX5_UN_SZ_DW(hca_cap_union) * sizeof(u32), "IU", "");
1525 SYSCTL_ADD_OPAQUE(&dev->sysctl_ctx,
1526 SYSCTL_CHILDREN(max_cap_sysctl_node),
1527 OID_AUTO, "ether", CTLFLAG_RD | CTLFLAG_MPSAFE,
1528 &dev->hca_caps_max[MLX5_CAP_ETHERNET_OFFLOADS],
1529 MLX5_UN_SZ_DW(hca_cap_union) * sizeof(u32), "IU", "");
1530 SYSCTL_ADD_OPAQUE(&dev->sysctl_ctx,
1531 SYSCTL_CHILDREN(current_cap_sysctl_node),
1532 OID_AUTO, "odp", CTLFLAG_RD | CTLFLAG_MPSAFE,
1533 &dev->hca_caps_cur[MLX5_CAP_ODP],
1534 MLX5_UN_SZ_DW(hca_cap_union) * sizeof(u32), "IU", "");
1535 SYSCTL_ADD_OPAQUE(&dev->sysctl_ctx,
1536 SYSCTL_CHILDREN(max_cap_sysctl_node),
1537 OID_AUTO, "odp", CTLFLAG_RD | CTLFLAG_MPSAFE,
1538 &dev->hca_caps_max[MLX5_CAP_ODP],
1539 MLX5_UN_SZ_DW(hca_cap_union) * sizeof(u32), "IU", "");
1540 SYSCTL_ADD_OPAQUE(&dev->sysctl_ctx,
1541 SYSCTL_CHILDREN(current_cap_sysctl_node),
1542 OID_AUTO, "atomic", CTLFLAG_RD | CTLFLAG_MPSAFE,
1543 &dev->hca_caps_cur[MLX5_CAP_ATOMIC],
1544 MLX5_UN_SZ_DW(hca_cap_union) * sizeof(u32), "IU", "");
1545 SYSCTL_ADD_OPAQUE(&dev->sysctl_ctx,
1546 SYSCTL_CHILDREN(max_cap_sysctl_node),
1547 OID_AUTO, "atomic", CTLFLAG_RD | CTLFLAG_MPSAFE,
1548 &dev->hca_caps_max[MLX5_CAP_ATOMIC],
1549 MLX5_UN_SZ_DW(hca_cap_union) * sizeof(u32), "IU", "");
1550 SYSCTL_ADD_OPAQUE(&dev->sysctl_ctx,
1551 SYSCTL_CHILDREN(current_cap_sysctl_node),
1552 OID_AUTO, "roce", CTLFLAG_RD | CTLFLAG_MPSAFE,
1553 &dev->hca_caps_cur[MLX5_CAP_ROCE],
1554 MLX5_UN_SZ_DW(hca_cap_union) * sizeof(u32), "IU", "");
1555 SYSCTL_ADD_OPAQUE(&dev->sysctl_ctx,
1556 SYSCTL_CHILDREN(max_cap_sysctl_node),
1557 OID_AUTO, "roce", CTLFLAG_RD | CTLFLAG_MPSAFE,
1558 &dev->hca_caps_max[MLX5_CAP_ROCE],
1559 MLX5_UN_SZ_DW(hca_cap_union) * sizeof(u32), "IU", "");
1560 SYSCTL_ADD_OPAQUE(&dev->sysctl_ctx,
1561 SYSCTL_CHILDREN(current_cap_sysctl_node),
1562 OID_AUTO, "ipoib", CTLFLAG_RD | CTLFLAG_MPSAFE,
1563 &dev->hca_caps_cur[MLX5_CAP_IPOIB_OFFLOADS],
1564 MLX5_UN_SZ_DW(hca_cap_union) * sizeof(u32), "IU", "");
1565 SYSCTL_ADD_OPAQUE(&dev->sysctl_ctx,
1566 SYSCTL_CHILDREN(max_cap_sysctl_node),
1567 OID_AUTO, "ipoib", CTLFLAG_RD | CTLFLAG_MPSAFE,
1568 &dev->hca_caps_max[MLX5_CAP_IPOIB_OFFLOADS],
1569 MLX5_UN_SZ_DW(hca_cap_union) * sizeof(u32), "IU", "");
1570 SYSCTL_ADD_OPAQUE(&dev->sysctl_ctx,
1571 SYSCTL_CHILDREN(current_cap_sysctl_node),
1572 OID_AUTO, "eoib", CTLFLAG_RD | CTLFLAG_MPSAFE,
1573 &dev->hca_caps_cur[MLX5_CAP_EOIB_OFFLOADS],
1574 MLX5_UN_SZ_DW(hca_cap_union) * sizeof(u32), "IU", "");
1575 SYSCTL_ADD_OPAQUE(&dev->sysctl_ctx,
1576 SYSCTL_CHILDREN(max_cap_sysctl_node),
1577 OID_AUTO, "eoib", CTLFLAG_RD | CTLFLAG_MPSAFE,
1578 &dev->hca_caps_max[MLX5_CAP_EOIB_OFFLOADS],
1579 MLX5_UN_SZ_DW(hca_cap_union) * sizeof(u32), "IU", "");
1580 SYSCTL_ADD_OPAQUE(&dev->sysctl_ctx,
1581 SYSCTL_CHILDREN(current_cap_sysctl_node),
1582 OID_AUTO, "flow_table", CTLFLAG_RD | CTLFLAG_MPSAFE,
1583 &dev->hca_caps_cur[MLX5_CAP_FLOW_TABLE],
1584 MLX5_UN_SZ_DW(hca_cap_union) * sizeof(u32), "IU", "");
1585 SYSCTL_ADD_OPAQUE(&dev->sysctl_ctx,
1586 SYSCTL_CHILDREN(max_cap_sysctl_node),
1587 OID_AUTO, "flow_table", CTLFLAG_RD | CTLFLAG_MPSAFE,
1588 &dev->hca_caps_max[MLX5_CAP_FLOW_TABLE],
1589 MLX5_UN_SZ_DW(hca_cap_union) * sizeof(u32), "IU", "");
1590 SYSCTL_ADD_OPAQUE(&dev->sysctl_ctx,
1591 SYSCTL_CHILDREN(current_cap_sysctl_node),
1592 OID_AUTO, "eswitch_flow_table", CTLFLAG_RD | CTLFLAG_MPSAFE,
1593 &dev->hca_caps_cur[MLX5_CAP_ESWITCH_FLOW_TABLE],
1594 MLX5_UN_SZ_DW(hca_cap_union) * sizeof(u32), "IU", "");
1595 SYSCTL_ADD_OPAQUE(&dev->sysctl_ctx,
1596 SYSCTL_CHILDREN(max_cap_sysctl_node),
1597 OID_AUTO, "eswitch_flow_table", CTLFLAG_RD | CTLFLAG_MPSAFE,
1598 &dev->hca_caps_max[MLX5_CAP_ESWITCH_FLOW_TABLE],
1599 MLX5_UN_SZ_DW(hca_cap_union) * sizeof(u32), "IU", "");
1600 SYSCTL_ADD_OPAQUE(&dev->sysctl_ctx,
1601 SYSCTL_CHILDREN(current_cap_sysctl_node),
1602 OID_AUTO, "eswitch", CTLFLAG_RD | CTLFLAG_MPSAFE,
1603 &dev->hca_caps_cur[MLX5_CAP_ESWITCH],
1604 MLX5_UN_SZ_DW(hca_cap_union) * sizeof(u32), "IU", "");
1605 SYSCTL_ADD_OPAQUE(&dev->sysctl_ctx,
1606 SYSCTL_CHILDREN(max_cap_sysctl_node),
1607 OID_AUTO, "eswitch", CTLFLAG_RD | CTLFLAG_MPSAFE,
1608 &dev->hca_caps_max[MLX5_CAP_ESWITCH],
1609 MLX5_UN_SZ_DW(hca_cap_union) * sizeof(u32), "IU", "");
1610 SYSCTL_ADD_OPAQUE(&dev->sysctl_ctx,
1611 SYSCTL_CHILDREN(current_cap_sysctl_node),
1612 OID_AUTO, "snapshot", CTLFLAG_RD | CTLFLAG_MPSAFE,
1613 &dev->hca_caps_cur[MLX5_CAP_SNAPSHOT],
1614 MLX5_UN_SZ_DW(hca_cap_union) * sizeof(u32), "IU", "");
1615 SYSCTL_ADD_OPAQUE(&dev->sysctl_ctx,
1616 SYSCTL_CHILDREN(max_cap_sysctl_node),
1617 OID_AUTO, "snapshot", CTLFLAG_RD | CTLFLAG_MPSAFE,
1618 &dev->hca_caps_max[MLX5_CAP_SNAPSHOT],
1619 MLX5_UN_SZ_DW(hca_cap_union) * sizeof(u32), "IU", "");
1620 SYSCTL_ADD_OPAQUE(&dev->sysctl_ctx,
1621 SYSCTL_CHILDREN(current_cap_sysctl_node),
1622 OID_AUTO, "vector_calc", CTLFLAG_RD | CTLFLAG_MPSAFE,
1623 &dev->hca_caps_cur[MLX5_CAP_VECTOR_CALC],
1624 MLX5_UN_SZ_DW(hca_cap_union) * sizeof(u32), "IU", "");
1625 SYSCTL_ADD_OPAQUE(&dev->sysctl_ctx,
1626 SYSCTL_CHILDREN(max_cap_sysctl_node),
1627 OID_AUTO, "vector_calc", CTLFLAG_RD | CTLFLAG_MPSAFE,
1628 &dev->hca_caps_max[MLX5_CAP_VECTOR_CALC],
1629 MLX5_UN_SZ_DW(hca_cap_union) * sizeof(u32), "IU", "");
1630 SYSCTL_ADD_OPAQUE(&dev->sysctl_ctx,
1631 SYSCTL_CHILDREN(current_cap_sysctl_node),
1632 OID_AUTO, "qos", CTLFLAG_RD | CTLFLAG_MPSAFE,
1633 &dev->hca_caps_cur[MLX5_CAP_QOS],
1634 MLX5_UN_SZ_DW(hca_cap_union) * sizeof(u32), "IU", "");
1635 SYSCTL_ADD_OPAQUE(&dev->sysctl_ctx,
1636 SYSCTL_CHILDREN(max_cap_sysctl_node),
1637 OID_AUTO, "qos", CTLFLAG_RD | CTLFLAG_MPSAFE,
1638 &dev->hca_caps_max[MLX5_CAP_QOS],
1639 MLX5_UN_SZ_DW(hca_cap_union) * sizeof(u32), "IU", "");
1640 SYSCTL_ADD_OPAQUE(&dev->sysctl_ctx,
1641 SYSCTL_CHILDREN(current_cap_sysctl_node),
1642 OID_AUTO, "debug", CTLFLAG_RD | CTLFLAG_MPSAFE,
1643 &dev->hca_caps_cur[MLX5_CAP_DEBUG],
1644 MLX5_UN_SZ_DW(hca_cap_union) * sizeof(u32), "IU", "");
1645 SYSCTL_ADD_OPAQUE(&dev->sysctl_ctx,
1646 SYSCTL_CHILDREN(max_cap_sysctl_node),
1647 OID_AUTO, "debug", CTLFLAG_RD | CTLFLAG_MPSAFE,
1648 &dev->hca_caps_max[MLX5_CAP_DEBUG],
1649 MLX5_UN_SZ_DW(hca_cap_union) * sizeof(u32), "IU", "");
1650 SYSCTL_ADD_OPAQUE(&dev->sysctl_ctx,
1651 SYSCTL_CHILDREN(cap_sysctl_node),
1652 OID_AUTO, "pcam", CTLFLAG_RD | CTLFLAG_MPSAFE,
1653 &dev->caps.pcam, sizeof(dev->caps.pcam), "IU", "");
1654 SYSCTL_ADD_OPAQUE(&dev->sysctl_ctx,
1655 SYSCTL_CHILDREN(cap_sysctl_node),
1656 OID_AUTO, "mcam", CTLFLAG_RD | CTLFLAG_MPSAFE,
1657 &dev->caps.mcam, sizeof(dev->caps.mcam), "IU", "");
1658 SYSCTL_ADD_OPAQUE(&dev->sysctl_ctx,
1659 SYSCTL_CHILDREN(cap_sysctl_node),
1660 OID_AUTO, "qcam", CTLFLAG_RD | CTLFLAG_MPSAFE,
1661 &dev->caps.qcam, sizeof(dev->caps.qcam), "IU", "");
1662 SYSCTL_ADD_OPAQUE(&dev->sysctl_ctx,
1663 SYSCTL_CHILDREN(cap_sysctl_node),
1664 OID_AUTO, "fpga", CTLFLAG_RD | CTLFLAG_MPSAFE,
1665 &dev->caps.fpga, sizeof(dev->caps.fpga), "IU", "");
1666
1667 INIT_LIST_HEAD(&priv->ctx_list);
1668 spin_lock_init(&priv->ctx_lock);
1669 mutex_init(&dev->pci_status_mutex);
1670 mutex_init(&dev->intf_state_mutex);
1671
1672 mutex_init(&priv->bfregs.reg_head.lock);
1673 mutex_init(&priv->bfregs.wc_head.lock);
1674 INIT_LIST_HEAD(&priv->bfregs.reg_head.list);
1675 INIT_LIST_HEAD(&priv->bfregs.wc_head.list);
1676
1677 mtx_init(&dev->dump_lock, "mlx5dmp", NULL, MTX_DEF | MTX_NEW);
1678 err = mlx5_pci_init(dev, priv);
1679 if (err) {
1680 mlx5_core_err(dev, "mlx5_pci_init failed %d\n", err);
1681 goto clean_dev;
1682 }
1683
1684 err = mlx5_health_init(dev);
1685 if (err) {
1686 mlx5_core_err(dev, "mlx5_health_init failed %d\n", err);
1687 goto close_pci;
1688 }
1689
1690 mlx5_pagealloc_init(dev);
1691
1692 err = mlx5_load_one(dev, priv, true);
1693 if (err) {
1694 mlx5_core_err(dev, "mlx5_load_one failed %d\n", err);
1695 goto clean_health;
1696 }
1697
1698 mlx5_fwdump_prep(dev);
1699
1700 mlx5_firmware_update(dev);
1701
1702 #ifdef PCI_IOV
1703 if (MLX5_CAP_GEN(dev, vport_group_manager)) {
1704 if (pci_find_extcap(bsddev, PCIZ_SRIOV, &sriov_pos) == 0) {
1705 num_vfs = pci_read_config(bsddev, sriov_pos +
1706 PCIR_SRIOV_TOTAL_VFS, 2);
1707 } else {
1708 mlx5_core_info(dev, "cannot find SR-IOV PCIe cap\n");
1709 num_vfs = 0;
1710 }
1711 err = mlx5_eswitch_init(dev, 1 + num_vfs);
1712 if (err == 0) {
1713 pf_schema = pci_iov_schema_alloc_node();
1714 vf_schema = pci_iov_schema_alloc_node();
1715 pci_iov_schema_add_unicast_mac(vf_schema,
1716 iov_mac_addr_name, 0, NULL);
1717 pci_iov_schema_add_uint64(vf_schema, iov_node_guid_name,
1718 0, 0);
1719 pci_iov_schema_add_uint64(vf_schema, iov_port_guid_name,
1720 0, 0);
1721 err = pci_iov_attach(bsddev, pf_schema, vf_schema);
1722 if (err == 0) {
1723 dev->iov_pf = true;
1724 } else {
1725 device_printf(bsddev,
1726 "Failed to initialize SR-IOV support, error %d\n",
1727 err);
1728 }
1729 } else {
1730 mlx5_core_err(dev, "eswitch init failed, error %d\n",
1731 err);
1732 }
1733 }
1734 #endif
1735
1736 pci_save_state(pdev);
1737 return 0;
1738
1739 clean_health:
1740 mlx5_pagealloc_cleanup(dev);
1741 mlx5_health_cleanup(dev);
1742 close_pci:
1743 mlx5_pci_close(dev, priv);
1744 clean_dev:
1745 mtx_destroy(&dev->dump_lock);
1746 clean_sysctl_ctx:
1747 sysctl_ctx_free(&dev->sysctl_ctx);
1748 kfree(dev);
1749 return err;
1750 }
1751
remove_one(struct pci_dev * pdev)1752 static void remove_one(struct pci_dev *pdev)
1753 {
1754 struct mlx5_core_dev *dev = pci_get_drvdata(pdev);
1755 struct mlx5_priv *priv = &dev->priv;
1756
1757 #ifdef PCI_IOV
1758 if (dev->iov_pf) {
1759 pci_iov_detach(pdev->dev.bsddev);
1760 mlx5_eswitch_disable_sriov(priv->eswitch);
1761 dev->iov_pf = false;
1762 }
1763 #endif
1764
1765 if (mlx5_unload_one(dev, priv, true)) {
1766 mlx5_core_err(dev, "mlx5_unload_one() failed, leaked %lld bytes\n",
1767 (long long)(dev->priv.fw_pages * MLX5_ADAPTER_PAGE_SIZE));
1768 }
1769
1770 mlx5_pagealloc_cleanup(dev);
1771 mlx5_health_cleanup(dev);
1772 mlx5_fwdump_clean(dev);
1773 mlx5_pci_close(dev, priv);
1774 mtx_destroy(&dev->dump_lock);
1775 pci_set_drvdata(pdev, NULL);
1776 sysctl_ctx_free(&dev->sysctl_ctx);
1777 kfree(dev);
1778 }
1779
mlx5_pci_err_detected(struct pci_dev * pdev,pci_channel_state_t state)1780 static pci_ers_result_t mlx5_pci_err_detected(struct pci_dev *pdev,
1781 pci_channel_state_t state)
1782 {
1783 struct mlx5_core_dev *dev = pci_get_drvdata(pdev);
1784 struct mlx5_priv *priv = &dev->priv;
1785
1786 mlx5_core_info(dev, "%s was called\n", __func__);
1787 mlx5_enter_error_state(dev, false);
1788 mlx5_unload_one(dev, priv, false);
1789
1790 if (state) {
1791 mlx5_drain_health_wq(dev);
1792 mlx5_pci_disable_device(dev);
1793 }
1794
1795 return state == pci_channel_io_perm_failure ?
1796 PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_NEED_RESET;
1797 }
1798
mlx5_pci_slot_reset(struct pci_dev * pdev)1799 static pci_ers_result_t mlx5_pci_slot_reset(struct pci_dev *pdev)
1800 {
1801 struct mlx5_core_dev *dev = pci_get_drvdata(pdev);
1802 int err = 0;
1803
1804 mlx5_core_info(dev,"%s was called\n", __func__);
1805
1806 err = mlx5_pci_enable_device(dev);
1807 if (err) {
1808 mlx5_core_err(dev, "mlx5_pci_enable_device failed with error code: %d\n"
1809 ,err);
1810 return PCI_ERS_RESULT_DISCONNECT;
1811 }
1812 pci_set_master(pdev);
1813 pci_set_powerstate(pdev->dev.bsddev, PCI_POWERSTATE_D0);
1814 pci_restore_state(pdev);
1815 pci_save_state(pdev);
1816
1817 return err ? PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_RECOVERED;
1818 }
1819
1820 /* wait for the device to show vital signs. For now we check
1821 * that we can read the device ID and that the health buffer
1822 * shows a non zero value which is different than 0xffffffff
1823 */
wait_vital(struct pci_dev * pdev)1824 static void wait_vital(struct pci_dev *pdev)
1825 {
1826 struct mlx5_core_dev *dev = pci_get_drvdata(pdev);
1827 struct mlx5_core_health *health = &dev->priv.health;
1828 const int niter = 100;
1829 u32 count;
1830 u16 did;
1831 int i;
1832
1833 /* Wait for firmware to be ready after reset */
1834 msleep(1000);
1835 for (i = 0; i < niter; i++) {
1836 if (pci_read_config_word(pdev, 2, &did)) {
1837 mlx5_core_warn(dev, "failed reading config word\n");
1838 break;
1839 }
1840 if (did == pdev->device) {
1841 mlx5_core_info(dev,
1842 "device ID correctly read after %d iterations\n", i);
1843 break;
1844 }
1845 msleep(50);
1846 }
1847 if (i == niter)
1848 mlx5_core_warn(dev, "could not read device ID\n");
1849
1850 for (i = 0; i < niter; i++) {
1851 count = ioread32be(health->health_counter);
1852 if (count && count != 0xffffffff) {
1853 mlx5_core_info(dev,
1854 "Counter value 0x%x after %d iterations\n", count, i);
1855 break;
1856 }
1857 msleep(50);
1858 }
1859
1860 if (i == niter)
1861 mlx5_core_warn(dev, "could not read device ID\n");
1862 }
1863
mlx5_pci_resume(struct pci_dev * pdev)1864 static void mlx5_pci_resume(struct pci_dev *pdev)
1865 {
1866 struct mlx5_core_dev *dev = pci_get_drvdata(pdev);
1867 struct mlx5_priv *priv = &dev->priv;
1868 int err;
1869
1870 mlx5_core_info(dev,"%s was called\n", __func__);
1871
1872 wait_vital(pdev);
1873
1874 err = mlx5_load_one(dev, priv, false);
1875 if (err)
1876 mlx5_core_err(dev,
1877 "mlx5_load_one failed with error code: %d\n" ,err);
1878 else
1879 mlx5_core_info(dev,"device recovered\n");
1880 }
1881
1882 static const struct pci_error_handlers mlx5_err_handler = {
1883 .error_detected = mlx5_pci_err_detected,
1884 .slot_reset = mlx5_pci_slot_reset,
1885 .resume = mlx5_pci_resume
1886 };
1887
1888 #ifdef PCI_IOV
1889 static int
mlx5_iov_init(device_t dev,uint16_t num_vfs,const nvlist_t * pf_config)1890 mlx5_iov_init(device_t dev, uint16_t num_vfs, const nvlist_t *pf_config)
1891 {
1892 struct pci_dev *pdev;
1893 struct mlx5_core_dev *core_dev;
1894 struct mlx5_priv *priv;
1895 int err;
1896
1897 pdev = device_get_softc(dev);
1898 core_dev = pci_get_drvdata(pdev);
1899 priv = &core_dev->priv;
1900
1901 if (priv->eswitch == NULL)
1902 return (ENXIO);
1903 if (priv->eswitch->total_vports < num_vfs + 1)
1904 num_vfs = priv->eswitch->total_vports - 1;
1905 err = mlx5_eswitch_enable_sriov(priv->eswitch, num_vfs);
1906 return (-err);
1907 }
1908
1909 static void
mlx5_iov_uninit(device_t dev)1910 mlx5_iov_uninit(device_t dev)
1911 {
1912 struct pci_dev *pdev;
1913 struct mlx5_core_dev *core_dev;
1914 struct mlx5_priv *priv;
1915
1916 pdev = device_get_softc(dev);
1917 core_dev = pci_get_drvdata(pdev);
1918 priv = &core_dev->priv;
1919
1920 mlx5_eswitch_disable_sriov(priv->eswitch);
1921 }
1922
1923 static int
mlx5_iov_add_vf(device_t dev,uint16_t vfnum,const nvlist_t * vf_config)1924 mlx5_iov_add_vf(device_t dev, uint16_t vfnum, const nvlist_t *vf_config)
1925 {
1926 struct pci_dev *pdev;
1927 struct mlx5_core_dev *core_dev;
1928 struct mlx5_priv *priv;
1929 const void *mac;
1930 size_t mac_size;
1931 uint64_t node_guid, port_guid;
1932 int error;
1933
1934 pdev = device_get_softc(dev);
1935 core_dev = pci_get_drvdata(pdev);
1936 priv = &core_dev->priv;
1937
1938 if (vfnum + 1 >= priv->eswitch->total_vports)
1939 return (ENXIO);
1940
1941 if (nvlist_exists_binary(vf_config, iov_mac_addr_name)) {
1942 mac = nvlist_get_binary(vf_config, iov_mac_addr_name,
1943 &mac_size);
1944 error = -mlx5_eswitch_set_vport_mac(priv->eswitch,
1945 vfnum + 1, __DECONST(u8 *, mac));
1946 if (error != 0) {
1947 mlx5_core_err(core_dev,
1948 "setting MAC for VF %d failed, error %d\n",
1949 vfnum + 1, error);
1950 }
1951 }
1952
1953 if (nvlist_exists_number(vf_config, iov_node_guid_name)) {
1954 node_guid = nvlist_get_number(vf_config, iov_node_guid_name);
1955 error = -mlx5_modify_nic_vport_node_guid(core_dev, vfnum + 1,
1956 node_guid);
1957 if (error != 0) {
1958 mlx5_core_err(core_dev,
1959 "modifying node GUID for VF %d failed, error %d\n",
1960 vfnum + 1, error);
1961 }
1962 }
1963
1964 if (nvlist_exists_number(vf_config, iov_port_guid_name)) {
1965 port_guid = nvlist_get_number(vf_config, iov_port_guid_name);
1966 error = -mlx5_modify_nic_vport_port_guid(core_dev, vfnum + 1,
1967 port_guid);
1968 if (error != 0) {
1969 mlx5_core_err(core_dev,
1970 "modifying port GUID for VF %d failed, error %d\n",
1971 vfnum + 1, error);
1972 }
1973 }
1974
1975 error = -mlx5_eswitch_set_vport_state(priv->eswitch, vfnum + 1,
1976 VPORT_STATE_FOLLOW);
1977 if (error != 0) {
1978 mlx5_core_err(core_dev,
1979 "upping vport for VF %d failed, error %d\n",
1980 vfnum + 1, error);
1981 }
1982 error = -mlx5_core_enable_hca(core_dev, vfnum + 1);
1983 if (error != 0) {
1984 mlx5_core_err(core_dev, "enabling VF %d failed, error %d\n",
1985 vfnum + 1, error);
1986 }
1987 return (error);
1988 }
1989 #endif
1990
mlx5_try_fast_unload(struct mlx5_core_dev * dev)1991 static int mlx5_try_fast_unload(struct mlx5_core_dev *dev)
1992 {
1993 bool fast_teardown, force_teardown;
1994 int err;
1995
1996 if (!mlx5_fast_unload_enabled) {
1997 mlx5_core_dbg(dev, "fast unload is disabled by user\n");
1998 return -EOPNOTSUPP;
1999 }
2000
2001 fast_teardown = MLX5_CAP_GEN(dev, fast_teardown);
2002 force_teardown = MLX5_CAP_GEN(dev, force_teardown);
2003
2004 mlx5_core_dbg(dev, "force teardown firmware support=%d\n", force_teardown);
2005 mlx5_core_dbg(dev, "fast teardown firmware support=%d\n", fast_teardown);
2006
2007 if (!fast_teardown && !force_teardown)
2008 return -EOPNOTSUPP;
2009
2010 if (dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) {
2011 mlx5_core_dbg(dev, "Device in internal error state, giving up\n");
2012 return -EAGAIN;
2013 }
2014
2015 /* Panic tear down fw command will stop the PCI bus communication
2016 * with the HCA, so the health polll is no longer needed.
2017 */
2018 mlx5_drain_health_wq(dev);
2019 mlx5_stop_health_poll(dev, false);
2020
2021 err = mlx5_cmd_fast_teardown_hca(dev);
2022 if (!err)
2023 goto done;
2024
2025 err = mlx5_cmd_force_teardown_hca(dev);
2026 if (!err)
2027 goto done;
2028
2029 mlx5_core_dbg(dev, "Firmware couldn't do fast unload error: %d\n", err);
2030 mlx5_start_health_poll(dev);
2031 return err;
2032 done:
2033 mlx5_enter_error_state(dev, true);
2034 return 0;
2035 }
2036
mlx5_shutdown_disable_interrupts(struct mlx5_core_dev * mdev)2037 static void mlx5_shutdown_disable_interrupts(struct mlx5_core_dev *mdev)
2038 {
2039 int nvec = mdev->priv.eq_table.num_comp_vectors + MLX5_EQ_VEC_COMP_BASE;
2040 int x;
2041
2042 mdev->priv.disable_irqs = 1;
2043
2044 /* wait for all IRQ handlers to finish processing */
2045 for (x = 0; x != nvec; x++)
2046 synchronize_irq(mdev->priv.msix_arr[x].vector);
2047 }
2048
shutdown_one(struct pci_dev * pdev)2049 static void shutdown_one(struct pci_dev *pdev)
2050 {
2051 struct mlx5_core_dev *dev = pci_get_drvdata(pdev);
2052 struct mlx5_priv *priv = &dev->priv;
2053 int err;
2054
2055 /* enter polling mode */
2056 mlx5_cmd_use_polling(dev);
2057
2058 set_bit(MLX5_INTERFACE_STATE_TEARDOWN, &dev->intf_state);
2059
2060 /* disable all interrupts */
2061 mlx5_shutdown_disable_interrupts(dev);
2062
2063 err = mlx5_try_fast_unload(dev);
2064 if (err)
2065 mlx5_unload_one(dev, priv, false);
2066 mlx5_pci_disable_device(dev);
2067 }
2068
2069 static const struct pci_device_id mlx5_core_pci_table[] = {
2070 { PCI_VDEVICE(MELLANOX, 4113) }, /* Connect-IB */
2071 { PCI_VDEVICE(MELLANOX, 4114) }, /* Connect-IB VF */
2072 { PCI_VDEVICE(MELLANOX, 4115) }, /* ConnectX-4 */
2073 { PCI_VDEVICE(MELLANOX, 4116) }, /* ConnectX-4 VF */
2074 { PCI_VDEVICE(MELLANOX, 4117) }, /* ConnectX-4LX */
2075 { PCI_VDEVICE(MELLANOX, 4118) }, /* ConnectX-4LX VF */
2076 { PCI_VDEVICE(MELLANOX, 4119) }, /* ConnectX-5, PCIe 3.0 */
2077 { PCI_VDEVICE(MELLANOX, 4120) }, /* ConnectX-5 VF */
2078 { PCI_VDEVICE(MELLANOX, 4121) }, /* ConnectX-5 Ex */
2079 { PCI_VDEVICE(MELLANOX, 4122) }, /* ConnectX-5 Ex VF */
2080 { PCI_VDEVICE(MELLANOX, 4123) }, /* ConnectX-6 */
2081 { PCI_VDEVICE(MELLANOX, 4124) }, /* ConnectX-6 VF */
2082 { PCI_VDEVICE(MELLANOX, 4125) }, /* ConnectX-6 Dx */
2083 { PCI_VDEVICE(MELLANOX, 4126) }, /* ConnectX Family mlx5Gen Virtual Function */
2084 { PCI_VDEVICE(MELLANOX, 4127) }, /* ConnectX-6 LX */
2085 { PCI_VDEVICE(MELLANOX, 4128) },
2086 { PCI_VDEVICE(MELLANOX, 4129) }, /* ConnectX-7 */
2087 { PCI_VDEVICE(MELLANOX, 4130) },
2088 { PCI_VDEVICE(MELLANOX, 4131) }, /* ConnectX-8 */
2089 { PCI_VDEVICE(MELLANOX, 4132) },
2090 { PCI_VDEVICE(MELLANOX, 4133) },
2091 { PCI_VDEVICE(MELLANOX, 4134) },
2092 { PCI_VDEVICE(MELLANOX, 4135) },
2093 { PCI_VDEVICE(MELLANOX, 4136) },
2094 { PCI_VDEVICE(MELLANOX, 4137) },
2095 { PCI_VDEVICE(MELLANOX, 4138) },
2096 { PCI_VDEVICE(MELLANOX, 4139) },
2097 { PCI_VDEVICE(MELLANOX, 4140) },
2098 { PCI_VDEVICE(MELLANOX, 4141) },
2099 { PCI_VDEVICE(MELLANOX, 4142) },
2100 { PCI_VDEVICE(MELLANOX, 4143) },
2101 { PCI_VDEVICE(MELLANOX, 4144) },
2102 { PCI_VDEVICE(MELLANOX, 0xa2d2) }, /* BlueField integrated ConnectX-5 network controller */
2103 { PCI_VDEVICE(MELLANOX, 0xa2d3) }, /* BlueField integrated ConnectX-5 network controller VF */
2104 { PCI_VDEVICE(MELLANOX, 0xa2d6) }, /* BlueField-2 integrated ConnectX-6 Dx network controller */
2105 { PCI_VDEVICE(MELLANOX, 0xa2dc) }, /* BlueField-3 integrated ConnectX-7 network controller */
2106 { PCI_VDEVICE(MELLANOX, 0xa2df) }, /* BlueField-4 integrated ConnectX-8 network controller */
2107 { }
2108 };
2109
2110 MODULE_DEVICE_TABLE(pci, mlx5_core_pci_table);
2111
mlx5_disable_device(struct mlx5_core_dev * dev)2112 void mlx5_disable_device(struct mlx5_core_dev *dev)
2113 {
2114 mlx5_pci_err_detected(dev->pdev, 0);
2115 }
2116
mlx5_recover_device(struct mlx5_core_dev * dev)2117 void mlx5_recover_device(struct mlx5_core_dev *dev)
2118 {
2119 mlx5_pci_disable_device(dev);
2120 if (mlx5_pci_slot_reset(dev->pdev) == PCI_ERS_RESULT_RECOVERED)
2121 mlx5_pci_resume(dev->pdev);
2122 }
2123
2124 struct pci_driver mlx5_core_driver = {
2125 .name = DRIVER_NAME,
2126 .id_table = mlx5_core_pci_table,
2127 .shutdown = shutdown_one,
2128 .probe = init_one,
2129 .remove = remove_one,
2130 .err_handler = &mlx5_err_handler,
2131 #ifdef PCI_IOV
2132 .bsd_iov_init = mlx5_iov_init,
2133 .bsd_iov_uninit = mlx5_iov_uninit,
2134 .bsd_iov_add_vf = mlx5_iov_add_vf,
2135 #endif
2136 };
2137
init(void)2138 static int __init init(void)
2139 {
2140 int err;
2141
2142 err = pci_register_driver(&mlx5_core_driver);
2143 if (err)
2144 goto err_debug;
2145
2146 err = mlx5_ctl_init();
2147 if (err)
2148 goto err_ctl;
2149
2150 return 0;
2151
2152 err_ctl:
2153 pci_unregister_driver(&mlx5_core_driver);
2154
2155 err_debug:
2156 return err;
2157 }
2158
cleanup(void)2159 static void __exit cleanup(void)
2160 {
2161 mlx5_ctl_fini();
2162 pci_unregister_driver(&mlx5_core_driver);
2163 }
2164
2165 module_init_order(init, SI_ORDER_FIRST);
2166 module_exit_order(cleanup, SI_ORDER_FIRST);
2167