xref: /freebsd/sys/dev/mlx4/mlx4_core/mlx4_main.c (revision 97549c34)
1 /*
2  * Copyright (c) 2004, 2005 Topspin Communications.  All rights reserved.
3  * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
4  * Copyright (c) 2005, 2006, 2007, 2008, 2014 Mellanox Technologies. All rights reserved.
5  * Copyright (c) 2006, 2007 Cisco Systems, Inc. All rights reserved.
6  *
7  * This software is available to you under a choice of one of two
8  * licenses.  You may choose to be licensed under the terms of the GNU
9  * General Public License (GPL) Version 2, available from the file
10  * COPYING in the main directory of this source tree, or the
11  * OpenIB.org BSD license below:
12  *
13  *     Redistribution and use in source and binary forms, with or
14  *     without modification, are permitted provided that the following
15  *     conditions are met:
16  *
17  *      - Redistributions of source code must retain the above
18  *        copyright notice, this list of conditions and the following
19  *        disclaimer.
20  *
21  *      - Redistributions in binary form must reproduce the above
22  *        copyright notice, this list of conditions and the following
23  *        disclaimer in the documentation and/or other materials
24  *        provided with the distribution.
25  *
26  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
27  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
28  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
29  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
30  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
31  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
32  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
33  * SOFTWARE.
34  */
35 
36 #define	LINUXKPI_PARAM_PREFIX mlx4_
37 
38 #include <linux/kmod.h>
39 #include <linux/module.h>
40 #include <linux/errno.h>
41 #include <linux/pci.h>
42 #include <linux/dma-mapping.h>
43 #include <linux/slab.h>
44 #include <linux/io-mapping.h>
45 #include <linux/delay.h>
46 #include <linux/netdevice.h>
47 #include <linux/string.h>
48 #include <linux/fs.h>
49 
50 #include <dev/mlx4/device.h>
51 #include <dev/mlx4/doorbell.h>
52 
53 #include "mlx4.h"
54 #include "fw.h"
55 #include "icm.h"
56 #include <dev/mlx4/stats.h>
57 
58 /* Mellanox ConnectX HCA low-level driver */
59 
60 struct workqueue_struct *mlx4_wq;
61 
62 #ifdef CONFIG_MLX4_DEBUG
63 
64 int mlx4_debug_level = 0;
65 module_param_named(debug_level, mlx4_debug_level, int, 0644);
66 MODULE_PARM_DESC(debug_level, "Enable debug tracing if > 0");
67 
68 #endif /* CONFIG_MLX4_DEBUG */
69 
70 #ifdef CONFIG_PCI_MSI
71 
72 static int msi_x = 1;
73 module_param(msi_x, int, 0444);
74 MODULE_PARM_DESC(msi_x, "0 - don't use MSI-X, 1 - use MSI-X, >1 - limit number of MSI-X irqs to msi_x (non-SRIOV only)");
75 
76 #else /* CONFIG_PCI_MSI */
77 
78 #define msi_x (0)
79 
80 #endif /* CONFIG_PCI_MSI */
81 
82 static int enable_sys_tune = 0;
83 module_param(enable_sys_tune, int, 0444);
84 MODULE_PARM_DESC(enable_sys_tune, "Tune the cpu's for better performance (default 0)");
85 
86 int mlx4_blck_lb = 1;
87 module_param_named(block_loopback, mlx4_blck_lb, int, 0644);
88 MODULE_PARM_DESC(block_loopback, "Block multicast loopback packets if > 0 "
89 				 "(default: 1)");
90 enum {
91 	DEFAULT_DOMAIN	= 0,
92 	BDF_STR_SIZE	= 8, /* bb:dd.f- */
93 	DBDF_STR_SIZE	= 13 /* mmmm:bb:dd.f- */
94 };
95 
96 enum {
97 	NUM_VFS,
98 	PROBE_VF,
99 	PORT_TYPE_ARRAY
100 };
101 
102 enum {
103 	VALID_DATA,
104 	INVALID_DATA,
105 	INVALID_STR
106 };
107 
108 struct param_data {
109 	int				id;
110 	struct mlx4_dbdf2val_lst	dbdf2val;
111 };
112 
113 static struct param_data num_vfs = {
114 	.id		= NUM_VFS,
115 	.dbdf2val = {
116 		.name		= "num_vfs param",
117 		.num_vals	= 1,
118 		.def_val	= {0},
119 		.range		= {0, MLX4_MAX_NUM_VF}
120 	}
121 };
122 module_param_string(num_vfs, num_vfs.dbdf2val.str,
123 		    sizeof(num_vfs.dbdf2val.str), 0444);
124 MODULE_PARM_DESC(num_vfs,
125 		 "Either single value (e.g. '5') to define uniform num_vfs value for all devices functions\n"
126 		 "\t\tor a string to map device function numbers to their num_vfs values (e.g. '0000:04:00.0-5,002b:1c:0b.a-15').\n"
127 		 "\t\tHexadecimal digits for the device function (e.g. 002b:1c:0b.a) and decimal for num_vfs value (e.g. 15).");
128 
129 static struct param_data probe_vf = {
130 	.id		= PROBE_VF,
131 	.dbdf2val = {
132 		.name		= "probe_vf param",
133 		.num_vals	= 1,
134 		.def_val	= {0},
135 		.range		= {0, MLX4_MAX_NUM_VF}
136 	}
137 };
138 module_param_string(probe_vf, probe_vf.dbdf2val.str,
139 		    sizeof(probe_vf.dbdf2val.str), 0444);
140 MODULE_PARM_DESC(probe_vf,
141 		 "Either single value (e.g. '3') to define uniform number of VFs to probe by the pf driver for all devices functions\n"
142 		 "\t\tor a string to map device function numbers to their probe_vf values (e.g. '0000:04:00.0-3,002b:1c:0b.a-13').\n"
143 		 "\t\tHexadecimal digits for the device function (e.g. 002b:1c:0b.a) and decimal for probe_vf value (e.g. 13).");
144 
145 int mlx4_log_num_mgm_entry_size = MLX4_DEFAULT_MGM_LOG_ENTRY_SIZE;
146 
147 module_param_named(log_num_mgm_entry_size,
148 			mlx4_log_num_mgm_entry_size, int, 0444);
149 MODULE_PARM_DESC(log_num_mgm_entry_size, "log mgm size, that defines the num"
150 					 " of qp per mcg, for example:"
151 					 " 10 gives 248.range: 7 <="
152 					 " log_num_mgm_entry_size <= 12."
153 					 " To activate device managed"
154 					 " flow steering when available, set to -1");
155 
156 static int high_rate_steer;
157 module_param(high_rate_steer, int, 0444);
158 MODULE_PARM_DESC(high_rate_steer, "Enable steering mode for higher packet rate"
159 				  " (default off)");
160 
161 static int fast_drop;
162 module_param_named(fast_drop, fast_drop, int, 0444);
163 MODULE_PARM_DESC(fast_drop,
164 		 "Enable fast packet drop when no receive WQEs are posted");
165 
166 int mlx4_enable_64b_cqe_eqe = 1;
167 module_param_named(enable_64b_cqe_eqe, mlx4_enable_64b_cqe_eqe, int, 0644);
168 MODULE_PARM_DESC(enable_64b_cqe_eqe,
169 		 "Enable 64 byte CQEs/EQEs when the FW supports this if non-zero (default: 1)");
170 
171 #define HCA_GLOBAL_CAP_MASK            0
172 
173 #define PF_CONTEXT_BEHAVIOUR_MASK	MLX4_FUNC_CAP_64B_EQE_CQE
174 
175 static char mlx4_version[] __devinitdata =
176 	DRV_NAME ": Mellanox ConnectX VPI driver v"
177 	DRV_VERSION " (" DRV_RELDATE ")\n";
178 
179 static int log_num_mac = 7;
180 module_param_named(log_num_mac, log_num_mac, int, 0444);
181 MODULE_PARM_DESC(log_num_mac, "Log2 max number of MACs per ETH port (1-7)");
182 
183 static int log_num_vlan;
184 module_param_named(log_num_vlan, log_num_vlan, int, 0444);
185 MODULE_PARM_DESC(log_num_vlan,
186 	"(Obsolete) Log2 max number of VLANs per ETH port (0-7)");
187 /* Log2 max number of VLANs per ETH port (0-7) */
188 #define MLX4_LOG_NUM_VLANS 7
189 
190 int log_mtts_per_seg = ilog2(1);
191 module_param_named(log_mtts_per_seg, log_mtts_per_seg, int, 0444);
192 MODULE_PARM_DESC(log_mtts_per_seg, "Log2 number of MTT entries per segment "
193 		 "(0-7) (default: 0)");
194 
195 static struct param_data port_type_array = {
196 	.id		= PORT_TYPE_ARRAY,
197 	.dbdf2val = {
198 		.name		= "port_type_array param",
199 		.num_vals	= 2,
200 		.def_val	= {MLX4_PORT_TYPE_ETH, MLX4_PORT_TYPE_ETH},
201 		.range		= {MLX4_PORT_TYPE_IB, MLX4_PORT_TYPE_NA}
202 	}
203 };
204 module_param_string(port_type_array, port_type_array.dbdf2val.str,
205 		    sizeof(port_type_array.dbdf2val.str), 0444);
206 MODULE_PARM_DESC(port_type_array,
207 		 "Either pair of values (e.g. '1,2') to define uniform port1/port2 types configuration for all devices functions\n"
208 		 "\t\tor a string to map device function numbers to their pair of port types values (e.g. '0000:04:00.0-1;2,002b:1c:0b.a-1;1').\n"
209 		 "\t\tValid port types: 1-ib, 2-eth, 3-auto, 4-N/A\n"
210 		 "\t\tIn case that only one port is available use the N/A port type for port2 (e.g '1,4').");
211 
212 
213 struct mlx4_port_config {
214 	struct list_head list;
215 	enum mlx4_port_type port_type[MLX4_MAX_PORTS + 1];
216 	struct pci_dev *pdev;
217 };
218 
219 #define MLX4_LOG_NUM_MTT 20
220 /* We limit to 30 as of a bit map issue which uses int and not uint.
221      see mlx4_buddy_init -> bitmap_zero which gets int.
222 */
223 #define MLX4_MAX_LOG_NUM_MTT 30
224 static struct mlx4_profile mod_param_profile = {
225 	.num_qp         = 19,
226 	.num_srq        = 16,
227 	.rdmarc_per_qp  = 4,
228 	.num_cq         = 16,
229 	.num_mcg        = 13,
230 	.num_mpt        = 19,
231 	.num_mtt_segs   = 0, /* max(20, 2*MTTs for host memory)) */
232 };
233 
234 module_param_named(log_num_qp, mod_param_profile.num_qp, int, 0444);
235 MODULE_PARM_DESC(log_num_qp, "log maximum number of QPs per HCA (default: 19)");
236 
237 module_param_named(log_num_srq, mod_param_profile.num_srq, int, 0444);
238 MODULE_PARM_DESC(log_num_srq, "log maximum number of SRQs per HCA "
239 		 "(default: 16)");
240 
241 module_param_named(log_rdmarc_per_qp, mod_param_profile.rdmarc_per_qp, int,
242 		   0444);
243 MODULE_PARM_DESC(log_rdmarc_per_qp, "log number of RDMARC buffers per QP "
244 		 "(default: 4)");
245 
246 module_param_named(log_num_cq, mod_param_profile.num_cq, int, 0444);
247 MODULE_PARM_DESC(log_num_cq, "log maximum number of CQs per HCA (default: 16)");
248 
249 module_param_named(log_num_mcg, mod_param_profile.num_mcg, int, 0444);
250 MODULE_PARM_DESC(log_num_mcg, "log maximum number of multicast groups per HCA "
251 		 "(default: 13)");
252 
253 module_param_named(log_num_mpt, mod_param_profile.num_mpt, int, 0444);
254 MODULE_PARM_DESC(log_num_mpt,
255 		 "log maximum number of memory protection table entries per "
256 		 "HCA (default: 19)");
257 
258 module_param_named(log_num_mtt, mod_param_profile.num_mtt_segs, int, 0444);
259 MODULE_PARM_DESC(log_num_mtt,
260 		 "log maximum number of memory translation table segments per "
261 		 "HCA (default: max(20, 2*MTTs for register all of the host memory limited to 30))");
262 
263 enum {
264 	MLX4_IF_STATE_BASIC,
265 	MLX4_IF_STATE_EXTENDED
266 };
267 
268 static inline u64 dbdf_to_u64(int domain, int bus, int dev, int fn)
269 {
270 	return (domain << 20) | (bus << 12) | (dev << 4) | fn;
271 }
272 
273 static inline void pr_bdf_err(const char *dbdf, const char *pname)
274 {
275 	pr_warn("mlx4_core: '%s' is not valid bdf in '%s'\n", dbdf, pname);
276 }
277 
278 static inline void pr_val_err(const char *dbdf, const char *pname,
279 			      const char *val)
280 {
281 	pr_warn("mlx4_core: value '%s' of bdf '%s' in '%s' is not valid\n"
282 		, val, dbdf, pname);
283 }
284 
285 static inline void pr_out_of_range_bdf(const char *dbdf, int val,
286 				       struct mlx4_dbdf2val_lst *dbdf2val)
287 {
288 	pr_warn("mlx4_core: value %d in bdf '%s' of '%s' is out of its valid range (%d,%d)\n"
289 		, val, dbdf, dbdf2val->name , dbdf2val->range.min,
290 		dbdf2val->range.max);
291 }
292 
293 static inline void pr_out_of_range(struct mlx4_dbdf2val_lst *dbdf2val)
294 {
295 	pr_warn("mlx4_core: value of '%s' is out of its valid range (%d,%d)\n"
296 		, dbdf2val->name , dbdf2val->range.min, dbdf2val->range.max);
297 }
298 
299 static inline int is_in_range(int val, struct mlx4_range *r)
300 {
301 	return (val >= r->min && val <= r->max);
302 }
303 
304 static int update_defaults(struct param_data *pdata)
305 {
306 	long int val[MLX4_MAX_BDF_VALS];
307 	int ret;
308 	char *t, *p = pdata->dbdf2val.str;
309 	char sval[32];
310 	int val_len;
311 
312 	if (!strlen(p) || strchr(p, ':') || strchr(p, '.') || strchr(p, ';'))
313 		return INVALID_STR;
314 
315 	switch (pdata->id) {
316 	case PORT_TYPE_ARRAY:
317 		t = strchr(p, ',');
318 		if (!t || t == p || (t - p) > sizeof(sval))
319 			return INVALID_STR;
320 
321 		val_len = t - p;
322 		strncpy(sval, p, val_len);
323 		sval[val_len] = 0;
324 
325 		ret = kstrtol(sval, 0, &val[0]);
326 		if (ret == -EINVAL)
327 			return INVALID_STR;
328 		if (ret || !is_in_range(val[0], &pdata->dbdf2val.range)) {
329 			pr_out_of_range(&pdata->dbdf2val);
330 			return INVALID_DATA;
331 		}
332 
333 		ret = kstrtol(t + 1, 0, &val[1]);
334 		if (ret == -EINVAL)
335 			return INVALID_STR;
336 		if (ret || !is_in_range(val[1], &pdata->dbdf2val.range)) {
337 			pr_out_of_range(&pdata->dbdf2val);
338 			return INVALID_DATA;
339 		}
340 
341 		pdata->dbdf2val.tbl[0].val[0] = val[0];
342 		pdata->dbdf2val.tbl[0].val[1] = val[1];
343 		break;
344 
345 	case NUM_VFS:
346 	case PROBE_VF:
347 		ret = kstrtol(p, 0, &val[0]);
348 		if (ret == -EINVAL)
349 			return INVALID_STR;
350 		if (ret || !is_in_range(val[0], &pdata->dbdf2val.range)) {
351 			pr_out_of_range(&pdata->dbdf2val);
352 			return INVALID_DATA;
353 		}
354 		pdata->dbdf2val.tbl[0].val[0] = val[0];
355 		break;
356 	}
357 	pdata->dbdf2val.tbl[1].dbdf = MLX4_ENDOF_TBL;
358 
359 	return VALID_DATA;
360 }
361 
362 int mlx4_fill_dbdf2val_tbl(struct mlx4_dbdf2val_lst *dbdf2val_lst)
363 {
364 	int domain, bus, dev, fn;
365 	u64 dbdf;
366 	char *p, *t, *v;
367 	char tmp[32];
368 	char sbdf[32];
369 	char sep = ',';
370 	int j, k, str_size, i = 1;
371 	int prfx_size;
372 
373 	p = dbdf2val_lst->str;
374 
375 	for (j = 0; j < dbdf2val_lst->num_vals; j++)
376 		dbdf2val_lst->tbl[0].val[j] = dbdf2val_lst->def_val[j];
377 	dbdf2val_lst->tbl[1].dbdf = MLX4_ENDOF_TBL;
378 
379 	str_size = strlen(dbdf2val_lst->str);
380 
381 	if (str_size == 0)
382 		return 0;
383 
384 	while (strlen(p)) {
385 		prfx_size = BDF_STR_SIZE;
386 		sbdf[prfx_size] = 0;
387 		strncpy(sbdf, p, prfx_size);
388 		domain = DEFAULT_DOMAIN;
389 		if (sscanf(sbdf, "%02x:%02x.%x-", &bus, &dev, &fn) != 3) {
390 			prfx_size = DBDF_STR_SIZE;
391 			sbdf[prfx_size] = 0;
392 			strncpy(sbdf, p, prfx_size);
393 			if (sscanf(sbdf, "%04x:%02x:%02x.%x-", &domain, &bus,
394 				   &dev, &fn) != 4) {
395 				pr_bdf_err(sbdf, dbdf2val_lst->name);
396 				goto err;
397 			}
398 			sprintf(tmp, "%04x:%02x:%02x.%x-", domain, bus, dev,
399 				fn);
400 		} else {
401 			sprintf(tmp, "%02x:%02x.%x-", bus, dev, fn);
402 		}
403 
404 		if (strnicmp(sbdf, tmp, sizeof(tmp))) {
405 			pr_bdf_err(sbdf, dbdf2val_lst->name);
406 			goto err;
407 		}
408 
409 		dbdf = dbdf_to_u64(domain, bus, dev, fn);
410 
411 		for (j = 1; j < i; j++)
412 			if (dbdf2val_lst->tbl[j].dbdf == dbdf) {
413 				pr_warn("mlx4_core: in '%s', %s appears multiple times\n"
414 					, dbdf2val_lst->name, sbdf);
415 				goto err;
416 			}
417 
418 		if (i >= MLX4_DEVS_TBL_SIZE) {
419 			pr_warn("mlx4_core: Too many devices in '%s'\n"
420 				, dbdf2val_lst->name);
421 			goto err;
422 		}
423 
424 		p += prfx_size;
425 		t = strchr(p, sep);
426 		t = t ? t : p + strlen(p);
427 		if (p >= t) {
428 			pr_val_err(sbdf, dbdf2val_lst->name, "");
429 			goto err;
430 		}
431 
432 		for (k = 0; k < dbdf2val_lst->num_vals; k++) {
433 			char sval[32];
434 			long int val;
435 			int ret, val_len;
436 			char vsep = ';';
437 
438 			v = (k == dbdf2val_lst->num_vals - 1) ? t : strchr(p, vsep);
439 			if (!v || v > t || v == p || (v - p) > sizeof(sval)) {
440 				pr_val_err(sbdf, dbdf2val_lst->name, p);
441 				goto err;
442 			}
443 			val_len = v - p;
444 			strncpy(sval, p, val_len);
445 			sval[val_len] = 0;
446 
447 			ret = kstrtol(sval, 0, &val);
448 			if (ret) {
449 				if (strchr(p, vsep))
450 					pr_warn("mlx4_core: too many vals in bdf '%s' of '%s'\n"
451 						, sbdf, dbdf2val_lst->name);
452 				else
453 					pr_val_err(sbdf, dbdf2val_lst->name,
454 						   sval);
455 				goto err;
456 			}
457 			if (!is_in_range(val, &dbdf2val_lst->range)) {
458 				pr_out_of_range_bdf(sbdf, val, dbdf2val_lst);
459 				goto err;
460 			}
461 
462 			dbdf2val_lst->tbl[i].val[k] = val;
463 			p = v;
464 			if (p[0] == vsep)
465 				p++;
466 		}
467 
468 		dbdf2val_lst->tbl[i].dbdf = dbdf;
469 		if (strlen(p)) {
470 			if (p[0] != sep) {
471 				pr_warn("mlx4_core: expect separator '%c' before '%s' in '%s'\n"
472 					, sep, p, dbdf2val_lst->name);
473 				goto err;
474 			}
475 			p++;
476 		}
477 		i++;
478 		if (i < MLX4_DEVS_TBL_SIZE)
479 			dbdf2val_lst->tbl[i].dbdf = MLX4_ENDOF_TBL;
480 	}
481 
482 	return 0;
483 
484 err:
485 	dbdf2val_lst->tbl[1].dbdf = MLX4_ENDOF_TBL;
486 	pr_warn("mlx4_core: The value of '%s' is incorrect. The value is discarded!\n"
487 		, dbdf2val_lst->name);
488 
489 	return -EINVAL;
490 }
491 EXPORT_SYMBOL(mlx4_fill_dbdf2val_tbl);
492 
493 int mlx4_get_val(struct mlx4_dbdf2val *tbl, struct pci_dev *pdev, int idx,
494 		 int *val)
495 {
496 	u64 dbdf;
497 	int i = 1;
498 
499 	*val = tbl[0].val[idx];
500 	if (!pdev)
501 		return -EINVAL;
502 
503         dbdf = dbdf_to_u64(pci_get_domain(pdev->dev.bsddev), pci_get_bus(pdev->dev.bsddev),
504 			   PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn));
505 
506 	while ((i < MLX4_DEVS_TBL_SIZE) && (tbl[i].dbdf != MLX4_ENDOF_TBL)) {
507 		if (tbl[i].dbdf == dbdf) {
508 			*val = tbl[i].val[idx];
509 			return 0;
510 		}
511 		i++;
512 	}
513 
514 	return 0;
515 }
516 EXPORT_SYMBOL(mlx4_get_val);
517 
518 static void process_mod_param_profile(struct mlx4_profile *profile)
519 {
520         vm_size_t hwphyssz;
521         hwphyssz = 0;
522         TUNABLE_ULONG_FETCH("hw.realmem", (u_long *) &hwphyssz);
523 
524 	profile->num_qp        = 1 << mod_param_profile.num_qp;
525 	profile->num_srq       = 1 << mod_param_profile.num_srq;
526 	profile->rdmarc_per_qp = 1 << mod_param_profile.rdmarc_per_qp;
527 	profile->num_cq	       = 1 << mod_param_profile.num_cq;
528 	profile->num_mcg       = 1 << mod_param_profile.num_mcg;
529 	profile->num_mpt       = 1 << mod_param_profile.num_mpt;
530 	/*
531 	 * We want to scale the number of MTTs with the size of the
532 	 * system memory, since it makes sense to register a lot of
533 	 * memory on a system with a lot of memory.  As a heuristic,
534 	 * make sure we have enough MTTs to register twice the system
535 	 * memory (with PAGE_SIZE entries).
536 	 *
537 	 * This number has to be a power of two and fit into 32 bits
538 	 * due to device limitations. We cap this at 2^30 as of bit map
539 	 * limitation to work with int instead of uint (mlx4_buddy_init -> bitmap_zero)
540 	 * That limits us to 4TB of memory registration per HCA with
541 	 * 4KB pages, which is probably OK for the next few months.
542 	 */
543 	if (mod_param_profile.num_mtt_segs)
544 		profile->num_mtt_segs = 1 << mod_param_profile.num_mtt_segs;
545 	else {
546 		profile->num_mtt_segs =
547 			roundup_pow_of_two(max_t(unsigned,
548 						1 << (MLX4_LOG_NUM_MTT - log_mtts_per_seg),
549 						min(1UL <<
550 						(MLX4_MAX_LOG_NUM_MTT -
551 						log_mtts_per_seg),
552 						(hwphyssz << 1)
553 						>> log_mtts_per_seg)));
554 		/* set the actual value, so it will be reflected to the user
555 		   using the sysfs */
556 		mod_param_profile.num_mtt_segs = ilog2(profile->num_mtt_segs);
557 	}
558 }
559 
560 int mlx4_check_port_params(struct mlx4_dev *dev,
561 			   enum mlx4_port_type *port_type)
562 {
563 	int i;
564 
565 	for (i = 0; i < dev->caps.num_ports - 1; i++) {
566 		if (port_type[i] != port_type[i + 1]) {
567 			if (!(dev->caps.flags & MLX4_DEV_CAP_FLAG_DPDP)) {
568 				mlx4_err(dev, "Only same port types supported "
569 					 "on this HCA, aborting.\n");
570 				return -EINVAL;
571 			}
572 		}
573 	}
574 
575 	for (i = 0; i < dev->caps.num_ports; i++) {
576 		if (!(port_type[i] & dev->caps.supported_type[i+1])) {
577 			mlx4_err(dev, "Requested port type for port %d is not "
578 				      "supported on this HCA\n", i + 1);
579 			return -EINVAL;
580 		}
581 	}
582 	return 0;
583 }
584 
585 static void mlx4_set_port_mask(struct mlx4_dev *dev)
586 {
587 	int i;
588 
589 	for (i = 1; i <= dev->caps.num_ports; ++i)
590 		dev->caps.port_mask[i] = dev->caps.port_type[i];
591 }
592 
593 static int mlx4_dev_cap(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
594 {
595 	int err;
596 	int i;
597 
598 	err = mlx4_QUERY_DEV_CAP(dev, dev_cap);
599 	if (err) {
600 		mlx4_err(dev, "QUERY_DEV_CAP command failed, aborting.\n");
601 		return err;
602 	}
603 
604 	if (dev_cap->min_page_sz > PAGE_SIZE) {
605 		mlx4_err(dev, "HCA minimum page size of %d bigger than "
606 			 "kernel PAGE_SIZE of %d, aborting.\n",
607 			 dev_cap->min_page_sz, (int)PAGE_SIZE);
608 		return -ENODEV;
609 	}
610 	if (dev_cap->num_ports > MLX4_MAX_PORTS) {
611 		mlx4_err(dev, "HCA has %d ports, but we only support %d, "
612 			 "aborting.\n",
613 			 dev_cap->num_ports, MLX4_MAX_PORTS);
614 		return -ENODEV;
615 	}
616 
617 	if (dev_cap->uar_size > pci_resource_len(dev->pdev, 2)) {
618 		mlx4_err(dev, "HCA reported UAR size of 0x%x bigger than "
619 			 "PCI resource 2 size of 0x%llx, aborting.\n",
620 			 dev_cap->uar_size,
621 			 (unsigned long long) pci_resource_len(dev->pdev, 2));
622 		return -ENODEV;
623 	}
624 
625 	dev->caps.num_ports	     = dev_cap->num_ports;
626 	dev->phys_caps.num_phys_eqs  = MLX4_MAX_EQ_NUM;
627 	for (i = 1; i <= dev->caps.num_ports; ++i) {
628 		dev->caps.vl_cap[i]	    = dev_cap->max_vl[i];
629 		dev->caps.ib_mtu_cap[i]	    = dev_cap->ib_mtu[i];
630 		dev->phys_caps.gid_phys_table_len[i]  = dev_cap->max_gids[i];
631 		dev->phys_caps.pkey_phys_table_len[i] = dev_cap->max_pkeys[i];
632 		/* set gid and pkey table operating lengths by default
633 		 * to non-sriov values */
634 		dev->caps.gid_table_len[i]  = dev_cap->max_gids[i];
635 		dev->caps.pkey_table_len[i] = dev_cap->max_pkeys[i];
636 		dev->caps.port_width_cap[i] = dev_cap->max_port_width[i];
637 		dev->caps.eth_mtu_cap[i]    = dev_cap->eth_mtu[i];
638 		dev->caps.def_mac[i]        = dev_cap->def_mac[i];
639 		dev->caps.supported_type[i] = dev_cap->supported_port_types[i];
640 		dev->caps.suggested_type[i] = dev_cap->suggested_type[i];
641 		dev->caps.default_sense[i] = dev_cap->default_sense[i];
642 		dev->caps.trans_type[i]	    = dev_cap->trans_type[i];
643 		dev->caps.vendor_oui[i]     = dev_cap->vendor_oui[i];
644 		dev->caps.wavelength[i]     = dev_cap->wavelength[i];
645 		dev->caps.trans_code[i]     = dev_cap->trans_code[i];
646 	}
647 
648 	dev->caps.uar_page_size	     = PAGE_SIZE;
649 	dev->caps.num_uars	     = dev_cap->uar_size / PAGE_SIZE;
650 	dev->caps.local_ca_ack_delay = dev_cap->local_ca_ack_delay;
651 	dev->caps.bf_reg_size	     = dev_cap->bf_reg_size;
652 	dev->caps.bf_regs_per_page   = dev_cap->bf_regs_per_page;
653 	dev->caps.max_sq_sg	     = dev_cap->max_sq_sg;
654 	dev->caps.max_rq_sg	     = dev_cap->max_rq_sg;
655 	dev->caps.max_wqes	     = dev_cap->max_qp_sz;
656 	dev->caps.max_qp_init_rdma   = dev_cap->max_requester_per_qp;
657 	dev->caps.max_srq_wqes	     = dev_cap->max_srq_sz;
658 	dev->caps.max_srq_sge	     = dev_cap->max_rq_sg - 1;
659 	dev->caps.reserved_srqs	     = dev_cap->reserved_srqs;
660 	dev->caps.max_sq_desc_sz     = dev_cap->max_sq_desc_sz;
661 	dev->caps.max_rq_desc_sz     = dev_cap->max_rq_desc_sz;
662 	/*
663 	 * Subtract 1 from the limit because we need to allocate a
664 	 * spare CQE to enable resizing the CQ
665 	 */
666 	dev->caps.max_cqes	     = dev_cap->max_cq_sz - 1;
667 	dev->caps.reserved_cqs	     = dev_cap->reserved_cqs;
668 	dev->caps.reserved_eqs	     = dev_cap->reserved_eqs;
669 	dev->caps.reserved_mtts      = dev_cap->reserved_mtts;
670 	dev->caps.reserved_mrws	     = dev_cap->reserved_mrws;
671 
672 	/* The first 128 UARs are used for EQ doorbells */
673 	dev->caps.reserved_uars	     = max_t(int, 128, dev_cap->reserved_uars);
674 	dev->caps.reserved_pds	     = dev_cap->reserved_pds;
675 	dev->caps.reserved_xrcds     = (dev->caps.flags & MLX4_DEV_CAP_FLAG_XRC) ?
676 					dev_cap->reserved_xrcds : 0;
677 	dev->caps.max_xrcds          = (dev->caps.flags & MLX4_DEV_CAP_FLAG_XRC) ?
678 					dev_cap->max_xrcds : 0;
679 	dev->caps.mtt_entry_sz       = dev_cap->mtt_entry_sz;
680 
681 	dev->caps.max_msg_sz         = dev_cap->max_msg_sz;
682 	dev->caps.page_size_cap	     = ~(u32) (dev_cap->min_page_sz - 1);
683 	dev->caps.flags		     = dev_cap->flags;
684 	dev->caps.flags2	     = dev_cap->flags2;
685 	dev->caps.bmme_flags	     = dev_cap->bmme_flags;
686 	dev->caps.reserved_lkey	     = dev_cap->reserved_lkey;
687 	dev->caps.stat_rate_support  = dev_cap->stat_rate_support;
688 	dev->caps.cq_timestamp       = dev_cap->timestamp_support;
689 	dev->caps.max_gso_sz	     = dev_cap->max_gso_sz;
690 	dev->caps.max_rss_tbl_sz     = dev_cap->max_rss_tbl_sz;
691 
692 	/* Sense port always allowed on supported devices for ConnectX-1 and -2 */
693 	if (mlx4_priv(dev)->pci_dev_data & MLX4_PCI_DEV_FORCE_SENSE_PORT)
694 		dev->caps.flags |= MLX4_DEV_CAP_FLAG_SENSE_SUPPORT;
695 	/* Don't do sense port on multifunction devices (for now at least) */
696 	if (mlx4_is_mfunc(dev))
697 		dev->caps.flags &= ~MLX4_DEV_CAP_FLAG_SENSE_SUPPORT;
698 
699 	dev->caps.log_num_macs  = log_num_mac;
700 	dev->caps.log_num_vlans = MLX4_LOG_NUM_VLANS;
701 
702 	dev->caps.fast_drop	= fast_drop ?
703 				  !!(dev->caps.flags & MLX4_DEV_CAP_FLAG_FAST_DROP) :
704 				  0;
705 
706 	for (i = 1; i <= dev->caps.num_ports; ++i) {
707 		dev->caps.port_type[i] = MLX4_PORT_TYPE_NONE;
708 		if (dev->caps.supported_type[i]) {
709 			/* if only ETH is supported - assign ETH */
710 			if (dev->caps.supported_type[i] == MLX4_PORT_TYPE_ETH)
711 				dev->caps.port_type[i] = MLX4_PORT_TYPE_ETH;
712 			/* if only IB is supported, assign IB */
713 			else if (dev->caps.supported_type[i] ==
714 				 MLX4_PORT_TYPE_IB)
715 				dev->caps.port_type[i] = MLX4_PORT_TYPE_IB;
716 			else {
717 				/*
718 				 * if IB and ETH are supported, we set the port
719 				 * type according to user selection of port type;
720 				 * if there is no user selection, take the FW hint
721 				 */
722 				int pta;
723 				mlx4_get_val(port_type_array.dbdf2val.tbl,
724 					     pci_physfn(dev->pdev), i - 1,
725 					     &pta);
726 				if (pta == MLX4_PORT_TYPE_NONE) {
727 					dev->caps.port_type[i] = dev->caps.suggested_type[i] ?
728 						MLX4_PORT_TYPE_ETH : MLX4_PORT_TYPE_IB;
729 				} else if (pta == MLX4_PORT_TYPE_NA) {
730 					mlx4_err(dev, "Port %d is valid port. "
731 						 "It is not allowed to configure its type to N/A(%d)\n",
732 						 i, MLX4_PORT_TYPE_NA);
733 					return -EINVAL;
734 				} else {
735 					dev->caps.port_type[i] = pta;
736 				}
737 			}
738 		}
739 		/*
740 		 * Link sensing is allowed on the port if 3 conditions are true:
741 		 * 1. Both protocols are supported on the port.
742 		 * 2. Different types are supported on the port
743 		 * 3. FW declared that it supports link sensing
744 		 */
745 		mlx4_priv(dev)->sense.sense_allowed[i] =
746 			((dev->caps.supported_type[i] == MLX4_PORT_TYPE_AUTO) &&
747 			 (dev->caps.flags & MLX4_DEV_CAP_FLAG_DPDP) &&
748 			 (dev->caps.flags & MLX4_DEV_CAP_FLAG_SENSE_SUPPORT));
749 
750 		/* Disablling auto sense for default Eth ports support */
751 		mlx4_priv(dev)->sense.sense_allowed[i] = 0;
752 
753 		/*
754 		 * If "default_sense" bit is set, we move the port to "AUTO" mode
755 		 * and perform sense_port FW command to try and set the correct
756 		 * port type from beginning
757 		 */
758 		if (mlx4_priv(dev)->sense.sense_allowed[i] && dev->caps.default_sense[i]) {
759 			enum mlx4_port_type sensed_port = MLX4_PORT_TYPE_NONE;
760 			dev->caps.possible_type[i] = MLX4_PORT_TYPE_AUTO;
761 			mlx4_SENSE_PORT(dev, i, &sensed_port);
762 			if (sensed_port != MLX4_PORT_TYPE_NONE)
763 				dev->caps.port_type[i] = sensed_port;
764 		} else {
765 			dev->caps.possible_type[i] = dev->caps.port_type[i];
766 		}
767 
768 		if (dev->caps.log_num_macs > dev_cap->log_max_macs[i]) {
769 			dev->caps.log_num_macs = dev_cap->log_max_macs[i];
770 			mlx4_warn(dev, "Requested number of MACs is too much "
771 				  "for port %d, reducing to %d.\n",
772 				  i, 1 << dev->caps.log_num_macs);
773 		}
774 		if (dev->caps.log_num_vlans > dev_cap->log_max_vlans[i]) {
775 			dev->caps.log_num_vlans = dev_cap->log_max_vlans[i];
776 			mlx4_warn(dev, "Requested number of VLANs is too much "
777 				  "for port %d, reducing to %d.\n",
778 				  i, 1 << dev->caps.log_num_vlans);
779 		}
780 	}
781 
782 	dev->caps.max_basic_counters = dev_cap->max_basic_counters;
783 	dev->caps.max_extended_counters = dev_cap->max_extended_counters;
784 	/* support extended counters if available */
785 	if (dev->caps.flags & MLX4_DEV_CAP_FLAG_COUNTERS_EXT)
786 		dev->caps.max_counters = dev->caps.max_extended_counters;
787 	else
788 		dev->caps.max_counters = dev->caps.max_basic_counters;
789 
790 	dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW] = dev_cap->reserved_qps;
791 	dev->caps.reserved_qps_cnt[MLX4_QP_REGION_ETH_ADDR] =
792 		dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FC_ADDR] =
793 		(1 << dev->caps.log_num_macs) *
794 		(1 << dev->caps.log_num_vlans) *
795 		dev->caps.num_ports;
796 	dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FC_EXCH] = MLX4_NUM_FEXCH;
797 
798 	dev->caps.reserved_qps = dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW] +
799 		dev->caps.reserved_qps_cnt[MLX4_QP_REGION_ETH_ADDR] +
800 		dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FC_ADDR] +
801 		dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FC_EXCH];
802 
803 	dev->caps.sync_qp = dev_cap->sync_qp;
804 	if (dev->pdev->device == 0x1003)
805 		dev->caps.cq_flags |= MLX4_DEV_CAP_CQ_FLAG_IO;
806 
807 	dev->caps.sqp_demux = (mlx4_is_master(dev)) ? MLX4_MAX_NUM_SLAVES : 0;
808 
809 	if (!mlx4_enable_64b_cqe_eqe && !mlx4_is_slave(dev)) {
810 		if (dev_cap->flags &
811 		    (MLX4_DEV_CAP_FLAG_64B_CQE | MLX4_DEV_CAP_FLAG_64B_EQE)) {
812 			mlx4_warn(dev, "64B EQEs/CQEs supported by the device but not enabled\n");
813 			dev->caps.flags &= ~MLX4_DEV_CAP_FLAG_64B_CQE;
814 			dev->caps.flags &= ~MLX4_DEV_CAP_FLAG_64B_EQE;
815 		}
816 	}
817 
818 	if ((dev->caps.flags &
819 	    (MLX4_DEV_CAP_FLAG_64B_CQE | MLX4_DEV_CAP_FLAG_64B_EQE)) &&
820 	    mlx4_is_master(dev))
821 		dev->caps.function_caps |= MLX4_FUNC_CAP_64B_EQE_CQE;
822 
823 	if (!mlx4_is_slave(dev)) {
824 		for (i = 0; i < dev->caps.num_ports; ++i)
825 			dev->caps.def_counter_index[i] = i << 1;
826 	}
827 
828 	return 0;
829 }
830 /*The function checks if there are live vf, return the num of them*/
831 static int mlx4_how_many_lives_vf(struct mlx4_dev *dev)
832 {
833 	struct mlx4_priv *priv = mlx4_priv(dev);
834 	struct mlx4_slave_state *s_state;
835 	int i;
836 	int ret = 0;
837 
838 	for (i = 1/*the ppf is 0*/; i < dev->num_slaves; ++i) {
839 		s_state = &priv->mfunc.master.slave_state[i];
840 		if (s_state->active && s_state->last_cmd !=
841 		    MLX4_COMM_CMD_RESET) {
842 			mlx4_warn(dev, "%s: slave: %d is still active\n",
843 				  __func__, i);
844 			ret++;
845 		}
846 	}
847 	return ret;
848 }
849 
850 int mlx4_get_parav_qkey(struct mlx4_dev *dev, u32 qpn, u32 *qkey)
851 {
852 	u32 qk = MLX4_RESERVED_QKEY_BASE;
853 
854 	if (qpn >= dev->phys_caps.base_tunnel_sqpn + 8 * MLX4_MFUNC_MAX ||
855 	    qpn < dev->phys_caps.base_proxy_sqpn)
856 		return -EINVAL;
857 
858 	if (qpn >= dev->phys_caps.base_tunnel_sqpn)
859 		/* tunnel qp */
860 		qk += qpn - dev->phys_caps.base_tunnel_sqpn;
861 	else
862 		qk += qpn - dev->phys_caps.base_proxy_sqpn;
863 	*qkey = qk;
864 	return 0;
865 }
866 EXPORT_SYMBOL(mlx4_get_parav_qkey);
867 
868 void mlx4_sync_pkey_table(struct mlx4_dev *dev, int slave, int port, int i, int val)
869 {
870 	struct mlx4_priv *priv = container_of(dev, struct mlx4_priv, dev);
871 
872 	if (!mlx4_is_master(dev))
873 		return;
874 
875 	priv->virt2phys_pkey[slave][port - 1][i] = val;
876 }
877 EXPORT_SYMBOL(mlx4_sync_pkey_table);
878 
879 void mlx4_put_slave_node_guid(struct mlx4_dev *dev, int slave, __be64 guid)
880 {
881 	struct mlx4_priv *priv = container_of(dev, struct mlx4_priv, dev);
882 
883 	if (!mlx4_is_master(dev))
884 		return;
885 
886 	priv->slave_node_guids[slave] = guid;
887 }
888 EXPORT_SYMBOL(mlx4_put_slave_node_guid);
889 
890 __be64 mlx4_get_slave_node_guid(struct mlx4_dev *dev, int slave)
891 {
892 	struct mlx4_priv *priv = container_of(dev, struct mlx4_priv, dev);
893 
894 	if (!mlx4_is_master(dev))
895 		return 0;
896 
897 	return priv->slave_node_guids[slave];
898 }
899 EXPORT_SYMBOL(mlx4_get_slave_node_guid);
900 
901 int mlx4_is_slave_active(struct mlx4_dev *dev, int slave)
902 {
903 	struct mlx4_priv *priv = mlx4_priv(dev);
904 	struct mlx4_slave_state *s_slave;
905 
906 	if (!mlx4_is_master(dev))
907 		return 0;
908 
909 	s_slave = &priv->mfunc.master.slave_state[slave];
910 	return !!s_slave->active;
911 }
912 EXPORT_SYMBOL(mlx4_is_slave_active);
913 
914 static void slave_adjust_steering_mode(struct mlx4_dev *dev,
915 				       struct mlx4_dev_cap *dev_cap,
916 				       struct mlx4_init_hca_param *hca_param)
917 {
918 	dev->caps.steering_mode = hca_param->steering_mode;
919 	if (dev->caps.steering_mode == MLX4_STEERING_MODE_DEVICE_MANAGED)
920 		dev->caps.num_qp_per_mgm = dev_cap->fs_max_num_qp_per_entry;
921 	else
922 		dev->caps.num_qp_per_mgm =
923 			4 * ((1 << hca_param->log_mc_entry_sz)/16 - 2);
924 
925 	mlx4_dbg(dev, "Steering mode is: %s\n",
926 		 mlx4_steering_mode_str(dev->caps.steering_mode));
927 }
928 
929 static int mlx4_slave_cap(struct mlx4_dev *dev)
930 {
931 	int			   err;
932 	u32			   page_size;
933 	struct mlx4_dev_cap	   dev_cap;
934 	struct mlx4_func_cap	   func_cap;
935 	struct mlx4_init_hca_param hca_param;
936 	int			   i;
937 
938 	memset(&hca_param, 0, sizeof(hca_param));
939 	err = mlx4_QUERY_HCA(dev, &hca_param);
940 	if (err) {
941 		mlx4_err(dev, "QUERY_HCA command failed, aborting.\n");
942 		return err;
943 	}
944 
945 	/*fail if the hca has an unknown capability */
946 	if ((hca_param.global_caps | HCA_GLOBAL_CAP_MASK) !=
947 	    HCA_GLOBAL_CAP_MASK) {
948 		mlx4_err(dev, "Unknown hca global capabilities\n");
949 		return -ENOSYS;
950 	}
951 
952 	mlx4_log_num_mgm_entry_size = hca_param.log_mc_entry_sz;
953 
954 	dev->caps.hca_core_clock = hca_param.hca_core_clock;
955 
956 	memset(&dev_cap, 0, sizeof(dev_cap));
957 	dev->caps.max_qp_dest_rdma = 1 << hca_param.log_rd_per_qp;
958 	err = mlx4_dev_cap(dev, &dev_cap);
959 	if (err) {
960 		mlx4_err(dev, "QUERY_DEV_CAP command failed, aborting.\n");
961 		return err;
962 	}
963 
964 	err = mlx4_QUERY_FW(dev);
965 	if (err)
966 		mlx4_err(dev, "QUERY_FW command failed: could not get FW version.\n");
967 
968 	if (!hca_param.mw_enable) {
969 		dev->caps.flags      &= ~MLX4_DEV_CAP_FLAG_MEM_WINDOW;
970 		dev->caps.bmme_flags &= ~MLX4_BMME_FLAG_TYPE_2_WIN;
971 	}
972 
973 	page_size = ~dev->caps.page_size_cap + 1;
974 	mlx4_warn(dev, "HCA minimum page size:%d\n", page_size);
975 	if (page_size > PAGE_SIZE) {
976 		mlx4_err(dev, "HCA minimum page size of %d bigger than "
977 			 "kernel PAGE_SIZE of %d, aborting.\n",
978 			 page_size, (int)PAGE_SIZE);
979 		return -ENODEV;
980 	}
981 
982 	/* slave gets uar page size from QUERY_HCA fw command */
983 	dev->caps.uar_page_size = 1 << (hca_param.uar_page_sz + 12);
984 
985 	/* TODO: relax this assumption */
986 	if (dev->caps.uar_page_size != PAGE_SIZE) {
987 		mlx4_err(dev, "UAR size:%d != kernel PAGE_SIZE of %d\n",
988 			 dev->caps.uar_page_size, (int)PAGE_SIZE);
989 		return -ENODEV;
990 	}
991 
992 	memset(&func_cap, 0, sizeof(func_cap));
993 	err = mlx4_QUERY_FUNC_CAP(dev, 0, &func_cap);
994 	if (err) {
995 		mlx4_err(dev, "QUERY_FUNC_CAP general command failed, aborting (%d).\n",
996 			  err);
997 		return err;
998 	}
999 
1000 	if ((func_cap.pf_context_behaviour | PF_CONTEXT_BEHAVIOUR_MASK) !=
1001 	    PF_CONTEXT_BEHAVIOUR_MASK) {
1002 		mlx4_err(dev, "Unknown pf context behaviour\n");
1003 		return -ENOSYS;
1004 	}
1005 
1006 	dev->caps.num_ports		= func_cap.num_ports;
1007 	dev->quotas.qp			= func_cap.qp_quota;
1008 	dev->quotas.srq			= func_cap.srq_quota;
1009 	dev->quotas.cq			= func_cap.cq_quota;
1010 	dev->quotas.mpt			= func_cap.mpt_quota;
1011 	dev->quotas.mtt			= func_cap.mtt_quota;
1012 	dev->caps.num_qps		= 1 << hca_param.log_num_qps;
1013 	dev->caps.num_srqs		= 1 << hca_param.log_num_srqs;
1014 	dev->caps.num_cqs		= 1 << hca_param.log_num_cqs;
1015 	dev->caps.num_mpts		= 1 << hca_param.log_mpt_sz;
1016 	dev->caps.num_eqs		= func_cap.max_eq;
1017 	dev->caps.reserved_eqs		= func_cap.reserved_eq;
1018 	dev->caps.num_pds               = MLX4_NUM_PDS;
1019 	dev->caps.num_mgms              = 0;
1020 	dev->caps.num_amgms             = 0;
1021 
1022 	if (dev->caps.num_ports > MLX4_MAX_PORTS) {
1023 		mlx4_err(dev, "HCA has %d ports, but we only support %d, "
1024 			 "aborting.\n", dev->caps.num_ports, MLX4_MAX_PORTS);
1025 		return -ENODEV;
1026 	}
1027 
1028 	dev->caps.qp0_tunnel = kcalloc(dev->caps.num_ports, sizeof (u32), GFP_KERNEL);
1029 	dev->caps.qp0_proxy = kcalloc(dev->caps.num_ports, sizeof (u32), GFP_KERNEL);
1030 	dev->caps.qp1_tunnel = kcalloc(dev->caps.num_ports, sizeof (u32), GFP_KERNEL);
1031 	dev->caps.qp1_proxy = kcalloc(dev->caps.num_ports, sizeof (u32), GFP_KERNEL);
1032 
1033 	if (!dev->caps.qp0_tunnel || !dev->caps.qp0_proxy ||
1034 	    !dev->caps.qp1_tunnel || !dev->caps.qp1_proxy) {
1035 		err = -ENOMEM;
1036 		goto err_mem;
1037 	}
1038 
1039 	for (i = 1; i <= dev->caps.num_ports; ++i) {
1040 		err = mlx4_QUERY_FUNC_CAP(dev, (u32) i, &func_cap);
1041 		if (err) {
1042 			mlx4_err(dev, "QUERY_FUNC_CAP port command failed for"
1043 				 " port %d, aborting (%d).\n", i, err);
1044 			goto err_mem;
1045 		}
1046 		dev->caps.qp0_tunnel[i - 1] = func_cap.qp0_tunnel_qpn;
1047 		dev->caps.qp0_proxy[i - 1] = func_cap.qp0_proxy_qpn;
1048 		dev->caps.qp1_tunnel[i - 1] = func_cap.qp1_tunnel_qpn;
1049 		dev->caps.qp1_proxy[i - 1] = func_cap.qp1_proxy_qpn;
1050 		dev->caps.def_counter_index[i - 1] = func_cap.def_counter_index;
1051 
1052 		dev->caps.port_mask[i] = dev->caps.port_type[i];
1053 		err = mlx4_get_slave_pkey_gid_tbl_len(dev, i,
1054 						      &dev->caps.gid_table_len[i],
1055 						      &dev->caps.pkey_table_len[i]);
1056 		if (err)
1057 			goto err_mem;
1058 	}
1059 
1060 	if (dev->caps.uar_page_size * (dev->caps.num_uars -
1061 				       dev->caps.reserved_uars) >
1062 				       pci_resource_len(dev->pdev, 2)) {
1063 		mlx4_err(dev, "HCA reported UAR region size of 0x%x bigger than "
1064 			 "PCI resource 2 size of 0x%llx, aborting.\n",
1065 			 dev->caps.uar_page_size * dev->caps.num_uars,
1066 			 (unsigned long long) pci_resource_len(dev->pdev, 2));
1067 		err = -ENOMEM;
1068 		goto err_mem;
1069 	}
1070 
1071 	if (hca_param.dev_cap_enabled & MLX4_DEV_CAP_64B_EQE_ENABLED) {
1072 		dev->caps.eqe_size   = 64;
1073 		dev->caps.eqe_factor = 1;
1074 	} else {
1075 		dev->caps.eqe_size   = 32;
1076 		dev->caps.eqe_factor = 0;
1077 	}
1078 
1079 	if (hca_param.dev_cap_enabled & MLX4_DEV_CAP_64B_CQE_ENABLED) {
1080 		dev->caps.cqe_size   = 64;
1081 		dev->caps.userspace_caps |= MLX4_USER_DEV_CAP_64B_CQE;
1082 	} else {
1083 		dev->caps.cqe_size   = 32;
1084 	}
1085 
1086 	dev->caps.flags2 &= ~MLX4_DEV_CAP_FLAG2_TS;
1087 	mlx4_warn(dev, "Timestamping is not supported in slave mode.\n");
1088 
1089 	slave_adjust_steering_mode(dev, &dev_cap, &hca_param);
1090 
1091 	return 0;
1092 
1093 err_mem:
1094 	kfree(dev->caps.qp0_tunnel);
1095 	kfree(dev->caps.qp0_proxy);
1096 	kfree(dev->caps.qp1_tunnel);
1097 	kfree(dev->caps.qp1_proxy);
1098 	dev->caps.qp0_tunnel = dev->caps.qp0_proxy =
1099 		dev->caps.qp1_tunnel = dev->caps.qp1_proxy = NULL;
1100 
1101 	return err;
1102 }
1103 
1104 static void mlx4_request_modules(struct mlx4_dev *dev)
1105 {
1106 	int port;
1107 	int has_ib_port = false;
1108 	int has_eth_port = false;
1109 #define EN_DRV_NAME	"mlx4_en"
1110 #define IB_DRV_NAME	"mlx4_ib"
1111 
1112 	for (port = 1; port <= dev->caps.num_ports; port++) {
1113 		if (dev->caps.port_type[port] == MLX4_PORT_TYPE_IB)
1114 			has_ib_port = true;
1115 		else if (dev->caps.port_type[port] == MLX4_PORT_TYPE_ETH)
1116 			has_eth_port = true;
1117 	}
1118 
1119 	if (has_ib_port)
1120 		request_module_nowait(IB_DRV_NAME);
1121 	if (has_eth_port)
1122 		request_module_nowait(EN_DRV_NAME);
1123 }
1124 
1125 /*
1126  * Change the port configuration of the device.
1127  * Every user of this function must hold the port mutex.
1128  */
1129 int mlx4_change_port_types(struct mlx4_dev *dev,
1130 			   enum mlx4_port_type *port_types)
1131 {
1132 	int err = 0;
1133 	int change = 0;
1134 	int port;
1135 
1136 	for (port = 0; port <  dev->caps.num_ports; port++) {
1137 		/* Change the port type only if the new type is different
1138 		 * from the current, and not set to Auto */
1139 		if (port_types[port] != dev->caps.port_type[port + 1])
1140 			change = 1;
1141 	}
1142 	if (change) {
1143 		mlx4_unregister_device(dev);
1144 		for (port = 1; port <= dev->caps.num_ports; port++) {
1145 			mlx4_CLOSE_PORT(dev, port);
1146 			dev->caps.port_type[port] = port_types[port - 1];
1147 			err = mlx4_SET_PORT(dev, port, -1);
1148 			if (err) {
1149 				mlx4_err(dev, "Failed to set port %d, "
1150 					      "aborting\n", port);
1151 				goto out;
1152 			}
1153 		}
1154 		mlx4_set_port_mask(dev);
1155 		err = mlx4_register_device(dev);
1156 		if (err) {
1157 			mlx4_err(dev, "Failed to register device\n");
1158 			goto out;
1159 		}
1160 		mlx4_request_modules(dev);
1161 	}
1162 
1163 out:
1164 	return err;
1165 }
1166 
1167 static ssize_t show_port_type(struct device *dev,
1168 			      struct device_attribute *attr,
1169 			      char *buf)
1170 {
1171 	struct mlx4_port_info *info = container_of(attr, struct mlx4_port_info,
1172 						   port_attr);
1173 	struct mlx4_dev *mdev = info->dev;
1174 	char type[8];
1175 
1176 	sprintf(type, "%s",
1177 		(mdev->caps.port_type[info->port] == MLX4_PORT_TYPE_IB) ?
1178 		"ib" : "eth");
1179 	if (mdev->caps.possible_type[info->port] == MLX4_PORT_TYPE_AUTO)
1180 		sprintf(buf, "auto (%s)\n", type);
1181 	else
1182 		sprintf(buf, "%s\n", type);
1183 
1184 	return strlen(buf);
1185 }
1186 
1187 static ssize_t set_port_type(struct device *dev,
1188 			     struct device_attribute *attr,
1189 			     const char *buf, size_t count)
1190 {
1191 	struct mlx4_port_info *info = container_of(attr, struct mlx4_port_info,
1192 						   port_attr);
1193 	struct mlx4_dev *mdev = info->dev;
1194 	struct mlx4_priv *priv = mlx4_priv(mdev);
1195 	enum mlx4_port_type types[MLX4_MAX_PORTS];
1196 	enum mlx4_port_type new_types[MLX4_MAX_PORTS];
1197 	int i;
1198 	int err = 0;
1199 
1200 	if (!strcmp(buf, "ib\n"))
1201 		info->tmp_type = MLX4_PORT_TYPE_IB;
1202 	else if (!strcmp(buf, "eth\n"))
1203 		info->tmp_type = MLX4_PORT_TYPE_ETH;
1204 	else if (!strcmp(buf, "auto\n"))
1205 		info->tmp_type = MLX4_PORT_TYPE_AUTO;
1206 	else {
1207 		mlx4_err(mdev, "%s is not supported port type\n", buf);
1208 		return -EINVAL;
1209 	}
1210 
1211 	if ((info->tmp_type & mdev->caps.supported_type[info->port]) !=
1212 	    info->tmp_type) {
1213 		mlx4_err(mdev, "Requested port type for port %d is not supported on this HCA\n",
1214 			 info->port);
1215 		return -EINVAL;
1216 	}
1217 
1218 	mlx4_stop_sense(mdev);
1219 	mutex_lock(&priv->port_mutex);
1220 	/* Possible type is always the one that was delivered */
1221 	mdev->caps.possible_type[info->port] = info->tmp_type;
1222 
1223 	for (i = 0; i < mdev->caps.num_ports; i++) {
1224 		types[i] = priv->port[i+1].tmp_type ? priv->port[i+1].tmp_type :
1225 					mdev->caps.possible_type[i+1];
1226 		if (types[i] == MLX4_PORT_TYPE_AUTO)
1227 			types[i] = mdev->caps.port_type[i+1];
1228 	}
1229 
1230 	if (!(mdev->caps.flags & MLX4_DEV_CAP_FLAG_DPDP) &&
1231 	    !(mdev->caps.flags & MLX4_DEV_CAP_FLAG_SENSE_SUPPORT)) {
1232 		for (i = 1; i <= mdev->caps.num_ports; i++) {
1233 			if (mdev->caps.possible_type[i] == MLX4_PORT_TYPE_AUTO) {
1234 				mdev->caps.possible_type[i] = mdev->caps.port_type[i];
1235 				err = -EINVAL;
1236 			}
1237 		}
1238 	}
1239 	if (err) {
1240 		mlx4_err(mdev, "Auto sensing is not supported on this HCA. "
1241 			       "Set only 'eth' or 'ib' for both ports "
1242 			       "(should be the same)\n");
1243 		goto out;
1244 	}
1245 
1246 	mlx4_do_sense_ports(mdev, new_types, types);
1247 
1248 	err = mlx4_check_port_params(mdev, new_types);
1249 	if (err)
1250 		goto out;
1251 
1252 	/* We are about to apply the changes after the configuration
1253 	 * was verified, no need to remember the temporary types
1254 	 * any more */
1255 	for (i = 0; i < mdev->caps.num_ports; i++)
1256 		priv->port[i + 1].tmp_type = 0;
1257 
1258 	err = mlx4_change_port_types(mdev, new_types);
1259 
1260 out:
1261 	mlx4_start_sense(mdev);
1262 	mutex_unlock(&priv->port_mutex);
1263 	return err ? err : count;
1264 }
1265 
1266 enum ibta_mtu {
1267 	IB_MTU_256  = 1,
1268 	IB_MTU_512  = 2,
1269 	IB_MTU_1024 = 3,
1270 	IB_MTU_2048 = 4,
1271 	IB_MTU_4096 = 5
1272 };
1273 
1274 static inline int int_to_ibta_mtu(int mtu)
1275 {
1276 	switch (mtu) {
1277 	case 256:  return IB_MTU_256;
1278 	case 512:  return IB_MTU_512;
1279 	case 1024: return IB_MTU_1024;
1280 	case 2048: return IB_MTU_2048;
1281 	case 4096: return IB_MTU_4096;
1282 	default: return -1;
1283 	}
1284 }
1285 
1286 static inline int ibta_mtu_to_int(enum ibta_mtu mtu)
1287 {
1288 	switch (mtu) {
1289 	case IB_MTU_256:  return  256;
1290 	case IB_MTU_512:  return  512;
1291 	case IB_MTU_1024: return 1024;
1292 	case IB_MTU_2048: return 2048;
1293 	case IB_MTU_4096: return 4096;
1294 	default: return -1;
1295 	}
1296 }
1297 
1298 static ssize_t
1299 show_board(struct device *device, struct device_attribute *attr,
1300 			  char *buf)
1301 {
1302 	struct mlx4_hca_info *info = container_of(attr, struct mlx4_hca_info,
1303 						   board_attr);
1304 	struct mlx4_dev *mdev = info->dev;
1305 
1306 	return sprintf(buf, "%.*s\n", MLX4_BOARD_ID_LEN,
1307 		       mdev->board_id);
1308 }
1309 
1310 static ssize_t
1311 show_hca(struct device *device, struct device_attribute *attr,
1312 			char *buf)
1313 {
1314 	struct mlx4_hca_info *info = container_of(attr, struct mlx4_hca_info,
1315 						   hca_attr);
1316 	struct mlx4_dev *mdev = info->dev;
1317 
1318 	return sprintf(buf, "MT%d\n", mdev->pdev->device);
1319 }
1320 
1321 static ssize_t
1322 show_firmware_version(struct device *dev,
1323 				struct device_attribute *attr,
1324 				char *buf)
1325 {
1326 	struct mlx4_hca_info *info = container_of(attr, struct mlx4_hca_info,
1327 						   firmware_attr);
1328 	struct mlx4_dev *mdev = info->dev;
1329 
1330 	return sprintf(buf, "%d.%d.%d\n", (int)(mdev->caps.fw_ver >> 32),
1331 		       (int)(mdev->caps.fw_ver >> 16) & 0xffff,
1332 		       (int)mdev->caps.fw_ver & 0xffff);
1333 }
1334 
1335 static ssize_t show_port_ib_mtu(struct device *dev,
1336 			     struct device_attribute *attr,
1337 			     char *buf)
1338 {
1339 	struct mlx4_port_info *info = container_of(attr, struct mlx4_port_info,
1340 						   port_mtu_attr);
1341 	struct mlx4_dev *mdev = info->dev;
1342 
1343 	/* When port type is eth, port mtu value isn't used. */
1344 	if (mdev->caps.port_type[info->port] == MLX4_PORT_TYPE_ETH)
1345 		return -EINVAL;
1346 
1347 	sprintf(buf, "%d\n",
1348 			ibta_mtu_to_int(mdev->caps.port_ib_mtu[info->port]));
1349 	return strlen(buf);
1350 }
1351 
1352 static ssize_t set_port_ib_mtu(struct device *dev,
1353 			     struct device_attribute *attr,
1354 			     const char *buf, size_t count)
1355 {
1356 	struct mlx4_port_info *info = container_of(attr, struct mlx4_port_info,
1357 						   port_mtu_attr);
1358 	struct mlx4_dev *mdev = info->dev;
1359 	struct mlx4_priv *priv = mlx4_priv(mdev);
1360 	int err, port, mtu, ibta_mtu = -1;
1361 
1362 	if (mdev->caps.port_type[info->port] == MLX4_PORT_TYPE_ETH) {
1363 		mlx4_warn(mdev, "port level mtu is only used for IB ports\n");
1364 		return -EINVAL;
1365 	}
1366 
1367 	mtu = (int) simple_strtol(buf, NULL, 0);
1368 	ibta_mtu = int_to_ibta_mtu(mtu);
1369 
1370 	if (ibta_mtu < 0) {
1371 		mlx4_err(mdev, "%s is invalid IBTA mtu\n", buf);
1372 		return -EINVAL;
1373 	}
1374 
1375 	mdev->caps.port_ib_mtu[info->port] = ibta_mtu;
1376 
1377 	mlx4_stop_sense(mdev);
1378 	mutex_lock(&priv->port_mutex);
1379 	mlx4_unregister_device(mdev);
1380 	for (port = 1; port <= mdev->caps.num_ports; port++) {
1381 		mlx4_CLOSE_PORT(mdev, port);
1382 		err = mlx4_SET_PORT(mdev, port, -1);
1383 		if (err) {
1384 			mlx4_err(mdev, "Failed to set port %d, "
1385 				      "aborting\n", port);
1386 			goto err_set_port;
1387 		}
1388 	}
1389 	err = mlx4_register_device(mdev);
1390 err_set_port:
1391 	mutex_unlock(&priv->port_mutex);
1392 	mlx4_start_sense(mdev);
1393 	return err ? err : count;
1394 }
1395 
1396 static int mlx4_load_fw(struct mlx4_dev *dev)
1397 {
1398 	struct mlx4_priv *priv = mlx4_priv(dev);
1399 	int err, unmap_flag = 0;
1400 
1401 	priv->fw.fw_icm = mlx4_alloc_icm(dev, priv->fw.fw_pages,
1402 					 GFP_HIGHUSER | __GFP_NOWARN, 0);
1403 	if (!priv->fw.fw_icm) {
1404 		mlx4_err(dev, "Couldn't allocate FW area, aborting.\n");
1405 		return -ENOMEM;
1406 	}
1407 
1408 	err = mlx4_MAP_FA(dev, priv->fw.fw_icm);
1409 	if (err) {
1410 		mlx4_err(dev, "MAP_FA command failed, aborting.\n");
1411 		goto err_free;
1412 	}
1413 
1414 	err = mlx4_RUN_FW(dev);
1415 	if (err) {
1416 		mlx4_err(dev, "RUN_FW command failed, aborting.\n");
1417 		goto err_unmap_fa;
1418 	}
1419 
1420 	return 0;
1421 
1422 err_unmap_fa:
1423 	unmap_flag = mlx4_UNMAP_FA(dev);
1424 	if (unmap_flag)
1425 		pr_warn("mlx4_core: mlx4_UNMAP_FA failed.\n");
1426 
1427 err_free:
1428 	if (!unmap_flag)
1429 		mlx4_free_icm(dev, priv->fw.fw_icm, 0);
1430 	return err;
1431 }
1432 
1433 static int mlx4_init_cmpt_table(struct mlx4_dev *dev, u64 cmpt_base,
1434 				int cmpt_entry_sz)
1435 {
1436 	struct mlx4_priv *priv = mlx4_priv(dev);
1437 	int err;
1438 	int num_eqs;
1439 
1440 	err = mlx4_init_icm_table(dev, &priv->qp_table.cmpt_table,
1441 				  cmpt_base +
1442 				  ((u64) (MLX4_CMPT_TYPE_QP *
1443 					  cmpt_entry_sz) << MLX4_CMPT_SHIFT),
1444 				  cmpt_entry_sz, dev->caps.num_qps,
1445 				  dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW],
1446 				  0, 0);
1447 	if (err)
1448 		goto err;
1449 
1450 	err = mlx4_init_icm_table(dev, &priv->srq_table.cmpt_table,
1451 				  cmpt_base +
1452 				  ((u64) (MLX4_CMPT_TYPE_SRQ *
1453 					  cmpt_entry_sz) << MLX4_CMPT_SHIFT),
1454 				  cmpt_entry_sz, dev->caps.num_srqs,
1455 				  dev->caps.reserved_srqs, 0, 0);
1456 	if (err)
1457 		goto err_qp;
1458 
1459 	err = mlx4_init_icm_table(dev, &priv->cq_table.cmpt_table,
1460 				  cmpt_base +
1461 				  ((u64) (MLX4_CMPT_TYPE_CQ *
1462 					  cmpt_entry_sz) << MLX4_CMPT_SHIFT),
1463 				  cmpt_entry_sz, dev->caps.num_cqs,
1464 				  dev->caps.reserved_cqs, 0, 0);
1465 	if (err)
1466 		goto err_srq;
1467 
1468 	num_eqs = (mlx4_is_master(dev)) ? dev->phys_caps.num_phys_eqs :
1469 		  dev->caps.num_eqs;
1470 	err = mlx4_init_icm_table(dev, &priv->eq_table.cmpt_table,
1471 				  cmpt_base +
1472 				  ((u64) (MLX4_CMPT_TYPE_EQ *
1473 					  cmpt_entry_sz) << MLX4_CMPT_SHIFT),
1474 				  cmpt_entry_sz, num_eqs, num_eqs, 0, 0);
1475 	if (err)
1476 		goto err_cq;
1477 
1478 	return 0;
1479 
1480 err_cq:
1481 	mlx4_cleanup_icm_table(dev, &priv->cq_table.cmpt_table);
1482 
1483 err_srq:
1484 	mlx4_cleanup_icm_table(dev, &priv->srq_table.cmpt_table);
1485 
1486 err_qp:
1487 	mlx4_cleanup_icm_table(dev, &priv->qp_table.cmpt_table);
1488 
1489 err:
1490 	return err;
1491 }
1492 
1493 static int mlx4_init_icm(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap,
1494 			 struct mlx4_init_hca_param *init_hca, u64 icm_size)
1495 {
1496 	struct mlx4_priv *priv = mlx4_priv(dev);
1497 	u64 aux_pages;
1498 	int num_eqs;
1499 	int err, unmap_flag = 0;
1500 
1501 	err = mlx4_SET_ICM_SIZE(dev, icm_size, &aux_pages);
1502 	if (err) {
1503 		mlx4_err(dev, "SET_ICM_SIZE command failed, aborting.\n");
1504 		return err;
1505 	}
1506 
1507 	mlx4_dbg(dev, "%lld KB of HCA context requires %lld KB aux memory.\n",
1508 		 (unsigned long long) icm_size >> 10,
1509 		 (unsigned long long) aux_pages << 2);
1510 
1511 	priv->fw.aux_icm = mlx4_alloc_icm(dev, aux_pages,
1512 					  GFP_HIGHUSER | __GFP_NOWARN, 0);
1513 	if (!priv->fw.aux_icm) {
1514 		mlx4_err(dev, "Couldn't allocate aux memory, aborting.\n");
1515 		return -ENOMEM;
1516 	}
1517 
1518 	err = mlx4_MAP_ICM_AUX(dev, priv->fw.aux_icm);
1519 	if (err) {
1520 		mlx4_err(dev, "MAP_ICM_AUX command failed, aborting.\n");
1521 		goto err_free_aux;
1522 	}
1523 
1524 	err = mlx4_init_cmpt_table(dev, init_hca->cmpt_base, dev_cap->cmpt_entry_sz);
1525 	if (err) {
1526 		mlx4_err(dev, "Failed to map cMPT context memory, aborting.\n");
1527 		goto err_unmap_aux;
1528 	}
1529 
1530 
1531 	num_eqs = (mlx4_is_master(dev)) ? dev->phys_caps.num_phys_eqs :
1532 		   dev->caps.num_eqs;
1533 	err = mlx4_init_icm_table(dev, &priv->eq_table.table,
1534 				  init_hca->eqc_base, dev_cap->eqc_entry_sz,
1535 				  num_eqs, num_eqs, 0, 0);
1536 	if (err) {
1537 		mlx4_err(dev, "Failed to map EQ context memory, aborting.\n");
1538 		goto err_unmap_cmpt;
1539 	}
1540 
1541 	/*
1542 	 * Reserved MTT entries must be aligned up to a cacheline
1543 	 * boundary, since the FW will write to them, while the driver
1544 	 * writes to all other MTT entries. (The variable
1545 	 * dev->caps.mtt_entry_sz below is really the MTT segment
1546 	 * size, not the raw entry size)
1547 	 */
1548 	dev->caps.reserved_mtts =
1549 		ALIGN(dev->caps.reserved_mtts * dev->caps.mtt_entry_sz,
1550 		      dma_get_cache_alignment()) / dev->caps.mtt_entry_sz;
1551 
1552 	err = mlx4_init_icm_table(dev, &priv->mr_table.mtt_table,
1553 				  init_hca->mtt_base,
1554 				  dev->caps.mtt_entry_sz,
1555 				  dev->caps.num_mtts,
1556 				  dev->caps.reserved_mtts, 1, 0);
1557 	if (err) {
1558 		mlx4_err(dev, "Failed to map MTT context memory, aborting.\n");
1559 		goto err_unmap_eq;
1560 	}
1561 
1562 	err = mlx4_init_icm_table(dev, &priv->mr_table.dmpt_table,
1563 				  init_hca->dmpt_base,
1564 				  dev_cap->dmpt_entry_sz,
1565 				  dev->caps.num_mpts,
1566 				  dev->caps.reserved_mrws, 1, 1);
1567 	if (err) {
1568 		mlx4_err(dev, "Failed to map dMPT context memory, aborting.\n");
1569 		goto err_unmap_mtt;
1570 	}
1571 
1572 	err = mlx4_init_icm_table(dev, &priv->qp_table.qp_table,
1573 				  init_hca->qpc_base,
1574 				  dev_cap->qpc_entry_sz,
1575 				  dev->caps.num_qps,
1576 				  dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW],
1577 				  0, 0);
1578 	if (err) {
1579 		mlx4_err(dev, "Failed to map QP context memory, aborting.\n");
1580 		goto err_unmap_dmpt;
1581 	}
1582 
1583 	err = mlx4_init_icm_table(dev, &priv->qp_table.auxc_table,
1584 				  init_hca->auxc_base,
1585 				  dev_cap->aux_entry_sz,
1586 				  dev->caps.num_qps,
1587 				  dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW],
1588 				  0, 0);
1589 	if (err) {
1590 		mlx4_err(dev, "Failed to map AUXC context memory, aborting.\n");
1591 		goto err_unmap_qp;
1592 	}
1593 
1594 	err = mlx4_init_icm_table(dev, &priv->qp_table.altc_table,
1595 				  init_hca->altc_base,
1596 				  dev_cap->altc_entry_sz,
1597 				  dev->caps.num_qps,
1598 				  dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW],
1599 				  0, 0);
1600 	if (err) {
1601 		mlx4_err(dev, "Failed to map ALTC context memory, aborting.\n");
1602 		goto err_unmap_auxc;
1603 	}
1604 
1605 	err = mlx4_init_icm_table(dev, &priv->qp_table.rdmarc_table,
1606 				  init_hca->rdmarc_base,
1607 				  dev_cap->rdmarc_entry_sz << priv->qp_table.rdmarc_shift,
1608 				  dev->caps.num_qps,
1609 				  dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW],
1610 				  0, 0);
1611 	if (err) {
1612 		mlx4_err(dev, "Failed to map RDMARC context memory, aborting\n");
1613 		goto err_unmap_altc;
1614 	}
1615 
1616 	err = mlx4_init_icm_table(dev, &priv->cq_table.table,
1617 				  init_hca->cqc_base,
1618 				  dev_cap->cqc_entry_sz,
1619 				  dev->caps.num_cqs,
1620 				  dev->caps.reserved_cqs, 0, 0);
1621 	if (err) {
1622 		mlx4_err(dev, "Failed to map CQ context memory, aborting.\n");
1623 		goto err_unmap_rdmarc;
1624 	}
1625 
1626 	err = mlx4_init_icm_table(dev, &priv->srq_table.table,
1627 				  init_hca->srqc_base,
1628 				  dev_cap->srq_entry_sz,
1629 				  dev->caps.num_srqs,
1630 				  dev->caps.reserved_srqs, 0, 0);
1631 	if (err) {
1632 		mlx4_err(dev, "Failed to map SRQ context memory, aborting.\n");
1633 		goto err_unmap_cq;
1634 	}
1635 
1636 	/*
1637 	 * For flow steering device managed mode it is required to use
1638 	 * mlx4_init_icm_table. For B0 steering mode it's not strictly
1639 	 * required, but for simplicity just map the whole multicast
1640 	 * group table now.  The table isn't very big and it's a lot
1641 	 * easier than trying to track ref counts.
1642 	 */
1643 	err = mlx4_init_icm_table(dev, &priv->mcg_table.table,
1644 				  init_hca->mc_base,
1645 				  mlx4_get_mgm_entry_size(dev),
1646 				  dev->caps.num_mgms + dev->caps.num_amgms,
1647 				  dev->caps.num_mgms + dev->caps.num_amgms,
1648 				  0, 0);
1649 	if (err) {
1650 		mlx4_err(dev, "Failed to map MCG context memory, aborting.\n");
1651 		goto err_unmap_srq;
1652 	}
1653 
1654 	return 0;
1655 
1656 err_unmap_srq:
1657 	mlx4_cleanup_icm_table(dev, &priv->srq_table.table);
1658 
1659 err_unmap_cq:
1660 	mlx4_cleanup_icm_table(dev, &priv->cq_table.table);
1661 
1662 err_unmap_rdmarc:
1663 	mlx4_cleanup_icm_table(dev, &priv->qp_table.rdmarc_table);
1664 
1665 err_unmap_altc:
1666 	mlx4_cleanup_icm_table(dev, &priv->qp_table.altc_table);
1667 
1668 err_unmap_auxc:
1669 	mlx4_cleanup_icm_table(dev, &priv->qp_table.auxc_table);
1670 
1671 err_unmap_qp:
1672 	mlx4_cleanup_icm_table(dev, &priv->qp_table.qp_table);
1673 
1674 err_unmap_dmpt:
1675 	mlx4_cleanup_icm_table(dev, &priv->mr_table.dmpt_table);
1676 
1677 err_unmap_mtt:
1678 	mlx4_cleanup_icm_table(dev, &priv->mr_table.mtt_table);
1679 
1680 err_unmap_eq:
1681 	mlx4_cleanup_icm_table(dev, &priv->eq_table.table);
1682 
1683 err_unmap_cmpt:
1684 	mlx4_cleanup_icm_table(dev, &priv->eq_table.cmpt_table);
1685 	mlx4_cleanup_icm_table(dev, &priv->cq_table.cmpt_table);
1686 	mlx4_cleanup_icm_table(dev, &priv->srq_table.cmpt_table);
1687 	mlx4_cleanup_icm_table(dev, &priv->qp_table.cmpt_table);
1688 
1689 err_unmap_aux:
1690 	unmap_flag = mlx4_UNMAP_ICM_AUX(dev);
1691 	if (unmap_flag)
1692 		pr_warn("mlx4_core: mlx4_UNMAP_ICM_AUX failed.\n");
1693 
1694 err_free_aux:
1695 	if (!unmap_flag)
1696 		mlx4_free_icm(dev, priv->fw.aux_icm, 0);
1697 
1698 	return err;
1699 }
1700 
1701 static void mlx4_free_icms(struct mlx4_dev *dev)
1702 {
1703 	struct mlx4_priv *priv = mlx4_priv(dev);
1704 
1705 	mlx4_cleanup_icm_table(dev, &priv->mcg_table.table);
1706 	mlx4_cleanup_icm_table(dev, &priv->srq_table.table);
1707 	mlx4_cleanup_icm_table(dev, &priv->cq_table.table);
1708 	mlx4_cleanup_icm_table(dev, &priv->qp_table.rdmarc_table);
1709 	mlx4_cleanup_icm_table(dev, &priv->qp_table.altc_table);
1710 	mlx4_cleanup_icm_table(dev, &priv->qp_table.auxc_table);
1711 	mlx4_cleanup_icm_table(dev, &priv->qp_table.qp_table);
1712 	mlx4_cleanup_icm_table(dev, &priv->mr_table.dmpt_table);
1713 	mlx4_cleanup_icm_table(dev, &priv->mr_table.mtt_table);
1714 	mlx4_cleanup_icm_table(dev, &priv->eq_table.table);
1715 	mlx4_cleanup_icm_table(dev, &priv->eq_table.cmpt_table);
1716 	mlx4_cleanup_icm_table(dev, &priv->cq_table.cmpt_table);
1717 	mlx4_cleanup_icm_table(dev, &priv->srq_table.cmpt_table);
1718 	mlx4_cleanup_icm_table(dev, &priv->qp_table.cmpt_table);
1719 
1720 	if (!mlx4_UNMAP_ICM_AUX(dev))
1721 		mlx4_free_icm(dev, priv->fw.aux_icm, 0);
1722 	else
1723 		pr_warn("mlx4_core: mlx4_UNMAP_ICM_AUX failed.\n");
1724 }
1725 
1726 static void mlx4_slave_exit(struct mlx4_dev *dev)
1727 {
1728 	struct mlx4_priv *priv = mlx4_priv(dev);
1729 
1730 	mutex_lock(&priv->cmd.slave_cmd_mutex);
1731 	if (mlx4_comm_cmd(dev, MLX4_COMM_CMD_RESET, 0, MLX4_COMM_TIME))
1732 		mlx4_warn(dev, "Failed to close slave function.\n");
1733 	mutex_unlock(&priv->cmd.slave_cmd_mutex);
1734 }
1735 
1736 static int map_bf_area(struct mlx4_dev *dev)
1737 {
1738 	struct mlx4_priv *priv = mlx4_priv(dev);
1739 	resource_size_t bf_start;
1740 	resource_size_t bf_len;
1741 	int err = 0;
1742 
1743 	if (!dev->caps.bf_reg_size)
1744 		return -ENXIO;
1745 
1746 	bf_start = pci_resource_start(dev->pdev, 2) +
1747 			(dev->caps.num_uars << PAGE_SHIFT);
1748 	bf_len = pci_resource_len(dev->pdev, 2) -
1749 			(dev->caps.num_uars << PAGE_SHIFT);
1750 	priv->bf_mapping = io_mapping_create_wc(bf_start, bf_len);
1751 	if (!priv->bf_mapping)
1752 		err = -ENOMEM;
1753 
1754 	return err;
1755 }
1756 
1757 static void unmap_bf_area(struct mlx4_dev *dev)
1758 {
1759 	if (mlx4_priv(dev)->bf_mapping)
1760 		io_mapping_free(mlx4_priv(dev)->bf_mapping);
1761 }
1762 
1763 int mlx4_read_clock(struct mlx4_dev *dev)
1764 {
1765 	u32 clockhi, clocklo, clockhi1;
1766 	cycle_t cycles;
1767 	int i;
1768 	struct mlx4_priv *priv = mlx4_priv(dev);
1769 
1770 	if (!priv->clock_mapping)
1771 		return -ENOTSUPP;
1772 
1773 	for (i = 0; i < 10; i++) {
1774 		clockhi = swab32(readl(priv->clock_mapping));
1775 		clocklo = swab32(readl(priv->clock_mapping + 4));
1776 		clockhi1 = swab32(readl(priv->clock_mapping));
1777 		if (clockhi == clockhi1)
1778 			break;
1779 	}
1780 
1781 	cycles = (u64) clockhi << 32 | (u64) clocklo;
1782 
1783 	return cycles;
1784 }
1785 EXPORT_SYMBOL_GPL(mlx4_read_clock);
1786 
1787 
1788 static int map_internal_clock(struct mlx4_dev *dev)
1789 {
1790 	struct mlx4_priv *priv = mlx4_priv(dev);
1791 
1792 	priv->clock_mapping = ioremap(pci_resource_start(dev->pdev,
1793 				priv->fw.clock_bar) +
1794 				priv->fw.clock_offset, MLX4_CLOCK_SIZE);
1795 
1796 	if (!priv->clock_mapping)
1797 		return -ENOMEM;
1798 
1799 	return 0;
1800 }
1801 
1802 
1803 int mlx4_get_internal_clock_params(struct mlx4_dev *dev,
1804 				   struct mlx4_clock_params *params)
1805 {
1806 	struct mlx4_priv *priv = mlx4_priv(dev);
1807 
1808 	if (mlx4_is_slave(dev))
1809 		return -ENOTSUPP;
1810 	if (!params)
1811 		return -EINVAL;
1812 
1813 	params->bar = priv->fw.clock_bar;
1814 	params->offset = priv->fw.clock_offset;
1815 	params->size = MLX4_CLOCK_SIZE;
1816 
1817 	return 0;
1818 }
1819 EXPORT_SYMBOL_GPL(mlx4_get_internal_clock_params);
1820 
1821 static void unmap_internal_clock(struct mlx4_dev *dev)
1822 {
1823 	struct mlx4_priv *priv = mlx4_priv(dev);
1824 
1825 	if (priv->clock_mapping)
1826 		iounmap(priv->clock_mapping);
1827 }
1828 
1829 static void mlx4_close_hca(struct mlx4_dev *dev)
1830 {
1831 	unmap_internal_clock(dev);
1832 	unmap_bf_area(dev);
1833 	if (mlx4_is_slave(dev)) {
1834 		mlx4_slave_exit(dev);
1835 	} else {
1836 		mlx4_CLOSE_HCA(dev, 0);
1837 		mlx4_free_icms(dev);
1838 
1839 		if (!mlx4_UNMAP_FA(dev))
1840 			 mlx4_free_icm(dev, mlx4_priv(dev)->fw.fw_icm, 0);
1841 		else
1842 			pr_warn("mlx4_core: mlx4_UNMAP_FA failed.\n");
1843 	}
1844 }
1845 
1846 static int mlx4_init_slave(struct mlx4_dev *dev)
1847 {
1848 	struct mlx4_priv *priv = mlx4_priv(dev);
1849 	u64 dma = (u64) priv->mfunc.vhcr_dma;
1850 	int num_of_reset_retries = NUM_OF_RESET_RETRIES;
1851 	int ret_from_reset = 0;
1852 	u32 slave_read;
1853 	u32 cmd_channel_ver;
1854 
1855 	mutex_lock(&priv->cmd.slave_cmd_mutex);
1856 	priv->cmd.max_cmds = 1;
1857 	mlx4_warn(dev, "Sending reset\n");
1858 	ret_from_reset = mlx4_comm_cmd(dev, MLX4_COMM_CMD_RESET, 0,
1859 				       MLX4_COMM_TIME);
1860 	/* if we are in the middle of flr the slave will try
1861 	 * NUM_OF_RESET_RETRIES times before leaving.*/
1862 	if (ret_from_reset) {
1863 		if (MLX4_DELAY_RESET_SLAVE == ret_from_reset) {
1864 			msleep(SLEEP_TIME_IN_RESET);
1865 			while (ret_from_reset && num_of_reset_retries) {
1866 				mlx4_warn(dev, "slave is currently in the"
1867 					  "middle of FLR. retrying..."
1868 					  "(try num:%d)\n",
1869 					  (NUM_OF_RESET_RETRIES -
1870 					   num_of_reset_retries  + 1));
1871 				ret_from_reset =
1872 					mlx4_comm_cmd(dev, MLX4_COMM_CMD_RESET,
1873 						      0, MLX4_COMM_TIME);
1874 				num_of_reset_retries = num_of_reset_retries - 1;
1875 			}
1876 		} else
1877 			goto err;
1878 	}
1879 
1880 	/* check the driver version - the slave I/F revision
1881 	 * must match the master's */
1882 	slave_read = swab32(readl(&priv->mfunc.comm->slave_read));
1883 	cmd_channel_ver = mlx4_comm_get_version();
1884 
1885 	if (MLX4_COMM_GET_IF_REV(cmd_channel_ver) !=
1886 		MLX4_COMM_GET_IF_REV(slave_read)) {
1887 		mlx4_err(dev, "slave driver version is not supported"
1888 			 " by the master\n");
1889 		goto err;
1890 	}
1891 
1892 	mlx4_warn(dev, "Sending vhcr0\n");
1893 	if (mlx4_comm_cmd(dev, MLX4_COMM_CMD_VHCR0, dma >> 48,
1894 						    MLX4_COMM_TIME))
1895 		goto err;
1896 	if (mlx4_comm_cmd(dev, MLX4_COMM_CMD_VHCR1, dma >> 32,
1897 						    MLX4_COMM_TIME))
1898 		goto err;
1899 	if (mlx4_comm_cmd(dev, MLX4_COMM_CMD_VHCR2, dma >> 16,
1900 						    MLX4_COMM_TIME))
1901 		goto err;
1902 	if (mlx4_comm_cmd(dev, MLX4_COMM_CMD_VHCR_EN, dma, MLX4_COMM_TIME))
1903 		goto err;
1904 
1905 	mutex_unlock(&priv->cmd.slave_cmd_mutex);
1906 	return 0;
1907 
1908 err:
1909 	mlx4_comm_cmd(dev, MLX4_COMM_CMD_RESET, 0, 0);
1910 	mutex_unlock(&priv->cmd.slave_cmd_mutex);
1911 	return -EIO;
1912 }
1913 
1914 static void mlx4_parav_master_pf_caps(struct mlx4_dev *dev)
1915 {
1916 	int i;
1917 
1918 	for (i = 1; i <= dev->caps.num_ports; i++) {
1919 		if (dev->caps.port_type[i] == MLX4_PORT_TYPE_ETH)
1920 			dev->caps.gid_table_len[i] =
1921 				mlx4_get_slave_num_gids(dev, 0);
1922 		else
1923 			dev->caps.gid_table_len[i] = 1;
1924 		dev->caps.pkey_table_len[i] =
1925 			dev->phys_caps.pkey_phys_table_len[i] - 1;
1926 	}
1927 }
1928 
1929 static int choose_log_fs_mgm_entry_size(int qp_per_entry)
1930 {
1931 	int i = MLX4_MIN_MGM_LOG_ENTRY_SIZE;
1932 
1933 	for (i = MLX4_MIN_MGM_LOG_ENTRY_SIZE; i <= MLX4_MAX_MGM_LOG_ENTRY_SIZE;
1934 	      i++) {
1935 		if (qp_per_entry <= 4 * ((1 << i) / 16 - 2))
1936 			break;
1937 	}
1938 
1939 	return (i <= MLX4_MAX_MGM_LOG_ENTRY_SIZE) ? i : -1;
1940 }
1941 
1942 static void choose_steering_mode(struct mlx4_dev *dev,
1943 				 struct mlx4_dev_cap *dev_cap)
1944 {
1945 	int nvfs;
1946 
1947 	mlx4_get_val(num_vfs.dbdf2val.tbl, pci_physfn(dev->pdev), 0, &nvfs);
1948 	if (high_rate_steer && !mlx4_is_mfunc(dev)) {
1949 		dev->caps.flags &= ~(MLX4_DEV_CAP_FLAG_VEP_MC_STEER |
1950 				     MLX4_DEV_CAP_FLAG_VEP_UC_STEER);
1951 		dev_cap->flags2 &= ~MLX4_DEV_CAP_FLAG2_FS_EN;
1952 	}
1953 
1954 	if (mlx4_log_num_mgm_entry_size == -1 &&
1955 	    dev_cap->flags2 & MLX4_DEV_CAP_FLAG2_FS_EN &&
1956 	    (!mlx4_is_mfunc(dev) ||
1957 	     (dev_cap->fs_max_num_qp_per_entry >= (nvfs + 1))) &&
1958 	    choose_log_fs_mgm_entry_size(dev_cap->fs_max_num_qp_per_entry) >=
1959 		MLX4_MIN_MGM_LOG_ENTRY_SIZE) {
1960 		dev->oper_log_mgm_entry_size =
1961 			choose_log_fs_mgm_entry_size(dev_cap->fs_max_num_qp_per_entry);
1962 		dev->caps.steering_mode = MLX4_STEERING_MODE_DEVICE_MANAGED;
1963 		dev->caps.num_qp_per_mgm = dev_cap->fs_max_num_qp_per_entry;
1964 	} else {
1965 		if (dev->caps.flags & MLX4_DEV_CAP_FLAG_VEP_UC_STEER &&
1966 		    dev->caps.flags & MLX4_DEV_CAP_FLAG_VEP_MC_STEER)
1967 			dev->caps.steering_mode = MLX4_STEERING_MODE_B0;
1968 		else {
1969 			dev->caps.steering_mode = MLX4_STEERING_MODE_A0;
1970 
1971 			if (dev->caps.flags & MLX4_DEV_CAP_FLAG_VEP_UC_STEER ||
1972 			    dev->caps.flags & MLX4_DEV_CAP_FLAG_VEP_MC_STEER)
1973 				mlx4_warn(dev, "Must have both UC_STEER and MC_STEER flags "
1974 					  "set to use B0 steering. Falling back to A0 steering mode.\n");
1975 		}
1976 		dev->oper_log_mgm_entry_size =
1977 			mlx4_log_num_mgm_entry_size > 0 ?
1978 			mlx4_log_num_mgm_entry_size :
1979 			MLX4_DEFAULT_MGM_LOG_ENTRY_SIZE;
1980 		dev->caps.num_qp_per_mgm = mlx4_get_qp_per_mgm(dev);
1981 	}
1982 	mlx4_dbg(dev, "Steering mode is: %s, oper_log_mgm_entry_size = %d, "
1983 		 "log_num_mgm_entry_size = %d\n",
1984 		 mlx4_steering_mode_str(dev->caps.steering_mode),
1985 		 dev->oper_log_mgm_entry_size, mlx4_log_num_mgm_entry_size);
1986 }
1987 
1988 static int mlx4_init_hca(struct mlx4_dev *dev)
1989 {
1990 	struct mlx4_priv	  *priv = mlx4_priv(dev);
1991 	struct mlx4_dev_cap	   *dev_cap = NULL;
1992 	struct mlx4_adapter	   adapter;
1993 	struct mlx4_mod_stat_cfg   mlx4_cfg;
1994 	struct mlx4_profile	   profile;
1995 	struct mlx4_init_hca_param init_hca;
1996 	u64 icm_size;
1997 	int err;
1998 
1999 	if (!mlx4_is_slave(dev)) {
2000 		err = mlx4_QUERY_FW(dev);
2001 		if (err) {
2002 			if (err == -EACCES)
2003 				mlx4_info(dev, "non-primary physical function, skipping.\n");
2004 			else
2005 				mlx4_err(dev, "QUERY_FW command failed, aborting.\n");
2006 			return err;
2007 		}
2008 
2009 		err = mlx4_load_fw(dev);
2010 		if (err) {
2011 			mlx4_err(dev, "Failed to start FW, aborting.\n");
2012 			return err;
2013 		}
2014 
2015 		mlx4_cfg.log_pg_sz_m = 1;
2016 		mlx4_cfg.log_pg_sz = 0;
2017 		err = mlx4_MOD_STAT_CFG(dev, &mlx4_cfg);
2018 		if (err)
2019 			mlx4_warn(dev, "Failed to override log_pg_sz parameter\n");
2020 
2021 		dev_cap = kzalloc(sizeof *dev_cap, GFP_KERNEL);
2022 		if (!dev_cap) {
2023 			mlx4_err(dev, "Failed to allocate memory for dev_cap\n");
2024 			err = -ENOMEM;
2025 			goto err_stop_fw;
2026 		}
2027 
2028 		err = mlx4_dev_cap(dev, dev_cap);
2029 		if (err) {
2030 			mlx4_err(dev, "QUERY_DEV_CAP command failed, aborting.\n");
2031 			goto err_stop_fw;
2032 		}
2033 
2034 		choose_steering_mode(dev, dev_cap);
2035 
2036 		if (mlx4_is_master(dev))
2037 			mlx4_parav_master_pf_caps(dev);
2038 
2039 		process_mod_param_profile(&profile);
2040 		if (dev->caps.steering_mode ==
2041 		    MLX4_STEERING_MODE_DEVICE_MANAGED)
2042 			profile.num_mcg = MLX4_FS_NUM_MCG;
2043 
2044 		icm_size = mlx4_make_profile(dev, &profile, dev_cap,
2045 					     &init_hca);
2046 		if ((long long) icm_size < 0) {
2047 			err = icm_size;
2048 			goto err_stop_fw;
2049 		}
2050 
2051 		dev->caps.max_fmr_maps = (1 << (32 - ilog2(dev->caps.num_mpts))) - 1;
2052 
2053 		init_hca.log_uar_sz = ilog2(dev->caps.num_uars);
2054 		init_hca.uar_page_sz = PAGE_SHIFT - 12;
2055 
2056 		err = mlx4_init_icm(dev, dev_cap, &init_hca, icm_size);
2057 		if (err)
2058 			goto err_stop_fw;
2059 
2060 		init_hca.mw_enable = 1;
2061 
2062 		err = mlx4_INIT_HCA(dev, &init_hca);
2063 		if (err) {
2064 			mlx4_err(dev, "INIT_HCA command failed, aborting.\n");
2065 			goto err_free_icm;
2066 		}
2067 
2068 		/*
2069 		 * Read HCA frequency by QUERY_HCA command
2070 		 */
2071 		if (dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_TS) {
2072 			memset(&init_hca, 0, sizeof(init_hca));
2073 			err = mlx4_QUERY_HCA(dev, &init_hca);
2074 			if (err) {
2075 				mlx4_err(dev, "QUERY_HCA command failed, disable timestamp.\n");
2076 				dev->caps.flags2 &= ~MLX4_DEV_CAP_FLAG2_TS;
2077 			} else {
2078 				dev->caps.hca_core_clock =
2079 					init_hca.hca_core_clock;
2080 			}
2081 
2082 			/* In case we got HCA frequency 0 - disable timestamping
2083 			 * to avoid dividing by zero
2084 			 */
2085 			if (!dev->caps.hca_core_clock) {
2086 				dev->caps.flags2 &= ~MLX4_DEV_CAP_FLAG2_TS;
2087 				mlx4_err(dev, "HCA frequency is 0. Timestamping is not supported.");
2088 			} else if (map_internal_clock(dev)) {
2089 				/* Map internal clock,
2090 				 * in case of failure disable timestamping
2091 				 */
2092 				dev->caps.flags2 &= ~MLX4_DEV_CAP_FLAG2_TS;
2093 				mlx4_err(dev, "Failed to map internal clock. Timestamping is not supported.\n");
2094 			}
2095 		}
2096 	} else {
2097 		err = mlx4_init_slave(dev);
2098 		if (err) {
2099 			mlx4_err(dev, "Failed to initialize slave\n");
2100 			return err;
2101 		}
2102 
2103 		err = mlx4_slave_cap(dev);
2104 		if (err) {
2105 			mlx4_err(dev, "Failed to obtain slave caps\n");
2106 			goto err_close;
2107 		}
2108 	}
2109 
2110 	if (map_bf_area(dev))
2111 		mlx4_dbg(dev, "Failed to map blue flame area\n");
2112 
2113 	/* Only the master set the ports, all the rest got it from it.*/
2114 	if (!mlx4_is_slave(dev))
2115 		mlx4_set_port_mask(dev);
2116 
2117 	err = mlx4_QUERY_ADAPTER(dev, &adapter);
2118 	if (err) {
2119 		mlx4_err(dev, "QUERY_ADAPTER command failed, aborting.\n");
2120 		goto unmap_bf;
2121 	}
2122 
2123 	priv->eq_table.inta_pin = adapter.inta_pin;
2124 	memcpy(dev->board_id, adapter.board_id, sizeof dev->board_id);
2125 	memcpy(dev->vsd, adapter.vsd, sizeof(dev->vsd));
2126 	dev->vsd_vendor_id = adapter.vsd_vendor_id;
2127 
2128 	if (!mlx4_is_slave(dev))
2129 		kfree(dev_cap);
2130 
2131 	return 0;
2132 
2133 unmap_bf:
2134 	if (!mlx4_is_slave(dev))
2135 		unmap_internal_clock(dev);
2136 	unmap_bf_area(dev);
2137 
2138 	if (mlx4_is_slave(dev)) {
2139 		kfree(dev->caps.qp0_tunnel);
2140 		kfree(dev->caps.qp0_proxy);
2141 		kfree(dev->caps.qp1_tunnel);
2142 		kfree(dev->caps.qp1_proxy);
2143 	}
2144 
2145 err_close:
2146 	if (mlx4_is_slave(dev))
2147 		mlx4_slave_exit(dev);
2148 	else
2149 		mlx4_CLOSE_HCA(dev, 0);
2150 
2151 err_free_icm:
2152 	if (!mlx4_is_slave(dev))
2153 		mlx4_free_icms(dev);
2154 
2155 err_stop_fw:
2156 	if (!mlx4_is_slave(dev)) {
2157 		if (!mlx4_UNMAP_FA(dev))
2158 			mlx4_free_icm(dev, priv->fw.fw_icm, 0);
2159 		else
2160 			pr_warn("mlx4_core: mlx4_UNMAP_FA failed.\n");
2161 		kfree(dev_cap);
2162 	}
2163 	return err;
2164 }
2165 
2166 static int mlx4_init_counters_table(struct mlx4_dev *dev)
2167 {
2168 	struct mlx4_priv *priv = mlx4_priv(dev);
2169 	int nent_pow2, port_indx, vf_index, num_counters;
2170 	int res, index = 0;
2171 	struct counter_index *new_counter_index;
2172 
2173 
2174 	if (!(dev->caps.flags & MLX4_DEV_CAP_FLAG_COUNTERS))
2175 		return -ENOENT;
2176 
2177 	if (!mlx4_is_slave(dev) &&
2178 	    dev->caps.max_counters == dev->caps.max_extended_counters) {
2179 		res = mlx4_cmd(dev, MLX4_IF_STATE_EXTENDED, 0, 0,
2180 			       MLX4_CMD_SET_IF_STAT,
2181 			       MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE);
2182 		if (res) {
2183 			mlx4_err(dev, "Failed to set extended counters (err=%d)\n", res);
2184 			return res;
2185 		}
2186 	}
2187 
2188 	mutex_init(&priv->counters_table.mutex);
2189 
2190 	if (mlx4_is_slave(dev)) {
2191 		for (port_indx = 0; port_indx < dev->caps.num_ports; port_indx++) {
2192 			INIT_LIST_HEAD(&priv->counters_table.global_port_list[port_indx]);
2193 			if (dev->caps.def_counter_index[port_indx] != 0xFF) {
2194 				new_counter_index = kmalloc(sizeof(struct counter_index), GFP_KERNEL);
2195 				if (!new_counter_index)
2196 					return -ENOMEM;
2197 				new_counter_index->index = dev->caps.def_counter_index[port_indx];
2198 				list_add_tail(&new_counter_index->list, &priv->counters_table.global_port_list[port_indx]);
2199 			}
2200 		}
2201 		mlx4_dbg(dev, "%s: slave allocated %d counters for %d ports\n",
2202 			 __func__, dev->caps.num_ports, dev->caps.num_ports);
2203 		return 0;
2204 	}
2205 
2206 	nent_pow2 = roundup_pow_of_two(dev->caps.max_counters);
2207 
2208 	for (port_indx = 0; port_indx < dev->caps.num_ports; port_indx++) {
2209 		INIT_LIST_HEAD(&priv->counters_table.global_port_list[port_indx]);
2210 		/* allocating 2 counters per port for PFs */
2211                 /* For the PF, the ETH default counters are 0,2; */
2212 		/* and the RoCE default counters are 1,3 */
2213 		for (num_counters = 0; num_counters < 2; num_counters++, index++) {
2214 			new_counter_index = kmalloc(sizeof(struct counter_index), GFP_KERNEL);
2215 			if (!new_counter_index)
2216 				return -ENOMEM;
2217 			new_counter_index->index = index;
2218 			list_add_tail(&new_counter_index->list,
2219 				      &priv->counters_table.global_port_list[port_indx]);
2220 		}
2221 	}
2222 
2223 	if (mlx4_is_master(dev)) {
2224 		for (vf_index = 0; vf_index < dev->num_vfs; vf_index++) {
2225 			for (port_indx = 0; port_indx < dev->caps.num_ports; port_indx++) {
2226 				INIT_LIST_HEAD(&priv->counters_table.vf_list[vf_index][port_indx]);
2227 				new_counter_index = kmalloc(sizeof(struct counter_index), GFP_KERNEL);
2228 				if (!new_counter_index)
2229 					return -ENOMEM;
2230 				if (index <  nent_pow2 - 2) {
2231 					new_counter_index->index = index;
2232 					index++;
2233 				} else {
2234 					new_counter_index->index = MLX4_SINK_COUNTER_INDEX;
2235 				}
2236 
2237 				list_add_tail(&new_counter_index->list,
2238 					      &priv->counters_table.vf_list[vf_index][port_indx]);
2239 			}
2240 		}
2241 
2242 		res = mlx4_bitmap_init(&priv->counters_table.bitmap,
2243 				       nent_pow2, nent_pow2 - 1,
2244 				       index, 1);
2245 		mlx4_dbg(dev, "%s: master allocated %d counters for %d VFs\n",
2246 			 __func__, index, dev->num_vfs);
2247 	} else {
2248 		res = mlx4_bitmap_init(&priv->counters_table.bitmap,
2249 				nent_pow2, nent_pow2 - 1,
2250 				index, 1);
2251 		mlx4_dbg(dev, "%s: native allocated %d counters for %d ports\n",
2252 			 __func__, index, dev->caps.num_ports);
2253 	}
2254 
2255 	return 0;
2256 
2257 }
2258 
2259 static void mlx4_cleanup_counters_table(struct mlx4_dev *dev)
2260 {
2261 	struct mlx4_priv *priv = mlx4_priv(dev);
2262 	int i, j;
2263 	struct counter_index *port, *tmp_port;
2264 	struct counter_index *vf, *tmp_vf;
2265 
2266 	mutex_lock(&priv->counters_table.mutex);
2267 
2268 	if (dev->caps.flags & MLX4_DEV_CAP_FLAG_COUNTERS) {
2269 		for (i = 0; i < dev->caps.num_ports; i++) {
2270 			list_for_each_entry_safe(port, tmp_port,
2271 						 &priv->counters_table.global_port_list[i],
2272 						 list) {
2273 				list_del(&port->list);
2274 				kfree(port);
2275 			}
2276 		}
2277 		if (!mlx4_is_slave(dev)) {
2278 			for (i = 0; i < dev->num_vfs; i++) {
2279 				for (j = 0; j < dev->caps.num_ports; j++) {
2280 					list_for_each_entry_safe(vf, tmp_vf,
2281 								 &priv->counters_table.vf_list[i][j],
2282 								 list) {
2283 						/* clear the counter statistic */
2284 						if (__mlx4_clear_if_stat(dev, vf->index))
2285 							mlx4_dbg(dev, "%s: reset counter %d failed\n",
2286 								 __func__, vf->index);
2287 						list_del(&vf->list);
2288 						kfree(vf);
2289 					}
2290 				}
2291 			}
2292 			mlx4_bitmap_cleanup(&priv->counters_table.bitmap);
2293 		}
2294 	}
2295 	mutex_unlock(&priv->counters_table.mutex);
2296 }
2297 
2298 int __mlx4_slave_counters_free(struct mlx4_dev *dev, int slave)
2299 {
2300 	struct mlx4_priv *priv = mlx4_priv(dev);
2301 	int i, first;
2302 	struct counter_index *vf, *tmp_vf;
2303 
2304 	/* clean VF's counters for the next useg */
2305 	if (slave > 0 && slave <= dev->num_vfs) {
2306 		mlx4_dbg(dev, "%s: free counters of slave(%d)\n"
2307 			 , __func__, slave);
2308 
2309 		mutex_lock(&priv->counters_table.mutex);
2310 		for (i = 0; i < dev->caps.num_ports; i++) {
2311 			first = 0;
2312 			list_for_each_entry_safe(vf, tmp_vf,
2313 						 &priv->counters_table.vf_list[slave - 1][i],
2314 						 list) {
2315 				/* clear the counter statistic */
2316 				if (__mlx4_clear_if_stat(dev, vf->index))
2317 					mlx4_dbg(dev, "%s: reset counter %d failed\n",
2318 						 __func__, vf->index);
2319 				if (first++ && vf->index != MLX4_SINK_COUNTER_INDEX) {
2320 					mlx4_dbg(dev, "%s: delete counter index %d for slave %d and port %d\n"
2321 						 , __func__, vf->index, slave, i + 1);
2322 					mlx4_bitmap_free(&priv->counters_table.bitmap, vf->index, MLX4_USE_RR);
2323 					list_del(&vf->list);
2324 					kfree(vf);
2325 				} else {
2326 					mlx4_dbg(dev, "%s: can't delete default counter index %d for slave %d and port %d\n"
2327 						 , __func__, vf->index, slave, i + 1);
2328 				}
2329 			}
2330 		}
2331 		mutex_unlock(&priv->counters_table.mutex);
2332 	}
2333 
2334 	return 0;
2335 }
2336 
2337 int __mlx4_counter_alloc(struct mlx4_dev *dev, int slave, int port, u32 *idx)
2338 {
2339 	struct mlx4_priv *priv = mlx4_priv(dev);
2340 	struct counter_index *new_counter_index;
2341 
2342 	if (!(dev->caps.flags & MLX4_DEV_CAP_FLAG_COUNTERS))
2343 		return -ENOENT;
2344 
2345 	if ((slave > MLX4_MAX_NUM_VF) || (slave < 0) ||
2346 	    (port < 0) || (port > MLX4_MAX_PORTS)) {
2347 		mlx4_dbg(dev, "%s: invalid slave(%d) or port(%d) index\n",
2348 			 __func__, slave, port);
2349 		return -EINVAL;
2350 	}
2351 
2352 	/* handle old guest request does not support request by port index */
2353 	if (port == 0) {
2354 		*idx = MLX4_SINK_COUNTER_INDEX;
2355 		mlx4_dbg(dev, "%s: allocated default counter index %d for slave %d port %d\n"
2356 			 , __func__, *idx, slave, port);
2357 		return 0;
2358 	}
2359 
2360 	mutex_lock(&priv->counters_table.mutex);
2361 
2362 	*idx = mlx4_bitmap_alloc(&priv->counters_table.bitmap);
2363 	/* if no resources return the default counter of the slave and port */
2364 	if (*idx == -1) {
2365 		if (slave == 0) { /* its the ethernet counter ?????? */
2366 			new_counter_index = list_entry(priv->counters_table.global_port_list[port - 1].next,
2367 						       struct counter_index,
2368 						       list);
2369 		} else {
2370 			new_counter_index = list_entry(priv->counters_table.vf_list[slave - 1][port - 1].next,
2371 						       struct counter_index,
2372 						       list);
2373 		}
2374 
2375 		*idx = new_counter_index->index;
2376 		mlx4_dbg(dev, "%s: allocated defualt counter index %d for slave %d port %d\n"
2377 			 , __func__, *idx, slave, port);
2378 		goto out;
2379 	}
2380 
2381 	if (slave == 0) { /* native or master */
2382 		new_counter_index = kmalloc(sizeof(struct counter_index), GFP_KERNEL);
2383 		if (!new_counter_index)
2384 			goto no_mem;
2385 		new_counter_index->index = *idx;
2386 		list_add_tail(&new_counter_index->list, &priv->counters_table.global_port_list[port - 1]);
2387 	} else {
2388 		new_counter_index = kmalloc(sizeof(struct counter_index), GFP_KERNEL);
2389 		if (!new_counter_index)
2390 			goto no_mem;
2391 		new_counter_index->index = *idx;
2392 		list_add_tail(&new_counter_index->list, &priv->counters_table.vf_list[slave - 1][port - 1]);
2393 	}
2394 
2395 	mlx4_dbg(dev, "%s: allocated counter index %d for slave %d port %d\n"
2396 		 , __func__, *idx, slave, port);
2397 out:
2398 	mutex_unlock(&priv->counters_table.mutex);
2399 	return 0;
2400 
2401 no_mem:
2402 	mlx4_bitmap_free(&priv->counters_table.bitmap, *idx, MLX4_USE_RR);
2403 	mutex_unlock(&priv->counters_table.mutex);
2404 	*idx = MLX4_SINK_COUNTER_INDEX;
2405 	mlx4_dbg(dev, "%s: failed err (%d)\n"
2406 		 , __func__, -ENOMEM);
2407 	return -ENOMEM;
2408 }
2409 
2410 int mlx4_counter_alloc(struct mlx4_dev *dev, u8 port, u32 *idx)
2411 {
2412 	u64 out_param;
2413 	int err;
2414 	struct mlx4_priv *priv = mlx4_priv(dev);
2415 	struct counter_index *new_counter_index, *c_index;
2416 
2417 	if (mlx4_is_mfunc(dev)) {
2418 		err = mlx4_cmd_imm(dev, 0, &out_param,
2419 				   ((u32) port) << 8 | (u32) RES_COUNTER,
2420 				   RES_OP_RESERVE, MLX4_CMD_ALLOC_RES,
2421 				   MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED);
2422 		if (!err) {
2423 			*idx = get_param_l(&out_param);
2424 			if (*idx == MLX4_SINK_COUNTER_INDEX)
2425 				return -ENOSPC;
2426 
2427 			mutex_lock(&priv->counters_table.mutex);
2428 			c_index = list_entry(priv->counters_table.global_port_list[port - 1].next,
2429 					     struct counter_index,
2430 					     list);
2431 			mutex_unlock(&priv->counters_table.mutex);
2432 			if (c_index->index == *idx)
2433 				return -EEXIST;
2434 
2435 			if (mlx4_is_slave(dev)) {
2436 				new_counter_index = kmalloc(sizeof(struct counter_index), GFP_KERNEL);
2437 				if (!new_counter_index) {
2438 					mlx4_counter_free(dev, port, *idx);
2439 					return -ENOMEM;
2440 				}
2441 				new_counter_index->index = *idx;
2442 				mutex_lock(&priv->counters_table.mutex);
2443 				list_add_tail(&new_counter_index->list, &priv->counters_table.global_port_list[port - 1]);
2444 				mutex_unlock(&priv->counters_table.mutex);
2445 				mlx4_dbg(dev, "%s: allocated counter index %d for port %d\n"
2446 					 , __func__, *idx, port);
2447 			}
2448 		}
2449 		return err;
2450 	}
2451 	return __mlx4_counter_alloc(dev, 0, port, idx);
2452 }
2453 EXPORT_SYMBOL_GPL(mlx4_counter_alloc);
2454 
2455 void __mlx4_counter_free(struct mlx4_dev *dev, int slave, int port, u32 idx)
2456 {
2457 	/* check if native or slave and deletes accordingly */
2458 	struct mlx4_priv *priv = mlx4_priv(dev);
2459 	struct counter_index *pf, *tmp_pf;
2460 	struct counter_index *vf, *tmp_vf;
2461 	int first;
2462 
2463 
2464 	if (idx == MLX4_SINK_COUNTER_INDEX) {
2465 		mlx4_dbg(dev, "%s: try to delete default counter index %d for port %d\n"
2466 			 , __func__, idx, port);
2467 			return;
2468 	}
2469 
2470 	if ((slave > MLX4_MAX_NUM_VF) || (slave < 0) ||
2471 	    (port < 0) || (port > MLX4_MAX_PORTS)) {
2472 		mlx4_warn(dev, "%s: deletion failed due to invalid slave(%d) or port(%d) index\n"
2473 			 , __func__, slave, idx);
2474 			return;
2475 	}
2476 
2477 	mutex_lock(&priv->counters_table.mutex);
2478 	if (slave == 0) {
2479 		first = 0;
2480 		list_for_each_entry_safe(pf, tmp_pf,
2481 					 &priv->counters_table.global_port_list[port - 1],
2482 					 list) {
2483 			/* the first 2 counters are reserved */
2484 			if (pf->index == idx) {
2485 				/* clear the counter statistic */
2486 				if (__mlx4_clear_if_stat(dev, pf->index))
2487 					mlx4_dbg(dev, "%s: reset counter %d failed\n",
2488 						 __func__, pf->index);
2489 				if (1 < first && idx != MLX4_SINK_COUNTER_INDEX) {
2490 					list_del(&pf->list);
2491 					kfree(pf);
2492 					mlx4_dbg(dev, "%s: delete counter index %d for native device (%d) port %d\n"
2493 						 , __func__, idx, slave, port);
2494 					mlx4_bitmap_free(&priv->counters_table.bitmap, idx, MLX4_USE_RR);
2495 					goto out;
2496 				} else {
2497 					mlx4_dbg(dev, "%s: can't delete default counter index %d for native device (%d) port %d\n"
2498 						 , __func__, idx, slave, port);
2499 					goto out;
2500 				}
2501 			}
2502 			first++;
2503 		}
2504 		mlx4_dbg(dev, "%s: can't delete counter index %d for native device (%d) port %d\n"
2505 			 , __func__, idx, slave, port);
2506 	} else {
2507 		first = 0;
2508 		list_for_each_entry_safe(vf, tmp_vf,
2509 					 &priv->counters_table.vf_list[slave - 1][port - 1],
2510 					 list) {
2511 			/* the first element is reserved */
2512 			if (vf->index == idx) {
2513 				/* clear the counter statistic */
2514 				if (__mlx4_clear_if_stat(dev, vf->index))
2515 					mlx4_dbg(dev, "%s: reset counter %d failed\n",
2516 						 __func__, vf->index);
2517 				if (first) {
2518 					list_del(&vf->list);
2519 					kfree(vf);
2520 					mlx4_dbg(dev, "%s: delete counter index %d for slave %d port %d\n",
2521 						 __func__, idx, slave, port);
2522 					mlx4_bitmap_free(&priv->counters_table.bitmap, idx, MLX4_USE_RR);
2523 					goto out;
2524 				} else {
2525 					mlx4_dbg(dev, "%s: can't delete default slave (%d) counter index %d for port %d\n"
2526 						 , __func__, slave, idx, port);
2527 					goto out;
2528 				}
2529 			}
2530 			first++;
2531 		}
2532 		mlx4_dbg(dev, "%s: can't delete slave (%d) counter index %d for port %d\n"
2533 			 , __func__, slave, idx, port);
2534 	}
2535 
2536 out:
2537 	mutex_unlock(&priv->counters_table.mutex);
2538 }
2539 
2540 void mlx4_counter_free(struct mlx4_dev *dev, u8 port, u32 idx)
2541 {
2542 	u64 in_param = 0;
2543 	struct mlx4_priv *priv = mlx4_priv(dev);
2544 	struct counter_index *counter, *tmp_counter;
2545 	int first = 0;
2546 
2547 	if (mlx4_is_mfunc(dev)) {
2548 		set_param_l(&in_param, idx);
2549 		mlx4_cmd(dev, in_param,
2550 			 ((u32) port) << 8 | (u32) RES_COUNTER,
2551 			 RES_OP_RESERVE,
2552 			 MLX4_CMD_FREE_RES, MLX4_CMD_TIME_CLASS_A,
2553 			 MLX4_CMD_WRAPPED);
2554 
2555 		if (mlx4_is_slave(dev) && idx != MLX4_SINK_COUNTER_INDEX) {
2556 			mutex_lock(&priv->counters_table.mutex);
2557 			list_for_each_entry_safe(counter, tmp_counter,
2558 						 &priv->counters_table.global_port_list[port - 1],
2559 						 list) {
2560 				if (counter->index == idx && first++) {
2561 					list_del(&counter->list);
2562 					kfree(counter);
2563 					mlx4_dbg(dev, "%s: delete counter index %d for port %d\n"
2564 						 , __func__, idx, port);
2565 					mutex_unlock(&priv->counters_table.mutex);
2566 					return;
2567 				}
2568 			}
2569 			mutex_unlock(&priv->counters_table.mutex);
2570 		}
2571 
2572 		return;
2573 	}
2574 	__mlx4_counter_free(dev, 0, port, idx);
2575 }
2576 EXPORT_SYMBOL_GPL(mlx4_counter_free);
2577 
2578 int __mlx4_clear_if_stat(struct mlx4_dev *dev,
2579 			 u8 counter_index)
2580 {
2581 	struct mlx4_cmd_mailbox *if_stat_mailbox = NULL;
2582 	int err = 0;
2583 	u32 if_stat_in_mod = (counter_index & 0xff) | (1 << 31);
2584 
2585 	if (counter_index == MLX4_SINK_COUNTER_INDEX)
2586 		return -EINVAL;
2587 
2588 	if (mlx4_is_slave(dev))
2589 		return 0;
2590 
2591 	if_stat_mailbox = mlx4_alloc_cmd_mailbox(dev);
2592 	if (IS_ERR(if_stat_mailbox)) {
2593 		err = PTR_ERR(if_stat_mailbox);
2594 		return err;
2595 	}
2596 
2597 	err = mlx4_cmd_box(dev, 0, if_stat_mailbox->dma, if_stat_in_mod, 0,
2598 			   MLX4_CMD_QUERY_IF_STAT, MLX4_CMD_TIME_CLASS_C,
2599 			   MLX4_CMD_NATIVE);
2600 
2601 	mlx4_free_cmd_mailbox(dev, if_stat_mailbox);
2602 	return err;
2603 }
2604 
2605 u8 mlx4_get_default_counter_index(struct mlx4_dev *dev, int slave, int port)
2606 {
2607 	struct mlx4_priv *priv = mlx4_priv(dev);
2608 	struct counter_index *new_counter_index;
2609 
2610 	if (dev->caps.port_type[port] == MLX4_PORT_TYPE_IB) {
2611 		mlx4_dbg(dev, "%s: return counter index %d for slave %d port (MLX4_PORT_TYPE_IB) %d\n",
2612 			 __func__, MLX4_SINK_COUNTER_INDEX, slave, port);
2613 		return (u8)MLX4_SINK_COUNTER_INDEX;
2614 	}
2615 
2616 	mutex_lock(&priv->counters_table.mutex);
2617 	if (slave == 0) {
2618 		new_counter_index = list_entry(priv->counters_table.global_port_list[port - 1].next,
2619 					       struct counter_index,
2620 					       list);
2621 	} else {
2622 		new_counter_index = list_entry(priv->counters_table.vf_list[slave - 1][port - 1].next,
2623 					       struct counter_index,
2624 					       list);
2625 	}
2626 	mutex_unlock(&priv->counters_table.mutex);
2627 
2628 	mlx4_dbg(dev, "%s: return counter index %d for slave %d port %d\n",
2629 		 __func__, new_counter_index->index, slave, port);
2630 
2631 
2632 	return (u8)new_counter_index->index;
2633 }
2634 
2635 int mlx4_get_vport_ethtool_stats(struct mlx4_dev *dev, int port,
2636 			 struct mlx4_en_vport_stats *vport_stats,
2637 			 int reset)
2638 {
2639 	struct mlx4_priv *priv = mlx4_priv(dev);
2640 	struct mlx4_cmd_mailbox *if_stat_mailbox = NULL;
2641 	union  mlx4_counter *counter;
2642 	int err = 0;
2643 	u32 if_stat_in_mod;
2644 	struct counter_index *vport, *tmp_vport;
2645 
2646 	if (!vport_stats)
2647 		return -EINVAL;
2648 
2649 	if_stat_mailbox = mlx4_alloc_cmd_mailbox(dev);
2650 	if (IS_ERR(if_stat_mailbox)) {
2651 		err = PTR_ERR(if_stat_mailbox);
2652 		return err;
2653 	}
2654 
2655 	mutex_lock(&priv->counters_table.mutex);
2656 	list_for_each_entry_safe(vport, tmp_vport,
2657 				 &priv->counters_table.global_port_list[port - 1],
2658 				 list) {
2659 		if (vport->index == MLX4_SINK_COUNTER_INDEX)
2660 			continue;
2661 
2662 		memset(if_stat_mailbox->buf, 0, sizeof(union  mlx4_counter));
2663 		if_stat_in_mod = (vport->index & 0xff) | ((reset & 1) << 31);
2664 		err = mlx4_cmd_box(dev, 0, if_stat_mailbox->dma,
2665 				   if_stat_in_mod, 0,
2666 				   MLX4_CMD_QUERY_IF_STAT,
2667 				   MLX4_CMD_TIME_CLASS_C,
2668 				   MLX4_CMD_NATIVE);
2669 		if (err) {
2670 			mlx4_dbg(dev, "%s: failed to read statistics for counter index %d\n",
2671 				 __func__, vport->index);
2672 			goto if_stat_out;
2673 		}
2674 		counter = (union mlx4_counter *)if_stat_mailbox->buf;
2675 		if ((counter->control.cnt_mode & 0xf) == 1) {
2676 			vport_stats->rx_broadcast_packets += be64_to_cpu(counter->ext.counters[0].IfRxBroadcastFrames);
2677 			vport_stats->rx_unicast_packets += be64_to_cpu(counter->ext.counters[0].IfRxUnicastFrames);
2678 			vport_stats->rx_multicast_packets += be64_to_cpu(counter->ext.counters[0].IfRxMulticastFrames);
2679 			vport_stats->tx_broadcast_packets += be64_to_cpu(counter->ext.counters[0].IfTxBroadcastFrames);
2680 			vport_stats->tx_unicast_packets += be64_to_cpu(counter->ext.counters[0].IfTxUnicastFrames);
2681 			vport_stats->tx_multicast_packets += be64_to_cpu(counter->ext.counters[0].IfTxMulticastFrames);
2682 			vport_stats->rx_broadcast_bytes += be64_to_cpu(counter->ext.counters[0].IfRxBroadcastOctets);
2683 			vport_stats->rx_unicast_bytes += be64_to_cpu(counter->ext.counters[0].IfRxUnicastOctets);
2684 			vport_stats->rx_multicast_bytes += be64_to_cpu(counter->ext.counters[0].IfRxMulticastOctets);
2685 			vport_stats->tx_broadcast_bytes += be64_to_cpu(counter->ext.counters[0].IfTxBroadcastOctets);
2686 			vport_stats->tx_unicast_bytes += be64_to_cpu(counter->ext.counters[0].IfTxUnicastOctets);
2687 			vport_stats->tx_multicast_bytes += be64_to_cpu(counter->ext.counters[0].IfTxMulticastOctets);
2688 			vport_stats->rx_errors += be64_to_cpu(counter->ext.counters[0].IfRxErrorFrames);
2689 			vport_stats->rx_dropped += be64_to_cpu(counter->ext.counters[0].IfRxNoBufferFrames);
2690 			vport_stats->tx_errors += be64_to_cpu(counter->ext.counters[0].IfTxDroppedFrames);
2691 		}
2692 	}
2693 
2694 if_stat_out:
2695 	mutex_unlock(&priv->counters_table.mutex);
2696 	mlx4_free_cmd_mailbox(dev, if_stat_mailbox);
2697 
2698 	return err;
2699 }
2700 EXPORT_SYMBOL_GPL(mlx4_get_vport_ethtool_stats);
2701 
2702 static int mlx4_setup_hca(struct mlx4_dev *dev)
2703 {
2704 	struct mlx4_priv *priv = mlx4_priv(dev);
2705 	int err;
2706 	int port;
2707 	__be32 ib_port_default_caps;
2708 
2709 	err = mlx4_init_uar_table(dev);
2710 	if (err) {
2711 		mlx4_err(dev, "Failed to initialize "
2712 			 "user access region table (err=%d), aborting.\n",
2713 			 err);
2714 		return err;
2715 	}
2716 
2717 	err = mlx4_uar_alloc(dev, &priv->driver_uar);
2718 	if (err) {
2719 		mlx4_err(dev, "Failed to allocate driver access region "
2720 			 "(err=%d), aborting.\n", err);
2721 		goto err_uar_table_free;
2722 	}
2723 
2724 	priv->kar = ioremap((phys_addr_t) priv->driver_uar.pfn << PAGE_SHIFT, PAGE_SIZE);
2725 	if (!priv->kar) {
2726 		mlx4_err(dev, "Couldn't map kernel access region, "
2727 			 "aborting.\n");
2728 		err = -ENOMEM;
2729 		goto err_uar_free;
2730 	}
2731 
2732 	err = mlx4_init_pd_table(dev);
2733 	if (err) {
2734 		mlx4_err(dev, "Failed to initialize "
2735 			 "protection domain table (err=%d), aborting.\n", err);
2736 		goto err_kar_unmap;
2737 	}
2738 
2739 	err = mlx4_init_xrcd_table(dev);
2740 	if (err) {
2741 		mlx4_err(dev, "Failed to initialize "
2742 			 "reliable connection domain table (err=%d), "
2743 			 "aborting.\n", err);
2744 		goto err_pd_table_free;
2745 	}
2746 
2747 	err = mlx4_init_mr_table(dev);
2748 	if (err) {
2749 		mlx4_err(dev, "Failed to initialize "
2750 			 "memory region table (err=%d), aborting.\n", err);
2751 		goto err_xrcd_table_free;
2752 	}
2753 
2754 	if (!mlx4_is_slave(dev)) {
2755 		err = mlx4_init_mcg_table(dev);
2756 		if (err) {
2757 			mlx4_err(dev, "Failed to initialize "
2758 				 "multicast group table (err=%d), aborting.\n",
2759 				 err);
2760 			goto err_mr_table_free;
2761 		}
2762 	}
2763 
2764 	err = mlx4_init_eq_table(dev);
2765 	if (err) {
2766 		mlx4_err(dev, "Failed to initialize "
2767 			 "event queue table (err=%d), aborting.\n", err);
2768 		goto err_mcg_table_free;
2769 	}
2770 
2771 	err = mlx4_cmd_use_events(dev);
2772 	if (err) {
2773 		mlx4_err(dev, "Failed to switch to event-driven "
2774 			 "firmware commands (err=%d), aborting.\n", err);
2775 		goto err_eq_table_free;
2776 	}
2777 
2778 	err = mlx4_NOP(dev);
2779 	if (err) {
2780 		if (dev->flags & MLX4_FLAG_MSI_X) {
2781 			mlx4_warn(dev, "NOP command failed to generate MSI-X "
2782 				  "interrupt IRQ %d).\n",
2783 				  priv->eq_table.eq[dev->caps.num_comp_vectors].irq);
2784 			mlx4_warn(dev, "Trying again without MSI-X.\n");
2785 		} else {
2786 			mlx4_err(dev, "NOP command failed to generate interrupt "
2787 				 "(IRQ %d), aborting.\n",
2788 				 priv->eq_table.eq[dev->caps.num_comp_vectors].irq);
2789 			mlx4_err(dev, "BIOS or ACPI interrupt routing problem?\n");
2790 		}
2791 
2792 		goto err_cmd_poll;
2793 	}
2794 
2795 	mlx4_dbg(dev, "NOP command IRQ test passed\n");
2796 
2797 	err = mlx4_init_cq_table(dev);
2798 	if (err) {
2799 		mlx4_err(dev, "Failed to initialize "
2800 			 "completion queue table (err=%d), aborting.\n", err);
2801 		goto err_cmd_poll;
2802 	}
2803 
2804 	err = mlx4_init_srq_table(dev);
2805 	if (err) {
2806 		mlx4_err(dev, "Failed to initialize "
2807 			 "shared receive queue table (err=%d), aborting.\n",
2808 			 err);
2809 		goto err_cq_table_free;
2810 	}
2811 
2812 	err = mlx4_init_qp_table(dev);
2813 	if (err) {
2814 		mlx4_err(dev, "Failed to initialize "
2815 			 "queue pair table (err=%d), aborting.\n", err);
2816 		goto err_srq_table_free;
2817 	}
2818 
2819 	err = mlx4_init_counters_table(dev);
2820 	if (err && err != -ENOENT) {
2821 		mlx4_err(dev, "Failed to initialize counters table (err=%d), "
2822 			 "aborting.\n", err);
2823 		goto err_qp_table_free;
2824 	}
2825 
2826 	if (!mlx4_is_slave(dev)) {
2827 		for (port = 1; port <= dev->caps.num_ports; port++) {
2828 			ib_port_default_caps = 0;
2829 			err = mlx4_get_port_ib_caps(dev, port,
2830 						    &ib_port_default_caps);
2831 			if (err)
2832 				mlx4_warn(dev, "failed to get port %d default "
2833 					  "ib capabilities (%d). Continuing "
2834 					  "with caps = 0\n", port, err);
2835 			dev->caps.ib_port_def_cap[port] = ib_port_default_caps;
2836 
2837 			/* initialize per-slave default ib port capabilities */
2838 			if (mlx4_is_master(dev)) {
2839 				int i;
2840 				for (i = 0; i < dev->num_slaves; i++) {
2841 					if (i == mlx4_master_func_num(dev))
2842 						continue;
2843 					priv->mfunc.master.slave_state[i].ib_cap_mask[port] =
2844 							ib_port_default_caps;
2845 				}
2846 			}
2847 
2848 			dev->caps.port_ib_mtu[port] = IB_MTU_4096;
2849 
2850 			err = mlx4_SET_PORT(dev, port, mlx4_is_master(dev) ?
2851 					    dev->caps.pkey_table_len[port] : -1);
2852 			if (err) {
2853 				mlx4_err(dev, "Failed to set port %d (err=%d), "
2854 					 "aborting\n", port, err);
2855 				goto err_counters_table_free;
2856 			}
2857 		}
2858 	}
2859 
2860 	return 0;
2861 
2862 err_counters_table_free:
2863 	mlx4_cleanup_counters_table(dev);
2864 
2865 err_qp_table_free:
2866 	mlx4_cleanup_qp_table(dev);
2867 
2868 err_srq_table_free:
2869 	mlx4_cleanup_srq_table(dev);
2870 
2871 err_cq_table_free:
2872 	mlx4_cleanup_cq_table(dev);
2873 
2874 err_cmd_poll:
2875 	mlx4_cmd_use_polling(dev);
2876 
2877 err_eq_table_free:
2878 	mlx4_cleanup_eq_table(dev);
2879 
2880 err_mcg_table_free:
2881 	if (!mlx4_is_slave(dev))
2882 		mlx4_cleanup_mcg_table(dev);
2883 
2884 err_mr_table_free:
2885 	mlx4_cleanup_mr_table(dev);
2886 
2887 err_xrcd_table_free:
2888 	mlx4_cleanup_xrcd_table(dev);
2889 
2890 err_pd_table_free:
2891 	mlx4_cleanup_pd_table(dev);
2892 
2893 err_kar_unmap:
2894 	iounmap(priv->kar);
2895 
2896 err_uar_free:
2897 	mlx4_uar_free(dev, &priv->driver_uar);
2898 
2899 err_uar_table_free:
2900 	mlx4_cleanup_uar_table(dev);
2901 	return err;
2902 }
2903 
2904 static void mlx4_enable_msi_x(struct mlx4_dev *dev)
2905 {
2906 	struct mlx4_priv *priv = mlx4_priv(dev);
2907 	struct msix_entry *entries;
2908 	int nreq = min_t(int, dev->caps.num_ports *
2909 			 min_t(int, num_possible_cpus() + 1, MAX_MSIX_P_PORT)
2910 				+ MSIX_LEGACY_SZ, MAX_MSIX);
2911 	int err;
2912 	int i;
2913 
2914 	if (msi_x) {
2915 		nreq = min_t(int, dev->caps.num_eqs - dev->caps.reserved_eqs,
2916 			     nreq);
2917 
2918 		if (msi_x > 1 && !mlx4_is_mfunc(dev))
2919 			nreq = min_t(int, nreq, msi_x);
2920 
2921 		entries = kcalloc(nreq, sizeof *entries, GFP_KERNEL);
2922 		if (!entries)
2923 			goto no_msi;
2924 
2925 		for (i = 0; i < nreq; ++i)
2926 			entries[i].entry = i;
2927 
2928 	retry:
2929 		err = pci_enable_msix(dev->pdev, entries, nreq);
2930 		if (err) {
2931 			/* Try again if at least 2 vectors are available */
2932 			if (err > 1) {
2933 				mlx4_info(dev, "Requested %d vectors, "
2934 					  "but only %d MSI-X vectors available, "
2935 					  "trying again\n", nreq, err);
2936 				nreq = err;
2937 				goto retry;
2938 			}
2939 			kfree(entries);
2940 			/* if error, or can't alloc even 1 IRQ */
2941 			if (err < 0) {
2942 				mlx4_err(dev, "No IRQs left, device can't "
2943 				    "be started.\n");
2944 				goto no_irq;
2945 			}
2946 			goto no_msi;
2947 		}
2948 
2949 		if (nreq <
2950 		    MSIX_LEGACY_SZ + dev->caps.num_ports * MIN_MSIX_P_PORT) {
2951 			/*Working in legacy mode , all EQ's shared*/
2952 			dev->caps.comp_pool           = 0;
2953 			dev->caps.num_comp_vectors = nreq - 1;
2954 		} else {
2955 			dev->caps.comp_pool           = nreq - MSIX_LEGACY_SZ;
2956 			dev->caps.num_comp_vectors = MSIX_LEGACY_SZ - 1;
2957 		}
2958 		for (i = 0; i < nreq; ++i)
2959 			priv->eq_table.eq[i].irq = entries[i].vector;
2960 
2961 		dev->flags |= MLX4_FLAG_MSI_X;
2962 
2963 		kfree(entries);
2964 		return;
2965 	}
2966 
2967 no_msi:
2968 	dev->caps.num_comp_vectors = 1;
2969 	dev->caps.comp_pool	   = 0;
2970 
2971 	for (i = 0; i < 2; ++i)
2972 		priv->eq_table.eq[i].irq = dev->pdev->irq;
2973 	return;
2974 no_irq:
2975 	dev->caps.num_comp_vectors = 0;
2976 	dev->caps.comp_pool        = 0;
2977 	return;
2978 }
2979 
2980 static void
2981 mlx4_init_hca_info(struct mlx4_dev *dev)
2982 {
2983 	struct mlx4_hca_info *info = &mlx4_priv(dev)->hca_info;
2984 
2985 	info->dev = dev;
2986 
2987 	info->firmware_attr = (struct device_attribute)__ATTR(fw_ver, S_IRUGO,
2988 							show_firmware_version, NULL);
2989 	if (device_create_file(&dev->pdev->dev, &info->firmware_attr))
2990 		mlx4_err(dev, "Failed to add file firmware version");
2991 
2992 	info->hca_attr = (struct device_attribute)__ATTR(hca, S_IRUGO, show_hca,
2993 										NULL);
2994 	if (device_create_file(&dev->pdev->dev, &info->hca_attr))
2995 		mlx4_err(dev, "Failed to add file hca type");
2996 
2997 	info->board_attr = (struct device_attribute)__ATTR(board_id, S_IRUGO,
2998 							    show_board, NULL);
2999 	if (device_create_file(&dev->pdev->dev, &info->board_attr))
3000 		mlx4_err(dev, "Failed to add file board id type");
3001 }
3002 
3003 static int mlx4_init_port_info(struct mlx4_dev *dev, int port)
3004 {
3005 	struct mlx4_port_info *info = &mlx4_priv(dev)->port[port];
3006 	int err = 0;
3007 
3008 	info->dev = dev;
3009 	info->port = port;
3010 	if (!mlx4_is_slave(dev)) {
3011 		mlx4_init_mac_table(dev, &info->mac_table);
3012 		mlx4_init_vlan_table(dev, &info->vlan_table);
3013 		info->base_qpn = mlx4_get_base_qpn(dev, port);
3014 	}
3015 
3016 	sprintf(info->dev_name, "mlx4_port%d", port);
3017 	info->port_attr.attr.name = info->dev_name;
3018 	if (mlx4_is_mfunc(dev))
3019 		info->port_attr.attr.mode = S_IRUGO;
3020 	else {
3021 		info->port_attr.attr.mode = S_IRUGO | S_IWUSR;
3022 		info->port_attr.store     = set_port_type;
3023 	}
3024 	info->port_attr.show      = show_port_type;
3025 	sysfs_attr_init(&info->port_attr.attr);
3026 
3027 	err = device_create_file(&dev->pdev->dev, &info->port_attr);
3028 	if (err) {
3029 		mlx4_err(dev, "Failed to create file for port %d\n", port);
3030 		info->port = -1;
3031 	}
3032 
3033 	sprintf(info->dev_mtu_name, "mlx4_port%d_mtu", port);
3034 	info->port_mtu_attr.attr.name = info->dev_mtu_name;
3035 	if (mlx4_is_mfunc(dev))
3036 		info->port_mtu_attr.attr.mode = S_IRUGO;
3037 	else {
3038 		info->port_mtu_attr.attr.mode = S_IRUGO | S_IWUSR;
3039 		info->port_mtu_attr.store     = set_port_ib_mtu;
3040 	}
3041 	info->port_mtu_attr.show      = show_port_ib_mtu;
3042 	sysfs_attr_init(&info->port_mtu_attr.attr);
3043 
3044 	err = device_create_file(&dev->pdev->dev, &info->port_mtu_attr);
3045 	if (err) {
3046 		mlx4_err(dev, "Failed to create mtu file for port %d\n", port);
3047 		device_remove_file(&info->dev->pdev->dev, &info->port_attr);
3048 		info->port = -1;
3049 	}
3050 
3051 	return err;
3052 }
3053 
3054 static void
3055 mlx4_cleanup_hca_info(struct mlx4_hca_info *info)
3056 {
3057 	device_remove_file(&info->dev->pdev->dev, &info->firmware_attr);
3058 	device_remove_file(&info->dev->pdev->dev, &info->board_attr);
3059 	device_remove_file(&info->dev->pdev->dev, &info->hca_attr);
3060 }
3061 
3062 static void mlx4_cleanup_port_info(struct mlx4_port_info *info)
3063 {
3064 	if (info->port < 0)
3065 		return;
3066 
3067 	device_remove_file(&info->dev->pdev->dev, &info->port_attr);
3068 	device_remove_file(&info->dev->pdev->dev, &info->port_mtu_attr);
3069 }
3070 
3071 static int mlx4_init_steering(struct mlx4_dev *dev)
3072 {
3073 	struct mlx4_priv *priv = mlx4_priv(dev);
3074 	int num_entries = dev->caps.num_ports;
3075 	int i, j;
3076 
3077 	priv->steer = kzalloc(sizeof(struct mlx4_steer) * num_entries, GFP_KERNEL);
3078 	if (!priv->steer)
3079 		return -ENOMEM;
3080 
3081 	for (i = 0; i < num_entries; i++)
3082 		for (j = 0; j < MLX4_NUM_STEERS; j++) {
3083 			INIT_LIST_HEAD(&priv->steer[i].promisc_qps[j]);
3084 			INIT_LIST_HEAD(&priv->steer[i].steer_entries[j]);
3085 		}
3086 	return 0;
3087 }
3088 
3089 static void mlx4_clear_steering(struct mlx4_dev *dev)
3090 {
3091 	struct mlx4_priv *priv = mlx4_priv(dev);
3092 	struct mlx4_steer_index *entry, *tmp_entry;
3093 	struct mlx4_promisc_qp *pqp, *tmp_pqp;
3094 	int num_entries = dev->caps.num_ports;
3095 	int i, j;
3096 
3097 	for (i = 0; i < num_entries; i++) {
3098 		for (j = 0; j < MLX4_NUM_STEERS; j++) {
3099 			list_for_each_entry_safe(pqp, tmp_pqp,
3100 						 &priv->steer[i].promisc_qps[j],
3101 						 list) {
3102 				list_del(&pqp->list);
3103 				kfree(pqp);
3104 			}
3105 			list_for_each_entry_safe(entry, tmp_entry,
3106 						 &priv->steer[i].steer_entries[j],
3107 						 list) {
3108 				list_del(&entry->list);
3109 				list_for_each_entry_safe(pqp, tmp_pqp,
3110 							 &entry->duplicates,
3111 							 list) {
3112 					list_del(&pqp->list);
3113 					kfree(pqp);
3114 				}
3115 				kfree(entry);
3116 			}
3117 		}
3118 	}
3119 	kfree(priv->steer);
3120 }
3121 
3122 static int extended_func_num(struct pci_dev *pdev)
3123 {
3124 	return PCI_SLOT(pdev->devfn) * 8 + PCI_FUNC(pdev->devfn);
3125 }
3126 
3127 #define MLX4_OWNER_BASE	0x8069c
3128 #define MLX4_OWNER_SIZE	4
3129 
3130 static int mlx4_get_ownership(struct mlx4_dev *dev)
3131 {
3132 	void __iomem *owner;
3133 	u32 ret;
3134 
3135 	if (pci_channel_offline(dev->pdev))
3136 		return -EIO;
3137 
3138 	owner = ioremap(pci_resource_start(dev->pdev, 0) + MLX4_OWNER_BASE,
3139 			MLX4_OWNER_SIZE);
3140 	if (!owner) {
3141 		mlx4_err(dev, "Failed to obtain ownership bit\n");
3142 		return -ENOMEM;
3143 	}
3144 
3145 	ret = readl(owner);
3146 	iounmap(owner);
3147 	return (int) !!ret;
3148 }
3149 
3150 static void mlx4_free_ownership(struct mlx4_dev *dev)
3151 {
3152 	void __iomem *owner;
3153 
3154 	if (pci_channel_offline(dev->pdev))
3155 		return;
3156 
3157 	owner = ioremap(pci_resource_start(dev->pdev, 0) + MLX4_OWNER_BASE,
3158 			MLX4_OWNER_SIZE);
3159 	if (!owner) {
3160 		mlx4_err(dev, "Failed to obtain ownership bit\n");
3161 		return;
3162 	}
3163 	writel(0, owner);
3164 	msleep(1000);
3165 	iounmap(owner);
3166 }
3167 
3168 static int __mlx4_init_one(struct pci_dev *pdev, int pci_dev_data)
3169 {
3170 	struct mlx4_priv *priv;
3171 	struct mlx4_dev *dev;
3172 	int err;
3173 	int port;
3174 	int nvfs, prb_vf;
3175 
3176 	pr_info(DRV_NAME ": Initializing %s\n", pci_name(pdev));
3177 
3178 	err = pci_enable_device(pdev);
3179 	if (err) {
3180 		dev_err(&pdev->dev, "Cannot enable PCI device, "
3181 			"aborting.\n");
3182 		return err;
3183 	}
3184 
3185 	mlx4_get_val(num_vfs.dbdf2val.tbl, pci_physfn(pdev), 0, &nvfs);
3186 	mlx4_get_val(probe_vf.dbdf2val.tbl, pci_physfn(pdev), 0, &prb_vf);
3187 	if (nvfs > MLX4_MAX_NUM_VF) {
3188 		dev_err(&pdev->dev, "There are more VF's (%d) than allowed(%d)\n",
3189 			nvfs, MLX4_MAX_NUM_VF);
3190 		return -EINVAL;
3191 	}
3192 
3193 	if (nvfs < 0) {
3194 		dev_err(&pdev->dev, "num_vfs module parameter cannot be negative\n");
3195 		return -EINVAL;
3196 	}
3197 	/*
3198 	 * Check for BARs.
3199 	 */
3200 	if (!(pci_dev_data & MLX4_PCI_DEV_IS_VF) &&
3201 	    !(pci_resource_flags(pdev, 0) & IORESOURCE_MEM)) {
3202 		dev_err(&pdev->dev, "Missing DCS, aborting."
3203 			"(driver_data: 0x%x, pci_resource_flags(pdev, 0):0x%x)\n",
3204 			pci_dev_data, pci_resource_flags(pdev, 0));
3205 		err = -ENODEV;
3206 		goto err_disable_pdev;
3207 	}
3208 	if (!(pci_resource_flags(pdev, 2) & IORESOURCE_MEM)) {
3209 		dev_err(&pdev->dev, "Missing UAR, aborting.\n");
3210 		err = -ENODEV;
3211 		goto err_disable_pdev;
3212 	}
3213 
3214 	err = pci_request_regions(pdev, DRV_NAME);
3215 	if (err) {
3216 		dev_err(&pdev->dev, "Couldn't get PCI resources, aborting\n");
3217 		goto err_disable_pdev;
3218 	}
3219 
3220 	pci_set_master(pdev);
3221 
3222 	err = pci_set_dma_mask(pdev, DMA_BIT_MASK(64));
3223 	if (err) {
3224 		dev_warn(&pdev->dev, "Warning: couldn't set 64-bit PCI DMA mask.\n");
3225 		err = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
3226 		if (err) {
3227 			dev_err(&pdev->dev, "Can't set PCI DMA mask, aborting.\n");
3228 			goto err_release_regions;
3229 		}
3230 	}
3231 	err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
3232 	if (err) {
3233 		dev_warn(&pdev->dev, "Warning: couldn't set 64-bit "
3234 			 "consistent PCI DMA mask.\n");
3235 		err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32));
3236 		if (err) {
3237 			dev_err(&pdev->dev, "Can't set consistent PCI DMA mask, "
3238 				"aborting.\n");
3239 			goto err_release_regions;
3240 		}
3241 	}
3242 
3243 	/* Allow large DMA segments, up to the firmware limit of 1 GB */
3244 	dma_set_max_seg_size(&pdev->dev, 1024 * 1024 * 1024);
3245 
3246 	priv = kzalloc(sizeof *priv, GFP_KERNEL);
3247 	if (!priv) {
3248 		dev_err(&pdev->dev, "Device struct alloc failed, "
3249 			"aborting.\n");
3250 		err = -ENOMEM;
3251 		goto err_release_regions;
3252 	}
3253 
3254 	dev       = &priv->dev;
3255 	dev->pdev = pdev;
3256 	INIT_LIST_HEAD(&priv->dev_list);
3257 	INIT_LIST_HEAD(&priv->ctx_list);
3258 	spin_lock_init(&priv->ctx_lock);
3259 
3260 	mutex_init(&priv->port_mutex);
3261 
3262 	INIT_LIST_HEAD(&priv->pgdir_list);
3263 	mutex_init(&priv->pgdir_mutex);
3264 
3265 	INIT_LIST_HEAD(&priv->bf_list);
3266 	mutex_init(&priv->bf_mutex);
3267 
3268 	dev->rev_id = pdev->revision;
3269 	dev->numa_node = dev_to_node(&pdev->dev);
3270 	/* Detect if this device is a virtual function */
3271 	if (pci_dev_data & MLX4_PCI_DEV_IS_VF) {
3272 		/* When acting as pf, we normally skip vfs unless explicitly
3273 		 * requested to probe them. */
3274 		if (nvfs && extended_func_num(pdev) > prb_vf) {
3275 			mlx4_warn(dev, "Skipping virtual function:%d\n",
3276 						extended_func_num(pdev));
3277 			err = -ENODEV;
3278 			goto err_free_dev;
3279 		}
3280 		mlx4_warn(dev, "Detected virtual function - running in slave mode\n");
3281 		dev->flags |= MLX4_FLAG_SLAVE;
3282 	} else {
3283 		/* We reset the device and enable SRIOV only for physical
3284 		 * devices.  Try to claim ownership on the device;
3285 		 * if already taken, skip -- do not allow multiple PFs */
3286 		err = mlx4_get_ownership(dev);
3287 		if (err) {
3288 			if (err < 0)
3289 				goto err_free_dev;
3290 			else {
3291 				mlx4_warn(dev, "Multiple PFs not yet supported."
3292 					  " Skipping PF.\n");
3293 				err = -EINVAL;
3294 				goto err_free_dev;
3295 			}
3296 		}
3297 
3298 		if (nvfs) {
3299 			mlx4_warn(dev, "Enabling SR-IOV with %d VFs\n", nvfs);
3300 			err = pci_enable_sriov(pdev, nvfs);
3301 			if (err) {
3302 				mlx4_err(dev, "Failed to enable SR-IOV, continuing without SR-IOV (err = %d).\n",
3303 					 err);
3304 				err = 0;
3305 			} else {
3306 				mlx4_warn(dev, "Running in master mode\n");
3307 				dev->flags |= MLX4_FLAG_SRIOV |
3308 					      MLX4_FLAG_MASTER;
3309 				dev->num_vfs = nvfs;
3310 			}
3311 		}
3312 
3313 		atomic_set(&priv->opreq_count, 0);
3314 		INIT_WORK(&priv->opreq_task, mlx4_opreq_action);
3315 
3316 		/*
3317 		 * Now reset the HCA before we touch the PCI capabilities or
3318 		 * attempt a firmware command, since a boot ROM may have left
3319 		 * the HCA in an undefined state.
3320 		 */
3321 		err = mlx4_reset(dev);
3322 		if (err) {
3323 			mlx4_err(dev, "Failed to reset HCA, aborting.\n");
3324 			goto err_sriov;
3325 		}
3326 	}
3327 
3328 slave_start:
3329 	err = mlx4_cmd_init(dev);
3330 	if (err) {
3331 		mlx4_err(dev, "Failed to init command interface, aborting.\n");
3332 		goto err_sriov;
3333 	}
3334 
3335 	/* In slave functions, the communication channel must be initialized
3336 	 * before posting commands. Also, init num_slaves before calling
3337 	 * mlx4_init_hca */
3338 	if (mlx4_is_mfunc(dev)) {
3339 		if (mlx4_is_master(dev))
3340 			dev->num_slaves = MLX4_MAX_NUM_SLAVES;
3341 		else {
3342 			dev->num_slaves = 0;
3343 			err = mlx4_multi_func_init(dev);
3344 			if (err) {
3345 				mlx4_err(dev, "Failed to init slave mfunc"
3346 					 " interface, aborting.\n");
3347 				goto err_cmd;
3348 			}
3349 		}
3350 	}
3351 
3352 	err = mlx4_init_hca(dev);
3353 	if (err) {
3354 		if (err == -EACCES) {
3355 			/* Not primary Physical function
3356 			 * Running in slave mode */
3357 			mlx4_cmd_cleanup(dev);
3358 			dev->flags |= MLX4_FLAG_SLAVE;
3359 			dev->flags &= ~MLX4_FLAG_MASTER;
3360 			goto slave_start;
3361 		} else
3362 			goto err_mfunc;
3363 	}
3364 
3365 	/* In master functions, the communication channel must be initialized
3366 	 * after obtaining its address from fw */
3367 	if (mlx4_is_master(dev)) {
3368 		err = mlx4_multi_func_init(dev);
3369 		if (err) {
3370 			mlx4_err(dev, "Failed to init master mfunc"
3371 				 "interface, aborting.\n");
3372 			goto err_close;
3373 		}
3374 	}
3375 
3376 	err = mlx4_alloc_eq_table(dev);
3377 	if (err)
3378 		goto err_master_mfunc;
3379 
3380 	priv->msix_ctl.pool_bm = 0;
3381 	mutex_init(&priv->msix_ctl.pool_lock);
3382 
3383 	mlx4_enable_msi_x(dev);
3384 
3385 	/* no MSIX and no shared IRQ */
3386 	if (!dev->caps.num_comp_vectors && !dev->caps.comp_pool) {
3387 		err = -ENOSPC;
3388 		goto err_free_eq;
3389 	}
3390 
3391 	if ((mlx4_is_mfunc(dev)) &&
3392 	    !(dev->flags & MLX4_FLAG_MSI_X)) {
3393 		err = -ENOSYS;
3394 		mlx4_err(dev, "INTx is not supported in multi-function mode."
3395 			 " aborting.\n");
3396 		goto err_free_eq;
3397 	}
3398 
3399 	if (!mlx4_is_slave(dev)) {
3400 		err = mlx4_init_steering(dev);
3401 		if (err)
3402 			goto err_free_eq;
3403 	}
3404 
3405 	err = mlx4_setup_hca(dev);
3406 	if (err == -EBUSY && (dev->flags & MLX4_FLAG_MSI_X) &&
3407 	    !mlx4_is_mfunc(dev)) {
3408 		dev->flags &= ~MLX4_FLAG_MSI_X;
3409 		dev->caps.num_comp_vectors = 1;
3410 		dev->caps.comp_pool	   = 0;
3411 		pci_disable_msix(pdev);
3412 		err = mlx4_setup_hca(dev);
3413 	}
3414 
3415 	if (err)
3416 		goto err_steer;
3417 
3418 	mlx4_init_quotas(dev);
3419 	mlx4_init_hca_info(dev);
3420 
3421 	for (port = 1; port <= dev->caps.num_ports; port++) {
3422 		err = mlx4_init_port_info(dev, port);
3423 		if (err)
3424 			goto err_port;
3425 	}
3426 
3427 	err = mlx4_register_device(dev);
3428 	if (err)
3429 		goto err_port;
3430 
3431 	mlx4_request_modules(dev);
3432 
3433 	mlx4_sense_init(dev);
3434 	mlx4_start_sense(dev);
3435 
3436 	priv->pci_dev_data = pci_dev_data;
3437 	pci_set_drvdata(pdev, dev);
3438 
3439 	return 0;
3440 
3441 err_port:
3442 	for (--port; port >= 1; --port)
3443 		mlx4_cleanup_port_info(&priv->port[port]);
3444 
3445 	mlx4_cleanup_counters_table(dev);
3446 	mlx4_cleanup_qp_table(dev);
3447 	mlx4_cleanup_srq_table(dev);
3448 	mlx4_cleanup_cq_table(dev);
3449 	mlx4_cmd_use_polling(dev);
3450 	mlx4_cleanup_eq_table(dev);
3451 	mlx4_cleanup_mcg_table(dev);
3452 	mlx4_cleanup_mr_table(dev);
3453 	mlx4_cleanup_xrcd_table(dev);
3454 	mlx4_cleanup_pd_table(dev);
3455 	mlx4_cleanup_uar_table(dev);
3456 
3457 err_steer:
3458 	if (!mlx4_is_slave(dev))
3459 		mlx4_clear_steering(dev);
3460 
3461 err_free_eq:
3462 	mlx4_free_eq_table(dev);
3463 
3464 err_master_mfunc:
3465 	if (mlx4_is_master(dev)) {
3466 		mlx4_free_resource_tracker(dev, RES_TR_FREE_STRUCTS_ONLY);
3467 		mlx4_multi_func_cleanup(dev);
3468 	}
3469 
3470 	if (mlx4_is_slave(dev)) {
3471 		kfree(dev->caps.qp0_tunnel);
3472 		kfree(dev->caps.qp0_proxy);
3473 		kfree(dev->caps.qp1_tunnel);
3474 		kfree(dev->caps.qp1_proxy);
3475 	}
3476 
3477 err_close:
3478 	if (dev->flags & MLX4_FLAG_MSI_X)
3479 		pci_disable_msix(pdev);
3480 
3481 	mlx4_close_hca(dev);
3482 
3483 err_mfunc:
3484 	if (mlx4_is_slave(dev))
3485 		mlx4_multi_func_cleanup(dev);
3486 
3487 err_cmd:
3488 	mlx4_cmd_cleanup(dev);
3489 
3490 err_sriov:
3491 	if (dev->flags & MLX4_FLAG_SRIOV)
3492 		pci_disable_sriov(pdev);
3493 
3494 	if (!mlx4_is_slave(dev))
3495 		mlx4_free_ownership(dev);
3496 
3497 err_free_dev:
3498 	kfree(priv);
3499 
3500 err_release_regions:
3501 	pci_release_regions(pdev);
3502 
3503 err_disable_pdev:
3504 	pci_disable_device(pdev);
3505 	pci_set_drvdata(pdev, NULL);
3506 	return err;
3507 }
3508 
3509 static int __devinit mlx4_init_one(struct pci_dev *pdev,
3510 				   const struct pci_device_id *id)
3511 {
3512 	device_set_desc(pdev->dev.bsddev, mlx4_version);
3513 	return __mlx4_init_one(pdev, id->driver_data);
3514 }
3515 
3516 static void mlx4_remove_one(struct pci_dev *pdev)
3517 {
3518 	struct mlx4_dev  *dev  = pci_get_drvdata(pdev);
3519 	struct mlx4_priv *priv = mlx4_priv(dev);
3520 	int p;
3521 
3522 	if (dev) {
3523 		/* in SRIOV it is not allowed to unload the pf's
3524 		 * driver while there are alive vf's */
3525 		if (mlx4_is_master(dev)) {
3526 			if (mlx4_how_many_lives_vf(dev))
3527 				mlx4_err(dev, "Removing PF when there are assigned VF's !!!\n");
3528 		}
3529 		mlx4_stop_sense(dev);
3530 		mlx4_unregister_device(dev);
3531 
3532 		mlx4_cleanup_hca_info(&priv->hca_info);
3533 		for (p = 1; p <= dev->caps.num_ports; p++) {
3534 			mlx4_cleanup_port_info(&priv->port[p]);
3535 			mlx4_CLOSE_PORT(dev, p);
3536 		}
3537 
3538 		if (mlx4_is_master(dev))
3539 			mlx4_free_resource_tracker(dev,
3540 						   RES_TR_FREE_SLAVES_ONLY);
3541 
3542 		mlx4_cleanup_counters_table(dev);
3543 		mlx4_cleanup_qp_table(dev);
3544 		mlx4_cleanup_srq_table(dev);
3545 		mlx4_cleanup_cq_table(dev);
3546 		mlx4_cmd_use_polling(dev);
3547 		mlx4_cleanup_eq_table(dev);
3548 		mlx4_cleanup_mcg_table(dev);
3549 		mlx4_cleanup_mr_table(dev);
3550 		mlx4_cleanup_xrcd_table(dev);
3551 		mlx4_cleanup_pd_table(dev);
3552 
3553 		if (mlx4_is_master(dev))
3554 			mlx4_free_resource_tracker(dev,
3555 						   RES_TR_FREE_STRUCTS_ONLY);
3556 
3557 		iounmap(priv->kar);
3558 		mlx4_uar_free(dev, &priv->driver_uar);
3559 		mlx4_cleanup_uar_table(dev);
3560 		if (!mlx4_is_slave(dev))
3561 			mlx4_clear_steering(dev);
3562 		mlx4_free_eq_table(dev);
3563 		if (mlx4_is_master(dev))
3564 			mlx4_multi_func_cleanup(dev);
3565 		mlx4_close_hca(dev);
3566 		if (mlx4_is_slave(dev))
3567 			mlx4_multi_func_cleanup(dev);
3568 		mlx4_cmd_cleanup(dev);
3569 
3570 		if (dev->flags & MLX4_FLAG_MSI_X)
3571 			pci_disable_msix(pdev);
3572 		if (dev->flags & MLX4_FLAG_SRIOV) {
3573 			mlx4_warn(dev, "Disabling SR-IOV\n");
3574 			pci_disable_sriov(pdev);
3575 		}
3576 
3577 		if (!mlx4_is_slave(dev))
3578 			mlx4_free_ownership(dev);
3579 
3580 		kfree(dev->caps.qp0_tunnel);
3581 		kfree(dev->caps.qp0_proxy);
3582 		kfree(dev->caps.qp1_tunnel);
3583 		kfree(dev->caps.qp1_proxy);
3584 
3585 		kfree(priv);
3586 		pci_release_regions(pdev);
3587 		pci_disable_device(pdev);
3588 		pci_set_drvdata(pdev, NULL);
3589 	}
3590 }
3591 
3592 static int restore_current_port_types(struct mlx4_dev *dev,
3593 				      enum mlx4_port_type *types,
3594 				      enum mlx4_port_type *poss_types)
3595 {
3596 	struct mlx4_priv *priv = mlx4_priv(dev);
3597 	int err, i;
3598 
3599 	mlx4_stop_sense(dev);
3600 	mutex_lock(&priv->port_mutex);
3601 	for (i = 0; i < dev->caps.num_ports; i++)
3602 		dev->caps.possible_type[i + 1] = poss_types[i];
3603 	err = mlx4_change_port_types(dev, types);
3604 	mlx4_start_sense(dev);
3605 	mutex_unlock(&priv->port_mutex);
3606 	return err;
3607 }
3608 
3609 int mlx4_restart_one(struct pci_dev *pdev)
3610 {
3611 	struct mlx4_dev	 *dev  = pci_get_drvdata(pdev);
3612 	struct mlx4_priv *priv = mlx4_priv(dev);
3613 	enum mlx4_port_type curr_type[MLX4_MAX_PORTS];
3614 	enum mlx4_port_type poss_type[MLX4_MAX_PORTS];
3615 	int pci_dev_data, err, i;
3616 
3617 	pci_dev_data = priv->pci_dev_data;
3618 	for (i = 0; i < dev->caps.num_ports; i++) {
3619 		curr_type[i] = dev->caps.port_type[i + 1];
3620 		poss_type[i] = dev->caps.possible_type[i + 1];
3621 	}
3622 
3623 	mlx4_remove_one(pdev);
3624 	err = __mlx4_init_one(pdev, pci_dev_data);
3625 	if (err)
3626 		return err;
3627 
3628 	dev = pci_get_drvdata(pdev);
3629 	err = restore_current_port_types(dev, curr_type, poss_type);
3630 	if (err)
3631 		mlx4_err(dev, "mlx4_restart_one: could not restore original port types (%d)\n",
3632 			 err);
3633 	return 0;
3634 }
3635 
3636 static DEFINE_PCI_DEVICE_TABLE(mlx4_pci_table) = {
3637 	/* MT25408 "Hermon" SDR */
3638 	{ PCI_VDEVICE(MELLANOX, 0x6340), MLX4_PCI_DEV_FORCE_SENSE_PORT },
3639 	/* MT25408 "Hermon" DDR */
3640 	{ PCI_VDEVICE(MELLANOX, 0x634a), MLX4_PCI_DEV_FORCE_SENSE_PORT },
3641 	/* MT25408 "Hermon" QDR */
3642 	{ PCI_VDEVICE(MELLANOX, 0x6354), MLX4_PCI_DEV_FORCE_SENSE_PORT },
3643 	/* MT25408 "Hermon" DDR PCIe gen2 */
3644 	{ PCI_VDEVICE(MELLANOX, 0x6732), MLX4_PCI_DEV_FORCE_SENSE_PORT },
3645 	/* MT25408 "Hermon" QDR PCIe gen2 */
3646 	{ PCI_VDEVICE(MELLANOX, 0x673c), MLX4_PCI_DEV_FORCE_SENSE_PORT },
3647 	/* MT25408 "Hermon" EN 10GigE */
3648 	{ PCI_VDEVICE(MELLANOX, 0x6368), MLX4_PCI_DEV_FORCE_SENSE_PORT },
3649 	/* MT25408 "Hermon" EN 10GigE PCIe gen2 */
3650 	{ PCI_VDEVICE(MELLANOX, 0x6750), MLX4_PCI_DEV_FORCE_SENSE_PORT },
3651 	/* MT25458 ConnectX EN 10GBASE-T 10GigE */
3652 	{ PCI_VDEVICE(MELLANOX, 0x6372), MLX4_PCI_DEV_FORCE_SENSE_PORT },
3653 	/* MT25458 ConnectX EN 10GBASE-T+Gen2 10GigE */
3654 	{ PCI_VDEVICE(MELLANOX, 0x675a), MLX4_PCI_DEV_FORCE_SENSE_PORT },
3655 	/* MT26468 ConnectX EN 10GigE PCIe gen2*/
3656 	{ PCI_VDEVICE(MELLANOX, 0x6764), MLX4_PCI_DEV_FORCE_SENSE_PORT },
3657 	/* MT26438 ConnectX EN 40GigE PCIe gen2 5GT/s */
3658 	{ PCI_VDEVICE(MELLANOX, 0x6746), MLX4_PCI_DEV_FORCE_SENSE_PORT },
3659 	/* MT26478 ConnectX2 40GigE PCIe gen2 */
3660 	{ PCI_VDEVICE(MELLANOX, 0x676e), MLX4_PCI_DEV_FORCE_SENSE_PORT },
3661 	/* MT25400 Family [ConnectX-2 Virtual Function] */
3662 	{ PCI_VDEVICE(MELLANOX, 0x1002), MLX4_PCI_DEV_IS_VF },
3663 	/* MT27500 Family [ConnectX-3] */
3664 	{ PCI_VDEVICE(MELLANOX, 0x1003), 0 },
3665 	/* MT27500 Family [ConnectX-3 Virtual Function] */
3666 	{ PCI_VDEVICE(MELLANOX, 0x1004), MLX4_PCI_DEV_IS_VF },
3667 	{ PCI_VDEVICE(MELLANOX, 0x1005), 0 }, /* MT27510 Family */
3668 	{ PCI_VDEVICE(MELLANOX, 0x1006), 0 }, /* MT27511 Family */
3669 	{ PCI_VDEVICE(MELLANOX, 0x1007), 0 }, /* MT27520 Family */
3670 	{ PCI_VDEVICE(MELLANOX, 0x1008), 0 }, /* MT27521 Family */
3671 	{ PCI_VDEVICE(MELLANOX, 0x1009), 0 }, /* MT27530 Family */
3672 	{ PCI_VDEVICE(MELLANOX, 0x100a), 0 }, /* MT27531 Family */
3673 	{ PCI_VDEVICE(MELLANOX, 0x100b), 0 }, /* MT27540 Family */
3674 	{ PCI_VDEVICE(MELLANOX, 0x100c), 0 }, /* MT27541 Family */
3675 	{ PCI_VDEVICE(MELLANOX, 0x100d), 0 }, /* MT27550 Family */
3676 	{ PCI_VDEVICE(MELLANOX, 0x100e), 0 }, /* MT27551 Family */
3677 	{ PCI_VDEVICE(MELLANOX, 0x100f), 0 }, /* MT27560 Family */
3678 	{ PCI_VDEVICE(MELLANOX, 0x1010), 0 }, /* MT27561 Family */
3679 	{ 0, }
3680 };
3681 
3682 MODULE_DEVICE_TABLE(pci, mlx4_pci_table);
3683 
3684 static pci_ers_result_t mlx4_pci_err_detected(struct pci_dev *pdev,
3685 					      pci_channel_state_t state)
3686 {
3687 	mlx4_remove_one(pdev);
3688 
3689 	return state == pci_channel_io_perm_failure ?
3690 		PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_NEED_RESET;
3691 }
3692 
3693 static pci_ers_result_t mlx4_pci_slot_reset(struct pci_dev *pdev)
3694 {
3695 	int ret = __mlx4_init_one(pdev, 0);
3696 
3697 	return ret ? PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_RECOVERED;
3698 }
3699 
3700 static const struct pci_error_handlers mlx4_err_handler = {
3701 	.error_detected = mlx4_pci_err_detected,
3702 	.slot_reset     = mlx4_pci_slot_reset,
3703 };
3704 
3705 static int suspend(struct pci_dev *pdev, pm_message_t state)
3706 {
3707 	mlx4_remove_one(pdev);
3708 
3709 	return 0;
3710 }
3711 
3712 static int resume(struct pci_dev *pdev)
3713 {
3714 	return __mlx4_init_one(pdev, 0);
3715 }
3716 
3717 static struct pci_driver mlx4_driver = {
3718 	.name		= DRV_NAME,
3719 	.id_table	= mlx4_pci_table,
3720 	.probe		= mlx4_init_one,
3721 	.remove		= __devexit_p(mlx4_remove_one),
3722 	.suspend	= suspend,
3723 	.resume		= resume,
3724 	.err_handler    = &mlx4_err_handler,
3725 };
3726 
3727 static int __init mlx4_verify_params(void)
3728 {
3729 	int status;
3730 
3731 	status = update_defaults(&port_type_array);
3732 	if (status == INVALID_STR) {
3733 		if (mlx4_fill_dbdf2val_tbl(&port_type_array.dbdf2val))
3734 			return -1;
3735 	} else if (status == INVALID_DATA) {
3736 		return -1;
3737 	}
3738 
3739 	status = update_defaults(&num_vfs);
3740 	if (status == INVALID_STR) {
3741 		if (mlx4_fill_dbdf2val_tbl(&num_vfs.dbdf2val))
3742 			return -1;
3743 	} else if (status == INVALID_DATA) {
3744 		return -1;
3745 	}
3746 
3747 	status = update_defaults(&probe_vf);
3748 	if (status == INVALID_STR) {
3749 		if (mlx4_fill_dbdf2val_tbl(&probe_vf.dbdf2val))
3750 			return -1;
3751 	} else if (status == INVALID_DATA) {
3752 		return -1;
3753 	}
3754 
3755 	if (msi_x < 0) {
3756 		pr_warn("mlx4_core: bad msi_x: %d\n", msi_x);
3757 		return -1;
3758 	}
3759 
3760 	if ((log_num_mac < 0) || (log_num_mac > 7)) {
3761 		pr_warning("mlx4_core: bad num_mac: %d\n", log_num_mac);
3762 		return -1;
3763 	}
3764 
3765 	if (log_num_vlan != 0)
3766 		pr_warning("mlx4_core: log_num_vlan - obsolete module param, using %d\n",
3767 			   MLX4_LOG_NUM_VLANS);
3768 
3769 	if (mlx4_set_4k_mtu != -1)
3770 		pr_warning("mlx4_core: set_4k_mtu - obsolete module param\n");
3771 
3772 	if ((log_mtts_per_seg < 0) || (log_mtts_per_seg > 7)) {
3773 		pr_warning("mlx4_core: bad log_mtts_per_seg: %d\n", log_mtts_per_seg);
3774 		return -1;
3775 	}
3776 
3777 	if (mlx4_log_num_mgm_entry_size != -1 &&
3778 	    (mlx4_log_num_mgm_entry_size < MLX4_MIN_MGM_LOG_ENTRY_SIZE ||
3779 	     mlx4_log_num_mgm_entry_size > MLX4_MAX_MGM_LOG_ENTRY_SIZE)) {
3780 		pr_warning("mlx4_core: mlx4_log_num_mgm_entry_size (%d) not "
3781 			   "in legal range (-1 or %d..%d)\n",
3782 			   mlx4_log_num_mgm_entry_size,
3783 			   MLX4_MIN_MGM_LOG_ENTRY_SIZE,
3784 			   MLX4_MAX_MGM_LOG_ENTRY_SIZE);
3785 		return -1;
3786 	}
3787 
3788 	if (mod_param_profile.num_qp < 18 || mod_param_profile.num_qp > 23) {
3789 		pr_warning("mlx4_core: bad log_num_qp: %d\n",
3790 			   mod_param_profile.num_qp);
3791 		return -1;
3792 	}
3793 
3794 	if (mod_param_profile.num_srq < 10) {
3795 		pr_warning("mlx4_core: too low log_num_srq: %d\n",
3796 			   mod_param_profile.num_srq);
3797 		return -1;
3798 	}
3799 
3800 	if (mod_param_profile.num_cq < 10) {
3801 		pr_warning("mlx4_core: too low log_num_cq: %d\n",
3802 			   mod_param_profile.num_cq);
3803 		return -1;
3804 	}
3805 
3806 	if (mod_param_profile.num_mpt < 10) {
3807 		pr_warning("mlx4_core: too low log_num_mpt: %d\n",
3808 			   mod_param_profile.num_mpt);
3809 		return -1;
3810 	}
3811 
3812 	if (mod_param_profile.num_mtt_segs &&
3813 	    mod_param_profile.num_mtt_segs < 15) {
3814 		pr_warning("mlx4_core: too low log_num_mtt: %d\n",
3815 			   mod_param_profile.num_mtt_segs);
3816 		return -1;
3817 	}
3818 
3819 	if (mod_param_profile.num_mtt_segs > MLX4_MAX_LOG_NUM_MTT) {
3820 		pr_warning("mlx4_core: too high log_num_mtt: %d\n",
3821 			   mod_param_profile.num_mtt_segs);
3822 		return -1;
3823 	}
3824 	return 0;
3825 }
3826 
3827 static int __init mlx4_init(void)
3828 {
3829 	int ret;
3830 
3831 	if (mlx4_verify_params())
3832 		return -EINVAL;
3833 
3834 	mlx4_catas_init();
3835 
3836 	mlx4_wq = create_singlethread_workqueue("mlx4");
3837 	if (!mlx4_wq)
3838 		return -ENOMEM;
3839 
3840 	if (enable_sys_tune)
3841 		sys_tune_init();
3842 
3843 	ret = pci_register_driver(&mlx4_driver);
3844 	if (ret < 0)
3845 		goto err;
3846 
3847 	return 0;
3848 
3849 err:
3850 	if (enable_sys_tune)
3851 		sys_tune_fini();
3852 
3853 	destroy_workqueue(mlx4_wq);
3854 
3855 	return ret;
3856 }
3857 
3858 static void __exit mlx4_cleanup(void)
3859 {
3860 	if (enable_sys_tune)
3861 		sys_tune_fini();
3862 
3863 	pci_unregister_driver(&mlx4_driver);
3864 	destroy_workqueue(mlx4_wq);
3865 }
3866 
3867 module_init_order(mlx4_init, SI_ORDER_MIDDLE);
3868 module_exit(mlx4_cleanup);
3869 
3870 static int
3871 mlx4_evhand(module_t mod, int event, void *arg)
3872 {
3873         return (0);
3874 }
3875 
3876 static moduledata_t mlx4_mod = {
3877         .name = "mlx4",
3878         .evhand = mlx4_evhand,
3879 };
3880 MODULE_VERSION(mlx4, 1);
3881 DECLARE_MODULE(mlx4, mlx4_mod, SI_SUB_OFED_PREINIT, SI_ORDER_ANY);
3882 MODULE_DEPEND(mlx4, linuxkpi, 1, 1, 1);
3883 
3884