1 /* 2 * Copyright (c) 2004, 2005 Topspin Communications. All rights reserved. 3 * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved. 4 * Copyright (c) 2005, 2006, 2007, 2008, 2014 Mellanox Technologies. All rights reserved. 5 * Copyright (c) 2006, 2007 Cisco Systems, Inc. All rights reserved. 6 * 7 * This software is available to you under a choice of one of two 8 * licenses. You may choose to be licensed under the terms of the GNU 9 * General Public License (GPL) Version 2, available from the file 10 * COPYING in the main directory of this source tree, or the 11 * OpenIB.org BSD license below: 12 * 13 * Redistribution and use in source and binary forms, with or 14 * without modification, are permitted provided that the following 15 * conditions are met: 16 * 17 * - Redistributions of source code must retain the above 18 * copyright notice, this list of conditions and the following 19 * disclaimer. 20 * 21 * - Redistributions in binary form must reproduce the above 22 * copyright notice, this list of conditions and the following 23 * disclaimer in the documentation and/or other materials 24 * provided with the distribution. 25 * 26 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 27 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 28 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 29 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 30 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 31 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 32 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 33 * SOFTWARE. 34 */ 35 36 #define LINUXKPI_PARAM_PREFIX mlx4_ 37 38 #include <linux/kmod.h> 39 #include <linux/module.h> 40 #include <linux/errno.h> 41 #include <linux/pci.h> 42 #include <linux/dma-mapping.h> 43 #include <linux/slab.h> 44 #include <linux/io-mapping.h> 45 #include <linux/delay.h> 46 #include <linux/netdevice.h> 47 #include <linux/string.h> 48 #include <linux/fs.h> 49 50 #include <dev/mlx4/device.h> 51 #include <dev/mlx4/doorbell.h> 52 53 #include "mlx4.h" 54 #include "fw.h" 55 #include "icm.h" 56 #include <dev/mlx4/stats.h> 57 58 /* Mellanox ConnectX HCA low-level driver */ 59 60 struct workqueue_struct *mlx4_wq; 61 62 #ifdef CONFIG_MLX4_DEBUG 63 64 int mlx4_debug_level = 0; 65 module_param_named(debug_level, mlx4_debug_level, int, 0644); 66 MODULE_PARM_DESC(debug_level, "Enable debug tracing if > 0"); 67 68 #endif /* CONFIG_MLX4_DEBUG */ 69 70 #ifdef CONFIG_PCI_MSI 71 72 static int msi_x = 1; 73 module_param(msi_x, int, 0444); 74 MODULE_PARM_DESC(msi_x, "0 - don't use MSI-X, 1 - use MSI-X, >1 - limit number of MSI-X irqs to msi_x (non-SRIOV only)"); 75 76 #else /* CONFIG_PCI_MSI */ 77 78 #define msi_x (0) 79 80 #endif /* CONFIG_PCI_MSI */ 81 82 static int enable_sys_tune = 0; 83 module_param(enable_sys_tune, int, 0444); 84 MODULE_PARM_DESC(enable_sys_tune, "Tune the cpu's for better performance (default 0)"); 85 86 int mlx4_blck_lb = 1; 87 module_param_named(block_loopback, mlx4_blck_lb, int, 0644); 88 MODULE_PARM_DESC(block_loopback, "Block multicast loopback packets if > 0 " 89 "(default: 1)"); 90 enum { 91 DEFAULT_DOMAIN = 0, 92 BDF_STR_SIZE = 8, /* bb:dd.f- */ 93 DBDF_STR_SIZE = 13 /* mmmm:bb:dd.f- */ 94 }; 95 96 enum { 97 NUM_VFS, 98 PROBE_VF, 99 PORT_TYPE_ARRAY 100 }; 101 102 enum { 103 VALID_DATA, 104 INVALID_DATA, 105 INVALID_STR 106 }; 107 108 struct param_data { 109 int id; 110 struct mlx4_dbdf2val_lst dbdf2val; 111 }; 112 113 static struct param_data num_vfs = { 114 .id = NUM_VFS, 115 .dbdf2val = { 116 .name = "num_vfs param", 117 .num_vals = 1, 118 .def_val = {0}, 119 .range = {0, MLX4_MAX_NUM_VF} 120 } 121 }; 122 module_param_string(num_vfs, num_vfs.dbdf2val.str, 123 sizeof(num_vfs.dbdf2val.str), 0444); 124 MODULE_PARM_DESC(num_vfs, 125 "Either single value (e.g. '5') to define uniform num_vfs value for all devices functions\n" 126 "\t\tor a string to map device function numbers to their num_vfs values (e.g. '0000:04:00.0-5,002b:1c:0b.a-15').\n" 127 "\t\tHexadecimal digits for the device function (e.g. 002b:1c:0b.a) and decimal for num_vfs value (e.g. 15)."); 128 129 static struct param_data probe_vf = { 130 .id = PROBE_VF, 131 .dbdf2val = { 132 .name = "probe_vf param", 133 .num_vals = 1, 134 .def_val = {0}, 135 .range = {0, MLX4_MAX_NUM_VF} 136 } 137 }; 138 module_param_string(probe_vf, probe_vf.dbdf2val.str, 139 sizeof(probe_vf.dbdf2val.str), 0444); 140 MODULE_PARM_DESC(probe_vf, 141 "Either single value (e.g. '3') to define uniform number of VFs to probe by the pf driver for all devices functions\n" 142 "\t\tor a string to map device function numbers to their probe_vf values (e.g. '0000:04:00.0-3,002b:1c:0b.a-13').\n" 143 "\t\tHexadecimal digits for the device function (e.g. 002b:1c:0b.a) and decimal for probe_vf value (e.g. 13)."); 144 145 int mlx4_log_num_mgm_entry_size = MLX4_DEFAULT_MGM_LOG_ENTRY_SIZE; 146 147 module_param_named(log_num_mgm_entry_size, 148 mlx4_log_num_mgm_entry_size, int, 0444); 149 MODULE_PARM_DESC(log_num_mgm_entry_size, "log mgm size, that defines the num" 150 " of qp per mcg, for example:" 151 " 10 gives 248.range: 7 <=" 152 " log_num_mgm_entry_size <= 12." 153 " To activate device managed" 154 " flow steering when available, set to -1"); 155 156 static int high_rate_steer; 157 module_param(high_rate_steer, int, 0444); 158 MODULE_PARM_DESC(high_rate_steer, "Enable steering mode for higher packet rate" 159 " (default off)"); 160 161 static int fast_drop; 162 module_param_named(fast_drop, fast_drop, int, 0444); 163 MODULE_PARM_DESC(fast_drop, 164 "Enable fast packet drop when no receive WQEs are posted"); 165 166 int mlx4_enable_64b_cqe_eqe = 1; 167 module_param_named(enable_64b_cqe_eqe, mlx4_enable_64b_cqe_eqe, int, 0644); 168 MODULE_PARM_DESC(enable_64b_cqe_eqe, 169 "Enable 64 byte CQEs/EQEs when the FW supports this if non-zero (default: 1)"); 170 171 #define HCA_GLOBAL_CAP_MASK 0 172 173 #define PF_CONTEXT_BEHAVIOUR_MASK MLX4_FUNC_CAP_64B_EQE_CQE 174 175 static char mlx4_version[] __devinitdata = 176 DRV_NAME ": Mellanox ConnectX VPI driver v" 177 DRV_VERSION " (" DRV_RELDATE ")\n"; 178 179 static int log_num_mac = 7; 180 module_param_named(log_num_mac, log_num_mac, int, 0444); 181 MODULE_PARM_DESC(log_num_mac, "Log2 max number of MACs per ETH port (1-7)"); 182 183 static int log_num_vlan; 184 module_param_named(log_num_vlan, log_num_vlan, int, 0444); 185 MODULE_PARM_DESC(log_num_vlan, 186 "(Obsolete) Log2 max number of VLANs per ETH port (0-7)"); 187 /* Log2 max number of VLANs per ETH port (0-7) */ 188 #define MLX4_LOG_NUM_VLANS 7 189 190 int log_mtts_per_seg = ilog2(1); 191 module_param_named(log_mtts_per_seg, log_mtts_per_seg, int, 0444); 192 MODULE_PARM_DESC(log_mtts_per_seg, "Log2 number of MTT entries per segment " 193 "(0-7) (default: 0)"); 194 195 static struct param_data port_type_array = { 196 .id = PORT_TYPE_ARRAY, 197 .dbdf2val = { 198 .name = "port_type_array param", 199 .num_vals = 2, 200 .def_val = {MLX4_PORT_TYPE_ETH, MLX4_PORT_TYPE_ETH}, 201 .range = {MLX4_PORT_TYPE_IB, MLX4_PORT_TYPE_NA} 202 } 203 }; 204 module_param_string(port_type_array, port_type_array.dbdf2val.str, 205 sizeof(port_type_array.dbdf2val.str), 0444); 206 MODULE_PARM_DESC(port_type_array, 207 "Either pair of values (e.g. '1,2') to define uniform port1/port2 types configuration for all devices functions\n" 208 "\t\tor a string to map device function numbers to their pair of port types values (e.g. '0000:04:00.0-1;2,002b:1c:0b.a-1;1').\n" 209 "\t\tValid port types: 1-ib, 2-eth, 3-auto, 4-N/A\n" 210 "\t\tIn case that only one port is available use the N/A port type for port2 (e.g '1,4')."); 211 212 213 struct mlx4_port_config { 214 struct list_head list; 215 enum mlx4_port_type port_type[MLX4_MAX_PORTS + 1]; 216 struct pci_dev *pdev; 217 }; 218 219 #define MLX4_LOG_NUM_MTT 20 220 /* We limit to 30 as of a bit map issue which uses int and not uint. 221 see mlx4_buddy_init -> bitmap_zero which gets int. 222 */ 223 #define MLX4_MAX_LOG_NUM_MTT 30 224 static struct mlx4_profile mod_param_profile = { 225 .num_qp = 19, 226 .num_srq = 16, 227 .rdmarc_per_qp = 4, 228 .num_cq = 16, 229 .num_mcg = 13, 230 .num_mpt = 19, 231 .num_mtt_segs = 0, /* max(20, 2*MTTs for host memory)) */ 232 }; 233 234 module_param_named(log_num_qp, mod_param_profile.num_qp, int, 0444); 235 MODULE_PARM_DESC(log_num_qp, "log maximum number of QPs per HCA (default: 19)"); 236 237 module_param_named(log_num_srq, mod_param_profile.num_srq, int, 0444); 238 MODULE_PARM_DESC(log_num_srq, "log maximum number of SRQs per HCA " 239 "(default: 16)"); 240 241 module_param_named(log_rdmarc_per_qp, mod_param_profile.rdmarc_per_qp, int, 242 0444); 243 MODULE_PARM_DESC(log_rdmarc_per_qp, "log number of RDMARC buffers per QP " 244 "(default: 4)"); 245 246 module_param_named(log_num_cq, mod_param_profile.num_cq, int, 0444); 247 MODULE_PARM_DESC(log_num_cq, "log maximum number of CQs per HCA (default: 16)"); 248 249 module_param_named(log_num_mcg, mod_param_profile.num_mcg, int, 0444); 250 MODULE_PARM_DESC(log_num_mcg, "log maximum number of multicast groups per HCA " 251 "(default: 13)"); 252 253 module_param_named(log_num_mpt, mod_param_profile.num_mpt, int, 0444); 254 MODULE_PARM_DESC(log_num_mpt, 255 "log maximum number of memory protection table entries per " 256 "HCA (default: 19)"); 257 258 module_param_named(log_num_mtt, mod_param_profile.num_mtt_segs, int, 0444); 259 MODULE_PARM_DESC(log_num_mtt, 260 "log maximum number of memory translation table segments per " 261 "HCA (default: max(20, 2*MTTs for register all of the host memory limited to 30))"); 262 263 enum { 264 MLX4_IF_STATE_BASIC, 265 MLX4_IF_STATE_EXTENDED 266 }; 267 268 static inline u64 dbdf_to_u64(int domain, int bus, int dev, int fn) 269 { 270 return (domain << 20) | (bus << 12) | (dev << 4) | fn; 271 } 272 273 static inline void pr_bdf_err(const char *dbdf, const char *pname) 274 { 275 pr_warn("mlx4_core: '%s' is not valid bdf in '%s'\n", dbdf, pname); 276 } 277 278 static inline void pr_val_err(const char *dbdf, const char *pname, 279 const char *val) 280 { 281 pr_warn("mlx4_core: value '%s' of bdf '%s' in '%s' is not valid\n" 282 , val, dbdf, pname); 283 } 284 285 static inline void pr_out_of_range_bdf(const char *dbdf, int val, 286 struct mlx4_dbdf2val_lst *dbdf2val) 287 { 288 pr_warn("mlx4_core: value %d in bdf '%s' of '%s' is out of its valid range (%d,%d)\n" 289 , val, dbdf, dbdf2val->name , dbdf2val->range.min, 290 dbdf2val->range.max); 291 } 292 293 static inline void pr_out_of_range(struct mlx4_dbdf2val_lst *dbdf2val) 294 { 295 pr_warn("mlx4_core: value of '%s' is out of its valid range (%d,%d)\n" 296 , dbdf2val->name , dbdf2val->range.min, dbdf2val->range.max); 297 } 298 299 static inline int is_in_range(int val, struct mlx4_range *r) 300 { 301 return (val >= r->min && val <= r->max); 302 } 303 304 static int update_defaults(struct param_data *pdata) 305 { 306 long int val[MLX4_MAX_BDF_VALS]; 307 int ret; 308 char *t, *p = pdata->dbdf2val.str; 309 char sval[32]; 310 int val_len; 311 312 if (!strlen(p) || strchr(p, ':') || strchr(p, '.') || strchr(p, ';')) 313 return INVALID_STR; 314 315 switch (pdata->id) { 316 case PORT_TYPE_ARRAY: 317 t = strchr(p, ','); 318 if (!t || t == p || (t - p) > sizeof(sval)) 319 return INVALID_STR; 320 321 val_len = t - p; 322 strncpy(sval, p, val_len); 323 sval[val_len] = 0; 324 325 ret = kstrtol(sval, 0, &val[0]); 326 if (ret == -EINVAL) 327 return INVALID_STR; 328 if (ret || !is_in_range(val[0], &pdata->dbdf2val.range)) { 329 pr_out_of_range(&pdata->dbdf2val); 330 return INVALID_DATA; 331 } 332 333 ret = kstrtol(t + 1, 0, &val[1]); 334 if (ret == -EINVAL) 335 return INVALID_STR; 336 if (ret || !is_in_range(val[1], &pdata->dbdf2val.range)) { 337 pr_out_of_range(&pdata->dbdf2val); 338 return INVALID_DATA; 339 } 340 341 pdata->dbdf2val.tbl[0].val[0] = val[0]; 342 pdata->dbdf2val.tbl[0].val[1] = val[1]; 343 break; 344 345 case NUM_VFS: 346 case PROBE_VF: 347 ret = kstrtol(p, 0, &val[0]); 348 if (ret == -EINVAL) 349 return INVALID_STR; 350 if (ret || !is_in_range(val[0], &pdata->dbdf2val.range)) { 351 pr_out_of_range(&pdata->dbdf2val); 352 return INVALID_DATA; 353 } 354 pdata->dbdf2val.tbl[0].val[0] = val[0]; 355 break; 356 } 357 pdata->dbdf2val.tbl[1].dbdf = MLX4_ENDOF_TBL; 358 359 return VALID_DATA; 360 } 361 362 int mlx4_fill_dbdf2val_tbl(struct mlx4_dbdf2val_lst *dbdf2val_lst) 363 { 364 int domain, bus, dev, fn; 365 u64 dbdf; 366 char *p, *t, *v; 367 char tmp[32]; 368 char sbdf[32]; 369 char sep = ','; 370 int j, k, str_size, i = 1; 371 int prfx_size; 372 373 p = dbdf2val_lst->str; 374 375 for (j = 0; j < dbdf2val_lst->num_vals; j++) 376 dbdf2val_lst->tbl[0].val[j] = dbdf2val_lst->def_val[j]; 377 dbdf2val_lst->tbl[1].dbdf = MLX4_ENDOF_TBL; 378 379 str_size = strlen(dbdf2val_lst->str); 380 381 if (str_size == 0) 382 return 0; 383 384 while (strlen(p)) { 385 prfx_size = BDF_STR_SIZE; 386 sbdf[prfx_size] = 0; 387 strncpy(sbdf, p, prfx_size); 388 domain = DEFAULT_DOMAIN; 389 if (sscanf(sbdf, "%02x:%02x.%x-", &bus, &dev, &fn) != 3) { 390 prfx_size = DBDF_STR_SIZE; 391 sbdf[prfx_size] = 0; 392 strncpy(sbdf, p, prfx_size); 393 if (sscanf(sbdf, "%04x:%02x:%02x.%x-", &domain, &bus, 394 &dev, &fn) != 4) { 395 pr_bdf_err(sbdf, dbdf2val_lst->name); 396 goto err; 397 } 398 sprintf(tmp, "%04x:%02x:%02x.%x-", domain, bus, dev, 399 fn); 400 } else { 401 sprintf(tmp, "%02x:%02x.%x-", bus, dev, fn); 402 } 403 404 if (strnicmp(sbdf, tmp, sizeof(tmp))) { 405 pr_bdf_err(sbdf, dbdf2val_lst->name); 406 goto err; 407 } 408 409 dbdf = dbdf_to_u64(domain, bus, dev, fn); 410 411 for (j = 1; j < i; j++) 412 if (dbdf2val_lst->tbl[j].dbdf == dbdf) { 413 pr_warn("mlx4_core: in '%s', %s appears multiple times\n" 414 , dbdf2val_lst->name, sbdf); 415 goto err; 416 } 417 418 if (i >= MLX4_DEVS_TBL_SIZE) { 419 pr_warn("mlx4_core: Too many devices in '%s'\n" 420 , dbdf2val_lst->name); 421 goto err; 422 } 423 424 p += prfx_size; 425 t = strchr(p, sep); 426 t = t ? t : p + strlen(p); 427 if (p >= t) { 428 pr_val_err(sbdf, dbdf2val_lst->name, ""); 429 goto err; 430 } 431 432 for (k = 0; k < dbdf2val_lst->num_vals; k++) { 433 char sval[32]; 434 long int val; 435 int ret, val_len; 436 char vsep = ';'; 437 438 v = (k == dbdf2val_lst->num_vals - 1) ? t : strchr(p, vsep); 439 if (!v || v > t || v == p || (v - p) > sizeof(sval)) { 440 pr_val_err(sbdf, dbdf2val_lst->name, p); 441 goto err; 442 } 443 val_len = v - p; 444 strncpy(sval, p, val_len); 445 sval[val_len] = 0; 446 447 ret = kstrtol(sval, 0, &val); 448 if (ret) { 449 if (strchr(p, vsep)) 450 pr_warn("mlx4_core: too many vals in bdf '%s' of '%s'\n" 451 , sbdf, dbdf2val_lst->name); 452 else 453 pr_val_err(sbdf, dbdf2val_lst->name, 454 sval); 455 goto err; 456 } 457 if (!is_in_range(val, &dbdf2val_lst->range)) { 458 pr_out_of_range_bdf(sbdf, val, dbdf2val_lst); 459 goto err; 460 } 461 462 dbdf2val_lst->tbl[i].val[k] = val; 463 p = v; 464 if (p[0] == vsep) 465 p++; 466 } 467 468 dbdf2val_lst->tbl[i].dbdf = dbdf; 469 if (strlen(p)) { 470 if (p[0] != sep) { 471 pr_warn("mlx4_core: expect separator '%c' before '%s' in '%s'\n" 472 , sep, p, dbdf2val_lst->name); 473 goto err; 474 } 475 p++; 476 } 477 i++; 478 if (i < MLX4_DEVS_TBL_SIZE) 479 dbdf2val_lst->tbl[i].dbdf = MLX4_ENDOF_TBL; 480 } 481 482 return 0; 483 484 err: 485 dbdf2val_lst->tbl[1].dbdf = MLX4_ENDOF_TBL; 486 pr_warn("mlx4_core: The value of '%s' is incorrect. The value is discarded!\n" 487 , dbdf2val_lst->name); 488 489 return -EINVAL; 490 } 491 EXPORT_SYMBOL(mlx4_fill_dbdf2val_tbl); 492 493 int mlx4_get_val(struct mlx4_dbdf2val *tbl, struct pci_dev *pdev, int idx, 494 int *val) 495 { 496 u64 dbdf; 497 int i = 1; 498 499 *val = tbl[0].val[idx]; 500 if (!pdev) 501 return -EINVAL; 502 503 dbdf = dbdf_to_u64(pci_get_domain(pdev->dev.bsddev), pci_get_bus(pdev->dev.bsddev), 504 PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn)); 505 506 while ((i < MLX4_DEVS_TBL_SIZE) && (tbl[i].dbdf != MLX4_ENDOF_TBL)) { 507 if (tbl[i].dbdf == dbdf) { 508 *val = tbl[i].val[idx]; 509 return 0; 510 } 511 i++; 512 } 513 514 return 0; 515 } 516 EXPORT_SYMBOL(mlx4_get_val); 517 518 static void process_mod_param_profile(struct mlx4_profile *profile) 519 { 520 vm_size_t hwphyssz; 521 hwphyssz = 0; 522 TUNABLE_ULONG_FETCH("hw.realmem", (u_long *) &hwphyssz); 523 524 profile->num_qp = 1 << mod_param_profile.num_qp; 525 profile->num_srq = 1 << mod_param_profile.num_srq; 526 profile->rdmarc_per_qp = 1 << mod_param_profile.rdmarc_per_qp; 527 profile->num_cq = 1 << mod_param_profile.num_cq; 528 profile->num_mcg = 1 << mod_param_profile.num_mcg; 529 profile->num_mpt = 1 << mod_param_profile.num_mpt; 530 /* 531 * We want to scale the number of MTTs with the size of the 532 * system memory, since it makes sense to register a lot of 533 * memory on a system with a lot of memory. As a heuristic, 534 * make sure we have enough MTTs to register twice the system 535 * memory (with PAGE_SIZE entries). 536 * 537 * This number has to be a power of two and fit into 32 bits 538 * due to device limitations. We cap this at 2^30 as of bit map 539 * limitation to work with int instead of uint (mlx4_buddy_init -> bitmap_zero) 540 * That limits us to 4TB of memory registration per HCA with 541 * 4KB pages, which is probably OK for the next few months. 542 */ 543 if (mod_param_profile.num_mtt_segs) 544 profile->num_mtt_segs = 1 << mod_param_profile.num_mtt_segs; 545 else { 546 profile->num_mtt_segs = 547 roundup_pow_of_two(max_t(unsigned, 548 1 << (MLX4_LOG_NUM_MTT - log_mtts_per_seg), 549 min(1UL << 550 (MLX4_MAX_LOG_NUM_MTT - 551 log_mtts_per_seg), 552 (hwphyssz << 1) 553 >> log_mtts_per_seg))); 554 /* set the actual value, so it will be reflected to the user 555 using the sysfs */ 556 mod_param_profile.num_mtt_segs = ilog2(profile->num_mtt_segs); 557 } 558 } 559 560 int mlx4_check_port_params(struct mlx4_dev *dev, 561 enum mlx4_port_type *port_type) 562 { 563 int i; 564 565 for (i = 0; i < dev->caps.num_ports - 1; i++) { 566 if (port_type[i] != port_type[i + 1]) { 567 if (!(dev->caps.flags & MLX4_DEV_CAP_FLAG_DPDP)) { 568 mlx4_err(dev, "Only same port types supported " 569 "on this HCA, aborting.\n"); 570 return -EINVAL; 571 } 572 } 573 } 574 575 for (i = 0; i < dev->caps.num_ports; i++) { 576 if (!(port_type[i] & dev->caps.supported_type[i+1])) { 577 mlx4_err(dev, "Requested port type for port %d is not " 578 "supported on this HCA\n", i + 1); 579 return -EINVAL; 580 } 581 } 582 return 0; 583 } 584 585 static void mlx4_set_port_mask(struct mlx4_dev *dev) 586 { 587 int i; 588 589 for (i = 1; i <= dev->caps.num_ports; ++i) 590 dev->caps.port_mask[i] = dev->caps.port_type[i]; 591 } 592 593 static int mlx4_dev_cap(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap) 594 { 595 int err; 596 int i; 597 598 err = mlx4_QUERY_DEV_CAP(dev, dev_cap); 599 if (err) { 600 mlx4_err(dev, "QUERY_DEV_CAP command failed, aborting.\n"); 601 return err; 602 } 603 604 if (dev_cap->min_page_sz > PAGE_SIZE) { 605 mlx4_err(dev, "HCA minimum page size of %d bigger than " 606 "kernel PAGE_SIZE of %d, aborting.\n", 607 dev_cap->min_page_sz, (int)PAGE_SIZE); 608 return -ENODEV; 609 } 610 if (dev_cap->num_ports > MLX4_MAX_PORTS) { 611 mlx4_err(dev, "HCA has %d ports, but we only support %d, " 612 "aborting.\n", 613 dev_cap->num_ports, MLX4_MAX_PORTS); 614 return -ENODEV; 615 } 616 617 if (dev_cap->uar_size > pci_resource_len(dev->pdev, 2)) { 618 mlx4_err(dev, "HCA reported UAR size of 0x%x bigger than " 619 "PCI resource 2 size of 0x%llx, aborting.\n", 620 dev_cap->uar_size, 621 (unsigned long long) pci_resource_len(dev->pdev, 2)); 622 return -ENODEV; 623 } 624 625 dev->caps.num_ports = dev_cap->num_ports; 626 dev->phys_caps.num_phys_eqs = MLX4_MAX_EQ_NUM; 627 for (i = 1; i <= dev->caps.num_ports; ++i) { 628 dev->caps.vl_cap[i] = dev_cap->max_vl[i]; 629 dev->caps.ib_mtu_cap[i] = dev_cap->ib_mtu[i]; 630 dev->phys_caps.gid_phys_table_len[i] = dev_cap->max_gids[i]; 631 dev->phys_caps.pkey_phys_table_len[i] = dev_cap->max_pkeys[i]; 632 /* set gid and pkey table operating lengths by default 633 * to non-sriov values */ 634 dev->caps.gid_table_len[i] = dev_cap->max_gids[i]; 635 dev->caps.pkey_table_len[i] = dev_cap->max_pkeys[i]; 636 dev->caps.port_width_cap[i] = dev_cap->max_port_width[i]; 637 dev->caps.eth_mtu_cap[i] = dev_cap->eth_mtu[i]; 638 dev->caps.def_mac[i] = dev_cap->def_mac[i]; 639 dev->caps.supported_type[i] = dev_cap->supported_port_types[i]; 640 dev->caps.suggested_type[i] = dev_cap->suggested_type[i]; 641 dev->caps.default_sense[i] = dev_cap->default_sense[i]; 642 dev->caps.trans_type[i] = dev_cap->trans_type[i]; 643 dev->caps.vendor_oui[i] = dev_cap->vendor_oui[i]; 644 dev->caps.wavelength[i] = dev_cap->wavelength[i]; 645 dev->caps.trans_code[i] = dev_cap->trans_code[i]; 646 } 647 648 dev->caps.uar_page_size = PAGE_SIZE; 649 dev->caps.num_uars = dev_cap->uar_size / PAGE_SIZE; 650 dev->caps.local_ca_ack_delay = dev_cap->local_ca_ack_delay; 651 dev->caps.bf_reg_size = dev_cap->bf_reg_size; 652 dev->caps.bf_regs_per_page = dev_cap->bf_regs_per_page; 653 dev->caps.max_sq_sg = dev_cap->max_sq_sg; 654 dev->caps.max_rq_sg = dev_cap->max_rq_sg; 655 dev->caps.max_wqes = dev_cap->max_qp_sz; 656 dev->caps.max_qp_init_rdma = dev_cap->max_requester_per_qp; 657 dev->caps.max_srq_wqes = dev_cap->max_srq_sz; 658 dev->caps.max_srq_sge = dev_cap->max_rq_sg - 1; 659 dev->caps.reserved_srqs = dev_cap->reserved_srqs; 660 dev->caps.max_sq_desc_sz = dev_cap->max_sq_desc_sz; 661 dev->caps.max_rq_desc_sz = dev_cap->max_rq_desc_sz; 662 /* 663 * Subtract 1 from the limit because we need to allocate a 664 * spare CQE to enable resizing the CQ 665 */ 666 dev->caps.max_cqes = dev_cap->max_cq_sz - 1; 667 dev->caps.reserved_cqs = dev_cap->reserved_cqs; 668 dev->caps.reserved_eqs = dev_cap->reserved_eqs; 669 dev->caps.reserved_mtts = dev_cap->reserved_mtts; 670 dev->caps.reserved_mrws = dev_cap->reserved_mrws; 671 672 /* The first 128 UARs are used for EQ doorbells */ 673 dev->caps.reserved_uars = max_t(int, 128, dev_cap->reserved_uars); 674 dev->caps.reserved_pds = dev_cap->reserved_pds; 675 dev->caps.reserved_xrcds = (dev->caps.flags & MLX4_DEV_CAP_FLAG_XRC) ? 676 dev_cap->reserved_xrcds : 0; 677 dev->caps.max_xrcds = (dev->caps.flags & MLX4_DEV_CAP_FLAG_XRC) ? 678 dev_cap->max_xrcds : 0; 679 dev->caps.mtt_entry_sz = dev_cap->mtt_entry_sz; 680 681 dev->caps.max_msg_sz = dev_cap->max_msg_sz; 682 dev->caps.page_size_cap = ~(u32) (dev_cap->min_page_sz - 1); 683 dev->caps.flags = dev_cap->flags; 684 dev->caps.flags2 = dev_cap->flags2; 685 dev->caps.bmme_flags = dev_cap->bmme_flags; 686 dev->caps.reserved_lkey = dev_cap->reserved_lkey; 687 dev->caps.stat_rate_support = dev_cap->stat_rate_support; 688 dev->caps.cq_timestamp = dev_cap->timestamp_support; 689 dev->caps.max_gso_sz = dev_cap->max_gso_sz; 690 dev->caps.max_rss_tbl_sz = dev_cap->max_rss_tbl_sz; 691 692 /* Sense port always allowed on supported devices for ConnectX-1 and -2 */ 693 if (mlx4_priv(dev)->pci_dev_data & MLX4_PCI_DEV_FORCE_SENSE_PORT) 694 dev->caps.flags |= MLX4_DEV_CAP_FLAG_SENSE_SUPPORT; 695 /* Don't do sense port on multifunction devices (for now at least) */ 696 if (mlx4_is_mfunc(dev)) 697 dev->caps.flags &= ~MLX4_DEV_CAP_FLAG_SENSE_SUPPORT; 698 699 dev->caps.log_num_macs = log_num_mac; 700 dev->caps.log_num_vlans = MLX4_LOG_NUM_VLANS; 701 702 dev->caps.fast_drop = fast_drop ? 703 !!(dev->caps.flags & MLX4_DEV_CAP_FLAG_FAST_DROP) : 704 0; 705 706 for (i = 1; i <= dev->caps.num_ports; ++i) { 707 dev->caps.port_type[i] = MLX4_PORT_TYPE_NONE; 708 if (dev->caps.supported_type[i]) { 709 /* if only ETH is supported - assign ETH */ 710 if (dev->caps.supported_type[i] == MLX4_PORT_TYPE_ETH) 711 dev->caps.port_type[i] = MLX4_PORT_TYPE_ETH; 712 /* if only IB is supported, assign IB */ 713 else if (dev->caps.supported_type[i] == 714 MLX4_PORT_TYPE_IB) 715 dev->caps.port_type[i] = MLX4_PORT_TYPE_IB; 716 else { 717 /* 718 * if IB and ETH are supported, we set the port 719 * type according to user selection of port type; 720 * if there is no user selection, take the FW hint 721 */ 722 int pta; 723 mlx4_get_val(port_type_array.dbdf2val.tbl, 724 pci_physfn(dev->pdev), i - 1, 725 &pta); 726 if (pta == MLX4_PORT_TYPE_NONE) { 727 dev->caps.port_type[i] = dev->caps.suggested_type[i] ? 728 MLX4_PORT_TYPE_ETH : MLX4_PORT_TYPE_IB; 729 } else if (pta == MLX4_PORT_TYPE_NA) { 730 mlx4_err(dev, "Port %d is valid port. " 731 "It is not allowed to configure its type to N/A(%d)\n", 732 i, MLX4_PORT_TYPE_NA); 733 return -EINVAL; 734 } else { 735 dev->caps.port_type[i] = pta; 736 } 737 } 738 } 739 /* 740 * Link sensing is allowed on the port if 3 conditions are true: 741 * 1. Both protocols are supported on the port. 742 * 2. Different types are supported on the port 743 * 3. FW declared that it supports link sensing 744 */ 745 mlx4_priv(dev)->sense.sense_allowed[i] = 746 ((dev->caps.supported_type[i] == MLX4_PORT_TYPE_AUTO) && 747 (dev->caps.flags & MLX4_DEV_CAP_FLAG_DPDP) && 748 (dev->caps.flags & MLX4_DEV_CAP_FLAG_SENSE_SUPPORT)); 749 750 /* Disablling auto sense for default Eth ports support */ 751 mlx4_priv(dev)->sense.sense_allowed[i] = 0; 752 753 /* 754 * If "default_sense" bit is set, we move the port to "AUTO" mode 755 * and perform sense_port FW command to try and set the correct 756 * port type from beginning 757 */ 758 if (mlx4_priv(dev)->sense.sense_allowed[i] && dev->caps.default_sense[i]) { 759 enum mlx4_port_type sensed_port = MLX4_PORT_TYPE_NONE; 760 dev->caps.possible_type[i] = MLX4_PORT_TYPE_AUTO; 761 mlx4_SENSE_PORT(dev, i, &sensed_port); 762 if (sensed_port != MLX4_PORT_TYPE_NONE) 763 dev->caps.port_type[i] = sensed_port; 764 } else { 765 dev->caps.possible_type[i] = dev->caps.port_type[i]; 766 } 767 768 if (dev->caps.log_num_macs > dev_cap->log_max_macs[i]) { 769 dev->caps.log_num_macs = dev_cap->log_max_macs[i]; 770 mlx4_warn(dev, "Requested number of MACs is too much " 771 "for port %d, reducing to %d.\n", 772 i, 1 << dev->caps.log_num_macs); 773 } 774 if (dev->caps.log_num_vlans > dev_cap->log_max_vlans[i]) { 775 dev->caps.log_num_vlans = dev_cap->log_max_vlans[i]; 776 mlx4_warn(dev, "Requested number of VLANs is too much " 777 "for port %d, reducing to %d.\n", 778 i, 1 << dev->caps.log_num_vlans); 779 } 780 } 781 782 dev->caps.max_basic_counters = dev_cap->max_basic_counters; 783 dev->caps.max_extended_counters = dev_cap->max_extended_counters; 784 /* support extended counters if available */ 785 if (dev->caps.flags & MLX4_DEV_CAP_FLAG_COUNTERS_EXT) 786 dev->caps.max_counters = dev->caps.max_extended_counters; 787 else 788 dev->caps.max_counters = dev->caps.max_basic_counters; 789 790 dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW] = dev_cap->reserved_qps; 791 dev->caps.reserved_qps_cnt[MLX4_QP_REGION_ETH_ADDR] = 792 dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FC_ADDR] = 793 (1 << dev->caps.log_num_macs) * 794 (1 << dev->caps.log_num_vlans) * 795 dev->caps.num_ports; 796 dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FC_EXCH] = MLX4_NUM_FEXCH; 797 798 dev->caps.reserved_qps = dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW] + 799 dev->caps.reserved_qps_cnt[MLX4_QP_REGION_ETH_ADDR] + 800 dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FC_ADDR] + 801 dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FC_EXCH]; 802 803 dev->caps.sync_qp = dev_cap->sync_qp; 804 if (dev->pdev->device == 0x1003) 805 dev->caps.cq_flags |= MLX4_DEV_CAP_CQ_FLAG_IO; 806 807 dev->caps.sqp_demux = (mlx4_is_master(dev)) ? MLX4_MAX_NUM_SLAVES : 0; 808 809 if (!mlx4_enable_64b_cqe_eqe && !mlx4_is_slave(dev)) { 810 if (dev_cap->flags & 811 (MLX4_DEV_CAP_FLAG_64B_CQE | MLX4_DEV_CAP_FLAG_64B_EQE)) { 812 mlx4_warn(dev, "64B EQEs/CQEs supported by the device but not enabled\n"); 813 dev->caps.flags &= ~MLX4_DEV_CAP_FLAG_64B_CQE; 814 dev->caps.flags &= ~MLX4_DEV_CAP_FLAG_64B_EQE; 815 } 816 } 817 818 if ((dev->caps.flags & 819 (MLX4_DEV_CAP_FLAG_64B_CQE | MLX4_DEV_CAP_FLAG_64B_EQE)) && 820 mlx4_is_master(dev)) 821 dev->caps.function_caps |= MLX4_FUNC_CAP_64B_EQE_CQE; 822 823 if (!mlx4_is_slave(dev)) { 824 for (i = 0; i < dev->caps.num_ports; ++i) 825 dev->caps.def_counter_index[i] = i << 1; 826 } 827 828 return 0; 829 } 830 /*The function checks if there are live vf, return the num of them*/ 831 static int mlx4_how_many_lives_vf(struct mlx4_dev *dev) 832 { 833 struct mlx4_priv *priv = mlx4_priv(dev); 834 struct mlx4_slave_state *s_state; 835 int i; 836 int ret = 0; 837 838 for (i = 1/*the ppf is 0*/; i < dev->num_slaves; ++i) { 839 s_state = &priv->mfunc.master.slave_state[i]; 840 if (s_state->active && s_state->last_cmd != 841 MLX4_COMM_CMD_RESET) { 842 mlx4_warn(dev, "%s: slave: %d is still active\n", 843 __func__, i); 844 ret++; 845 } 846 } 847 return ret; 848 } 849 850 int mlx4_get_parav_qkey(struct mlx4_dev *dev, u32 qpn, u32 *qkey) 851 { 852 u32 qk = MLX4_RESERVED_QKEY_BASE; 853 854 if (qpn >= dev->phys_caps.base_tunnel_sqpn + 8 * MLX4_MFUNC_MAX || 855 qpn < dev->phys_caps.base_proxy_sqpn) 856 return -EINVAL; 857 858 if (qpn >= dev->phys_caps.base_tunnel_sqpn) 859 /* tunnel qp */ 860 qk += qpn - dev->phys_caps.base_tunnel_sqpn; 861 else 862 qk += qpn - dev->phys_caps.base_proxy_sqpn; 863 *qkey = qk; 864 return 0; 865 } 866 EXPORT_SYMBOL(mlx4_get_parav_qkey); 867 868 void mlx4_sync_pkey_table(struct mlx4_dev *dev, int slave, int port, int i, int val) 869 { 870 struct mlx4_priv *priv = container_of(dev, struct mlx4_priv, dev); 871 872 if (!mlx4_is_master(dev)) 873 return; 874 875 priv->virt2phys_pkey[slave][port - 1][i] = val; 876 } 877 EXPORT_SYMBOL(mlx4_sync_pkey_table); 878 879 void mlx4_put_slave_node_guid(struct mlx4_dev *dev, int slave, __be64 guid) 880 { 881 struct mlx4_priv *priv = container_of(dev, struct mlx4_priv, dev); 882 883 if (!mlx4_is_master(dev)) 884 return; 885 886 priv->slave_node_guids[slave] = guid; 887 } 888 EXPORT_SYMBOL(mlx4_put_slave_node_guid); 889 890 __be64 mlx4_get_slave_node_guid(struct mlx4_dev *dev, int slave) 891 { 892 struct mlx4_priv *priv = container_of(dev, struct mlx4_priv, dev); 893 894 if (!mlx4_is_master(dev)) 895 return 0; 896 897 return priv->slave_node_guids[slave]; 898 } 899 EXPORT_SYMBOL(mlx4_get_slave_node_guid); 900 901 int mlx4_is_slave_active(struct mlx4_dev *dev, int slave) 902 { 903 struct mlx4_priv *priv = mlx4_priv(dev); 904 struct mlx4_slave_state *s_slave; 905 906 if (!mlx4_is_master(dev)) 907 return 0; 908 909 s_slave = &priv->mfunc.master.slave_state[slave]; 910 return !!s_slave->active; 911 } 912 EXPORT_SYMBOL(mlx4_is_slave_active); 913 914 static void slave_adjust_steering_mode(struct mlx4_dev *dev, 915 struct mlx4_dev_cap *dev_cap, 916 struct mlx4_init_hca_param *hca_param) 917 { 918 dev->caps.steering_mode = hca_param->steering_mode; 919 if (dev->caps.steering_mode == MLX4_STEERING_MODE_DEVICE_MANAGED) 920 dev->caps.num_qp_per_mgm = dev_cap->fs_max_num_qp_per_entry; 921 else 922 dev->caps.num_qp_per_mgm = 923 4 * ((1 << hca_param->log_mc_entry_sz)/16 - 2); 924 925 mlx4_dbg(dev, "Steering mode is: %s\n", 926 mlx4_steering_mode_str(dev->caps.steering_mode)); 927 } 928 929 static int mlx4_slave_cap(struct mlx4_dev *dev) 930 { 931 int err; 932 u32 page_size; 933 struct mlx4_dev_cap dev_cap; 934 struct mlx4_func_cap func_cap; 935 struct mlx4_init_hca_param hca_param; 936 int i; 937 938 memset(&hca_param, 0, sizeof(hca_param)); 939 err = mlx4_QUERY_HCA(dev, &hca_param); 940 if (err) { 941 mlx4_err(dev, "QUERY_HCA command failed, aborting.\n"); 942 return err; 943 } 944 945 /*fail if the hca has an unknown capability */ 946 if ((hca_param.global_caps | HCA_GLOBAL_CAP_MASK) != 947 HCA_GLOBAL_CAP_MASK) { 948 mlx4_err(dev, "Unknown hca global capabilities\n"); 949 return -ENOSYS; 950 } 951 952 mlx4_log_num_mgm_entry_size = hca_param.log_mc_entry_sz; 953 954 dev->caps.hca_core_clock = hca_param.hca_core_clock; 955 956 memset(&dev_cap, 0, sizeof(dev_cap)); 957 dev->caps.max_qp_dest_rdma = 1 << hca_param.log_rd_per_qp; 958 err = mlx4_dev_cap(dev, &dev_cap); 959 if (err) { 960 mlx4_err(dev, "QUERY_DEV_CAP command failed, aborting.\n"); 961 return err; 962 } 963 964 err = mlx4_QUERY_FW(dev); 965 if (err) 966 mlx4_err(dev, "QUERY_FW command failed: could not get FW version.\n"); 967 968 if (!hca_param.mw_enable) { 969 dev->caps.flags &= ~MLX4_DEV_CAP_FLAG_MEM_WINDOW; 970 dev->caps.bmme_flags &= ~MLX4_BMME_FLAG_TYPE_2_WIN; 971 } 972 973 page_size = ~dev->caps.page_size_cap + 1; 974 mlx4_warn(dev, "HCA minimum page size:%d\n", page_size); 975 if (page_size > PAGE_SIZE) { 976 mlx4_err(dev, "HCA minimum page size of %d bigger than " 977 "kernel PAGE_SIZE of %d, aborting.\n", 978 page_size, (int)PAGE_SIZE); 979 return -ENODEV; 980 } 981 982 /* slave gets uar page size from QUERY_HCA fw command */ 983 dev->caps.uar_page_size = 1 << (hca_param.uar_page_sz + 12); 984 985 /* TODO: relax this assumption */ 986 if (dev->caps.uar_page_size != PAGE_SIZE) { 987 mlx4_err(dev, "UAR size:%d != kernel PAGE_SIZE of %d\n", 988 dev->caps.uar_page_size, (int)PAGE_SIZE); 989 return -ENODEV; 990 } 991 992 memset(&func_cap, 0, sizeof(func_cap)); 993 err = mlx4_QUERY_FUNC_CAP(dev, 0, &func_cap); 994 if (err) { 995 mlx4_err(dev, "QUERY_FUNC_CAP general command failed, aborting (%d).\n", 996 err); 997 return err; 998 } 999 1000 if ((func_cap.pf_context_behaviour | PF_CONTEXT_BEHAVIOUR_MASK) != 1001 PF_CONTEXT_BEHAVIOUR_MASK) { 1002 mlx4_err(dev, "Unknown pf context behaviour\n"); 1003 return -ENOSYS; 1004 } 1005 1006 dev->caps.num_ports = func_cap.num_ports; 1007 dev->quotas.qp = func_cap.qp_quota; 1008 dev->quotas.srq = func_cap.srq_quota; 1009 dev->quotas.cq = func_cap.cq_quota; 1010 dev->quotas.mpt = func_cap.mpt_quota; 1011 dev->quotas.mtt = func_cap.mtt_quota; 1012 dev->caps.num_qps = 1 << hca_param.log_num_qps; 1013 dev->caps.num_srqs = 1 << hca_param.log_num_srqs; 1014 dev->caps.num_cqs = 1 << hca_param.log_num_cqs; 1015 dev->caps.num_mpts = 1 << hca_param.log_mpt_sz; 1016 dev->caps.num_eqs = func_cap.max_eq; 1017 dev->caps.reserved_eqs = func_cap.reserved_eq; 1018 dev->caps.num_pds = MLX4_NUM_PDS; 1019 dev->caps.num_mgms = 0; 1020 dev->caps.num_amgms = 0; 1021 1022 if (dev->caps.num_ports > MLX4_MAX_PORTS) { 1023 mlx4_err(dev, "HCA has %d ports, but we only support %d, " 1024 "aborting.\n", dev->caps.num_ports, MLX4_MAX_PORTS); 1025 return -ENODEV; 1026 } 1027 1028 dev->caps.qp0_tunnel = kcalloc(dev->caps.num_ports, sizeof (u32), GFP_KERNEL); 1029 dev->caps.qp0_proxy = kcalloc(dev->caps.num_ports, sizeof (u32), GFP_KERNEL); 1030 dev->caps.qp1_tunnel = kcalloc(dev->caps.num_ports, sizeof (u32), GFP_KERNEL); 1031 dev->caps.qp1_proxy = kcalloc(dev->caps.num_ports, sizeof (u32), GFP_KERNEL); 1032 1033 if (!dev->caps.qp0_tunnel || !dev->caps.qp0_proxy || 1034 !dev->caps.qp1_tunnel || !dev->caps.qp1_proxy) { 1035 err = -ENOMEM; 1036 goto err_mem; 1037 } 1038 1039 for (i = 1; i <= dev->caps.num_ports; ++i) { 1040 err = mlx4_QUERY_FUNC_CAP(dev, (u32) i, &func_cap); 1041 if (err) { 1042 mlx4_err(dev, "QUERY_FUNC_CAP port command failed for" 1043 " port %d, aborting (%d).\n", i, err); 1044 goto err_mem; 1045 } 1046 dev->caps.qp0_tunnel[i - 1] = func_cap.qp0_tunnel_qpn; 1047 dev->caps.qp0_proxy[i - 1] = func_cap.qp0_proxy_qpn; 1048 dev->caps.qp1_tunnel[i - 1] = func_cap.qp1_tunnel_qpn; 1049 dev->caps.qp1_proxy[i - 1] = func_cap.qp1_proxy_qpn; 1050 dev->caps.def_counter_index[i - 1] = func_cap.def_counter_index; 1051 1052 dev->caps.port_mask[i] = dev->caps.port_type[i]; 1053 err = mlx4_get_slave_pkey_gid_tbl_len(dev, i, 1054 &dev->caps.gid_table_len[i], 1055 &dev->caps.pkey_table_len[i]); 1056 if (err) 1057 goto err_mem; 1058 } 1059 1060 if (dev->caps.uar_page_size * (dev->caps.num_uars - 1061 dev->caps.reserved_uars) > 1062 pci_resource_len(dev->pdev, 2)) { 1063 mlx4_err(dev, "HCA reported UAR region size of 0x%x bigger than " 1064 "PCI resource 2 size of 0x%llx, aborting.\n", 1065 dev->caps.uar_page_size * dev->caps.num_uars, 1066 (unsigned long long) pci_resource_len(dev->pdev, 2)); 1067 err = -ENOMEM; 1068 goto err_mem; 1069 } 1070 1071 if (hca_param.dev_cap_enabled & MLX4_DEV_CAP_64B_EQE_ENABLED) { 1072 dev->caps.eqe_size = 64; 1073 dev->caps.eqe_factor = 1; 1074 } else { 1075 dev->caps.eqe_size = 32; 1076 dev->caps.eqe_factor = 0; 1077 } 1078 1079 if (hca_param.dev_cap_enabled & MLX4_DEV_CAP_64B_CQE_ENABLED) { 1080 dev->caps.cqe_size = 64; 1081 dev->caps.userspace_caps |= MLX4_USER_DEV_CAP_64B_CQE; 1082 } else { 1083 dev->caps.cqe_size = 32; 1084 } 1085 1086 dev->caps.flags2 &= ~MLX4_DEV_CAP_FLAG2_TS; 1087 mlx4_warn(dev, "Timestamping is not supported in slave mode.\n"); 1088 1089 slave_adjust_steering_mode(dev, &dev_cap, &hca_param); 1090 1091 return 0; 1092 1093 err_mem: 1094 kfree(dev->caps.qp0_tunnel); 1095 kfree(dev->caps.qp0_proxy); 1096 kfree(dev->caps.qp1_tunnel); 1097 kfree(dev->caps.qp1_proxy); 1098 dev->caps.qp0_tunnel = dev->caps.qp0_proxy = 1099 dev->caps.qp1_tunnel = dev->caps.qp1_proxy = NULL; 1100 1101 return err; 1102 } 1103 1104 static void mlx4_request_modules(struct mlx4_dev *dev) 1105 { 1106 int port; 1107 int has_ib_port = false; 1108 int has_eth_port = false; 1109 #define EN_DRV_NAME "mlx4_en" 1110 #define IB_DRV_NAME "mlx4_ib" 1111 1112 for (port = 1; port <= dev->caps.num_ports; port++) { 1113 if (dev->caps.port_type[port] == MLX4_PORT_TYPE_IB) 1114 has_ib_port = true; 1115 else if (dev->caps.port_type[port] == MLX4_PORT_TYPE_ETH) 1116 has_eth_port = true; 1117 } 1118 1119 if (has_ib_port) 1120 request_module_nowait(IB_DRV_NAME); 1121 if (has_eth_port) 1122 request_module_nowait(EN_DRV_NAME); 1123 } 1124 1125 /* 1126 * Change the port configuration of the device. 1127 * Every user of this function must hold the port mutex. 1128 */ 1129 int mlx4_change_port_types(struct mlx4_dev *dev, 1130 enum mlx4_port_type *port_types) 1131 { 1132 int err = 0; 1133 int change = 0; 1134 int port; 1135 1136 for (port = 0; port < dev->caps.num_ports; port++) { 1137 /* Change the port type only if the new type is different 1138 * from the current, and not set to Auto */ 1139 if (port_types[port] != dev->caps.port_type[port + 1]) 1140 change = 1; 1141 } 1142 if (change) { 1143 mlx4_unregister_device(dev); 1144 for (port = 1; port <= dev->caps.num_ports; port++) { 1145 mlx4_CLOSE_PORT(dev, port); 1146 dev->caps.port_type[port] = port_types[port - 1]; 1147 err = mlx4_SET_PORT(dev, port, -1); 1148 if (err) { 1149 mlx4_err(dev, "Failed to set port %d, " 1150 "aborting\n", port); 1151 goto out; 1152 } 1153 } 1154 mlx4_set_port_mask(dev); 1155 err = mlx4_register_device(dev); 1156 if (err) { 1157 mlx4_err(dev, "Failed to register device\n"); 1158 goto out; 1159 } 1160 mlx4_request_modules(dev); 1161 } 1162 1163 out: 1164 return err; 1165 } 1166 1167 static ssize_t show_port_type(struct device *dev, 1168 struct device_attribute *attr, 1169 char *buf) 1170 { 1171 struct mlx4_port_info *info = container_of(attr, struct mlx4_port_info, 1172 port_attr); 1173 struct mlx4_dev *mdev = info->dev; 1174 char type[8]; 1175 1176 sprintf(type, "%s", 1177 (mdev->caps.port_type[info->port] == MLX4_PORT_TYPE_IB) ? 1178 "ib" : "eth"); 1179 if (mdev->caps.possible_type[info->port] == MLX4_PORT_TYPE_AUTO) 1180 sprintf(buf, "auto (%s)\n", type); 1181 else 1182 sprintf(buf, "%s\n", type); 1183 1184 return strlen(buf); 1185 } 1186 1187 static ssize_t set_port_type(struct device *dev, 1188 struct device_attribute *attr, 1189 const char *buf, size_t count) 1190 { 1191 struct mlx4_port_info *info = container_of(attr, struct mlx4_port_info, 1192 port_attr); 1193 struct mlx4_dev *mdev = info->dev; 1194 struct mlx4_priv *priv = mlx4_priv(mdev); 1195 enum mlx4_port_type types[MLX4_MAX_PORTS]; 1196 enum mlx4_port_type new_types[MLX4_MAX_PORTS]; 1197 int i; 1198 int err = 0; 1199 1200 if (!strcmp(buf, "ib\n")) 1201 info->tmp_type = MLX4_PORT_TYPE_IB; 1202 else if (!strcmp(buf, "eth\n")) 1203 info->tmp_type = MLX4_PORT_TYPE_ETH; 1204 else if (!strcmp(buf, "auto\n")) 1205 info->tmp_type = MLX4_PORT_TYPE_AUTO; 1206 else { 1207 mlx4_err(mdev, "%s is not supported port type\n", buf); 1208 return -EINVAL; 1209 } 1210 1211 if ((info->tmp_type & mdev->caps.supported_type[info->port]) != 1212 info->tmp_type) { 1213 mlx4_err(mdev, "Requested port type for port %d is not supported on this HCA\n", 1214 info->port); 1215 return -EINVAL; 1216 } 1217 1218 mlx4_stop_sense(mdev); 1219 mutex_lock(&priv->port_mutex); 1220 /* Possible type is always the one that was delivered */ 1221 mdev->caps.possible_type[info->port] = info->tmp_type; 1222 1223 for (i = 0; i < mdev->caps.num_ports; i++) { 1224 types[i] = priv->port[i+1].tmp_type ? priv->port[i+1].tmp_type : 1225 mdev->caps.possible_type[i+1]; 1226 if (types[i] == MLX4_PORT_TYPE_AUTO) 1227 types[i] = mdev->caps.port_type[i+1]; 1228 } 1229 1230 if (!(mdev->caps.flags & MLX4_DEV_CAP_FLAG_DPDP) && 1231 !(mdev->caps.flags & MLX4_DEV_CAP_FLAG_SENSE_SUPPORT)) { 1232 for (i = 1; i <= mdev->caps.num_ports; i++) { 1233 if (mdev->caps.possible_type[i] == MLX4_PORT_TYPE_AUTO) { 1234 mdev->caps.possible_type[i] = mdev->caps.port_type[i]; 1235 err = -EINVAL; 1236 } 1237 } 1238 } 1239 if (err) { 1240 mlx4_err(mdev, "Auto sensing is not supported on this HCA. " 1241 "Set only 'eth' or 'ib' for both ports " 1242 "(should be the same)\n"); 1243 goto out; 1244 } 1245 1246 mlx4_do_sense_ports(mdev, new_types, types); 1247 1248 err = mlx4_check_port_params(mdev, new_types); 1249 if (err) 1250 goto out; 1251 1252 /* We are about to apply the changes after the configuration 1253 * was verified, no need to remember the temporary types 1254 * any more */ 1255 for (i = 0; i < mdev->caps.num_ports; i++) 1256 priv->port[i + 1].tmp_type = 0; 1257 1258 err = mlx4_change_port_types(mdev, new_types); 1259 1260 out: 1261 mlx4_start_sense(mdev); 1262 mutex_unlock(&priv->port_mutex); 1263 return err ? err : count; 1264 } 1265 1266 enum ibta_mtu { 1267 IB_MTU_256 = 1, 1268 IB_MTU_512 = 2, 1269 IB_MTU_1024 = 3, 1270 IB_MTU_2048 = 4, 1271 IB_MTU_4096 = 5 1272 }; 1273 1274 static inline int int_to_ibta_mtu(int mtu) 1275 { 1276 switch (mtu) { 1277 case 256: return IB_MTU_256; 1278 case 512: return IB_MTU_512; 1279 case 1024: return IB_MTU_1024; 1280 case 2048: return IB_MTU_2048; 1281 case 4096: return IB_MTU_4096; 1282 default: return -1; 1283 } 1284 } 1285 1286 static inline int ibta_mtu_to_int(enum ibta_mtu mtu) 1287 { 1288 switch (mtu) { 1289 case IB_MTU_256: return 256; 1290 case IB_MTU_512: return 512; 1291 case IB_MTU_1024: return 1024; 1292 case IB_MTU_2048: return 2048; 1293 case IB_MTU_4096: return 4096; 1294 default: return -1; 1295 } 1296 } 1297 1298 static ssize_t 1299 show_board(struct device *device, struct device_attribute *attr, 1300 char *buf) 1301 { 1302 struct mlx4_hca_info *info = container_of(attr, struct mlx4_hca_info, 1303 board_attr); 1304 struct mlx4_dev *mdev = info->dev; 1305 1306 return sprintf(buf, "%.*s\n", MLX4_BOARD_ID_LEN, 1307 mdev->board_id); 1308 } 1309 1310 static ssize_t 1311 show_hca(struct device *device, struct device_attribute *attr, 1312 char *buf) 1313 { 1314 struct mlx4_hca_info *info = container_of(attr, struct mlx4_hca_info, 1315 hca_attr); 1316 struct mlx4_dev *mdev = info->dev; 1317 1318 return sprintf(buf, "MT%d\n", mdev->pdev->device); 1319 } 1320 1321 static ssize_t 1322 show_firmware_version(struct device *dev, 1323 struct device_attribute *attr, 1324 char *buf) 1325 { 1326 struct mlx4_hca_info *info = container_of(attr, struct mlx4_hca_info, 1327 firmware_attr); 1328 struct mlx4_dev *mdev = info->dev; 1329 1330 return sprintf(buf, "%d.%d.%d\n", (int)(mdev->caps.fw_ver >> 32), 1331 (int)(mdev->caps.fw_ver >> 16) & 0xffff, 1332 (int)mdev->caps.fw_ver & 0xffff); 1333 } 1334 1335 static ssize_t show_port_ib_mtu(struct device *dev, 1336 struct device_attribute *attr, 1337 char *buf) 1338 { 1339 struct mlx4_port_info *info = container_of(attr, struct mlx4_port_info, 1340 port_mtu_attr); 1341 struct mlx4_dev *mdev = info->dev; 1342 1343 /* When port type is eth, port mtu value isn't used. */ 1344 if (mdev->caps.port_type[info->port] == MLX4_PORT_TYPE_ETH) 1345 return -EINVAL; 1346 1347 sprintf(buf, "%d\n", 1348 ibta_mtu_to_int(mdev->caps.port_ib_mtu[info->port])); 1349 return strlen(buf); 1350 } 1351 1352 static ssize_t set_port_ib_mtu(struct device *dev, 1353 struct device_attribute *attr, 1354 const char *buf, size_t count) 1355 { 1356 struct mlx4_port_info *info = container_of(attr, struct mlx4_port_info, 1357 port_mtu_attr); 1358 struct mlx4_dev *mdev = info->dev; 1359 struct mlx4_priv *priv = mlx4_priv(mdev); 1360 int err, port, mtu, ibta_mtu = -1; 1361 1362 if (mdev->caps.port_type[info->port] == MLX4_PORT_TYPE_ETH) { 1363 mlx4_warn(mdev, "port level mtu is only used for IB ports\n"); 1364 return -EINVAL; 1365 } 1366 1367 mtu = (int) simple_strtol(buf, NULL, 0); 1368 ibta_mtu = int_to_ibta_mtu(mtu); 1369 1370 if (ibta_mtu < 0) { 1371 mlx4_err(mdev, "%s is invalid IBTA mtu\n", buf); 1372 return -EINVAL; 1373 } 1374 1375 mdev->caps.port_ib_mtu[info->port] = ibta_mtu; 1376 1377 mlx4_stop_sense(mdev); 1378 mutex_lock(&priv->port_mutex); 1379 mlx4_unregister_device(mdev); 1380 for (port = 1; port <= mdev->caps.num_ports; port++) { 1381 mlx4_CLOSE_PORT(mdev, port); 1382 err = mlx4_SET_PORT(mdev, port, -1); 1383 if (err) { 1384 mlx4_err(mdev, "Failed to set port %d, " 1385 "aborting\n", port); 1386 goto err_set_port; 1387 } 1388 } 1389 err = mlx4_register_device(mdev); 1390 err_set_port: 1391 mutex_unlock(&priv->port_mutex); 1392 mlx4_start_sense(mdev); 1393 return err ? err : count; 1394 } 1395 1396 static int mlx4_load_fw(struct mlx4_dev *dev) 1397 { 1398 struct mlx4_priv *priv = mlx4_priv(dev); 1399 int err, unmap_flag = 0; 1400 1401 priv->fw.fw_icm = mlx4_alloc_icm(dev, priv->fw.fw_pages, 1402 GFP_HIGHUSER | __GFP_NOWARN, 0); 1403 if (!priv->fw.fw_icm) { 1404 mlx4_err(dev, "Couldn't allocate FW area, aborting.\n"); 1405 return -ENOMEM; 1406 } 1407 1408 err = mlx4_MAP_FA(dev, priv->fw.fw_icm); 1409 if (err) { 1410 mlx4_err(dev, "MAP_FA command failed, aborting.\n"); 1411 goto err_free; 1412 } 1413 1414 err = mlx4_RUN_FW(dev); 1415 if (err) { 1416 mlx4_err(dev, "RUN_FW command failed, aborting.\n"); 1417 goto err_unmap_fa; 1418 } 1419 1420 return 0; 1421 1422 err_unmap_fa: 1423 unmap_flag = mlx4_UNMAP_FA(dev); 1424 if (unmap_flag) 1425 pr_warn("mlx4_core: mlx4_UNMAP_FA failed.\n"); 1426 1427 err_free: 1428 if (!unmap_flag) 1429 mlx4_free_icm(dev, priv->fw.fw_icm, 0); 1430 return err; 1431 } 1432 1433 static int mlx4_init_cmpt_table(struct mlx4_dev *dev, u64 cmpt_base, 1434 int cmpt_entry_sz) 1435 { 1436 struct mlx4_priv *priv = mlx4_priv(dev); 1437 int err; 1438 int num_eqs; 1439 1440 err = mlx4_init_icm_table(dev, &priv->qp_table.cmpt_table, 1441 cmpt_base + 1442 ((u64) (MLX4_CMPT_TYPE_QP * 1443 cmpt_entry_sz) << MLX4_CMPT_SHIFT), 1444 cmpt_entry_sz, dev->caps.num_qps, 1445 dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW], 1446 0, 0); 1447 if (err) 1448 goto err; 1449 1450 err = mlx4_init_icm_table(dev, &priv->srq_table.cmpt_table, 1451 cmpt_base + 1452 ((u64) (MLX4_CMPT_TYPE_SRQ * 1453 cmpt_entry_sz) << MLX4_CMPT_SHIFT), 1454 cmpt_entry_sz, dev->caps.num_srqs, 1455 dev->caps.reserved_srqs, 0, 0); 1456 if (err) 1457 goto err_qp; 1458 1459 err = mlx4_init_icm_table(dev, &priv->cq_table.cmpt_table, 1460 cmpt_base + 1461 ((u64) (MLX4_CMPT_TYPE_CQ * 1462 cmpt_entry_sz) << MLX4_CMPT_SHIFT), 1463 cmpt_entry_sz, dev->caps.num_cqs, 1464 dev->caps.reserved_cqs, 0, 0); 1465 if (err) 1466 goto err_srq; 1467 1468 num_eqs = (mlx4_is_master(dev)) ? dev->phys_caps.num_phys_eqs : 1469 dev->caps.num_eqs; 1470 err = mlx4_init_icm_table(dev, &priv->eq_table.cmpt_table, 1471 cmpt_base + 1472 ((u64) (MLX4_CMPT_TYPE_EQ * 1473 cmpt_entry_sz) << MLX4_CMPT_SHIFT), 1474 cmpt_entry_sz, num_eqs, num_eqs, 0, 0); 1475 if (err) 1476 goto err_cq; 1477 1478 return 0; 1479 1480 err_cq: 1481 mlx4_cleanup_icm_table(dev, &priv->cq_table.cmpt_table); 1482 1483 err_srq: 1484 mlx4_cleanup_icm_table(dev, &priv->srq_table.cmpt_table); 1485 1486 err_qp: 1487 mlx4_cleanup_icm_table(dev, &priv->qp_table.cmpt_table); 1488 1489 err: 1490 return err; 1491 } 1492 1493 static int mlx4_init_icm(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap, 1494 struct mlx4_init_hca_param *init_hca, u64 icm_size) 1495 { 1496 struct mlx4_priv *priv = mlx4_priv(dev); 1497 u64 aux_pages; 1498 int num_eqs; 1499 int err, unmap_flag = 0; 1500 1501 err = mlx4_SET_ICM_SIZE(dev, icm_size, &aux_pages); 1502 if (err) { 1503 mlx4_err(dev, "SET_ICM_SIZE command failed, aborting.\n"); 1504 return err; 1505 } 1506 1507 mlx4_dbg(dev, "%lld KB of HCA context requires %lld KB aux memory.\n", 1508 (unsigned long long) icm_size >> 10, 1509 (unsigned long long) aux_pages << 2); 1510 1511 priv->fw.aux_icm = mlx4_alloc_icm(dev, aux_pages, 1512 GFP_HIGHUSER | __GFP_NOWARN, 0); 1513 if (!priv->fw.aux_icm) { 1514 mlx4_err(dev, "Couldn't allocate aux memory, aborting.\n"); 1515 return -ENOMEM; 1516 } 1517 1518 err = mlx4_MAP_ICM_AUX(dev, priv->fw.aux_icm); 1519 if (err) { 1520 mlx4_err(dev, "MAP_ICM_AUX command failed, aborting.\n"); 1521 goto err_free_aux; 1522 } 1523 1524 err = mlx4_init_cmpt_table(dev, init_hca->cmpt_base, dev_cap->cmpt_entry_sz); 1525 if (err) { 1526 mlx4_err(dev, "Failed to map cMPT context memory, aborting.\n"); 1527 goto err_unmap_aux; 1528 } 1529 1530 1531 num_eqs = (mlx4_is_master(dev)) ? dev->phys_caps.num_phys_eqs : 1532 dev->caps.num_eqs; 1533 err = mlx4_init_icm_table(dev, &priv->eq_table.table, 1534 init_hca->eqc_base, dev_cap->eqc_entry_sz, 1535 num_eqs, num_eqs, 0, 0); 1536 if (err) { 1537 mlx4_err(dev, "Failed to map EQ context memory, aborting.\n"); 1538 goto err_unmap_cmpt; 1539 } 1540 1541 /* 1542 * Reserved MTT entries must be aligned up to a cacheline 1543 * boundary, since the FW will write to them, while the driver 1544 * writes to all other MTT entries. (The variable 1545 * dev->caps.mtt_entry_sz below is really the MTT segment 1546 * size, not the raw entry size) 1547 */ 1548 dev->caps.reserved_mtts = 1549 ALIGN(dev->caps.reserved_mtts * dev->caps.mtt_entry_sz, 1550 dma_get_cache_alignment()) / dev->caps.mtt_entry_sz; 1551 1552 err = mlx4_init_icm_table(dev, &priv->mr_table.mtt_table, 1553 init_hca->mtt_base, 1554 dev->caps.mtt_entry_sz, 1555 dev->caps.num_mtts, 1556 dev->caps.reserved_mtts, 1, 0); 1557 if (err) { 1558 mlx4_err(dev, "Failed to map MTT context memory, aborting.\n"); 1559 goto err_unmap_eq; 1560 } 1561 1562 err = mlx4_init_icm_table(dev, &priv->mr_table.dmpt_table, 1563 init_hca->dmpt_base, 1564 dev_cap->dmpt_entry_sz, 1565 dev->caps.num_mpts, 1566 dev->caps.reserved_mrws, 1, 1); 1567 if (err) { 1568 mlx4_err(dev, "Failed to map dMPT context memory, aborting.\n"); 1569 goto err_unmap_mtt; 1570 } 1571 1572 err = mlx4_init_icm_table(dev, &priv->qp_table.qp_table, 1573 init_hca->qpc_base, 1574 dev_cap->qpc_entry_sz, 1575 dev->caps.num_qps, 1576 dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW], 1577 0, 0); 1578 if (err) { 1579 mlx4_err(dev, "Failed to map QP context memory, aborting.\n"); 1580 goto err_unmap_dmpt; 1581 } 1582 1583 err = mlx4_init_icm_table(dev, &priv->qp_table.auxc_table, 1584 init_hca->auxc_base, 1585 dev_cap->aux_entry_sz, 1586 dev->caps.num_qps, 1587 dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW], 1588 0, 0); 1589 if (err) { 1590 mlx4_err(dev, "Failed to map AUXC context memory, aborting.\n"); 1591 goto err_unmap_qp; 1592 } 1593 1594 err = mlx4_init_icm_table(dev, &priv->qp_table.altc_table, 1595 init_hca->altc_base, 1596 dev_cap->altc_entry_sz, 1597 dev->caps.num_qps, 1598 dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW], 1599 0, 0); 1600 if (err) { 1601 mlx4_err(dev, "Failed to map ALTC context memory, aborting.\n"); 1602 goto err_unmap_auxc; 1603 } 1604 1605 err = mlx4_init_icm_table(dev, &priv->qp_table.rdmarc_table, 1606 init_hca->rdmarc_base, 1607 dev_cap->rdmarc_entry_sz << priv->qp_table.rdmarc_shift, 1608 dev->caps.num_qps, 1609 dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW], 1610 0, 0); 1611 if (err) { 1612 mlx4_err(dev, "Failed to map RDMARC context memory, aborting\n"); 1613 goto err_unmap_altc; 1614 } 1615 1616 err = mlx4_init_icm_table(dev, &priv->cq_table.table, 1617 init_hca->cqc_base, 1618 dev_cap->cqc_entry_sz, 1619 dev->caps.num_cqs, 1620 dev->caps.reserved_cqs, 0, 0); 1621 if (err) { 1622 mlx4_err(dev, "Failed to map CQ context memory, aborting.\n"); 1623 goto err_unmap_rdmarc; 1624 } 1625 1626 err = mlx4_init_icm_table(dev, &priv->srq_table.table, 1627 init_hca->srqc_base, 1628 dev_cap->srq_entry_sz, 1629 dev->caps.num_srqs, 1630 dev->caps.reserved_srqs, 0, 0); 1631 if (err) { 1632 mlx4_err(dev, "Failed to map SRQ context memory, aborting.\n"); 1633 goto err_unmap_cq; 1634 } 1635 1636 /* 1637 * For flow steering device managed mode it is required to use 1638 * mlx4_init_icm_table. For B0 steering mode it's not strictly 1639 * required, but for simplicity just map the whole multicast 1640 * group table now. The table isn't very big and it's a lot 1641 * easier than trying to track ref counts. 1642 */ 1643 err = mlx4_init_icm_table(dev, &priv->mcg_table.table, 1644 init_hca->mc_base, 1645 mlx4_get_mgm_entry_size(dev), 1646 dev->caps.num_mgms + dev->caps.num_amgms, 1647 dev->caps.num_mgms + dev->caps.num_amgms, 1648 0, 0); 1649 if (err) { 1650 mlx4_err(dev, "Failed to map MCG context memory, aborting.\n"); 1651 goto err_unmap_srq; 1652 } 1653 1654 return 0; 1655 1656 err_unmap_srq: 1657 mlx4_cleanup_icm_table(dev, &priv->srq_table.table); 1658 1659 err_unmap_cq: 1660 mlx4_cleanup_icm_table(dev, &priv->cq_table.table); 1661 1662 err_unmap_rdmarc: 1663 mlx4_cleanup_icm_table(dev, &priv->qp_table.rdmarc_table); 1664 1665 err_unmap_altc: 1666 mlx4_cleanup_icm_table(dev, &priv->qp_table.altc_table); 1667 1668 err_unmap_auxc: 1669 mlx4_cleanup_icm_table(dev, &priv->qp_table.auxc_table); 1670 1671 err_unmap_qp: 1672 mlx4_cleanup_icm_table(dev, &priv->qp_table.qp_table); 1673 1674 err_unmap_dmpt: 1675 mlx4_cleanup_icm_table(dev, &priv->mr_table.dmpt_table); 1676 1677 err_unmap_mtt: 1678 mlx4_cleanup_icm_table(dev, &priv->mr_table.mtt_table); 1679 1680 err_unmap_eq: 1681 mlx4_cleanup_icm_table(dev, &priv->eq_table.table); 1682 1683 err_unmap_cmpt: 1684 mlx4_cleanup_icm_table(dev, &priv->eq_table.cmpt_table); 1685 mlx4_cleanup_icm_table(dev, &priv->cq_table.cmpt_table); 1686 mlx4_cleanup_icm_table(dev, &priv->srq_table.cmpt_table); 1687 mlx4_cleanup_icm_table(dev, &priv->qp_table.cmpt_table); 1688 1689 err_unmap_aux: 1690 unmap_flag = mlx4_UNMAP_ICM_AUX(dev); 1691 if (unmap_flag) 1692 pr_warn("mlx4_core: mlx4_UNMAP_ICM_AUX failed.\n"); 1693 1694 err_free_aux: 1695 if (!unmap_flag) 1696 mlx4_free_icm(dev, priv->fw.aux_icm, 0); 1697 1698 return err; 1699 } 1700 1701 static void mlx4_free_icms(struct mlx4_dev *dev) 1702 { 1703 struct mlx4_priv *priv = mlx4_priv(dev); 1704 1705 mlx4_cleanup_icm_table(dev, &priv->mcg_table.table); 1706 mlx4_cleanup_icm_table(dev, &priv->srq_table.table); 1707 mlx4_cleanup_icm_table(dev, &priv->cq_table.table); 1708 mlx4_cleanup_icm_table(dev, &priv->qp_table.rdmarc_table); 1709 mlx4_cleanup_icm_table(dev, &priv->qp_table.altc_table); 1710 mlx4_cleanup_icm_table(dev, &priv->qp_table.auxc_table); 1711 mlx4_cleanup_icm_table(dev, &priv->qp_table.qp_table); 1712 mlx4_cleanup_icm_table(dev, &priv->mr_table.dmpt_table); 1713 mlx4_cleanup_icm_table(dev, &priv->mr_table.mtt_table); 1714 mlx4_cleanup_icm_table(dev, &priv->eq_table.table); 1715 mlx4_cleanup_icm_table(dev, &priv->eq_table.cmpt_table); 1716 mlx4_cleanup_icm_table(dev, &priv->cq_table.cmpt_table); 1717 mlx4_cleanup_icm_table(dev, &priv->srq_table.cmpt_table); 1718 mlx4_cleanup_icm_table(dev, &priv->qp_table.cmpt_table); 1719 1720 if (!mlx4_UNMAP_ICM_AUX(dev)) 1721 mlx4_free_icm(dev, priv->fw.aux_icm, 0); 1722 else 1723 pr_warn("mlx4_core: mlx4_UNMAP_ICM_AUX failed.\n"); 1724 } 1725 1726 static void mlx4_slave_exit(struct mlx4_dev *dev) 1727 { 1728 struct mlx4_priv *priv = mlx4_priv(dev); 1729 1730 mutex_lock(&priv->cmd.slave_cmd_mutex); 1731 if (mlx4_comm_cmd(dev, MLX4_COMM_CMD_RESET, 0, MLX4_COMM_TIME)) 1732 mlx4_warn(dev, "Failed to close slave function.\n"); 1733 mutex_unlock(&priv->cmd.slave_cmd_mutex); 1734 } 1735 1736 static int map_bf_area(struct mlx4_dev *dev) 1737 { 1738 struct mlx4_priv *priv = mlx4_priv(dev); 1739 resource_size_t bf_start; 1740 resource_size_t bf_len; 1741 int err = 0; 1742 1743 if (!dev->caps.bf_reg_size) 1744 return -ENXIO; 1745 1746 bf_start = pci_resource_start(dev->pdev, 2) + 1747 (dev->caps.num_uars << PAGE_SHIFT); 1748 bf_len = pci_resource_len(dev->pdev, 2) - 1749 (dev->caps.num_uars << PAGE_SHIFT); 1750 priv->bf_mapping = io_mapping_create_wc(bf_start, bf_len); 1751 if (!priv->bf_mapping) 1752 err = -ENOMEM; 1753 1754 return err; 1755 } 1756 1757 static void unmap_bf_area(struct mlx4_dev *dev) 1758 { 1759 if (mlx4_priv(dev)->bf_mapping) 1760 io_mapping_free(mlx4_priv(dev)->bf_mapping); 1761 } 1762 1763 int mlx4_read_clock(struct mlx4_dev *dev) 1764 { 1765 u32 clockhi, clocklo, clockhi1; 1766 cycle_t cycles; 1767 int i; 1768 struct mlx4_priv *priv = mlx4_priv(dev); 1769 1770 if (!priv->clock_mapping) 1771 return -ENOTSUPP; 1772 1773 for (i = 0; i < 10; i++) { 1774 clockhi = swab32(readl(priv->clock_mapping)); 1775 clocklo = swab32(readl(priv->clock_mapping + 4)); 1776 clockhi1 = swab32(readl(priv->clock_mapping)); 1777 if (clockhi == clockhi1) 1778 break; 1779 } 1780 1781 cycles = (u64) clockhi << 32 | (u64) clocklo; 1782 1783 return cycles; 1784 } 1785 EXPORT_SYMBOL_GPL(mlx4_read_clock); 1786 1787 1788 static int map_internal_clock(struct mlx4_dev *dev) 1789 { 1790 struct mlx4_priv *priv = mlx4_priv(dev); 1791 1792 priv->clock_mapping = ioremap(pci_resource_start(dev->pdev, 1793 priv->fw.clock_bar) + 1794 priv->fw.clock_offset, MLX4_CLOCK_SIZE); 1795 1796 if (!priv->clock_mapping) 1797 return -ENOMEM; 1798 1799 return 0; 1800 } 1801 1802 1803 int mlx4_get_internal_clock_params(struct mlx4_dev *dev, 1804 struct mlx4_clock_params *params) 1805 { 1806 struct mlx4_priv *priv = mlx4_priv(dev); 1807 1808 if (mlx4_is_slave(dev)) 1809 return -ENOTSUPP; 1810 if (!params) 1811 return -EINVAL; 1812 1813 params->bar = priv->fw.clock_bar; 1814 params->offset = priv->fw.clock_offset; 1815 params->size = MLX4_CLOCK_SIZE; 1816 1817 return 0; 1818 } 1819 EXPORT_SYMBOL_GPL(mlx4_get_internal_clock_params); 1820 1821 static void unmap_internal_clock(struct mlx4_dev *dev) 1822 { 1823 struct mlx4_priv *priv = mlx4_priv(dev); 1824 1825 if (priv->clock_mapping) 1826 iounmap(priv->clock_mapping); 1827 } 1828 1829 static void mlx4_close_hca(struct mlx4_dev *dev) 1830 { 1831 unmap_internal_clock(dev); 1832 unmap_bf_area(dev); 1833 if (mlx4_is_slave(dev)) { 1834 mlx4_slave_exit(dev); 1835 } else { 1836 mlx4_CLOSE_HCA(dev, 0); 1837 mlx4_free_icms(dev); 1838 1839 if (!mlx4_UNMAP_FA(dev)) 1840 mlx4_free_icm(dev, mlx4_priv(dev)->fw.fw_icm, 0); 1841 else 1842 pr_warn("mlx4_core: mlx4_UNMAP_FA failed.\n"); 1843 } 1844 } 1845 1846 static int mlx4_init_slave(struct mlx4_dev *dev) 1847 { 1848 struct mlx4_priv *priv = mlx4_priv(dev); 1849 u64 dma = (u64) priv->mfunc.vhcr_dma; 1850 int num_of_reset_retries = NUM_OF_RESET_RETRIES; 1851 int ret_from_reset = 0; 1852 u32 slave_read; 1853 u32 cmd_channel_ver; 1854 1855 mutex_lock(&priv->cmd.slave_cmd_mutex); 1856 priv->cmd.max_cmds = 1; 1857 mlx4_warn(dev, "Sending reset\n"); 1858 ret_from_reset = mlx4_comm_cmd(dev, MLX4_COMM_CMD_RESET, 0, 1859 MLX4_COMM_TIME); 1860 /* if we are in the middle of flr the slave will try 1861 * NUM_OF_RESET_RETRIES times before leaving.*/ 1862 if (ret_from_reset) { 1863 if (MLX4_DELAY_RESET_SLAVE == ret_from_reset) { 1864 msleep(SLEEP_TIME_IN_RESET); 1865 while (ret_from_reset && num_of_reset_retries) { 1866 mlx4_warn(dev, "slave is currently in the" 1867 "middle of FLR. retrying..." 1868 "(try num:%d)\n", 1869 (NUM_OF_RESET_RETRIES - 1870 num_of_reset_retries + 1)); 1871 ret_from_reset = 1872 mlx4_comm_cmd(dev, MLX4_COMM_CMD_RESET, 1873 0, MLX4_COMM_TIME); 1874 num_of_reset_retries = num_of_reset_retries - 1; 1875 } 1876 } else 1877 goto err; 1878 } 1879 1880 /* check the driver version - the slave I/F revision 1881 * must match the master's */ 1882 slave_read = swab32(readl(&priv->mfunc.comm->slave_read)); 1883 cmd_channel_ver = mlx4_comm_get_version(); 1884 1885 if (MLX4_COMM_GET_IF_REV(cmd_channel_ver) != 1886 MLX4_COMM_GET_IF_REV(slave_read)) { 1887 mlx4_err(dev, "slave driver version is not supported" 1888 " by the master\n"); 1889 goto err; 1890 } 1891 1892 mlx4_warn(dev, "Sending vhcr0\n"); 1893 if (mlx4_comm_cmd(dev, MLX4_COMM_CMD_VHCR0, dma >> 48, 1894 MLX4_COMM_TIME)) 1895 goto err; 1896 if (mlx4_comm_cmd(dev, MLX4_COMM_CMD_VHCR1, dma >> 32, 1897 MLX4_COMM_TIME)) 1898 goto err; 1899 if (mlx4_comm_cmd(dev, MLX4_COMM_CMD_VHCR2, dma >> 16, 1900 MLX4_COMM_TIME)) 1901 goto err; 1902 if (mlx4_comm_cmd(dev, MLX4_COMM_CMD_VHCR_EN, dma, MLX4_COMM_TIME)) 1903 goto err; 1904 1905 mutex_unlock(&priv->cmd.slave_cmd_mutex); 1906 return 0; 1907 1908 err: 1909 mlx4_comm_cmd(dev, MLX4_COMM_CMD_RESET, 0, 0); 1910 mutex_unlock(&priv->cmd.slave_cmd_mutex); 1911 return -EIO; 1912 } 1913 1914 static void mlx4_parav_master_pf_caps(struct mlx4_dev *dev) 1915 { 1916 int i; 1917 1918 for (i = 1; i <= dev->caps.num_ports; i++) { 1919 if (dev->caps.port_type[i] == MLX4_PORT_TYPE_ETH) 1920 dev->caps.gid_table_len[i] = 1921 mlx4_get_slave_num_gids(dev, 0); 1922 else 1923 dev->caps.gid_table_len[i] = 1; 1924 dev->caps.pkey_table_len[i] = 1925 dev->phys_caps.pkey_phys_table_len[i] - 1; 1926 } 1927 } 1928 1929 static int choose_log_fs_mgm_entry_size(int qp_per_entry) 1930 { 1931 int i = MLX4_MIN_MGM_LOG_ENTRY_SIZE; 1932 1933 for (i = MLX4_MIN_MGM_LOG_ENTRY_SIZE; i <= MLX4_MAX_MGM_LOG_ENTRY_SIZE; 1934 i++) { 1935 if (qp_per_entry <= 4 * ((1 << i) / 16 - 2)) 1936 break; 1937 } 1938 1939 return (i <= MLX4_MAX_MGM_LOG_ENTRY_SIZE) ? i : -1; 1940 } 1941 1942 static void choose_steering_mode(struct mlx4_dev *dev, 1943 struct mlx4_dev_cap *dev_cap) 1944 { 1945 int nvfs; 1946 1947 mlx4_get_val(num_vfs.dbdf2val.tbl, pci_physfn(dev->pdev), 0, &nvfs); 1948 if (high_rate_steer && !mlx4_is_mfunc(dev)) { 1949 dev->caps.flags &= ~(MLX4_DEV_CAP_FLAG_VEP_MC_STEER | 1950 MLX4_DEV_CAP_FLAG_VEP_UC_STEER); 1951 dev_cap->flags2 &= ~MLX4_DEV_CAP_FLAG2_FS_EN; 1952 } 1953 1954 if (mlx4_log_num_mgm_entry_size == -1 && 1955 dev_cap->flags2 & MLX4_DEV_CAP_FLAG2_FS_EN && 1956 (!mlx4_is_mfunc(dev) || 1957 (dev_cap->fs_max_num_qp_per_entry >= (nvfs + 1))) && 1958 choose_log_fs_mgm_entry_size(dev_cap->fs_max_num_qp_per_entry) >= 1959 MLX4_MIN_MGM_LOG_ENTRY_SIZE) { 1960 dev->oper_log_mgm_entry_size = 1961 choose_log_fs_mgm_entry_size(dev_cap->fs_max_num_qp_per_entry); 1962 dev->caps.steering_mode = MLX4_STEERING_MODE_DEVICE_MANAGED; 1963 dev->caps.num_qp_per_mgm = dev_cap->fs_max_num_qp_per_entry; 1964 } else { 1965 if (dev->caps.flags & MLX4_DEV_CAP_FLAG_VEP_UC_STEER && 1966 dev->caps.flags & MLX4_DEV_CAP_FLAG_VEP_MC_STEER) 1967 dev->caps.steering_mode = MLX4_STEERING_MODE_B0; 1968 else { 1969 dev->caps.steering_mode = MLX4_STEERING_MODE_A0; 1970 1971 if (dev->caps.flags & MLX4_DEV_CAP_FLAG_VEP_UC_STEER || 1972 dev->caps.flags & MLX4_DEV_CAP_FLAG_VEP_MC_STEER) 1973 mlx4_warn(dev, "Must have both UC_STEER and MC_STEER flags " 1974 "set to use B0 steering. Falling back to A0 steering mode.\n"); 1975 } 1976 dev->oper_log_mgm_entry_size = 1977 mlx4_log_num_mgm_entry_size > 0 ? 1978 mlx4_log_num_mgm_entry_size : 1979 MLX4_DEFAULT_MGM_LOG_ENTRY_SIZE; 1980 dev->caps.num_qp_per_mgm = mlx4_get_qp_per_mgm(dev); 1981 } 1982 mlx4_dbg(dev, "Steering mode is: %s, oper_log_mgm_entry_size = %d, " 1983 "log_num_mgm_entry_size = %d\n", 1984 mlx4_steering_mode_str(dev->caps.steering_mode), 1985 dev->oper_log_mgm_entry_size, mlx4_log_num_mgm_entry_size); 1986 } 1987 1988 static int mlx4_init_hca(struct mlx4_dev *dev) 1989 { 1990 struct mlx4_priv *priv = mlx4_priv(dev); 1991 struct mlx4_dev_cap *dev_cap = NULL; 1992 struct mlx4_adapter adapter; 1993 struct mlx4_mod_stat_cfg mlx4_cfg; 1994 struct mlx4_profile profile; 1995 struct mlx4_init_hca_param init_hca; 1996 u64 icm_size; 1997 int err; 1998 1999 if (!mlx4_is_slave(dev)) { 2000 err = mlx4_QUERY_FW(dev); 2001 if (err) { 2002 if (err == -EACCES) 2003 mlx4_info(dev, "non-primary physical function, skipping.\n"); 2004 else 2005 mlx4_err(dev, "QUERY_FW command failed, aborting.\n"); 2006 return err; 2007 } 2008 2009 err = mlx4_load_fw(dev); 2010 if (err) { 2011 mlx4_err(dev, "Failed to start FW, aborting.\n"); 2012 return err; 2013 } 2014 2015 mlx4_cfg.log_pg_sz_m = 1; 2016 mlx4_cfg.log_pg_sz = 0; 2017 err = mlx4_MOD_STAT_CFG(dev, &mlx4_cfg); 2018 if (err) 2019 mlx4_warn(dev, "Failed to override log_pg_sz parameter\n"); 2020 2021 dev_cap = kzalloc(sizeof *dev_cap, GFP_KERNEL); 2022 if (!dev_cap) { 2023 mlx4_err(dev, "Failed to allocate memory for dev_cap\n"); 2024 err = -ENOMEM; 2025 goto err_stop_fw; 2026 } 2027 2028 err = mlx4_dev_cap(dev, dev_cap); 2029 if (err) { 2030 mlx4_err(dev, "QUERY_DEV_CAP command failed, aborting.\n"); 2031 goto err_stop_fw; 2032 } 2033 2034 choose_steering_mode(dev, dev_cap); 2035 2036 if (mlx4_is_master(dev)) 2037 mlx4_parav_master_pf_caps(dev); 2038 2039 process_mod_param_profile(&profile); 2040 if (dev->caps.steering_mode == 2041 MLX4_STEERING_MODE_DEVICE_MANAGED) 2042 profile.num_mcg = MLX4_FS_NUM_MCG; 2043 2044 icm_size = mlx4_make_profile(dev, &profile, dev_cap, 2045 &init_hca); 2046 if ((long long) icm_size < 0) { 2047 err = icm_size; 2048 goto err_stop_fw; 2049 } 2050 2051 dev->caps.max_fmr_maps = (1 << (32 - ilog2(dev->caps.num_mpts))) - 1; 2052 2053 init_hca.log_uar_sz = ilog2(dev->caps.num_uars); 2054 init_hca.uar_page_sz = PAGE_SHIFT - 12; 2055 2056 err = mlx4_init_icm(dev, dev_cap, &init_hca, icm_size); 2057 if (err) 2058 goto err_stop_fw; 2059 2060 init_hca.mw_enable = 1; 2061 2062 err = mlx4_INIT_HCA(dev, &init_hca); 2063 if (err) { 2064 mlx4_err(dev, "INIT_HCA command failed, aborting.\n"); 2065 goto err_free_icm; 2066 } 2067 2068 /* 2069 * Read HCA frequency by QUERY_HCA command 2070 */ 2071 if (dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_TS) { 2072 memset(&init_hca, 0, sizeof(init_hca)); 2073 err = mlx4_QUERY_HCA(dev, &init_hca); 2074 if (err) { 2075 mlx4_err(dev, "QUERY_HCA command failed, disable timestamp.\n"); 2076 dev->caps.flags2 &= ~MLX4_DEV_CAP_FLAG2_TS; 2077 } else { 2078 dev->caps.hca_core_clock = 2079 init_hca.hca_core_clock; 2080 } 2081 2082 /* In case we got HCA frequency 0 - disable timestamping 2083 * to avoid dividing by zero 2084 */ 2085 if (!dev->caps.hca_core_clock) { 2086 dev->caps.flags2 &= ~MLX4_DEV_CAP_FLAG2_TS; 2087 mlx4_err(dev, "HCA frequency is 0. Timestamping is not supported."); 2088 } else if (map_internal_clock(dev)) { 2089 /* Map internal clock, 2090 * in case of failure disable timestamping 2091 */ 2092 dev->caps.flags2 &= ~MLX4_DEV_CAP_FLAG2_TS; 2093 mlx4_err(dev, "Failed to map internal clock. Timestamping is not supported.\n"); 2094 } 2095 } 2096 } else { 2097 err = mlx4_init_slave(dev); 2098 if (err) { 2099 mlx4_err(dev, "Failed to initialize slave\n"); 2100 return err; 2101 } 2102 2103 err = mlx4_slave_cap(dev); 2104 if (err) { 2105 mlx4_err(dev, "Failed to obtain slave caps\n"); 2106 goto err_close; 2107 } 2108 } 2109 2110 if (map_bf_area(dev)) 2111 mlx4_dbg(dev, "Failed to map blue flame area\n"); 2112 2113 /* Only the master set the ports, all the rest got it from it.*/ 2114 if (!mlx4_is_slave(dev)) 2115 mlx4_set_port_mask(dev); 2116 2117 err = mlx4_QUERY_ADAPTER(dev, &adapter); 2118 if (err) { 2119 mlx4_err(dev, "QUERY_ADAPTER command failed, aborting.\n"); 2120 goto unmap_bf; 2121 } 2122 2123 priv->eq_table.inta_pin = adapter.inta_pin; 2124 memcpy(dev->board_id, adapter.board_id, sizeof dev->board_id); 2125 memcpy(dev->vsd, adapter.vsd, sizeof(dev->vsd)); 2126 dev->vsd_vendor_id = adapter.vsd_vendor_id; 2127 2128 if (!mlx4_is_slave(dev)) 2129 kfree(dev_cap); 2130 2131 return 0; 2132 2133 unmap_bf: 2134 if (!mlx4_is_slave(dev)) 2135 unmap_internal_clock(dev); 2136 unmap_bf_area(dev); 2137 2138 if (mlx4_is_slave(dev)) { 2139 kfree(dev->caps.qp0_tunnel); 2140 kfree(dev->caps.qp0_proxy); 2141 kfree(dev->caps.qp1_tunnel); 2142 kfree(dev->caps.qp1_proxy); 2143 } 2144 2145 err_close: 2146 if (mlx4_is_slave(dev)) 2147 mlx4_slave_exit(dev); 2148 else 2149 mlx4_CLOSE_HCA(dev, 0); 2150 2151 err_free_icm: 2152 if (!mlx4_is_slave(dev)) 2153 mlx4_free_icms(dev); 2154 2155 err_stop_fw: 2156 if (!mlx4_is_slave(dev)) { 2157 if (!mlx4_UNMAP_FA(dev)) 2158 mlx4_free_icm(dev, priv->fw.fw_icm, 0); 2159 else 2160 pr_warn("mlx4_core: mlx4_UNMAP_FA failed.\n"); 2161 kfree(dev_cap); 2162 } 2163 return err; 2164 } 2165 2166 static int mlx4_init_counters_table(struct mlx4_dev *dev) 2167 { 2168 struct mlx4_priv *priv = mlx4_priv(dev); 2169 int nent_pow2, port_indx, vf_index, num_counters; 2170 int res, index = 0; 2171 struct counter_index *new_counter_index; 2172 2173 2174 if (!(dev->caps.flags & MLX4_DEV_CAP_FLAG_COUNTERS)) 2175 return -ENOENT; 2176 2177 if (!mlx4_is_slave(dev) && 2178 dev->caps.max_counters == dev->caps.max_extended_counters) { 2179 res = mlx4_cmd(dev, MLX4_IF_STATE_EXTENDED, 0, 0, 2180 MLX4_CMD_SET_IF_STAT, 2181 MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE); 2182 if (res) { 2183 mlx4_err(dev, "Failed to set extended counters (err=%d)\n", res); 2184 return res; 2185 } 2186 } 2187 2188 mutex_init(&priv->counters_table.mutex); 2189 2190 if (mlx4_is_slave(dev)) { 2191 for (port_indx = 0; port_indx < dev->caps.num_ports; port_indx++) { 2192 INIT_LIST_HEAD(&priv->counters_table.global_port_list[port_indx]); 2193 if (dev->caps.def_counter_index[port_indx] != 0xFF) { 2194 new_counter_index = kmalloc(sizeof(struct counter_index), GFP_KERNEL); 2195 if (!new_counter_index) 2196 return -ENOMEM; 2197 new_counter_index->index = dev->caps.def_counter_index[port_indx]; 2198 list_add_tail(&new_counter_index->list, &priv->counters_table.global_port_list[port_indx]); 2199 } 2200 } 2201 mlx4_dbg(dev, "%s: slave allocated %d counters for %d ports\n", 2202 __func__, dev->caps.num_ports, dev->caps.num_ports); 2203 return 0; 2204 } 2205 2206 nent_pow2 = roundup_pow_of_two(dev->caps.max_counters); 2207 2208 for (port_indx = 0; port_indx < dev->caps.num_ports; port_indx++) { 2209 INIT_LIST_HEAD(&priv->counters_table.global_port_list[port_indx]); 2210 /* allocating 2 counters per port for PFs */ 2211 /* For the PF, the ETH default counters are 0,2; */ 2212 /* and the RoCE default counters are 1,3 */ 2213 for (num_counters = 0; num_counters < 2; num_counters++, index++) { 2214 new_counter_index = kmalloc(sizeof(struct counter_index), GFP_KERNEL); 2215 if (!new_counter_index) 2216 return -ENOMEM; 2217 new_counter_index->index = index; 2218 list_add_tail(&new_counter_index->list, 2219 &priv->counters_table.global_port_list[port_indx]); 2220 } 2221 } 2222 2223 if (mlx4_is_master(dev)) { 2224 for (vf_index = 0; vf_index < dev->num_vfs; vf_index++) { 2225 for (port_indx = 0; port_indx < dev->caps.num_ports; port_indx++) { 2226 INIT_LIST_HEAD(&priv->counters_table.vf_list[vf_index][port_indx]); 2227 new_counter_index = kmalloc(sizeof(struct counter_index), GFP_KERNEL); 2228 if (!new_counter_index) 2229 return -ENOMEM; 2230 if (index < nent_pow2 - 2) { 2231 new_counter_index->index = index; 2232 index++; 2233 } else { 2234 new_counter_index->index = MLX4_SINK_COUNTER_INDEX; 2235 } 2236 2237 list_add_tail(&new_counter_index->list, 2238 &priv->counters_table.vf_list[vf_index][port_indx]); 2239 } 2240 } 2241 2242 res = mlx4_bitmap_init(&priv->counters_table.bitmap, 2243 nent_pow2, nent_pow2 - 1, 2244 index, 1); 2245 mlx4_dbg(dev, "%s: master allocated %d counters for %d VFs\n", 2246 __func__, index, dev->num_vfs); 2247 } else { 2248 res = mlx4_bitmap_init(&priv->counters_table.bitmap, 2249 nent_pow2, nent_pow2 - 1, 2250 index, 1); 2251 mlx4_dbg(dev, "%s: native allocated %d counters for %d ports\n", 2252 __func__, index, dev->caps.num_ports); 2253 } 2254 2255 return 0; 2256 2257 } 2258 2259 static void mlx4_cleanup_counters_table(struct mlx4_dev *dev) 2260 { 2261 struct mlx4_priv *priv = mlx4_priv(dev); 2262 int i, j; 2263 struct counter_index *port, *tmp_port; 2264 struct counter_index *vf, *tmp_vf; 2265 2266 mutex_lock(&priv->counters_table.mutex); 2267 2268 if (dev->caps.flags & MLX4_DEV_CAP_FLAG_COUNTERS) { 2269 for (i = 0; i < dev->caps.num_ports; i++) { 2270 list_for_each_entry_safe(port, tmp_port, 2271 &priv->counters_table.global_port_list[i], 2272 list) { 2273 list_del(&port->list); 2274 kfree(port); 2275 } 2276 } 2277 if (!mlx4_is_slave(dev)) { 2278 for (i = 0; i < dev->num_vfs; i++) { 2279 for (j = 0; j < dev->caps.num_ports; j++) { 2280 list_for_each_entry_safe(vf, tmp_vf, 2281 &priv->counters_table.vf_list[i][j], 2282 list) { 2283 /* clear the counter statistic */ 2284 if (__mlx4_clear_if_stat(dev, vf->index)) 2285 mlx4_dbg(dev, "%s: reset counter %d failed\n", 2286 __func__, vf->index); 2287 list_del(&vf->list); 2288 kfree(vf); 2289 } 2290 } 2291 } 2292 mlx4_bitmap_cleanup(&priv->counters_table.bitmap); 2293 } 2294 } 2295 mutex_unlock(&priv->counters_table.mutex); 2296 } 2297 2298 int __mlx4_slave_counters_free(struct mlx4_dev *dev, int slave) 2299 { 2300 struct mlx4_priv *priv = mlx4_priv(dev); 2301 int i, first; 2302 struct counter_index *vf, *tmp_vf; 2303 2304 /* clean VF's counters for the next useg */ 2305 if (slave > 0 && slave <= dev->num_vfs) { 2306 mlx4_dbg(dev, "%s: free counters of slave(%d)\n" 2307 , __func__, slave); 2308 2309 mutex_lock(&priv->counters_table.mutex); 2310 for (i = 0; i < dev->caps.num_ports; i++) { 2311 first = 0; 2312 list_for_each_entry_safe(vf, tmp_vf, 2313 &priv->counters_table.vf_list[slave - 1][i], 2314 list) { 2315 /* clear the counter statistic */ 2316 if (__mlx4_clear_if_stat(dev, vf->index)) 2317 mlx4_dbg(dev, "%s: reset counter %d failed\n", 2318 __func__, vf->index); 2319 if (first++ && vf->index != MLX4_SINK_COUNTER_INDEX) { 2320 mlx4_dbg(dev, "%s: delete counter index %d for slave %d and port %d\n" 2321 , __func__, vf->index, slave, i + 1); 2322 mlx4_bitmap_free(&priv->counters_table.bitmap, vf->index, MLX4_USE_RR); 2323 list_del(&vf->list); 2324 kfree(vf); 2325 } else { 2326 mlx4_dbg(dev, "%s: can't delete default counter index %d for slave %d and port %d\n" 2327 , __func__, vf->index, slave, i + 1); 2328 } 2329 } 2330 } 2331 mutex_unlock(&priv->counters_table.mutex); 2332 } 2333 2334 return 0; 2335 } 2336 2337 int __mlx4_counter_alloc(struct mlx4_dev *dev, int slave, int port, u32 *idx) 2338 { 2339 struct mlx4_priv *priv = mlx4_priv(dev); 2340 struct counter_index *new_counter_index; 2341 2342 if (!(dev->caps.flags & MLX4_DEV_CAP_FLAG_COUNTERS)) 2343 return -ENOENT; 2344 2345 if ((slave > MLX4_MAX_NUM_VF) || (slave < 0) || 2346 (port < 0) || (port > MLX4_MAX_PORTS)) { 2347 mlx4_dbg(dev, "%s: invalid slave(%d) or port(%d) index\n", 2348 __func__, slave, port); 2349 return -EINVAL; 2350 } 2351 2352 /* handle old guest request does not support request by port index */ 2353 if (port == 0) { 2354 *idx = MLX4_SINK_COUNTER_INDEX; 2355 mlx4_dbg(dev, "%s: allocated default counter index %d for slave %d port %d\n" 2356 , __func__, *idx, slave, port); 2357 return 0; 2358 } 2359 2360 mutex_lock(&priv->counters_table.mutex); 2361 2362 *idx = mlx4_bitmap_alloc(&priv->counters_table.bitmap); 2363 /* if no resources return the default counter of the slave and port */ 2364 if (*idx == -1) { 2365 if (slave == 0) { /* its the ethernet counter ?????? */ 2366 new_counter_index = list_entry(priv->counters_table.global_port_list[port - 1].next, 2367 struct counter_index, 2368 list); 2369 } else { 2370 new_counter_index = list_entry(priv->counters_table.vf_list[slave - 1][port - 1].next, 2371 struct counter_index, 2372 list); 2373 } 2374 2375 *idx = new_counter_index->index; 2376 mlx4_dbg(dev, "%s: allocated defualt counter index %d for slave %d port %d\n" 2377 , __func__, *idx, slave, port); 2378 goto out; 2379 } 2380 2381 if (slave == 0) { /* native or master */ 2382 new_counter_index = kmalloc(sizeof(struct counter_index), GFP_KERNEL); 2383 if (!new_counter_index) 2384 goto no_mem; 2385 new_counter_index->index = *idx; 2386 list_add_tail(&new_counter_index->list, &priv->counters_table.global_port_list[port - 1]); 2387 } else { 2388 new_counter_index = kmalloc(sizeof(struct counter_index), GFP_KERNEL); 2389 if (!new_counter_index) 2390 goto no_mem; 2391 new_counter_index->index = *idx; 2392 list_add_tail(&new_counter_index->list, &priv->counters_table.vf_list[slave - 1][port - 1]); 2393 } 2394 2395 mlx4_dbg(dev, "%s: allocated counter index %d for slave %d port %d\n" 2396 , __func__, *idx, slave, port); 2397 out: 2398 mutex_unlock(&priv->counters_table.mutex); 2399 return 0; 2400 2401 no_mem: 2402 mlx4_bitmap_free(&priv->counters_table.bitmap, *idx, MLX4_USE_RR); 2403 mutex_unlock(&priv->counters_table.mutex); 2404 *idx = MLX4_SINK_COUNTER_INDEX; 2405 mlx4_dbg(dev, "%s: failed err (%d)\n" 2406 , __func__, -ENOMEM); 2407 return -ENOMEM; 2408 } 2409 2410 int mlx4_counter_alloc(struct mlx4_dev *dev, u8 port, u32 *idx) 2411 { 2412 u64 out_param; 2413 int err; 2414 struct mlx4_priv *priv = mlx4_priv(dev); 2415 struct counter_index *new_counter_index, *c_index; 2416 2417 if (mlx4_is_mfunc(dev)) { 2418 err = mlx4_cmd_imm(dev, 0, &out_param, 2419 ((u32) port) << 8 | (u32) RES_COUNTER, 2420 RES_OP_RESERVE, MLX4_CMD_ALLOC_RES, 2421 MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED); 2422 if (!err) { 2423 *idx = get_param_l(&out_param); 2424 if (*idx == MLX4_SINK_COUNTER_INDEX) 2425 return -ENOSPC; 2426 2427 mutex_lock(&priv->counters_table.mutex); 2428 c_index = list_entry(priv->counters_table.global_port_list[port - 1].next, 2429 struct counter_index, 2430 list); 2431 mutex_unlock(&priv->counters_table.mutex); 2432 if (c_index->index == *idx) 2433 return -EEXIST; 2434 2435 if (mlx4_is_slave(dev)) { 2436 new_counter_index = kmalloc(sizeof(struct counter_index), GFP_KERNEL); 2437 if (!new_counter_index) { 2438 mlx4_counter_free(dev, port, *idx); 2439 return -ENOMEM; 2440 } 2441 new_counter_index->index = *idx; 2442 mutex_lock(&priv->counters_table.mutex); 2443 list_add_tail(&new_counter_index->list, &priv->counters_table.global_port_list[port - 1]); 2444 mutex_unlock(&priv->counters_table.mutex); 2445 mlx4_dbg(dev, "%s: allocated counter index %d for port %d\n" 2446 , __func__, *idx, port); 2447 } 2448 } 2449 return err; 2450 } 2451 return __mlx4_counter_alloc(dev, 0, port, idx); 2452 } 2453 EXPORT_SYMBOL_GPL(mlx4_counter_alloc); 2454 2455 void __mlx4_counter_free(struct mlx4_dev *dev, int slave, int port, u32 idx) 2456 { 2457 /* check if native or slave and deletes accordingly */ 2458 struct mlx4_priv *priv = mlx4_priv(dev); 2459 struct counter_index *pf, *tmp_pf; 2460 struct counter_index *vf, *tmp_vf; 2461 int first; 2462 2463 2464 if (idx == MLX4_SINK_COUNTER_INDEX) { 2465 mlx4_dbg(dev, "%s: try to delete default counter index %d for port %d\n" 2466 , __func__, idx, port); 2467 return; 2468 } 2469 2470 if ((slave > MLX4_MAX_NUM_VF) || (slave < 0) || 2471 (port < 0) || (port > MLX4_MAX_PORTS)) { 2472 mlx4_warn(dev, "%s: deletion failed due to invalid slave(%d) or port(%d) index\n" 2473 , __func__, slave, idx); 2474 return; 2475 } 2476 2477 mutex_lock(&priv->counters_table.mutex); 2478 if (slave == 0) { 2479 first = 0; 2480 list_for_each_entry_safe(pf, tmp_pf, 2481 &priv->counters_table.global_port_list[port - 1], 2482 list) { 2483 /* the first 2 counters are reserved */ 2484 if (pf->index == idx) { 2485 /* clear the counter statistic */ 2486 if (__mlx4_clear_if_stat(dev, pf->index)) 2487 mlx4_dbg(dev, "%s: reset counter %d failed\n", 2488 __func__, pf->index); 2489 if (1 < first && idx != MLX4_SINK_COUNTER_INDEX) { 2490 list_del(&pf->list); 2491 kfree(pf); 2492 mlx4_dbg(dev, "%s: delete counter index %d for native device (%d) port %d\n" 2493 , __func__, idx, slave, port); 2494 mlx4_bitmap_free(&priv->counters_table.bitmap, idx, MLX4_USE_RR); 2495 goto out; 2496 } else { 2497 mlx4_dbg(dev, "%s: can't delete default counter index %d for native device (%d) port %d\n" 2498 , __func__, idx, slave, port); 2499 goto out; 2500 } 2501 } 2502 first++; 2503 } 2504 mlx4_dbg(dev, "%s: can't delete counter index %d for native device (%d) port %d\n" 2505 , __func__, idx, slave, port); 2506 } else { 2507 first = 0; 2508 list_for_each_entry_safe(vf, tmp_vf, 2509 &priv->counters_table.vf_list[slave - 1][port - 1], 2510 list) { 2511 /* the first element is reserved */ 2512 if (vf->index == idx) { 2513 /* clear the counter statistic */ 2514 if (__mlx4_clear_if_stat(dev, vf->index)) 2515 mlx4_dbg(dev, "%s: reset counter %d failed\n", 2516 __func__, vf->index); 2517 if (first) { 2518 list_del(&vf->list); 2519 kfree(vf); 2520 mlx4_dbg(dev, "%s: delete counter index %d for slave %d port %d\n", 2521 __func__, idx, slave, port); 2522 mlx4_bitmap_free(&priv->counters_table.bitmap, idx, MLX4_USE_RR); 2523 goto out; 2524 } else { 2525 mlx4_dbg(dev, "%s: can't delete default slave (%d) counter index %d for port %d\n" 2526 , __func__, slave, idx, port); 2527 goto out; 2528 } 2529 } 2530 first++; 2531 } 2532 mlx4_dbg(dev, "%s: can't delete slave (%d) counter index %d for port %d\n" 2533 , __func__, slave, idx, port); 2534 } 2535 2536 out: 2537 mutex_unlock(&priv->counters_table.mutex); 2538 } 2539 2540 void mlx4_counter_free(struct mlx4_dev *dev, u8 port, u32 idx) 2541 { 2542 u64 in_param = 0; 2543 struct mlx4_priv *priv = mlx4_priv(dev); 2544 struct counter_index *counter, *tmp_counter; 2545 int first = 0; 2546 2547 if (mlx4_is_mfunc(dev)) { 2548 set_param_l(&in_param, idx); 2549 mlx4_cmd(dev, in_param, 2550 ((u32) port) << 8 | (u32) RES_COUNTER, 2551 RES_OP_RESERVE, 2552 MLX4_CMD_FREE_RES, MLX4_CMD_TIME_CLASS_A, 2553 MLX4_CMD_WRAPPED); 2554 2555 if (mlx4_is_slave(dev) && idx != MLX4_SINK_COUNTER_INDEX) { 2556 mutex_lock(&priv->counters_table.mutex); 2557 list_for_each_entry_safe(counter, tmp_counter, 2558 &priv->counters_table.global_port_list[port - 1], 2559 list) { 2560 if (counter->index == idx && first++) { 2561 list_del(&counter->list); 2562 kfree(counter); 2563 mlx4_dbg(dev, "%s: delete counter index %d for port %d\n" 2564 , __func__, idx, port); 2565 mutex_unlock(&priv->counters_table.mutex); 2566 return; 2567 } 2568 } 2569 mutex_unlock(&priv->counters_table.mutex); 2570 } 2571 2572 return; 2573 } 2574 __mlx4_counter_free(dev, 0, port, idx); 2575 } 2576 EXPORT_SYMBOL_GPL(mlx4_counter_free); 2577 2578 int __mlx4_clear_if_stat(struct mlx4_dev *dev, 2579 u8 counter_index) 2580 { 2581 struct mlx4_cmd_mailbox *if_stat_mailbox = NULL; 2582 int err = 0; 2583 u32 if_stat_in_mod = (counter_index & 0xff) | (1 << 31); 2584 2585 if (counter_index == MLX4_SINK_COUNTER_INDEX) 2586 return -EINVAL; 2587 2588 if (mlx4_is_slave(dev)) 2589 return 0; 2590 2591 if_stat_mailbox = mlx4_alloc_cmd_mailbox(dev); 2592 if (IS_ERR(if_stat_mailbox)) { 2593 err = PTR_ERR(if_stat_mailbox); 2594 return err; 2595 } 2596 2597 err = mlx4_cmd_box(dev, 0, if_stat_mailbox->dma, if_stat_in_mod, 0, 2598 MLX4_CMD_QUERY_IF_STAT, MLX4_CMD_TIME_CLASS_C, 2599 MLX4_CMD_NATIVE); 2600 2601 mlx4_free_cmd_mailbox(dev, if_stat_mailbox); 2602 return err; 2603 } 2604 2605 u8 mlx4_get_default_counter_index(struct mlx4_dev *dev, int slave, int port) 2606 { 2607 struct mlx4_priv *priv = mlx4_priv(dev); 2608 struct counter_index *new_counter_index; 2609 2610 if (dev->caps.port_type[port] == MLX4_PORT_TYPE_IB) { 2611 mlx4_dbg(dev, "%s: return counter index %d for slave %d port (MLX4_PORT_TYPE_IB) %d\n", 2612 __func__, MLX4_SINK_COUNTER_INDEX, slave, port); 2613 return (u8)MLX4_SINK_COUNTER_INDEX; 2614 } 2615 2616 mutex_lock(&priv->counters_table.mutex); 2617 if (slave == 0) { 2618 new_counter_index = list_entry(priv->counters_table.global_port_list[port - 1].next, 2619 struct counter_index, 2620 list); 2621 } else { 2622 new_counter_index = list_entry(priv->counters_table.vf_list[slave - 1][port - 1].next, 2623 struct counter_index, 2624 list); 2625 } 2626 mutex_unlock(&priv->counters_table.mutex); 2627 2628 mlx4_dbg(dev, "%s: return counter index %d for slave %d port %d\n", 2629 __func__, new_counter_index->index, slave, port); 2630 2631 2632 return (u8)new_counter_index->index; 2633 } 2634 2635 int mlx4_get_vport_ethtool_stats(struct mlx4_dev *dev, int port, 2636 struct mlx4_en_vport_stats *vport_stats, 2637 int reset) 2638 { 2639 struct mlx4_priv *priv = mlx4_priv(dev); 2640 struct mlx4_cmd_mailbox *if_stat_mailbox = NULL; 2641 union mlx4_counter *counter; 2642 int err = 0; 2643 u32 if_stat_in_mod; 2644 struct counter_index *vport, *tmp_vport; 2645 2646 if (!vport_stats) 2647 return -EINVAL; 2648 2649 if_stat_mailbox = mlx4_alloc_cmd_mailbox(dev); 2650 if (IS_ERR(if_stat_mailbox)) { 2651 err = PTR_ERR(if_stat_mailbox); 2652 return err; 2653 } 2654 2655 mutex_lock(&priv->counters_table.mutex); 2656 list_for_each_entry_safe(vport, tmp_vport, 2657 &priv->counters_table.global_port_list[port - 1], 2658 list) { 2659 if (vport->index == MLX4_SINK_COUNTER_INDEX) 2660 continue; 2661 2662 memset(if_stat_mailbox->buf, 0, sizeof(union mlx4_counter)); 2663 if_stat_in_mod = (vport->index & 0xff) | ((reset & 1) << 31); 2664 err = mlx4_cmd_box(dev, 0, if_stat_mailbox->dma, 2665 if_stat_in_mod, 0, 2666 MLX4_CMD_QUERY_IF_STAT, 2667 MLX4_CMD_TIME_CLASS_C, 2668 MLX4_CMD_NATIVE); 2669 if (err) { 2670 mlx4_dbg(dev, "%s: failed to read statistics for counter index %d\n", 2671 __func__, vport->index); 2672 goto if_stat_out; 2673 } 2674 counter = (union mlx4_counter *)if_stat_mailbox->buf; 2675 if ((counter->control.cnt_mode & 0xf) == 1) { 2676 vport_stats->rx_broadcast_packets += be64_to_cpu(counter->ext.counters[0].IfRxBroadcastFrames); 2677 vport_stats->rx_unicast_packets += be64_to_cpu(counter->ext.counters[0].IfRxUnicastFrames); 2678 vport_stats->rx_multicast_packets += be64_to_cpu(counter->ext.counters[0].IfRxMulticastFrames); 2679 vport_stats->tx_broadcast_packets += be64_to_cpu(counter->ext.counters[0].IfTxBroadcastFrames); 2680 vport_stats->tx_unicast_packets += be64_to_cpu(counter->ext.counters[0].IfTxUnicastFrames); 2681 vport_stats->tx_multicast_packets += be64_to_cpu(counter->ext.counters[0].IfTxMulticastFrames); 2682 vport_stats->rx_broadcast_bytes += be64_to_cpu(counter->ext.counters[0].IfRxBroadcastOctets); 2683 vport_stats->rx_unicast_bytes += be64_to_cpu(counter->ext.counters[0].IfRxUnicastOctets); 2684 vport_stats->rx_multicast_bytes += be64_to_cpu(counter->ext.counters[0].IfRxMulticastOctets); 2685 vport_stats->tx_broadcast_bytes += be64_to_cpu(counter->ext.counters[0].IfTxBroadcastOctets); 2686 vport_stats->tx_unicast_bytes += be64_to_cpu(counter->ext.counters[0].IfTxUnicastOctets); 2687 vport_stats->tx_multicast_bytes += be64_to_cpu(counter->ext.counters[0].IfTxMulticastOctets); 2688 vport_stats->rx_errors += be64_to_cpu(counter->ext.counters[0].IfRxErrorFrames); 2689 vport_stats->rx_dropped += be64_to_cpu(counter->ext.counters[0].IfRxNoBufferFrames); 2690 vport_stats->tx_errors += be64_to_cpu(counter->ext.counters[0].IfTxDroppedFrames); 2691 } 2692 } 2693 2694 if_stat_out: 2695 mutex_unlock(&priv->counters_table.mutex); 2696 mlx4_free_cmd_mailbox(dev, if_stat_mailbox); 2697 2698 return err; 2699 } 2700 EXPORT_SYMBOL_GPL(mlx4_get_vport_ethtool_stats); 2701 2702 static int mlx4_setup_hca(struct mlx4_dev *dev) 2703 { 2704 struct mlx4_priv *priv = mlx4_priv(dev); 2705 int err; 2706 int port; 2707 __be32 ib_port_default_caps; 2708 2709 err = mlx4_init_uar_table(dev); 2710 if (err) { 2711 mlx4_err(dev, "Failed to initialize " 2712 "user access region table (err=%d), aborting.\n", 2713 err); 2714 return err; 2715 } 2716 2717 err = mlx4_uar_alloc(dev, &priv->driver_uar); 2718 if (err) { 2719 mlx4_err(dev, "Failed to allocate driver access region " 2720 "(err=%d), aborting.\n", err); 2721 goto err_uar_table_free; 2722 } 2723 2724 priv->kar = ioremap((phys_addr_t) priv->driver_uar.pfn << PAGE_SHIFT, PAGE_SIZE); 2725 if (!priv->kar) { 2726 mlx4_err(dev, "Couldn't map kernel access region, " 2727 "aborting.\n"); 2728 err = -ENOMEM; 2729 goto err_uar_free; 2730 } 2731 2732 err = mlx4_init_pd_table(dev); 2733 if (err) { 2734 mlx4_err(dev, "Failed to initialize " 2735 "protection domain table (err=%d), aborting.\n", err); 2736 goto err_kar_unmap; 2737 } 2738 2739 err = mlx4_init_xrcd_table(dev); 2740 if (err) { 2741 mlx4_err(dev, "Failed to initialize " 2742 "reliable connection domain table (err=%d), " 2743 "aborting.\n", err); 2744 goto err_pd_table_free; 2745 } 2746 2747 err = mlx4_init_mr_table(dev); 2748 if (err) { 2749 mlx4_err(dev, "Failed to initialize " 2750 "memory region table (err=%d), aborting.\n", err); 2751 goto err_xrcd_table_free; 2752 } 2753 2754 if (!mlx4_is_slave(dev)) { 2755 err = mlx4_init_mcg_table(dev); 2756 if (err) { 2757 mlx4_err(dev, "Failed to initialize " 2758 "multicast group table (err=%d), aborting.\n", 2759 err); 2760 goto err_mr_table_free; 2761 } 2762 } 2763 2764 err = mlx4_init_eq_table(dev); 2765 if (err) { 2766 mlx4_err(dev, "Failed to initialize " 2767 "event queue table (err=%d), aborting.\n", err); 2768 goto err_mcg_table_free; 2769 } 2770 2771 err = mlx4_cmd_use_events(dev); 2772 if (err) { 2773 mlx4_err(dev, "Failed to switch to event-driven " 2774 "firmware commands (err=%d), aborting.\n", err); 2775 goto err_eq_table_free; 2776 } 2777 2778 err = mlx4_NOP(dev); 2779 if (err) { 2780 if (dev->flags & MLX4_FLAG_MSI_X) { 2781 mlx4_warn(dev, "NOP command failed to generate MSI-X " 2782 "interrupt IRQ %d).\n", 2783 priv->eq_table.eq[dev->caps.num_comp_vectors].irq); 2784 mlx4_warn(dev, "Trying again without MSI-X.\n"); 2785 } else { 2786 mlx4_err(dev, "NOP command failed to generate interrupt " 2787 "(IRQ %d), aborting.\n", 2788 priv->eq_table.eq[dev->caps.num_comp_vectors].irq); 2789 mlx4_err(dev, "BIOS or ACPI interrupt routing problem?\n"); 2790 } 2791 2792 goto err_cmd_poll; 2793 } 2794 2795 mlx4_dbg(dev, "NOP command IRQ test passed\n"); 2796 2797 err = mlx4_init_cq_table(dev); 2798 if (err) { 2799 mlx4_err(dev, "Failed to initialize " 2800 "completion queue table (err=%d), aborting.\n", err); 2801 goto err_cmd_poll; 2802 } 2803 2804 err = mlx4_init_srq_table(dev); 2805 if (err) { 2806 mlx4_err(dev, "Failed to initialize " 2807 "shared receive queue table (err=%d), aborting.\n", 2808 err); 2809 goto err_cq_table_free; 2810 } 2811 2812 err = mlx4_init_qp_table(dev); 2813 if (err) { 2814 mlx4_err(dev, "Failed to initialize " 2815 "queue pair table (err=%d), aborting.\n", err); 2816 goto err_srq_table_free; 2817 } 2818 2819 err = mlx4_init_counters_table(dev); 2820 if (err && err != -ENOENT) { 2821 mlx4_err(dev, "Failed to initialize counters table (err=%d), " 2822 "aborting.\n", err); 2823 goto err_qp_table_free; 2824 } 2825 2826 if (!mlx4_is_slave(dev)) { 2827 for (port = 1; port <= dev->caps.num_ports; port++) { 2828 ib_port_default_caps = 0; 2829 err = mlx4_get_port_ib_caps(dev, port, 2830 &ib_port_default_caps); 2831 if (err) 2832 mlx4_warn(dev, "failed to get port %d default " 2833 "ib capabilities (%d). Continuing " 2834 "with caps = 0\n", port, err); 2835 dev->caps.ib_port_def_cap[port] = ib_port_default_caps; 2836 2837 /* initialize per-slave default ib port capabilities */ 2838 if (mlx4_is_master(dev)) { 2839 int i; 2840 for (i = 0; i < dev->num_slaves; i++) { 2841 if (i == mlx4_master_func_num(dev)) 2842 continue; 2843 priv->mfunc.master.slave_state[i].ib_cap_mask[port] = 2844 ib_port_default_caps; 2845 } 2846 } 2847 2848 dev->caps.port_ib_mtu[port] = IB_MTU_4096; 2849 2850 err = mlx4_SET_PORT(dev, port, mlx4_is_master(dev) ? 2851 dev->caps.pkey_table_len[port] : -1); 2852 if (err) { 2853 mlx4_err(dev, "Failed to set port %d (err=%d), " 2854 "aborting\n", port, err); 2855 goto err_counters_table_free; 2856 } 2857 } 2858 } 2859 2860 return 0; 2861 2862 err_counters_table_free: 2863 mlx4_cleanup_counters_table(dev); 2864 2865 err_qp_table_free: 2866 mlx4_cleanup_qp_table(dev); 2867 2868 err_srq_table_free: 2869 mlx4_cleanup_srq_table(dev); 2870 2871 err_cq_table_free: 2872 mlx4_cleanup_cq_table(dev); 2873 2874 err_cmd_poll: 2875 mlx4_cmd_use_polling(dev); 2876 2877 err_eq_table_free: 2878 mlx4_cleanup_eq_table(dev); 2879 2880 err_mcg_table_free: 2881 if (!mlx4_is_slave(dev)) 2882 mlx4_cleanup_mcg_table(dev); 2883 2884 err_mr_table_free: 2885 mlx4_cleanup_mr_table(dev); 2886 2887 err_xrcd_table_free: 2888 mlx4_cleanup_xrcd_table(dev); 2889 2890 err_pd_table_free: 2891 mlx4_cleanup_pd_table(dev); 2892 2893 err_kar_unmap: 2894 iounmap(priv->kar); 2895 2896 err_uar_free: 2897 mlx4_uar_free(dev, &priv->driver_uar); 2898 2899 err_uar_table_free: 2900 mlx4_cleanup_uar_table(dev); 2901 return err; 2902 } 2903 2904 static void mlx4_enable_msi_x(struct mlx4_dev *dev) 2905 { 2906 struct mlx4_priv *priv = mlx4_priv(dev); 2907 struct msix_entry *entries; 2908 int nreq = min_t(int, dev->caps.num_ports * 2909 min_t(int, num_possible_cpus() + 1, MAX_MSIX_P_PORT) 2910 + MSIX_LEGACY_SZ, MAX_MSIX); 2911 int err; 2912 int i; 2913 2914 if (msi_x) { 2915 nreq = min_t(int, dev->caps.num_eqs - dev->caps.reserved_eqs, 2916 nreq); 2917 2918 if (msi_x > 1 && !mlx4_is_mfunc(dev)) 2919 nreq = min_t(int, nreq, msi_x); 2920 2921 entries = kcalloc(nreq, sizeof *entries, GFP_KERNEL); 2922 if (!entries) 2923 goto no_msi; 2924 2925 for (i = 0; i < nreq; ++i) 2926 entries[i].entry = i; 2927 2928 retry: 2929 err = pci_enable_msix(dev->pdev, entries, nreq); 2930 if (err) { 2931 /* Try again if at least 2 vectors are available */ 2932 if (err > 1) { 2933 mlx4_info(dev, "Requested %d vectors, " 2934 "but only %d MSI-X vectors available, " 2935 "trying again\n", nreq, err); 2936 nreq = err; 2937 goto retry; 2938 } 2939 kfree(entries); 2940 /* if error, or can't alloc even 1 IRQ */ 2941 if (err < 0) { 2942 mlx4_err(dev, "No IRQs left, device can't " 2943 "be started.\n"); 2944 goto no_irq; 2945 } 2946 goto no_msi; 2947 } 2948 2949 if (nreq < 2950 MSIX_LEGACY_SZ + dev->caps.num_ports * MIN_MSIX_P_PORT) { 2951 /*Working in legacy mode , all EQ's shared*/ 2952 dev->caps.comp_pool = 0; 2953 dev->caps.num_comp_vectors = nreq - 1; 2954 } else { 2955 dev->caps.comp_pool = nreq - MSIX_LEGACY_SZ; 2956 dev->caps.num_comp_vectors = MSIX_LEGACY_SZ - 1; 2957 } 2958 for (i = 0; i < nreq; ++i) 2959 priv->eq_table.eq[i].irq = entries[i].vector; 2960 2961 dev->flags |= MLX4_FLAG_MSI_X; 2962 2963 kfree(entries); 2964 return; 2965 } 2966 2967 no_msi: 2968 dev->caps.num_comp_vectors = 1; 2969 dev->caps.comp_pool = 0; 2970 2971 for (i = 0; i < 2; ++i) 2972 priv->eq_table.eq[i].irq = dev->pdev->irq; 2973 return; 2974 no_irq: 2975 dev->caps.num_comp_vectors = 0; 2976 dev->caps.comp_pool = 0; 2977 return; 2978 } 2979 2980 static void 2981 mlx4_init_hca_info(struct mlx4_dev *dev) 2982 { 2983 struct mlx4_hca_info *info = &mlx4_priv(dev)->hca_info; 2984 2985 info->dev = dev; 2986 2987 info->firmware_attr = (struct device_attribute)__ATTR(fw_ver, S_IRUGO, 2988 show_firmware_version, NULL); 2989 if (device_create_file(&dev->pdev->dev, &info->firmware_attr)) 2990 mlx4_err(dev, "Failed to add file firmware version"); 2991 2992 info->hca_attr = (struct device_attribute)__ATTR(hca, S_IRUGO, show_hca, 2993 NULL); 2994 if (device_create_file(&dev->pdev->dev, &info->hca_attr)) 2995 mlx4_err(dev, "Failed to add file hca type"); 2996 2997 info->board_attr = (struct device_attribute)__ATTR(board_id, S_IRUGO, 2998 show_board, NULL); 2999 if (device_create_file(&dev->pdev->dev, &info->board_attr)) 3000 mlx4_err(dev, "Failed to add file board id type"); 3001 } 3002 3003 static int mlx4_init_port_info(struct mlx4_dev *dev, int port) 3004 { 3005 struct mlx4_port_info *info = &mlx4_priv(dev)->port[port]; 3006 int err = 0; 3007 3008 info->dev = dev; 3009 info->port = port; 3010 if (!mlx4_is_slave(dev)) { 3011 mlx4_init_mac_table(dev, &info->mac_table); 3012 mlx4_init_vlan_table(dev, &info->vlan_table); 3013 info->base_qpn = mlx4_get_base_qpn(dev, port); 3014 } 3015 3016 sprintf(info->dev_name, "mlx4_port%d", port); 3017 info->port_attr.attr.name = info->dev_name; 3018 if (mlx4_is_mfunc(dev)) 3019 info->port_attr.attr.mode = S_IRUGO; 3020 else { 3021 info->port_attr.attr.mode = S_IRUGO | S_IWUSR; 3022 info->port_attr.store = set_port_type; 3023 } 3024 info->port_attr.show = show_port_type; 3025 sysfs_attr_init(&info->port_attr.attr); 3026 3027 err = device_create_file(&dev->pdev->dev, &info->port_attr); 3028 if (err) { 3029 mlx4_err(dev, "Failed to create file for port %d\n", port); 3030 info->port = -1; 3031 } 3032 3033 sprintf(info->dev_mtu_name, "mlx4_port%d_mtu", port); 3034 info->port_mtu_attr.attr.name = info->dev_mtu_name; 3035 if (mlx4_is_mfunc(dev)) 3036 info->port_mtu_attr.attr.mode = S_IRUGO; 3037 else { 3038 info->port_mtu_attr.attr.mode = S_IRUGO | S_IWUSR; 3039 info->port_mtu_attr.store = set_port_ib_mtu; 3040 } 3041 info->port_mtu_attr.show = show_port_ib_mtu; 3042 sysfs_attr_init(&info->port_mtu_attr.attr); 3043 3044 err = device_create_file(&dev->pdev->dev, &info->port_mtu_attr); 3045 if (err) { 3046 mlx4_err(dev, "Failed to create mtu file for port %d\n", port); 3047 device_remove_file(&info->dev->pdev->dev, &info->port_attr); 3048 info->port = -1; 3049 } 3050 3051 return err; 3052 } 3053 3054 static void 3055 mlx4_cleanup_hca_info(struct mlx4_hca_info *info) 3056 { 3057 device_remove_file(&info->dev->pdev->dev, &info->firmware_attr); 3058 device_remove_file(&info->dev->pdev->dev, &info->board_attr); 3059 device_remove_file(&info->dev->pdev->dev, &info->hca_attr); 3060 } 3061 3062 static void mlx4_cleanup_port_info(struct mlx4_port_info *info) 3063 { 3064 if (info->port < 0) 3065 return; 3066 3067 device_remove_file(&info->dev->pdev->dev, &info->port_attr); 3068 device_remove_file(&info->dev->pdev->dev, &info->port_mtu_attr); 3069 } 3070 3071 static int mlx4_init_steering(struct mlx4_dev *dev) 3072 { 3073 struct mlx4_priv *priv = mlx4_priv(dev); 3074 int num_entries = dev->caps.num_ports; 3075 int i, j; 3076 3077 priv->steer = kzalloc(sizeof(struct mlx4_steer) * num_entries, GFP_KERNEL); 3078 if (!priv->steer) 3079 return -ENOMEM; 3080 3081 for (i = 0; i < num_entries; i++) 3082 for (j = 0; j < MLX4_NUM_STEERS; j++) { 3083 INIT_LIST_HEAD(&priv->steer[i].promisc_qps[j]); 3084 INIT_LIST_HEAD(&priv->steer[i].steer_entries[j]); 3085 } 3086 return 0; 3087 } 3088 3089 static void mlx4_clear_steering(struct mlx4_dev *dev) 3090 { 3091 struct mlx4_priv *priv = mlx4_priv(dev); 3092 struct mlx4_steer_index *entry, *tmp_entry; 3093 struct mlx4_promisc_qp *pqp, *tmp_pqp; 3094 int num_entries = dev->caps.num_ports; 3095 int i, j; 3096 3097 for (i = 0; i < num_entries; i++) { 3098 for (j = 0; j < MLX4_NUM_STEERS; j++) { 3099 list_for_each_entry_safe(pqp, tmp_pqp, 3100 &priv->steer[i].promisc_qps[j], 3101 list) { 3102 list_del(&pqp->list); 3103 kfree(pqp); 3104 } 3105 list_for_each_entry_safe(entry, tmp_entry, 3106 &priv->steer[i].steer_entries[j], 3107 list) { 3108 list_del(&entry->list); 3109 list_for_each_entry_safe(pqp, tmp_pqp, 3110 &entry->duplicates, 3111 list) { 3112 list_del(&pqp->list); 3113 kfree(pqp); 3114 } 3115 kfree(entry); 3116 } 3117 } 3118 } 3119 kfree(priv->steer); 3120 } 3121 3122 static int extended_func_num(struct pci_dev *pdev) 3123 { 3124 return PCI_SLOT(pdev->devfn) * 8 + PCI_FUNC(pdev->devfn); 3125 } 3126 3127 #define MLX4_OWNER_BASE 0x8069c 3128 #define MLX4_OWNER_SIZE 4 3129 3130 static int mlx4_get_ownership(struct mlx4_dev *dev) 3131 { 3132 void __iomem *owner; 3133 u32 ret; 3134 3135 if (pci_channel_offline(dev->pdev)) 3136 return -EIO; 3137 3138 owner = ioremap(pci_resource_start(dev->pdev, 0) + MLX4_OWNER_BASE, 3139 MLX4_OWNER_SIZE); 3140 if (!owner) { 3141 mlx4_err(dev, "Failed to obtain ownership bit\n"); 3142 return -ENOMEM; 3143 } 3144 3145 ret = readl(owner); 3146 iounmap(owner); 3147 return (int) !!ret; 3148 } 3149 3150 static void mlx4_free_ownership(struct mlx4_dev *dev) 3151 { 3152 void __iomem *owner; 3153 3154 if (pci_channel_offline(dev->pdev)) 3155 return; 3156 3157 owner = ioremap(pci_resource_start(dev->pdev, 0) + MLX4_OWNER_BASE, 3158 MLX4_OWNER_SIZE); 3159 if (!owner) { 3160 mlx4_err(dev, "Failed to obtain ownership bit\n"); 3161 return; 3162 } 3163 writel(0, owner); 3164 msleep(1000); 3165 iounmap(owner); 3166 } 3167 3168 static int __mlx4_init_one(struct pci_dev *pdev, int pci_dev_data) 3169 { 3170 struct mlx4_priv *priv; 3171 struct mlx4_dev *dev; 3172 int err; 3173 int port; 3174 int nvfs, prb_vf; 3175 3176 pr_info(DRV_NAME ": Initializing %s\n", pci_name(pdev)); 3177 3178 err = pci_enable_device(pdev); 3179 if (err) { 3180 dev_err(&pdev->dev, "Cannot enable PCI device, " 3181 "aborting.\n"); 3182 return err; 3183 } 3184 3185 mlx4_get_val(num_vfs.dbdf2val.tbl, pci_physfn(pdev), 0, &nvfs); 3186 mlx4_get_val(probe_vf.dbdf2val.tbl, pci_physfn(pdev), 0, &prb_vf); 3187 if (nvfs > MLX4_MAX_NUM_VF) { 3188 dev_err(&pdev->dev, "There are more VF's (%d) than allowed(%d)\n", 3189 nvfs, MLX4_MAX_NUM_VF); 3190 return -EINVAL; 3191 } 3192 3193 if (nvfs < 0) { 3194 dev_err(&pdev->dev, "num_vfs module parameter cannot be negative\n"); 3195 return -EINVAL; 3196 } 3197 /* 3198 * Check for BARs. 3199 */ 3200 if (!(pci_dev_data & MLX4_PCI_DEV_IS_VF) && 3201 !(pci_resource_flags(pdev, 0) & IORESOURCE_MEM)) { 3202 dev_err(&pdev->dev, "Missing DCS, aborting." 3203 "(driver_data: 0x%x, pci_resource_flags(pdev, 0):0x%x)\n", 3204 pci_dev_data, pci_resource_flags(pdev, 0)); 3205 err = -ENODEV; 3206 goto err_disable_pdev; 3207 } 3208 if (!(pci_resource_flags(pdev, 2) & IORESOURCE_MEM)) { 3209 dev_err(&pdev->dev, "Missing UAR, aborting.\n"); 3210 err = -ENODEV; 3211 goto err_disable_pdev; 3212 } 3213 3214 err = pci_request_regions(pdev, DRV_NAME); 3215 if (err) { 3216 dev_err(&pdev->dev, "Couldn't get PCI resources, aborting\n"); 3217 goto err_disable_pdev; 3218 } 3219 3220 pci_set_master(pdev); 3221 3222 err = pci_set_dma_mask(pdev, DMA_BIT_MASK(64)); 3223 if (err) { 3224 dev_warn(&pdev->dev, "Warning: couldn't set 64-bit PCI DMA mask.\n"); 3225 err = pci_set_dma_mask(pdev, DMA_BIT_MASK(32)); 3226 if (err) { 3227 dev_err(&pdev->dev, "Can't set PCI DMA mask, aborting.\n"); 3228 goto err_release_regions; 3229 } 3230 } 3231 err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64)); 3232 if (err) { 3233 dev_warn(&pdev->dev, "Warning: couldn't set 64-bit " 3234 "consistent PCI DMA mask.\n"); 3235 err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32)); 3236 if (err) { 3237 dev_err(&pdev->dev, "Can't set consistent PCI DMA mask, " 3238 "aborting.\n"); 3239 goto err_release_regions; 3240 } 3241 } 3242 3243 /* Allow large DMA segments, up to the firmware limit of 1 GB */ 3244 dma_set_max_seg_size(&pdev->dev, 1024 * 1024 * 1024); 3245 3246 priv = kzalloc(sizeof *priv, GFP_KERNEL); 3247 if (!priv) { 3248 dev_err(&pdev->dev, "Device struct alloc failed, " 3249 "aborting.\n"); 3250 err = -ENOMEM; 3251 goto err_release_regions; 3252 } 3253 3254 dev = &priv->dev; 3255 dev->pdev = pdev; 3256 INIT_LIST_HEAD(&priv->dev_list); 3257 INIT_LIST_HEAD(&priv->ctx_list); 3258 spin_lock_init(&priv->ctx_lock); 3259 3260 mutex_init(&priv->port_mutex); 3261 3262 INIT_LIST_HEAD(&priv->pgdir_list); 3263 mutex_init(&priv->pgdir_mutex); 3264 3265 INIT_LIST_HEAD(&priv->bf_list); 3266 mutex_init(&priv->bf_mutex); 3267 3268 dev->rev_id = pdev->revision; 3269 dev->numa_node = dev_to_node(&pdev->dev); 3270 /* Detect if this device is a virtual function */ 3271 if (pci_dev_data & MLX4_PCI_DEV_IS_VF) { 3272 /* When acting as pf, we normally skip vfs unless explicitly 3273 * requested to probe them. */ 3274 if (nvfs && extended_func_num(pdev) > prb_vf) { 3275 mlx4_warn(dev, "Skipping virtual function:%d\n", 3276 extended_func_num(pdev)); 3277 err = -ENODEV; 3278 goto err_free_dev; 3279 } 3280 mlx4_warn(dev, "Detected virtual function - running in slave mode\n"); 3281 dev->flags |= MLX4_FLAG_SLAVE; 3282 } else { 3283 /* We reset the device and enable SRIOV only for physical 3284 * devices. Try to claim ownership on the device; 3285 * if already taken, skip -- do not allow multiple PFs */ 3286 err = mlx4_get_ownership(dev); 3287 if (err) { 3288 if (err < 0) 3289 goto err_free_dev; 3290 else { 3291 mlx4_warn(dev, "Multiple PFs not yet supported." 3292 " Skipping PF.\n"); 3293 err = -EINVAL; 3294 goto err_free_dev; 3295 } 3296 } 3297 3298 if (nvfs) { 3299 mlx4_warn(dev, "Enabling SR-IOV with %d VFs\n", nvfs); 3300 err = pci_enable_sriov(pdev, nvfs); 3301 if (err) { 3302 mlx4_err(dev, "Failed to enable SR-IOV, continuing without SR-IOV (err = %d).\n", 3303 err); 3304 err = 0; 3305 } else { 3306 mlx4_warn(dev, "Running in master mode\n"); 3307 dev->flags |= MLX4_FLAG_SRIOV | 3308 MLX4_FLAG_MASTER; 3309 dev->num_vfs = nvfs; 3310 } 3311 } 3312 3313 atomic_set(&priv->opreq_count, 0); 3314 INIT_WORK(&priv->opreq_task, mlx4_opreq_action); 3315 3316 /* 3317 * Now reset the HCA before we touch the PCI capabilities or 3318 * attempt a firmware command, since a boot ROM may have left 3319 * the HCA in an undefined state. 3320 */ 3321 err = mlx4_reset(dev); 3322 if (err) { 3323 mlx4_err(dev, "Failed to reset HCA, aborting.\n"); 3324 goto err_sriov; 3325 } 3326 } 3327 3328 slave_start: 3329 err = mlx4_cmd_init(dev); 3330 if (err) { 3331 mlx4_err(dev, "Failed to init command interface, aborting.\n"); 3332 goto err_sriov; 3333 } 3334 3335 /* In slave functions, the communication channel must be initialized 3336 * before posting commands. Also, init num_slaves before calling 3337 * mlx4_init_hca */ 3338 if (mlx4_is_mfunc(dev)) { 3339 if (mlx4_is_master(dev)) 3340 dev->num_slaves = MLX4_MAX_NUM_SLAVES; 3341 else { 3342 dev->num_slaves = 0; 3343 err = mlx4_multi_func_init(dev); 3344 if (err) { 3345 mlx4_err(dev, "Failed to init slave mfunc" 3346 " interface, aborting.\n"); 3347 goto err_cmd; 3348 } 3349 } 3350 } 3351 3352 err = mlx4_init_hca(dev); 3353 if (err) { 3354 if (err == -EACCES) { 3355 /* Not primary Physical function 3356 * Running in slave mode */ 3357 mlx4_cmd_cleanup(dev); 3358 dev->flags |= MLX4_FLAG_SLAVE; 3359 dev->flags &= ~MLX4_FLAG_MASTER; 3360 goto slave_start; 3361 } else 3362 goto err_mfunc; 3363 } 3364 3365 /* In master functions, the communication channel must be initialized 3366 * after obtaining its address from fw */ 3367 if (mlx4_is_master(dev)) { 3368 err = mlx4_multi_func_init(dev); 3369 if (err) { 3370 mlx4_err(dev, "Failed to init master mfunc" 3371 "interface, aborting.\n"); 3372 goto err_close; 3373 } 3374 } 3375 3376 err = mlx4_alloc_eq_table(dev); 3377 if (err) 3378 goto err_master_mfunc; 3379 3380 priv->msix_ctl.pool_bm = 0; 3381 mutex_init(&priv->msix_ctl.pool_lock); 3382 3383 mlx4_enable_msi_x(dev); 3384 3385 /* no MSIX and no shared IRQ */ 3386 if (!dev->caps.num_comp_vectors && !dev->caps.comp_pool) { 3387 err = -ENOSPC; 3388 goto err_free_eq; 3389 } 3390 3391 if ((mlx4_is_mfunc(dev)) && 3392 !(dev->flags & MLX4_FLAG_MSI_X)) { 3393 err = -ENOSYS; 3394 mlx4_err(dev, "INTx is not supported in multi-function mode." 3395 " aborting.\n"); 3396 goto err_free_eq; 3397 } 3398 3399 if (!mlx4_is_slave(dev)) { 3400 err = mlx4_init_steering(dev); 3401 if (err) 3402 goto err_free_eq; 3403 } 3404 3405 err = mlx4_setup_hca(dev); 3406 if (err == -EBUSY && (dev->flags & MLX4_FLAG_MSI_X) && 3407 !mlx4_is_mfunc(dev)) { 3408 dev->flags &= ~MLX4_FLAG_MSI_X; 3409 dev->caps.num_comp_vectors = 1; 3410 dev->caps.comp_pool = 0; 3411 pci_disable_msix(pdev); 3412 err = mlx4_setup_hca(dev); 3413 } 3414 3415 if (err) 3416 goto err_steer; 3417 3418 mlx4_init_quotas(dev); 3419 mlx4_init_hca_info(dev); 3420 3421 for (port = 1; port <= dev->caps.num_ports; port++) { 3422 err = mlx4_init_port_info(dev, port); 3423 if (err) 3424 goto err_port; 3425 } 3426 3427 err = mlx4_register_device(dev); 3428 if (err) 3429 goto err_port; 3430 3431 mlx4_request_modules(dev); 3432 3433 mlx4_sense_init(dev); 3434 mlx4_start_sense(dev); 3435 3436 priv->pci_dev_data = pci_dev_data; 3437 pci_set_drvdata(pdev, dev); 3438 3439 return 0; 3440 3441 err_port: 3442 for (--port; port >= 1; --port) 3443 mlx4_cleanup_port_info(&priv->port[port]); 3444 3445 mlx4_cleanup_counters_table(dev); 3446 mlx4_cleanup_qp_table(dev); 3447 mlx4_cleanup_srq_table(dev); 3448 mlx4_cleanup_cq_table(dev); 3449 mlx4_cmd_use_polling(dev); 3450 mlx4_cleanup_eq_table(dev); 3451 mlx4_cleanup_mcg_table(dev); 3452 mlx4_cleanup_mr_table(dev); 3453 mlx4_cleanup_xrcd_table(dev); 3454 mlx4_cleanup_pd_table(dev); 3455 mlx4_cleanup_uar_table(dev); 3456 3457 err_steer: 3458 if (!mlx4_is_slave(dev)) 3459 mlx4_clear_steering(dev); 3460 3461 err_free_eq: 3462 mlx4_free_eq_table(dev); 3463 3464 err_master_mfunc: 3465 if (mlx4_is_master(dev)) { 3466 mlx4_free_resource_tracker(dev, RES_TR_FREE_STRUCTS_ONLY); 3467 mlx4_multi_func_cleanup(dev); 3468 } 3469 3470 if (mlx4_is_slave(dev)) { 3471 kfree(dev->caps.qp0_tunnel); 3472 kfree(dev->caps.qp0_proxy); 3473 kfree(dev->caps.qp1_tunnel); 3474 kfree(dev->caps.qp1_proxy); 3475 } 3476 3477 err_close: 3478 if (dev->flags & MLX4_FLAG_MSI_X) 3479 pci_disable_msix(pdev); 3480 3481 mlx4_close_hca(dev); 3482 3483 err_mfunc: 3484 if (mlx4_is_slave(dev)) 3485 mlx4_multi_func_cleanup(dev); 3486 3487 err_cmd: 3488 mlx4_cmd_cleanup(dev); 3489 3490 err_sriov: 3491 if (dev->flags & MLX4_FLAG_SRIOV) 3492 pci_disable_sriov(pdev); 3493 3494 if (!mlx4_is_slave(dev)) 3495 mlx4_free_ownership(dev); 3496 3497 err_free_dev: 3498 kfree(priv); 3499 3500 err_release_regions: 3501 pci_release_regions(pdev); 3502 3503 err_disable_pdev: 3504 pci_disable_device(pdev); 3505 pci_set_drvdata(pdev, NULL); 3506 return err; 3507 } 3508 3509 static int __devinit mlx4_init_one(struct pci_dev *pdev, 3510 const struct pci_device_id *id) 3511 { 3512 device_set_desc(pdev->dev.bsddev, mlx4_version); 3513 return __mlx4_init_one(pdev, id->driver_data); 3514 } 3515 3516 static void mlx4_remove_one(struct pci_dev *pdev) 3517 { 3518 struct mlx4_dev *dev = pci_get_drvdata(pdev); 3519 struct mlx4_priv *priv = mlx4_priv(dev); 3520 int p; 3521 3522 if (dev) { 3523 /* in SRIOV it is not allowed to unload the pf's 3524 * driver while there are alive vf's */ 3525 if (mlx4_is_master(dev)) { 3526 if (mlx4_how_many_lives_vf(dev)) 3527 mlx4_err(dev, "Removing PF when there are assigned VF's !!!\n"); 3528 } 3529 mlx4_stop_sense(dev); 3530 mlx4_unregister_device(dev); 3531 3532 mlx4_cleanup_hca_info(&priv->hca_info); 3533 for (p = 1; p <= dev->caps.num_ports; p++) { 3534 mlx4_cleanup_port_info(&priv->port[p]); 3535 mlx4_CLOSE_PORT(dev, p); 3536 } 3537 3538 if (mlx4_is_master(dev)) 3539 mlx4_free_resource_tracker(dev, 3540 RES_TR_FREE_SLAVES_ONLY); 3541 3542 mlx4_cleanup_counters_table(dev); 3543 mlx4_cleanup_qp_table(dev); 3544 mlx4_cleanup_srq_table(dev); 3545 mlx4_cleanup_cq_table(dev); 3546 mlx4_cmd_use_polling(dev); 3547 mlx4_cleanup_eq_table(dev); 3548 mlx4_cleanup_mcg_table(dev); 3549 mlx4_cleanup_mr_table(dev); 3550 mlx4_cleanup_xrcd_table(dev); 3551 mlx4_cleanup_pd_table(dev); 3552 3553 if (mlx4_is_master(dev)) 3554 mlx4_free_resource_tracker(dev, 3555 RES_TR_FREE_STRUCTS_ONLY); 3556 3557 iounmap(priv->kar); 3558 mlx4_uar_free(dev, &priv->driver_uar); 3559 mlx4_cleanup_uar_table(dev); 3560 if (!mlx4_is_slave(dev)) 3561 mlx4_clear_steering(dev); 3562 mlx4_free_eq_table(dev); 3563 if (mlx4_is_master(dev)) 3564 mlx4_multi_func_cleanup(dev); 3565 mlx4_close_hca(dev); 3566 if (mlx4_is_slave(dev)) 3567 mlx4_multi_func_cleanup(dev); 3568 mlx4_cmd_cleanup(dev); 3569 3570 if (dev->flags & MLX4_FLAG_MSI_X) 3571 pci_disable_msix(pdev); 3572 if (dev->flags & MLX4_FLAG_SRIOV) { 3573 mlx4_warn(dev, "Disabling SR-IOV\n"); 3574 pci_disable_sriov(pdev); 3575 } 3576 3577 if (!mlx4_is_slave(dev)) 3578 mlx4_free_ownership(dev); 3579 3580 kfree(dev->caps.qp0_tunnel); 3581 kfree(dev->caps.qp0_proxy); 3582 kfree(dev->caps.qp1_tunnel); 3583 kfree(dev->caps.qp1_proxy); 3584 3585 kfree(priv); 3586 pci_release_regions(pdev); 3587 pci_disable_device(pdev); 3588 pci_set_drvdata(pdev, NULL); 3589 } 3590 } 3591 3592 static int restore_current_port_types(struct mlx4_dev *dev, 3593 enum mlx4_port_type *types, 3594 enum mlx4_port_type *poss_types) 3595 { 3596 struct mlx4_priv *priv = mlx4_priv(dev); 3597 int err, i; 3598 3599 mlx4_stop_sense(dev); 3600 mutex_lock(&priv->port_mutex); 3601 for (i = 0; i < dev->caps.num_ports; i++) 3602 dev->caps.possible_type[i + 1] = poss_types[i]; 3603 err = mlx4_change_port_types(dev, types); 3604 mlx4_start_sense(dev); 3605 mutex_unlock(&priv->port_mutex); 3606 return err; 3607 } 3608 3609 int mlx4_restart_one(struct pci_dev *pdev) 3610 { 3611 struct mlx4_dev *dev = pci_get_drvdata(pdev); 3612 struct mlx4_priv *priv = mlx4_priv(dev); 3613 enum mlx4_port_type curr_type[MLX4_MAX_PORTS]; 3614 enum mlx4_port_type poss_type[MLX4_MAX_PORTS]; 3615 int pci_dev_data, err, i; 3616 3617 pci_dev_data = priv->pci_dev_data; 3618 for (i = 0; i < dev->caps.num_ports; i++) { 3619 curr_type[i] = dev->caps.port_type[i + 1]; 3620 poss_type[i] = dev->caps.possible_type[i + 1]; 3621 } 3622 3623 mlx4_remove_one(pdev); 3624 err = __mlx4_init_one(pdev, pci_dev_data); 3625 if (err) 3626 return err; 3627 3628 dev = pci_get_drvdata(pdev); 3629 err = restore_current_port_types(dev, curr_type, poss_type); 3630 if (err) 3631 mlx4_err(dev, "mlx4_restart_one: could not restore original port types (%d)\n", 3632 err); 3633 return 0; 3634 } 3635 3636 static DEFINE_PCI_DEVICE_TABLE(mlx4_pci_table) = { 3637 /* MT25408 "Hermon" SDR */ 3638 { PCI_VDEVICE(MELLANOX, 0x6340), MLX4_PCI_DEV_FORCE_SENSE_PORT }, 3639 /* MT25408 "Hermon" DDR */ 3640 { PCI_VDEVICE(MELLANOX, 0x634a), MLX4_PCI_DEV_FORCE_SENSE_PORT }, 3641 /* MT25408 "Hermon" QDR */ 3642 { PCI_VDEVICE(MELLANOX, 0x6354), MLX4_PCI_DEV_FORCE_SENSE_PORT }, 3643 /* MT25408 "Hermon" DDR PCIe gen2 */ 3644 { PCI_VDEVICE(MELLANOX, 0x6732), MLX4_PCI_DEV_FORCE_SENSE_PORT }, 3645 /* MT25408 "Hermon" QDR PCIe gen2 */ 3646 { PCI_VDEVICE(MELLANOX, 0x673c), MLX4_PCI_DEV_FORCE_SENSE_PORT }, 3647 /* MT25408 "Hermon" EN 10GigE */ 3648 { PCI_VDEVICE(MELLANOX, 0x6368), MLX4_PCI_DEV_FORCE_SENSE_PORT }, 3649 /* MT25408 "Hermon" EN 10GigE PCIe gen2 */ 3650 { PCI_VDEVICE(MELLANOX, 0x6750), MLX4_PCI_DEV_FORCE_SENSE_PORT }, 3651 /* MT25458 ConnectX EN 10GBASE-T 10GigE */ 3652 { PCI_VDEVICE(MELLANOX, 0x6372), MLX4_PCI_DEV_FORCE_SENSE_PORT }, 3653 /* MT25458 ConnectX EN 10GBASE-T+Gen2 10GigE */ 3654 { PCI_VDEVICE(MELLANOX, 0x675a), MLX4_PCI_DEV_FORCE_SENSE_PORT }, 3655 /* MT26468 ConnectX EN 10GigE PCIe gen2*/ 3656 { PCI_VDEVICE(MELLANOX, 0x6764), MLX4_PCI_DEV_FORCE_SENSE_PORT }, 3657 /* MT26438 ConnectX EN 40GigE PCIe gen2 5GT/s */ 3658 { PCI_VDEVICE(MELLANOX, 0x6746), MLX4_PCI_DEV_FORCE_SENSE_PORT }, 3659 /* MT26478 ConnectX2 40GigE PCIe gen2 */ 3660 { PCI_VDEVICE(MELLANOX, 0x676e), MLX4_PCI_DEV_FORCE_SENSE_PORT }, 3661 /* MT25400 Family [ConnectX-2 Virtual Function] */ 3662 { PCI_VDEVICE(MELLANOX, 0x1002), MLX4_PCI_DEV_IS_VF }, 3663 /* MT27500 Family [ConnectX-3] */ 3664 { PCI_VDEVICE(MELLANOX, 0x1003), 0 }, 3665 /* MT27500 Family [ConnectX-3 Virtual Function] */ 3666 { PCI_VDEVICE(MELLANOX, 0x1004), MLX4_PCI_DEV_IS_VF }, 3667 { PCI_VDEVICE(MELLANOX, 0x1005), 0 }, /* MT27510 Family */ 3668 { PCI_VDEVICE(MELLANOX, 0x1006), 0 }, /* MT27511 Family */ 3669 { PCI_VDEVICE(MELLANOX, 0x1007), 0 }, /* MT27520 Family */ 3670 { PCI_VDEVICE(MELLANOX, 0x1008), 0 }, /* MT27521 Family */ 3671 { PCI_VDEVICE(MELLANOX, 0x1009), 0 }, /* MT27530 Family */ 3672 { PCI_VDEVICE(MELLANOX, 0x100a), 0 }, /* MT27531 Family */ 3673 { PCI_VDEVICE(MELLANOX, 0x100b), 0 }, /* MT27540 Family */ 3674 { PCI_VDEVICE(MELLANOX, 0x100c), 0 }, /* MT27541 Family */ 3675 { PCI_VDEVICE(MELLANOX, 0x100d), 0 }, /* MT27550 Family */ 3676 { PCI_VDEVICE(MELLANOX, 0x100e), 0 }, /* MT27551 Family */ 3677 { PCI_VDEVICE(MELLANOX, 0x100f), 0 }, /* MT27560 Family */ 3678 { PCI_VDEVICE(MELLANOX, 0x1010), 0 }, /* MT27561 Family */ 3679 { 0, } 3680 }; 3681 3682 MODULE_DEVICE_TABLE(pci, mlx4_pci_table); 3683 3684 static pci_ers_result_t mlx4_pci_err_detected(struct pci_dev *pdev, 3685 pci_channel_state_t state) 3686 { 3687 mlx4_remove_one(pdev); 3688 3689 return state == pci_channel_io_perm_failure ? 3690 PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_NEED_RESET; 3691 } 3692 3693 static pci_ers_result_t mlx4_pci_slot_reset(struct pci_dev *pdev) 3694 { 3695 int ret = __mlx4_init_one(pdev, 0); 3696 3697 return ret ? PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_RECOVERED; 3698 } 3699 3700 static const struct pci_error_handlers mlx4_err_handler = { 3701 .error_detected = mlx4_pci_err_detected, 3702 .slot_reset = mlx4_pci_slot_reset, 3703 }; 3704 3705 static int suspend(struct pci_dev *pdev, pm_message_t state) 3706 { 3707 mlx4_remove_one(pdev); 3708 3709 return 0; 3710 } 3711 3712 static int resume(struct pci_dev *pdev) 3713 { 3714 return __mlx4_init_one(pdev, 0); 3715 } 3716 3717 static struct pci_driver mlx4_driver = { 3718 .name = DRV_NAME, 3719 .id_table = mlx4_pci_table, 3720 .probe = mlx4_init_one, 3721 .remove = __devexit_p(mlx4_remove_one), 3722 .suspend = suspend, 3723 .resume = resume, 3724 .err_handler = &mlx4_err_handler, 3725 }; 3726 3727 static int __init mlx4_verify_params(void) 3728 { 3729 int status; 3730 3731 status = update_defaults(&port_type_array); 3732 if (status == INVALID_STR) { 3733 if (mlx4_fill_dbdf2val_tbl(&port_type_array.dbdf2val)) 3734 return -1; 3735 } else if (status == INVALID_DATA) { 3736 return -1; 3737 } 3738 3739 status = update_defaults(&num_vfs); 3740 if (status == INVALID_STR) { 3741 if (mlx4_fill_dbdf2val_tbl(&num_vfs.dbdf2val)) 3742 return -1; 3743 } else if (status == INVALID_DATA) { 3744 return -1; 3745 } 3746 3747 status = update_defaults(&probe_vf); 3748 if (status == INVALID_STR) { 3749 if (mlx4_fill_dbdf2val_tbl(&probe_vf.dbdf2val)) 3750 return -1; 3751 } else if (status == INVALID_DATA) { 3752 return -1; 3753 } 3754 3755 if (msi_x < 0) { 3756 pr_warn("mlx4_core: bad msi_x: %d\n", msi_x); 3757 return -1; 3758 } 3759 3760 if ((log_num_mac < 0) || (log_num_mac > 7)) { 3761 pr_warning("mlx4_core: bad num_mac: %d\n", log_num_mac); 3762 return -1; 3763 } 3764 3765 if (log_num_vlan != 0) 3766 pr_warning("mlx4_core: log_num_vlan - obsolete module param, using %d\n", 3767 MLX4_LOG_NUM_VLANS); 3768 3769 if (mlx4_set_4k_mtu != -1) 3770 pr_warning("mlx4_core: set_4k_mtu - obsolete module param\n"); 3771 3772 if ((log_mtts_per_seg < 0) || (log_mtts_per_seg > 7)) { 3773 pr_warning("mlx4_core: bad log_mtts_per_seg: %d\n", log_mtts_per_seg); 3774 return -1; 3775 } 3776 3777 if (mlx4_log_num_mgm_entry_size != -1 && 3778 (mlx4_log_num_mgm_entry_size < MLX4_MIN_MGM_LOG_ENTRY_SIZE || 3779 mlx4_log_num_mgm_entry_size > MLX4_MAX_MGM_LOG_ENTRY_SIZE)) { 3780 pr_warning("mlx4_core: mlx4_log_num_mgm_entry_size (%d) not " 3781 "in legal range (-1 or %d..%d)\n", 3782 mlx4_log_num_mgm_entry_size, 3783 MLX4_MIN_MGM_LOG_ENTRY_SIZE, 3784 MLX4_MAX_MGM_LOG_ENTRY_SIZE); 3785 return -1; 3786 } 3787 3788 if (mod_param_profile.num_qp < 18 || mod_param_profile.num_qp > 23) { 3789 pr_warning("mlx4_core: bad log_num_qp: %d\n", 3790 mod_param_profile.num_qp); 3791 return -1; 3792 } 3793 3794 if (mod_param_profile.num_srq < 10) { 3795 pr_warning("mlx4_core: too low log_num_srq: %d\n", 3796 mod_param_profile.num_srq); 3797 return -1; 3798 } 3799 3800 if (mod_param_profile.num_cq < 10) { 3801 pr_warning("mlx4_core: too low log_num_cq: %d\n", 3802 mod_param_profile.num_cq); 3803 return -1; 3804 } 3805 3806 if (mod_param_profile.num_mpt < 10) { 3807 pr_warning("mlx4_core: too low log_num_mpt: %d\n", 3808 mod_param_profile.num_mpt); 3809 return -1; 3810 } 3811 3812 if (mod_param_profile.num_mtt_segs && 3813 mod_param_profile.num_mtt_segs < 15) { 3814 pr_warning("mlx4_core: too low log_num_mtt: %d\n", 3815 mod_param_profile.num_mtt_segs); 3816 return -1; 3817 } 3818 3819 if (mod_param_profile.num_mtt_segs > MLX4_MAX_LOG_NUM_MTT) { 3820 pr_warning("mlx4_core: too high log_num_mtt: %d\n", 3821 mod_param_profile.num_mtt_segs); 3822 return -1; 3823 } 3824 return 0; 3825 } 3826 3827 static int __init mlx4_init(void) 3828 { 3829 int ret; 3830 3831 if (mlx4_verify_params()) 3832 return -EINVAL; 3833 3834 mlx4_catas_init(); 3835 3836 mlx4_wq = create_singlethread_workqueue("mlx4"); 3837 if (!mlx4_wq) 3838 return -ENOMEM; 3839 3840 if (enable_sys_tune) 3841 sys_tune_init(); 3842 3843 ret = pci_register_driver(&mlx4_driver); 3844 if (ret < 0) 3845 goto err; 3846 3847 return 0; 3848 3849 err: 3850 if (enable_sys_tune) 3851 sys_tune_fini(); 3852 3853 destroy_workqueue(mlx4_wq); 3854 3855 return ret; 3856 } 3857 3858 static void __exit mlx4_cleanup(void) 3859 { 3860 if (enable_sys_tune) 3861 sys_tune_fini(); 3862 3863 pci_unregister_driver(&mlx4_driver); 3864 destroy_workqueue(mlx4_wq); 3865 } 3866 3867 module_init_order(mlx4_init, SI_ORDER_MIDDLE); 3868 module_exit(mlx4_cleanup); 3869 3870 static int 3871 mlx4_evhand(module_t mod, int event, void *arg) 3872 { 3873 return (0); 3874 } 3875 3876 static moduledata_t mlx4_mod = { 3877 .name = "mlx4", 3878 .evhand = mlx4_evhand, 3879 }; 3880 MODULE_VERSION(mlx4, 1); 3881 DECLARE_MODULE(mlx4, mlx4_mod, SI_SUB_OFED_PREINIT, SI_ORDER_ANY); 3882 MODULE_DEPEND(mlx4, linuxkpi, 1, 1, 1); 3883 3884