1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB 2 /* Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */ 3 4 #include "eswitch.h" 5 #include "lib/mlx5.h" 6 #include "esw/qos.h" 7 #include "en/port.h" 8 #define CREATE_TRACE_POINTS 9 #include "diag/qos_tracepoint.h" 10 11 /* Minimum supported BW share value by the HW is 1 Mbit/sec */ 12 #define MLX5_MIN_BW_SHARE 1 13 14 #define MLX5_RATE_TO_BW_SHARE(rate, divider, limit) \ 15 min_t(u32, max_t(u32, DIV_ROUND_UP(rate, divider), MLX5_MIN_BW_SHARE), limit) 16 17 struct mlx5_esw_rate_group { 18 u32 tsar_ix; 19 u32 max_rate; 20 u32 min_rate; 21 u32 bw_share; 22 struct list_head list; 23 }; 24 25 static int esw_qos_tsar_config(struct mlx5_core_dev *dev, u32 *sched_ctx, 26 u32 tsar_ix, u32 max_rate, u32 bw_share) 27 { 28 u32 bitmask = 0; 29 30 if (!MLX5_CAP_GEN(dev, qos) || !MLX5_CAP_QOS(dev, esw_scheduling)) 31 return -EOPNOTSUPP; 32 33 MLX5_SET(scheduling_context, sched_ctx, max_average_bw, max_rate); 34 MLX5_SET(scheduling_context, sched_ctx, bw_share, bw_share); 35 bitmask |= MODIFY_SCHEDULING_ELEMENT_IN_MODIFY_BITMASK_MAX_AVERAGE_BW; 36 bitmask |= MODIFY_SCHEDULING_ELEMENT_IN_MODIFY_BITMASK_BW_SHARE; 37 38 return mlx5_modify_scheduling_element_cmd(dev, 39 SCHEDULING_HIERARCHY_E_SWITCH, 40 sched_ctx, 41 tsar_ix, 42 bitmask); 43 } 44 45 static int esw_qos_group_config(struct mlx5_eswitch *esw, struct mlx5_esw_rate_group *group, 46 u32 max_rate, u32 bw_share, struct netlink_ext_ack *extack) 47 { 48 u32 sched_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {}; 49 struct mlx5_core_dev *dev = esw->dev; 50 int err; 51 52 err = esw_qos_tsar_config(dev, sched_ctx, 53 group->tsar_ix, 54 max_rate, bw_share); 55 if (err) 56 NL_SET_ERR_MSG_MOD(extack, "E-Switch modify group TSAR element failed"); 57 58 trace_mlx5_esw_group_qos_config(dev, group, group->tsar_ix, bw_share, max_rate); 59 60 return err; 61 } 62 63 static int esw_qos_vport_config(struct mlx5_eswitch *esw, 64 struct mlx5_vport *vport, 65 u32 max_rate, u32 bw_share, 66 struct netlink_ext_ack *extack) 67 { 68 u32 sched_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {}; 69 struct mlx5_core_dev *dev = esw->dev; 70 int err; 71 72 if (!vport->qos.enabled) 73 return -EIO; 74 75 err = esw_qos_tsar_config(dev, sched_ctx, vport->qos.esw_tsar_ix, 76 max_rate, bw_share); 77 if (err) { 78 esw_warn(esw->dev, 79 "E-Switch modify TSAR vport element failed (vport=%d,err=%d)\n", 80 vport->vport, err); 81 NL_SET_ERR_MSG_MOD(extack, "E-Switch modify TSAR vport element failed"); 82 return err; 83 } 84 85 trace_mlx5_esw_vport_qos_config(vport, bw_share, max_rate); 86 87 return 0; 88 } 89 90 static u32 esw_qos_calculate_min_rate_divider(struct mlx5_eswitch *esw, 91 struct mlx5_esw_rate_group *group, 92 bool group_level) 93 { 94 u32 fw_max_bw_share = MLX5_CAP_QOS(esw->dev, max_tsar_bw_share); 95 struct mlx5_vport *evport; 96 u32 max_guarantee = 0; 97 unsigned long i; 98 99 if (group_level) { 100 struct mlx5_esw_rate_group *group; 101 102 list_for_each_entry(group, &esw->qos.groups, list) { 103 if (group->min_rate < max_guarantee) 104 continue; 105 max_guarantee = group->min_rate; 106 } 107 } else { 108 mlx5_esw_for_each_vport(esw, i, evport) { 109 if (!evport->enabled || !evport->qos.enabled || 110 evport->qos.group != group || evport->qos.min_rate < max_guarantee) 111 continue; 112 max_guarantee = evport->qos.min_rate; 113 } 114 } 115 116 if (max_guarantee) 117 return max_t(u32, max_guarantee / fw_max_bw_share, 1); 118 119 /* If vports min rate divider is 0 but their group has bw_share configured, then 120 * need to set bw_share for vports to minimal value. 121 */ 122 if (!group_level && !max_guarantee && group && group->bw_share) 123 return 1; 124 return 0; 125 } 126 127 static u32 esw_qos_calc_bw_share(u32 min_rate, u32 divider, u32 fw_max) 128 { 129 if (divider) 130 return MLX5_RATE_TO_BW_SHARE(min_rate, divider, fw_max); 131 132 return 0; 133 } 134 135 static int esw_qos_normalize_vports_min_rate(struct mlx5_eswitch *esw, 136 struct mlx5_esw_rate_group *group, 137 struct netlink_ext_ack *extack) 138 { 139 u32 fw_max_bw_share = MLX5_CAP_QOS(esw->dev, max_tsar_bw_share); 140 u32 divider = esw_qos_calculate_min_rate_divider(esw, group, false); 141 struct mlx5_vport *evport; 142 unsigned long i; 143 u32 bw_share; 144 int err; 145 146 mlx5_esw_for_each_vport(esw, i, evport) { 147 if (!evport->enabled || !evport->qos.enabled || evport->qos.group != group) 148 continue; 149 bw_share = esw_qos_calc_bw_share(evport->qos.min_rate, divider, fw_max_bw_share); 150 151 if (bw_share == evport->qos.bw_share) 152 continue; 153 154 err = esw_qos_vport_config(esw, evport, evport->qos.max_rate, bw_share, extack); 155 if (err) 156 return err; 157 158 evport->qos.bw_share = bw_share; 159 } 160 161 return 0; 162 } 163 164 static int esw_qos_normalize_groups_min_rate(struct mlx5_eswitch *esw, u32 divider, 165 struct netlink_ext_ack *extack) 166 { 167 u32 fw_max_bw_share = MLX5_CAP_QOS(esw->dev, max_tsar_bw_share); 168 struct mlx5_esw_rate_group *group; 169 u32 bw_share; 170 int err; 171 172 list_for_each_entry(group, &esw->qos.groups, list) { 173 bw_share = esw_qos_calc_bw_share(group->min_rate, divider, fw_max_bw_share); 174 175 if (bw_share == group->bw_share) 176 continue; 177 178 err = esw_qos_group_config(esw, group, group->max_rate, bw_share, extack); 179 if (err) 180 return err; 181 182 group->bw_share = bw_share; 183 184 /* All the group's vports need to be set with default bw_share 185 * to enable them with QOS 186 */ 187 err = esw_qos_normalize_vports_min_rate(esw, group, extack); 188 189 if (err) 190 return err; 191 } 192 193 return 0; 194 } 195 196 static int esw_qos_set_vport_min_rate(struct mlx5_eswitch *esw, struct mlx5_vport *evport, 197 u32 min_rate, struct netlink_ext_ack *extack) 198 { 199 u32 fw_max_bw_share, previous_min_rate; 200 bool min_rate_supported; 201 int err; 202 203 lockdep_assert_held(&esw->state_lock); 204 fw_max_bw_share = MLX5_CAP_QOS(esw->dev, max_tsar_bw_share); 205 min_rate_supported = MLX5_CAP_QOS(esw->dev, esw_bw_share) && 206 fw_max_bw_share >= MLX5_MIN_BW_SHARE; 207 if (min_rate && !min_rate_supported) 208 return -EOPNOTSUPP; 209 if (min_rate == evport->qos.min_rate) 210 return 0; 211 212 previous_min_rate = evport->qos.min_rate; 213 evport->qos.min_rate = min_rate; 214 err = esw_qos_normalize_vports_min_rate(esw, evport->qos.group, extack); 215 if (err) 216 evport->qos.min_rate = previous_min_rate; 217 218 return err; 219 } 220 221 static int esw_qos_set_vport_max_rate(struct mlx5_eswitch *esw, struct mlx5_vport *evport, 222 u32 max_rate, struct netlink_ext_ack *extack) 223 { 224 u32 act_max_rate = max_rate; 225 bool max_rate_supported; 226 int err; 227 228 lockdep_assert_held(&esw->state_lock); 229 max_rate_supported = MLX5_CAP_QOS(esw->dev, esw_rate_limit); 230 231 if (max_rate && !max_rate_supported) 232 return -EOPNOTSUPP; 233 if (max_rate == evport->qos.max_rate) 234 return 0; 235 236 /* If parent group has rate limit need to set to group 237 * value when new max rate is 0. 238 */ 239 if (evport->qos.group && !max_rate) 240 act_max_rate = evport->qos.group->max_rate; 241 242 err = esw_qos_vport_config(esw, evport, act_max_rate, evport->qos.bw_share, extack); 243 244 if (!err) 245 evport->qos.max_rate = max_rate; 246 247 return err; 248 } 249 250 static int esw_qos_set_group_min_rate(struct mlx5_eswitch *esw, struct mlx5_esw_rate_group *group, 251 u32 min_rate, struct netlink_ext_ack *extack) 252 { 253 u32 fw_max_bw_share = MLX5_CAP_QOS(esw->dev, max_tsar_bw_share); 254 struct mlx5_core_dev *dev = esw->dev; 255 u32 previous_min_rate, divider; 256 int err; 257 258 if (!(MLX5_CAP_QOS(dev, esw_bw_share) && fw_max_bw_share >= MLX5_MIN_BW_SHARE)) 259 return -EOPNOTSUPP; 260 261 if (min_rate == group->min_rate) 262 return 0; 263 264 previous_min_rate = group->min_rate; 265 group->min_rate = min_rate; 266 divider = esw_qos_calculate_min_rate_divider(esw, group, true); 267 err = esw_qos_normalize_groups_min_rate(esw, divider, extack); 268 if (err) { 269 group->min_rate = previous_min_rate; 270 NL_SET_ERR_MSG_MOD(extack, "E-Switch group min rate setting failed"); 271 272 /* Attempt restoring previous configuration */ 273 divider = esw_qos_calculate_min_rate_divider(esw, group, true); 274 if (esw_qos_normalize_groups_min_rate(esw, divider, extack)) 275 NL_SET_ERR_MSG_MOD(extack, "E-Switch BW share restore failed"); 276 } 277 278 return err; 279 } 280 281 static int esw_qos_set_group_max_rate(struct mlx5_eswitch *esw, 282 struct mlx5_esw_rate_group *group, 283 u32 max_rate, struct netlink_ext_ack *extack) 284 { 285 struct mlx5_vport *vport; 286 unsigned long i; 287 int err; 288 289 if (group->max_rate == max_rate) 290 return 0; 291 292 err = esw_qos_group_config(esw, group, max_rate, group->bw_share, extack); 293 if (err) 294 return err; 295 296 group->max_rate = max_rate; 297 298 /* Any unlimited vports in the group should be set 299 * with the value of the group. 300 */ 301 mlx5_esw_for_each_vport(esw, i, vport) { 302 if (!vport->enabled || !vport->qos.enabled || 303 vport->qos.group != group || vport->qos.max_rate) 304 continue; 305 306 err = esw_qos_vport_config(esw, vport, max_rate, vport->qos.bw_share, extack); 307 if (err) 308 NL_SET_ERR_MSG_MOD(extack, 309 "E-Switch vport implicit rate limit setting failed"); 310 } 311 312 return err; 313 } 314 315 static int esw_qos_vport_create_sched_element(struct mlx5_eswitch *esw, 316 struct mlx5_vport *vport, 317 u32 max_rate, u32 bw_share) 318 { 319 u32 sched_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {}; 320 struct mlx5_esw_rate_group *group = vport->qos.group; 321 struct mlx5_core_dev *dev = esw->dev; 322 u32 parent_tsar_ix; 323 void *vport_elem; 324 int err; 325 326 parent_tsar_ix = group ? group->tsar_ix : esw->qos.root_tsar_ix; 327 MLX5_SET(scheduling_context, sched_ctx, element_type, 328 SCHEDULING_CONTEXT_ELEMENT_TYPE_VPORT); 329 vport_elem = MLX5_ADDR_OF(scheduling_context, sched_ctx, element_attributes); 330 MLX5_SET(vport_element, vport_elem, vport_number, vport->vport); 331 MLX5_SET(scheduling_context, sched_ctx, parent_element_id, parent_tsar_ix); 332 MLX5_SET(scheduling_context, sched_ctx, max_average_bw, max_rate); 333 MLX5_SET(scheduling_context, sched_ctx, bw_share, bw_share); 334 335 err = mlx5_create_scheduling_element_cmd(dev, 336 SCHEDULING_HIERARCHY_E_SWITCH, 337 sched_ctx, 338 &vport->qos.esw_tsar_ix); 339 if (err) { 340 esw_warn(esw->dev, "E-Switch create TSAR vport element failed (vport=%d,err=%d)\n", 341 vport->vport, err); 342 return err; 343 } 344 345 return 0; 346 } 347 348 static int esw_qos_update_group_scheduling_element(struct mlx5_eswitch *esw, 349 struct mlx5_vport *vport, 350 struct mlx5_esw_rate_group *curr_group, 351 struct mlx5_esw_rate_group *new_group, 352 struct netlink_ext_ack *extack) 353 { 354 u32 max_rate; 355 int err; 356 357 err = mlx5_destroy_scheduling_element_cmd(esw->dev, 358 SCHEDULING_HIERARCHY_E_SWITCH, 359 vport->qos.esw_tsar_ix); 360 if (err) { 361 NL_SET_ERR_MSG_MOD(extack, "E-Switch destroy TSAR vport element failed"); 362 return err; 363 } 364 365 vport->qos.group = new_group; 366 max_rate = vport->qos.max_rate ? vport->qos.max_rate : new_group->max_rate; 367 368 /* If vport is unlimited, we set the group's value. 369 * Therefore, if the group is limited it will apply to 370 * the vport as well and if not, vport will remain unlimited. 371 */ 372 err = esw_qos_vport_create_sched_element(esw, vport, max_rate, vport->qos.bw_share); 373 if (err) { 374 NL_SET_ERR_MSG_MOD(extack, "E-Switch vport group set failed."); 375 goto err_sched; 376 } 377 378 return 0; 379 380 err_sched: 381 vport->qos.group = curr_group; 382 max_rate = vport->qos.max_rate ? vport->qos.max_rate : curr_group->max_rate; 383 if (esw_qos_vport_create_sched_element(esw, vport, max_rate, vport->qos.bw_share)) 384 esw_warn(esw->dev, "E-Switch vport group restore failed (vport=%d)\n", 385 vport->vport); 386 387 return err; 388 } 389 390 static int esw_qos_vport_update_group(struct mlx5_eswitch *esw, 391 struct mlx5_vport *vport, 392 struct mlx5_esw_rate_group *group, 393 struct netlink_ext_ack *extack) 394 { 395 struct mlx5_esw_rate_group *new_group, *curr_group; 396 int err; 397 398 if (!vport->enabled) 399 return -EINVAL; 400 401 curr_group = vport->qos.group; 402 new_group = group ?: esw->qos.group0; 403 if (curr_group == new_group) 404 return 0; 405 406 err = esw_qos_update_group_scheduling_element(esw, vport, curr_group, new_group, extack); 407 if (err) 408 return err; 409 410 /* Recalculate bw share weights of old and new groups */ 411 if (vport->qos.bw_share || new_group->bw_share) { 412 esw_qos_normalize_vports_min_rate(esw, curr_group, extack); 413 esw_qos_normalize_vports_min_rate(esw, new_group, extack); 414 } 415 416 return 0; 417 } 418 419 static struct mlx5_esw_rate_group * 420 __esw_qos_create_rate_group(struct mlx5_eswitch *esw, struct netlink_ext_ack *extack) 421 { 422 u32 tsar_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {}; 423 struct mlx5_esw_rate_group *group; 424 u32 divider; 425 int err; 426 427 group = kzalloc(sizeof(*group), GFP_KERNEL); 428 if (!group) 429 return ERR_PTR(-ENOMEM); 430 431 MLX5_SET(scheduling_context, tsar_ctx, parent_element_id, 432 esw->qos.root_tsar_ix); 433 err = mlx5_create_scheduling_element_cmd(esw->dev, 434 SCHEDULING_HIERARCHY_E_SWITCH, 435 tsar_ctx, 436 &group->tsar_ix); 437 if (err) { 438 NL_SET_ERR_MSG_MOD(extack, "E-Switch create TSAR for group failed"); 439 goto err_sched_elem; 440 } 441 442 list_add_tail(&group->list, &esw->qos.groups); 443 444 divider = esw_qos_calculate_min_rate_divider(esw, group, true); 445 if (divider) { 446 err = esw_qos_normalize_groups_min_rate(esw, divider, extack); 447 if (err) { 448 NL_SET_ERR_MSG_MOD(extack, "E-Switch groups normalization failed"); 449 goto err_min_rate; 450 } 451 } 452 trace_mlx5_esw_group_qos_create(esw->dev, group, group->tsar_ix); 453 454 return group; 455 456 err_min_rate: 457 list_del(&group->list); 458 if (mlx5_destroy_scheduling_element_cmd(esw->dev, 459 SCHEDULING_HIERARCHY_E_SWITCH, 460 group->tsar_ix)) 461 NL_SET_ERR_MSG_MOD(extack, "E-Switch destroy TSAR for group failed"); 462 err_sched_elem: 463 kfree(group); 464 return ERR_PTR(err); 465 } 466 467 static int esw_qos_get(struct mlx5_eswitch *esw, struct netlink_ext_ack *extack); 468 static void esw_qos_put(struct mlx5_eswitch *esw); 469 470 static struct mlx5_esw_rate_group * 471 esw_qos_create_rate_group(struct mlx5_eswitch *esw, struct netlink_ext_ack *extack) 472 { 473 struct mlx5_esw_rate_group *group; 474 int err; 475 476 if (!MLX5_CAP_QOS(esw->dev, log_esw_max_sched_depth)) 477 return ERR_PTR(-EOPNOTSUPP); 478 479 err = esw_qos_get(esw, extack); 480 if (err) 481 return ERR_PTR(err); 482 483 group = __esw_qos_create_rate_group(esw, extack); 484 if (IS_ERR(group)) 485 esw_qos_put(esw); 486 487 return group; 488 } 489 490 static int __esw_qos_destroy_rate_group(struct mlx5_eswitch *esw, 491 struct mlx5_esw_rate_group *group, 492 struct netlink_ext_ack *extack) 493 { 494 u32 divider; 495 int err; 496 497 list_del(&group->list); 498 499 divider = esw_qos_calculate_min_rate_divider(esw, NULL, true); 500 err = esw_qos_normalize_groups_min_rate(esw, divider, extack); 501 if (err) 502 NL_SET_ERR_MSG_MOD(extack, "E-Switch groups' normalization failed"); 503 504 err = mlx5_destroy_scheduling_element_cmd(esw->dev, 505 SCHEDULING_HIERARCHY_E_SWITCH, 506 group->tsar_ix); 507 if (err) 508 NL_SET_ERR_MSG_MOD(extack, "E-Switch destroy TSAR_ID failed"); 509 510 trace_mlx5_esw_group_qos_destroy(esw->dev, group, group->tsar_ix); 511 512 kfree(group); 513 514 return err; 515 } 516 517 static int esw_qos_destroy_rate_group(struct mlx5_eswitch *esw, 518 struct mlx5_esw_rate_group *group, 519 struct netlink_ext_ack *extack) 520 { 521 int err; 522 523 err = __esw_qos_destroy_rate_group(esw, group, extack); 524 esw_qos_put(esw); 525 526 return err; 527 } 528 529 static bool esw_qos_element_type_supported(struct mlx5_core_dev *dev, int type) 530 { 531 switch (type) { 532 case SCHEDULING_CONTEXT_ELEMENT_TYPE_TSAR: 533 return MLX5_CAP_QOS(dev, esw_element_type) & 534 ELEMENT_TYPE_CAP_MASK_TASR; 535 case SCHEDULING_CONTEXT_ELEMENT_TYPE_VPORT: 536 return MLX5_CAP_QOS(dev, esw_element_type) & 537 ELEMENT_TYPE_CAP_MASK_VPORT; 538 case SCHEDULING_CONTEXT_ELEMENT_TYPE_VPORT_TC: 539 return MLX5_CAP_QOS(dev, esw_element_type) & 540 ELEMENT_TYPE_CAP_MASK_VPORT_TC; 541 case SCHEDULING_CONTEXT_ELEMENT_TYPE_PARA_VPORT_TC: 542 return MLX5_CAP_QOS(dev, esw_element_type) & 543 ELEMENT_TYPE_CAP_MASK_PARA_VPORT_TC; 544 } 545 return false; 546 } 547 548 static int esw_qos_create(struct mlx5_eswitch *esw, struct netlink_ext_ack *extack) 549 { 550 u32 tsar_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {}; 551 struct mlx5_core_dev *dev = esw->dev; 552 __be32 *attr; 553 int err; 554 555 if (!MLX5_CAP_GEN(dev, qos) || !MLX5_CAP_QOS(dev, esw_scheduling)) 556 return -EOPNOTSUPP; 557 558 if (!esw_qos_element_type_supported(dev, SCHEDULING_CONTEXT_ELEMENT_TYPE_TSAR)) 559 return -EOPNOTSUPP; 560 561 MLX5_SET(scheduling_context, tsar_ctx, element_type, 562 SCHEDULING_CONTEXT_ELEMENT_TYPE_TSAR); 563 564 attr = MLX5_ADDR_OF(scheduling_context, tsar_ctx, element_attributes); 565 *attr = cpu_to_be32(TSAR_ELEMENT_TSAR_TYPE_DWRR << 16); 566 567 err = mlx5_create_scheduling_element_cmd(dev, 568 SCHEDULING_HIERARCHY_E_SWITCH, 569 tsar_ctx, 570 &esw->qos.root_tsar_ix); 571 if (err) { 572 esw_warn(dev, "E-Switch create root TSAR failed (%d)\n", err); 573 return err; 574 } 575 576 INIT_LIST_HEAD(&esw->qos.groups); 577 if (MLX5_CAP_QOS(dev, log_esw_max_sched_depth)) { 578 esw->qos.group0 = __esw_qos_create_rate_group(esw, extack); 579 if (IS_ERR(esw->qos.group0)) { 580 esw_warn(dev, "E-Switch create rate group 0 failed (%ld)\n", 581 PTR_ERR(esw->qos.group0)); 582 err = PTR_ERR(esw->qos.group0); 583 goto err_group0; 584 } 585 } 586 refcount_set(&esw->qos.refcnt, 1); 587 588 return 0; 589 590 err_group0: 591 if (mlx5_destroy_scheduling_element_cmd(esw->dev, SCHEDULING_HIERARCHY_E_SWITCH, 592 esw->qos.root_tsar_ix)) 593 esw_warn(esw->dev, "E-Switch destroy root TSAR failed.\n"); 594 595 return err; 596 } 597 598 static void esw_qos_destroy(struct mlx5_eswitch *esw) 599 { 600 int err; 601 602 if (esw->qos.group0) 603 __esw_qos_destroy_rate_group(esw, esw->qos.group0, NULL); 604 605 err = mlx5_destroy_scheduling_element_cmd(esw->dev, 606 SCHEDULING_HIERARCHY_E_SWITCH, 607 esw->qos.root_tsar_ix); 608 if (err) 609 esw_warn(esw->dev, "E-Switch destroy root TSAR failed (%d)\n", err); 610 } 611 612 static int esw_qos_get(struct mlx5_eswitch *esw, struct netlink_ext_ack *extack) 613 { 614 int err = 0; 615 616 lockdep_assert_held(&esw->state_lock); 617 618 if (!refcount_inc_not_zero(&esw->qos.refcnt)) { 619 /* esw_qos_create() set refcount to 1 only on success. 620 * No need to decrement on failure. 621 */ 622 err = esw_qos_create(esw, extack); 623 } 624 625 return err; 626 } 627 628 static void esw_qos_put(struct mlx5_eswitch *esw) 629 { 630 lockdep_assert_held(&esw->state_lock); 631 if (refcount_dec_and_test(&esw->qos.refcnt)) 632 esw_qos_destroy(esw); 633 } 634 635 static int esw_qos_vport_enable(struct mlx5_eswitch *esw, struct mlx5_vport *vport, 636 u32 max_rate, u32 bw_share, struct netlink_ext_ack *extack) 637 { 638 int err; 639 640 lockdep_assert_held(&esw->state_lock); 641 if (vport->qos.enabled) 642 return 0; 643 644 err = esw_qos_get(esw, extack); 645 if (err) 646 return err; 647 648 vport->qos.group = esw->qos.group0; 649 650 err = esw_qos_vport_create_sched_element(esw, vport, max_rate, bw_share); 651 if (err) 652 goto err_out; 653 654 vport->qos.enabled = true; 655 trace_mlx5_esw_vport_qos_create(vport, bw_share, max_rate); 656 657 return 0; 658 659 err_out: 660 esw_qos_put(esw); 661 662 return err; 663 } 664 665 void mlx5_esw_qos_vport_disable(struct mlx5_eswitch *esw, struct mlx5_vport *vport) 666 { 667 int err; 668 669 lockdep_assert_held(&esw->state_lock); 670 if (!vport->qos.enabled) 671 return; 672 WARN(vport->qos.group && vport->qos.group != esw->qos.group0, 673 "Disabling QoS on port before detaching it from group"); 674 675 err = mlx5_destroy_scheduling_element_cmd(esw->dev, 676 SCHEDULING_HIERARCHY_E_SWITCH, 677 vport->qos.esw_tsar_ix); 678 if (err) 679 esw_warn(esw->dev, "E-Switch destroy TSAR vport element failed (vport=%d,err=%d)\n", 680 vport->vport, err); 681 682 memset(&vport->qos, 0, sizeof(vport->qos)); 683 trace_mlx5_esw_vport_qos_destroy(vport); 684 685 esw_qos_put(esw); 686 } 687 688 int mlx5_esw_qos_set_vport_rate(struct mlx5_eswitch *esw, struct mlx5_vport *vport, 689 u32 max_rate, u32 min_rate) 690 { 691 int err; 692 693 lockdep_assert_held(&esw->state_lock); 694 err = esw_qos_vport_enable(esw, vport, 0, 0, NULL); 695 if (err) 696 return err; 697 698 err = esw_qos_set_vport_min_rate(esw, vport, min_rate, NULL); 699 if (!err) 700 err = esw_qos_set_vport_max_rate(esw, vport, max_rate, NULL); 701 702 return err; 703 } 704 705 static u32 mlx5_esw_qos_lag_link_speed_get_locked(struct mlx5_core_dev *mdev) 706 { 707 struct ethtool_link_ksettings lksettings; 708 struct net_device *slave, *master; 709 u32 speed = SPEED_UNKNOWN; 710 711 /* Lock ensures a stable reference to master and slave netdevice 712 * while port speed of master is queried. 713 */ 714 ASSERT_RTNL(); 715 716 slave = mlx5_uplink_netdev_get(mdev); 717 if (!slave) 718 goto out; 719 720 master = netdev_master_upper_dev_get(slave); 721 if (master && !__ethtool_get_link_ksettings(master, &lksettings)) 722 speed = lksettings.base.speed; 723 724 out: 725 return speed; 726 } 727 728 static int mlx5_esw_qos_max_link_speed_get(struct mlx5_core_dev *mdev, u32 *link_speed_max, 729 bool hold_rtnl_lock, struct netlink_ext_ack *extack) 730 { 731 int err; 732 733 if (!mlx5_lag_is_active(mdev)) 734 goto skip_lag; 735 736 if (hold_rtnl_lock) 737 rtnl_lock(); 738 739 *link_speed_max = mlx5_esw_qos_lag_link_speed_get_locked(mdev); 740 741 if (hold_rtnl_lock) 742 rtnl_unlock(); 743 744 if (*link_speed_max != (u32)SPEED_UNKNOWN) 745 return 0; 746 747 skip_lag: 748 err = mlx5_port_max_linkspeed(mdev, link_speed_max); 749 if (err) 750 NL_SET_ERR_MSG_MOD(extack, "Failed to get link maximum speed"); 751 752 return err; 753 } 754 755 static int mlx5_esw_qos_link_speed_verify(struct mlx5_core_dev *mdev, 756 const char *name, u32 link_speed_max, 757 u64 value, struct netlink_ext_ack *extack) 758 { 759 if (value > link_speed_max) { 760 pr_err("%s rate value %lluMbps exceed link maximum speed %u.\n", 761 name, value, link_speed_max); 762 NL_SET_ERR_MSG_MOD(extack, "TX rate value exceed link maximum speed"); 763 return -EINVAL; 764 } 765 766 return 0; 767 } 768 769 int mlx5_esw_qos_modify_vport_rate(struct mlx5_eswitch *esw, u16 vport_num, u32 rate_mbps) 770 { 771 u32 ctx[MLX5_ST_SZ_DW(scheduling_context)] = {}; 772 struct mlx5_vport *vport; 773 u32 link_speed_max; 774 u32 bitmask; 775 int err; 776 777 vport = mlx5_eswitch_get_vport(esw, vport_num); 778 if (IS_ERR(vport)) 779 return PTR_ERR(vport); 780 781 if (rate_mbps) { 782 err = mlx5_esw_qos_max_link_speed_get(esw->dev, &link_speed_max, false, NULL); 783 if (err) 784 return err; 785 786 err = mlx5_esw_qos_link_speed_verify(esw->dev, "Police", 787 link_speed_max, rate_mbps, NULL); 788 if (err) 789 return err; 790 } 791 792 mutex_lock(&esw->state_lock); 793 if (!vport->qos.enabled) { 794 /* Eswitch QoS wasn't enabled yet. Enable it and vport QoS. */ 795 err = esw_qos_vport_enable(esw, vport, rate_mbps, vport->qos.bw_share, NULL); 796 } else { 797 MLX5_SET(scheduling_context, ctx, max_average_bw, rate_mbps); 798 799 bitmask = MODIFY_SCHEDULING_ELEMENT_IN_MODIFY_BITMASK_MAX_AVERAGE_BW; 800 err = mlx5_modify_scheduling_element_cmd(esw->dev, 801 SCHEDULING_HIERARCHY_E_SWITCH, 802 ctx, 803 vport->qos.esw_tsar_ix, 804 bitmask); 805 } 806 mutex_unlock(&esw->state_lock); 807 808 return err; 809 } 810 811 #define MLX5_LINKSPEED_UNIT 125000 /* 1Mbps in Bps */ 812 813 /* Converts bytes per second value passed in a pointer into megabits per 814 * second, rewriting last. If converted rate exceed link speed or is not a 815 * fraction of Mbps - returns error. 816 */ 817 static int esw_qos_devlink_rate_to_mbps(struct mlx5_core_dev *mdev, const char *name, 818 u64 *rate, struct netlink_ext_ack *extack) 819 { 820 u32 link_speed_max, remainder; 821 u64 value; 822 int err; 823 824 value = div_u64_rem(*rate, MLX5_LINKSPEED_UNIT, &remainder); 825 if (remainder) { 826 pr_err("%s rate value %lluBps not in link speed units of 1Mbps.\n", 827 name, *rate); 828 NL_SET_ERR_MSG_MOD(extack, "TX rate value not in link speed units of 1Mbps"); 829 return -EINVAL; 830 } 831 832 err = mlx5_esw_qos_max_link_speed_get(mdev, &link_speed_max, true, extack); 833 if (err) 834 return err; 835 836 err = mlx5_esw_qos_link_speed_verify(mdev, name, link_speed_max, value, extack); 837 if (err) 838 return err; 839 840 *rate = value; 841 return 0; 842 } 843 844 /* Eswitch devlink rate API */ 845 846 int mlx5_esw_devlink_rate_leaf_tx_share_set(struct devlink_rate *rate_leaf, void *priv, 847 u64 tx_share, struct netlink_ext_ack *extack) 848 { 849 struct mlx5_vport *vport = priv; 850 struct mlx5_eswitch *esw; 851 int err; 852 853 esw = vport->dev->priv.eswitch; 854 if (!mlx5_esw_allowed(esw)) 855 return -EPERM; 856 857 err = esw_qos_devlink_rate_to_mbps(vport->dev, "tx_share", &tx_share, extack); 858 if (err) 859 return err; 860 861 mutex_lock(&esw->state_lock); 862 err = esw_qos_vport_enable(esw, vport, 0, 0, extack); 863 if (err) 864 goto unlock; 865 866 err = esw_qos_set_vport_min_rate(esw, vport, tx_share, extack); 867 unlock: 868 mutex_unlock(&esw->state_lock); 869 return err; 870 } 871 872 int mlx5_esw_devlink_rate_leaf_tx_max_set(struct devlink_rate *rate_leaf, void *priv, 873 u64 tx_max, struct netlink_ext_ack *extack) 874 { 875 struct mlx5_vport *vport = priv; 876 struct mlx5_eswitch *esw; 877 int err; 878 879 esw = vport->dev->priv.eswitch; 880 if (!mlx5_esw_allowed(esw)) 881 return -EPERM; 882 883 err = esw_qos_devlink_rate_to_mbps(vport->dev, "tx_max", &tx_max, extack); 884 if (err) 885 return err; 886 887 mutex_lock(&esw->state_lock); 888 err = esw_qos_vport_enable(esw, vport, 0, 0, extack); 889 if (err) 890 goto unlock; 891 892 err = esw_qos_set_vport_max_rate(esw, vport, tx_max, extack); 893 unlock: 894 mutex_unlock(&esw->state_lock); 895 return err; 896 } 897 898 int mlx5_esw_devlink_rate_node_tx_share_set(struct devlink_rate *rate_node, void *priv, 899 u64 tx_share, struct netlink_ext_ack *extack) 900 { 901 struct mlx5_core_dev *dev = devlink_priv(rate_node->devlink); 902 struct mlx5_eswitch *esw = dev->priv.eswitch; 903 struct mlx5_esw_rate_group *group = priv; 904 int err; 905 906 err = esw_qos_devlink_rate_to_mbps(dev, "tx_share", &tx_share, extack); 907 if (err) 908 return err; 909 910 mutex_lock(&esw->state_lock); 911 err = esw_qos_set_group_min_rate(esw, group, tx_share, extack); 912 mutex_unlock(&esw->state_lock); 913 return err; 914 } 915 916 int mlx5_esw_devlink_rate_node_tx_max_set(struct devlink_rate *rate_node, void *priv, 917 u64 tx_max, struct netlink_ext_ack *extack) 918 { 919 struct mlx5_core_dev *dev = devlink_priv(rate_node->devlink); 920 struct mlx5_eswitch *esw = dev->priv.eswitch; 921 struct mlx5_esw_rate_group *group = priv; 922 int err; 923 924 err = esw_qos_devlink_rate_to_mbps(dev, "tx_max", &tx_max, extack); 925 if (err) 926 return err; 927 928 mutex_lock(&esw->state_lock); 929 err = esw_qos_set_group_max_rate(esw, group, tx_max, extack); 930 mutex_unlock(&esw->state_lock); 931 return err; 932 } 933 934 int mlx5_esw_devlink_rate_node_new(struct devlink_rate *rate_node, void **priv, 935 struct netlink_ext_ack *extack) 936 { 937 struct mlx5_esw_rate_group *group; 938 struct mlx5_eswitch *esw; 939 int err = 0; 940 941 esw = mlx5_devlink_eswitch_get(rate_node->devlink); 942 if (IS_ERR(esw)) 943 return PTR_ERR(esw); 944 945 mutex_lock(&esw->state_lock); 946 if (esw->mode != MLX5_ESWITCH_OFFLOADS) { 947 NL_SET_ERR_MSG_MOD(extack, 948 "Rate node creation supported only in switchdev mode"); 949 err = -EOPNOTSUPP; 950 goto unlock; 951 } 952 953 group = esw_qos_create_rate_group(esw, extack); 954 if (IS_ERR(group)) { 955 err = PTR_ERR(group); 956 goto unlock; 957 } 958 959 *priv = group; 960 unlock: 961 mutex_unlock(&esw->state_lock); 962 return err; 963 } 964 965 int mlx5_esw_devlink_rate_node_del(struct devlink_rate *rate_node, void *priv, 966 struct netlink_ext_ack *extack) 967 { 968 struct mlx5_esw_rate_group *group = priv; 969 struct mlx5_eswitch *esw; 970 int err; 971 972 esw = mlx5_devlink_eswitch_get(rate_node->devlink); 973 if (IS_ERR(esw)) 974 return PTR_ERR(esw); 975 976 mutex_lock(&esw->state_lock); 977 err = esw_qos_destroy_rate_group(esw, group, extack); 978 mutex_unlock(&esw->state_lock); 979 return err; 980 } 981 982 int mlx5_esw_qos_vport_update_group(struct mlx5_eswitch *esw, 983 struct mlx5_vport *vport, 984 struct mlx5_esw_rate_group *group, 985 struct netlink_ext_ack *extack) 986 { 987 int err = 0; 988 989 mutex_lock(&esw->state_lock); 990 if (!vport->qos.enabled && !group) 991 goto unlock; 992 993 err = esw_qos_vport_enable(esw, vport, 0, 0, extack); 994 if (!err) 995 err = esw_qos_vport_update_group(esw, vport, group, extack); 996 unlock: 997 mutex_unlock(&esw->state_lock); 998 return err; 999 } 1000 1001 int mlx5_esw_devlink_rate_parent_set(struct devlink_rate *devlink_rate, 1002 struct devlink_rate *parent, 1003 void *priv, void *parent_priv, 1004 struct netlink_ext_ack *extack) 1005 { 1006 struct mlx5_esw_rate_group *group; 1007 struct mlx5_vport *vport = priv; 1008 1009 if (!parent) 1010 return mlx5_esw_qos_vport_update_group(vport->dev->priv.eswitch, 1011 vport, NULL, extack); 1012 1013 group = parent_priv; 1014 return mlx5_esw_qos_vport_update_group(vport->dev->priv.eswitch, vport, group, extack); 1015 } 1016