xref: /freebsd/sys/dev/ena/ena_sysctl.c (revision abd87254)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause
3  *
4  * Copyright (c) 2015-2023 Amazon.com, Inc. or its affiliates.
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  *
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  *
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in the
16  *    documentation and/or other materials provided with the distribution.
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29  */
30 
31 #include <sys/param.h>
32 #include "opt_rss.h"
33 
34 #include "ena_rss.h"
35 #include "ena_sysctl.h"
36 
37 static void ena_sysctl_add_wd(struct ena_adapter *);
38 static void ena_sysctl_add_stats(struct ena_adapter *);
39 static void ena_sysctl_add_eni_metrics(struct ena_adapter *);
40 static void ena_sysctl_add_customer_metrics(struct ena_adapter *);
41 static void ena_sysctl_add_srd_info(struct ena_adapter *);
42 static void ena_sysctl_add_tuneables(struct ena_adapter *);
43 static void ena_sysctl_add_irq_affinity(struct ena_adapter *);
44 /* Kernel option RSS prevents manipulation of key hash and indirection table. */
45 #ifndef RSS
46 static void ena_sysctl_add_rss(struct ena_adapter *);
47 #endif
48 static int ena_sysctl_buf_ring_size(SYSCTL_HANDLER_ARGS);
49 static int ena_sysctl_rx_queue_size(SYSCTL_HANDLER_ARGS);
50 static int ena_sysctl_io_queues_nb(SYSCTL_HANDLER_ARGS);
51 static int ena_sysctl_irq_base_cpu(SYSCTL_HANDLER_ARGS);
52 static int ena_sysctl_irq_cpu_stride(SYSCTL_HANDLER_ARGS);
53 static int ena_sysctl_metrics_interval(SYSCTL_HANDLER_ARGS);
54 #ifndef RSS
55 static int ena_sysctl_rss_key(SYSCTL_HANDLER_ARGS);
56 static int ena_sysctl_rss_indir_table(SYSCTL_HANDLER_ARGS);
57 #endif
58 
59 /* Limit max ENA sample rate to be an hour. */
60 #define ENA_METRICS_MAX_SAMPLE_INTERVAL 3600
61 #define ENA_HASH_KEY_MSG_SIZE (ENA_HASH_KEY_SIZE * 2 + 1)
62 
63 #define SYSCTL_GSTRING_LEN 128
64 
65 #define ENA_METRIC_ENI_ENTRY(stat, desc) { \
66         .name = #stat, \
67         .description = #desc, \
68 }
69 
70 #define ENA_STAT_ENTRY(stat, desc, stat_type) { \
71         .name = #stat, \
72         .description = #desc, \
73         .stat_offset = offsetof(struct ena_admin_##stat_type, stat) / sizeof(u64), \
74 }
75 
76 #define ENA_STAT_ENA_SRD_ENTRY(stat, desc) \
77 	ENA_STAT_ENTRY(stat, desc, ena_srd_stats)
78 
79 struct ena_hw_metrics {
80         char name[SYSCTL_GSTRING_LEN];
81         char description[SYSCTL_GSTRING_LEN];
82 };
83 
84 struct ena_srd_metrics {
85         char name[SYSCTL_GSTRING_LEN];
86         char description[SYSCTL_GSTRING_LEN];
87         int stat_offset;
88 };
89 
90 static const struct ena_srd_metrics ena_srd_stats_strings[] = {
91         ENA_STAT_ENA_SRD_ENTRY(
92 	    ena_srd_tx_pkts, Number of packets transmitted over ENA SRD),
93         ENA_STAT_ENA_SRD_ENTRY(
94 	    ena_srd_eligible_tx_pkts, Number of packets transmitted or could
95 	    have been transmitted over ENA SRD),
96         ENA_STAT_ENA_SRD_ENTRY(
97 	    ena_srd_rx_pkts, Number of packets received over ENA SRD),
98         ENA_STAT_ENA_SRD_ENTRY(
99 	    ena_srd_resource_utilization, Percentage of the ENA SRD resources
100 	    that are in use),
101 };
102 
103 static const struct ena_hw_metrics ena_hw_stats_strings[] = {
104         ENA_METRIC_ENI_ENTRY(
105 	    bw_in_allowance_exceeded, Inbound BW allowance exceeded),
106         ENA_METRIC_ENI_ENTRY(
107 	    bw_out_allowance_exceeded, Outbound BW allowance exceeded),
108         ENA_METRIC_ENI_ENTRY(
109 	    pps_allowance_exceeded, PPS allowance exceeded),
110         ENA_METRIC_ENI_ENTRY(
111 	    conntrack_allowance_exceeded, Connection tracking allowance exceeded),
112         ENA_METRIC_ENI_ENTRY(
113 	    linklocal_allowance_exceeded, Linklocal packet rate allowance),
114         ENA_METRIC_ENI_ENTRY(
115 	    conntrack_allowance_available, Number of available conntracks),
116 };
117 
118 #ifndef ARRAY_SIZE
119 #define ARRAY_SIZE(a) (sizeof(a) / sizeof(a[0]))
120 #endif
121 
122 #define ENA_CUSTOMER_METRICS_ARRAY_SIZE      ARRAY_SIZE(ena_hw_stats_strings)
123 #define ENA_SRD_METRICS_ARRAY_SIZE           ARRAY_SIZE(ena_srd_stats_strings)
124 
125 static SYSCTL_NODE(_hw, OID_AUTO, ena, CTLFLAG_RD | CTLFLAG_MPSAFE, 0,
126     "ENA driver parameters");
127 
128 /*
129  * Logging level for changing verbosity of the output
130  */
131 int ena_log_level = ENA_INFO;
132 SYSCTL_INT(_hw_ena, OID_AUTO, log_level, CTLFLAG_RWTUN, &ena_log_level, 0,
133     "Logging level indicating verbosity of the logs");
134 
135 SYSCTL_CONST_STRING(_hw_ena, OID_AUTO, driver_version, CTLFLAG_RD,
136     ENA_DRV_MODULE_VERSION, "ENA driver version");
137 
138 /*
139  * Use 9k mbufs for the Rx buffers. Default to 0 (use page size mbufs instead).
140  * Using 9k mbufs in low memory conditions might cause allocation to take a lot
141  * of time and lead to the OS instability as it needs to look for the contiguous
142  * pages.
143  * However, page size mbufs has a bit smaller throughput than 9k mbufs, so if
144  * the network performance is the priority, the 9k mbufs can be used.
145  */
146 int ena_enable_9k_mbufs = 0;
147 SYSCTL_INT(_hw_ena, OID_AUTO, enable_9k_mbufs, CTLFLAG_RDTUN,
148     &ena_enable_9k_mbufs, 0, "Use 9 kB mbufs for Rx descriptors");
149 
150 /*
151  * Force the driver to use large LLQ (Low Latency Queue) header. Defaults to
152  * false. This option may be important for platforms, which often handle packet
153  * headers on Tx with total header size greater than 96B, as it may
154  * reduce the latency.
155  * It also reduces the maximum Tx queue size by half, so it may cause more Tx
156  * packet drops.
157  */
158 bool ena_force_large_llq_header = false;
159 SYSCTL_BOOL(_hw_ena, OID_AUTO, force_large_llq_header, CTLFLAG_RDTUN,
160     &ena_force_large_llq_header, 0,
161     "Increases maximum supported header size in LLQ mode to 224 bytes, while reducing the maximum Tx queue size by half.\n");
162 
163 int ena_rss_table_size = ENA_RX_RSS_TABLE_SIZE;
164 
165 int ena_sysctl_allocate_customer_metrics_buffer(struct ena_adapter *adapter)
166 {
167 	int rc = 0;
168 
169 	adapter->customer_metrics_array = malloc((sizeof(u64) * ENA_CUSTOMER_METRICS_ARRAY_SIZE),
170 	    M_DEVBUF, M_NOWAIT | M_ZERO);
171 	if (unlikely(adapter->customer_metrics_array == NULL))
172 		rc = ENOMEM;
173 
174 	return rc;
175 }
176 void
177 ena_sysctl_add_nodes(struct ena_adapter *adapter)
178 {
179 	struct ena_com_dev *dev = adapter->ena_dev;
180 
181 	if (ena_com_get_cap(dev, ENA_ADMIN_CUSTOMER_METRICS))
182 		ena_sysctl_add_customer_metrics(adapter);
183 	else if (ena_com_get_cap(dev, ENA_ADMIN_ENI_STATS))
184 		ena_sysctl_add_eni_metrics(adapter);
185 
186 	if (ena_com_get_cap(adapter->ena_dev, ENA_ADMIN_ENA_SRD_INFO))
187 		ena_sysctl_add_srd_info(adapter);
188 
189 	ena_sysctl_add_wd(adapter);
190 	ena_sysctl_add_stats(adapter);
191 	ena_sysctl_add_tuneables(adapter);
192 	ena_sysctl_add_irq_affinity(adapter);
193 #ifndef RSS
194 	ena_sysctl_add_rss(adapter);
195 #endif
196 }
197 
198 static void
199 ena_sysctl_add_wd(struct ena_adapter *adapter)
200 {
201 	device_t dev;
202 
203 	struct sysctl_ctx_list *ctx;
204 	struct sysctl_oid *tree;
205 	struct sysctl_oid_list *child;
206 
207 	dev = adapter->pdev;
208 
209 	ctx = device_get_sysctl_ctx(dev);
210 	tree = device_get_sysctl_tree(dev);
211 	child = SYSCTL_CHILDREN(tree);
212 
213 	/* Sysctl calls for Watchdog service */
214 	SYSCTL_ADD_INT(ctx, child, OID_AUTO, "wd_active", CTLFLAG_RWTUN,
215 	    &adapter->wd_active, 0, "Watchdog is active");
216 
217 	SYSCTL_ADD_QUAD(ctx, child, OID_AUTO, "keep_alive_timeout",
218 	    CTLFLAG_RWTUN, &adapter->keep_alive_timeout,
219 	    "Timeout for Keep Alive messages");
220 
221 	SYSCTL_ADD_QUAD(ctx, child, OID_AUTO, "missing_tx_timeout",
222 	    CTLFLAG_RWTUN, &adapter->missing_tx_timeout,
223 	    "Timeout for TX completion");
224 
225 	SYSCTL_ADD_U32(ctx, child, OID_AUTO, "missing_tx_max_queues",
226 	    CTLFLAG_RWTUN, &adapter->missing_tx_max_queues, 0,
227 	    "Number of TX queues to check per run");
228 
229 	SYSCTL_ADD_U32(ctx, child, OID_AUTO, "missing_tx_threshold",
230 	    CTLFLAG_RWTUN, &adapter->missing_tx_threshold, 0,
231 	    "Max number of timeouted packets");
232 }
233 
234 static void
235 ena_sysctl_add_stats(struct ena_adapter *adapter)
236 {
237 	device_t dev;
238 
239 	struct ena_ring *tx_ring;
240 	struct ena_ring *rx_ring;
241 
242 	struct ena_hw_stats *hw_stats;
243 	struct ena_stats_dev *dev_stats;
244 	struct ena_stats_tx *tx_stats;
245 	struct ena_stats_rx *rx_stats;
246 	struct ena_com_stats_admin *admin_stats;
247 
248 	struct sysctl_ctx_list *ctx;
249 	struct sysctl_oid *tree;
250 	struct sysctl_oid_list *child;
251 
252 	struct sysctl_oid *queue_node, *tx_node, *rx_node, *hw_node;
253 	struct sysctl_oid *admin_node;
254 	struct sysctl_oid_list *queue_list, *tx_list, *rx_list, *hw_list;
255 	struct sysctl_oid_list *admin_list;
256 
257 #define QUEUE_NAME_LEN 32
258 	char namebuf[QUEUE_NAME_LEN];
259 	int i;
260 
261 	dev = adapter->pdev;
262 
263 	ctx = device_get_sysctl_ctx(dev);
264 	tree = device_get_sysctl_tree(dev);
265 	child = SYSCTL_CHILDREN(tree);
266 
267 	tx_ring = adapter->tx_ring;
268 	rx_ring = adapter->rx_ring;
269 
270 	hw_stats = &adapter->hw_stats;
271 	dev_stats = &adapter->dev_stats;
272 	admin_stats = &adapter->ena_dev->admin_queue.stats;
273 
274 	SYSCTL_ADD_COUNTER_U64(ctx, child, OID_AUTO, "wd_expired", CTLFLAG_RD,
275 	    &dev_stats->wd_expired, "Watchdog expiry count");
276 	SYSCTL_ADD_COUNTER_U64(ctx, child, OID_AUTO, "interface_up", CTLFLAG_RD,
277 	    &dev_stats->interface_up, "Network interface up count");
278 	SYSCTL_ADD_COUNTER_U64(ctx, child, OID_AUTO, "interface_down",
279 	    CTLFLAG_RD, &dev_stats->interface_down,
280 	    "Network interface down count");
281 	SYSCTL_ADD_COUNTER_U64(ctx, child, OID_AUTO, "admin_q_pause",
282 	    CTLFLAG_RD, &dev_stats->admin_q_pause, "Admin queue pauses");
283 
284 	for (i = 0; i < adapter->num_io_queues; ++i, ++tx_ring, ++rx_ring) {
285 		snprintf(namebuf, QUEUE_NAME_LEN, "queue%d", i);
286 
287 		queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf,
288 		    CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "Queue Name");
289 		queue_list = SYSCTL_CHILDREN(queue_node);
290 
291 		adapter->que[i].oid = queue_node;
292 
293 #ifdef RSS
294 		/* Common stats */
295 		SYSCTL_ADD_INT(ctx, queue_list, OID_AUTO, "cpu", CTLFLAG_RD,
296 		    &adapter->que[i].cpu, 0, "CPU affinity");
297 		SYSCTL_ADD_INT(ctx, queue_list, OID_AUTO, "domain", CTLFLAG_RD,
298 		    &adapter->que[i].domain, 0, "NUMA domain");
299 #endif
300 
301 		/* TX specific stats */
302 		tx_node = SYSCTL_ADD_NODE(ctx, queue_list, OID_AUTO, "tx_ring",
303 		    CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "TX ring");
304 		tx_list = SYSCTL_CHILDREN(tx_node);
305 
306 		tx_stats = &tx_ring->tx_stats;
307 
308 		SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO, "count",
309 		    CTLFLAG_RD, &tx_stats->cnt, "Packets sent");
310 		SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO, "bytes",
311 		    CTLFLAG_RD, &tx_stats->bytes, "Bytes sent");
312 		SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO,
313 		    "prepare_ctx_err", CTLFLAG_RD, &tx_stats->prepare_ctx_err,
314 		    "TX buffer preparation failures");
315 		SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO,
316 		    "dma_mapping_err", CTLFLAG_RD, &tx_stats->dma_mapping_err,
317 		    "DMA mapping failures");
318 		SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO, "doorbells",
319 		    CTLFLAG_RD, &tx_stats->doorbells, "Queue doorbells");
320 		SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO,
321 		    "missing_tx_comp", CTLFLAG_RD, &tx_stats->missing_tx_comp,
322 		    "TX completions missed");
323 		SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO, "bad_req_id",
324 		    CTLFLAG_RD, &tx_stats->bad_req_id, "Bad request id count");
325 		SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO, "mbuf_collapses",
326 		    CTLFLAG_RD, &tx_stats->collapse, "Mbuf collapse count");
327 		SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO,
328 		    "mbuf_collapse_err", CTLFLAG_RD, &tx_stats->collapse_err,
329 		    "Mbuf collapse failures");
330 		SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO, "queue_wakeups",
331 		    CTLFLAG_RD, &tx_stats->queue_wakeup, "Queue wakeups");
332 		SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO, "queue_stops",
333 		    CTLFLAG_RD, &tx_stats->queue_stop, "Queue stops");
334 		SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO,
335 		    "llq_buffer_copy", CTLFLAG_RD, &tx_stats->llq_buffer_copy,
336 		    "Header copies for llq transaction");
337 		SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO,
338 		    "unmask_interrupt_num", CTLFLAG_RD,
339 		    &tx_stats->unmask_interrupt_num,
340 		    "Unmasked interrupt count");
341 
342 		/* RX specific stats */
343 		rx_node = SYSCTL_ADD_NODE(ctx, queue_list, OID_AUTO, "rx_ring",
344 		    CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "RX ring");
345 		rx_list = SYSCTL_CHILDREN(rx_node);
346 
347 		rx_stats = &rx_ring->rx_stats;
348 
349 		SYSCTL_ADD_COUNTER_U64(ctx, rx_list, OID_AUTO, "count",
350 		    CTLFLAG_RD, &rx_stats->cnt, "Packets received");
351 		SYSCTL_ADD_COUNTER_U64(ctx, rx_list, OID_AUTO, "bytes",
352 		    CTLFLAG_RD, &rx_stats->bytes, "Bytes received");
353 		SYSCTL_ADD_COUNTER_U64(ctx, rx_list, OID_AUTO, "refil_partial",
354 		    CTLFLAG_RD, &rx_stats->refil_partial,
355 		    "Partial refilled mbufs");
356 		SYSCTL_ADD_COUNTER_U64(ctx, rx_list, OID_AUTO, "csum_bad",
357 		    CTLFLAG_RD, &rx_stats->csum_bad, "Bad RX checksum");
358 		SYSCTL_ADD_COUNTER_U64(ctx, rx_list, OID_AUTO,
359 		    "mbuf_alloc_fail", CTLFLAG_RD, &rx_stats->mbuf_alloc_fail,
360 		    "Failed mbuf allocs");
361 		SYSCTL_ADD_COUNTER_U64(ctx, rx_list, OID_AUTO,
362 		    "mjum_alloc_fail", CTLFLAG_RD, &rx_stats->mjum_alloc_fail,
363 		    "Failed jumbo mbuf allocs");
364 		SYSCTL_ADD_COUNTER_U64(ctx, rx_list, OID_AUTO,
365 		    "dma_mapping_err", CTLFLAG_RD, &rx_stats->dma_mapping_err,
366 		    "DMA mapping errors");
367 		SYSCTL_ADD_COUNTER_U64(ctx, rx_list, OID_AUTO, "bad_desc_num",
368 		    CTLFLAG_RD, &rx_stats->bad_desc_num,
369 		    "Bad descriptor count");
370 		SYSCTL_ADD_COUNTER_U64(ctx, rx_list, OID_AUTO, "bad_req_id",
371 		    CTLFLAG_RD, &rx_stats->bad_req_id, "Bad request id count");
372 		SYSCTL_ADD_COUNTER_U64(ctx, rx_list, OID_AUTO, "empty_rx_ring",
373 		    CTLFLAG_RD, &rx_stats->empty_rx_ring,
374 		    "RX descriptors depletion count");
375 		SYSCTL_ADD_COUNTER_U64(ctx, rx_list, OID_AUTO, "csum_good",
376 		    CTLFLAG_RD, &rx_stats->csum_good,
377 		    "Valid RX checksum calculations");
378 	}
379 
380 	/* Stats read from device */
381 	hw_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "hw_stats",
382 	    CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "Statistics from hardware");
383 	hw_list = SYSCTL_CHILDREN(hw_node);
384 
385 	SYSCTL_ADD_COUNTER_U64(ctx, hw_list, OID_AUTO, "rx_packets", CTLFLAG_RD,
386 	    &hw_stats->rx_packets, "Packets received");
387 	SYSCTL_ADD_COUNTER_U64(ctx, hw_list, OID_AUTO, "tx_packets", CTLFLAG_RD,
388 	    &hw_stats->tx_packets, "Packets transmitted");
389 	SYSCTL_ADD_COUNTER_U64(ctx, hw_list, OID_AUTO, "rx_bytes", CTLFLAG_RD,
390 	    &hw_stats->rx_bytes, "Bytes received");
391 	SYSCTL_ADD_COUNTER_U64(ctx, hw_list, OID_AUTO, "tx_bytes", CTLFLAG_RD,
392 	    &hw_stats->tx_bytes, "Bytes transmitted");
393 	SYSCTL_ADD_COUNTER_U64(ctx, hw_list, OID_AUTO, "rx_drops", CTLFLAG_RD,
394 	    &hw_stats->rx_drops, "Receive packet drops");
395 	SYSCTL_ADD_COUNTER_U64(ctx, hw_list, OID_AUTO, "tx_drops", CTLFLAG_RD,
396 	    &hw_stats->tx_drops, "Transmit packet drops");
397 
398 	/* ENA Admin queue stats */
399 	admin_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "admin_stats",
400 	    CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "ENA Admin Queue statistics");
401 	admin_list = SYSCTL_CHILDREN(admin_node);
402 
403 	SYSCTL_ADD_U64(ctx, admin_list, OID_AUTO, "aborted_cmd", CTLFLAG_RD,
404 	    &admin_stats->aborted_cmd, 0, "Aborted commands");
405 	SYSCTL_ADD_U64(ctx, admin_list, OID_AUTO, "sumbitted_cmd", CTLFLAG_RD,
406 	    &admin_stats->submitted_cmd, 0, "Submitted commands");
407 	SYSCTL_ADD_U64(ctx, admin_list, OID_AUTO, "completed_cmd", CTLFLAG_RD,
408 	    &admin_stats->completed_cmd, 0, "Completed commands");
409 	SYSCTL_ADD_U64(ctx, admin_list, OID_AUTO, "out_of_space", CTLFLAG_RD,
410 	    &admin_stats->out_of_space, 0, "Queue out of space");
411 	SYSCTL_ADD_U64(ctx, admin_list, OID_AUTO, "no_completion", CTLFLAG_RD,
412 	    &admin_stats->no_completion, 0, "Commands not completed");
413 }
414 
415 static void
416 ena_sysctl_add_srd_info(struct ena_adapter *adapter)
417 {
418 	device_t dev;
419 
420 	struct sysctl_oid *ena_srd_info;
421 	struct sysctl_oid_list *srd_list;
422 
423 	struct sysctl_ctx_list *ctx;
424 	struct sysctl_oid *tree;
425 	struct sysctl_oid_list *child;
426 
427 	struct ena_admin_ena_srd_stats *srd_stats_ptr;
428 	struct ena_srd_metrics cur_stat_strings;
429 
430 	int i;
431 
432 	dev = adapter->pdev;
433 
434 	ctx = device_get_sysctl_ctx(dev);
435 	tree = device_get_sysctl_tree(dev);
436 	child = SYSCTL_CHILDREN(tree);
437 
438 	ena_srd_info = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "ena_srd_info",
439 	    CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "ENA's SRD information");
440 	srd_list = SYSCTL_CHILDREN(ena_srd_info);
441 
442 	SYSCTL_ADD_U64(ctx, srd_list, OID_AUTO, "ena_srd_mode",
443             CTLFLAG_RD, &adapter->ena_srd_info.flags, 0,
444             "Describes which ENA-express features are enabled");
445 
446 	srd_stats_ptr = &adapter->ena_srd_info.ena_srd_stats;
447 
448 	for (i = 0 ; i < ENA_SRD_METRICS_ARRAY_SIZE; i++) {
449 		cur_stat_strings = ena_srd_stats_strings[i];
450 		SYSCTL_ADD_U64(ctx, srd_list, OID_AUTO, cur_stat_strings.name,
451 		    CTLFLAG_RD, (u64 *)srd_stats_ptr + cur_stat_strings.stat_offset,
452 		    0, cur_stat_strings.description);
453 	}
454 }
455 
456 static void
457 ena_sysctl_add_customer_metrics(struct ena_adapter *adapter)
458 {
459 	device_t dev;
460 	struct ena_com_dev *ena_dev;
461 
462 	struct sysctl_ctx_list *ctx;
463 	struct sysctl_oid *tree;
464 	struct sysctl_oid_list *child;
465 
466 	struct sysctl_oid *customer_metric;
467 	struct sysctl_oid_list *customer_list;
468 
469 	int i;
470 
471 	dev = adapter->pdev;
472 	ena_dev = adapter->ena_dev;
473 
474 	ctx = device_get_sysctl_ctx(dev);
475 	tree = device_get_sysctl_tree(dev);
476 	child = SYSCTL_CHILDREN(tree);
477 	customer_metric = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "customer_metrics",
478 	    CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "ENA's customer metrics");
479 	customer_list = SYSCTL_CHILDREN(customer_metric);
480 
481 	for (i = 0; i < ENA_CUSTOMER_METRICS_ARRAY_SIZE; i++) {
482 	        if (ena_com_get_customer_metric_support(ena_dev, i)) {
483 	                SYSCTL_ADD_U64(ctx, customer_list, OID_AUTO, ena_hw_stats_strings[i].name,
484 	                    CTLFLAG_RD, &adapter->customer_metrics_array[i], 0,
485 	                    ena_hw_stats_strings[i].description);
486 	         }
487 	 }
488 }
489 
490 static void
491 ena_sysctl_add_eni_metrics(struct ena_adapter *adapter)
492 {
493 	device_t dev;
494 	struct ena_admin_eni_stats *eni_metrics;
495 
496 	struct sysctl_ctx_list *ctx;
497 	struct sysctl_oid *tree;
498 	struct sysctl_oid_list *child;
499 
500 	struct sysctl_oid *eni_node;
501 	struct sysctl_oid_list *eni_list;
502 
503 	dev = adapter->pdev;
504 
505 	ctx = device_get_sysctl_ctx(dev);
506 	tree = device_get_sysctl_tree(dev);
507 	child = SYSCTL_CHILDREN(tree);
508 
509 	eni_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "eni_metrics",
510 	    CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "ENA's ENI metrics");
511 	eni_list = SYSCTL_CHILDREN(eni_node);
512 
513 	eni_metrics = &adapter->eni_metrics;
514 
515 	SYSCTL_ADD_U64(ctx, eni_list, OID_AUTO, "bw_in_allowance_exceeded",
516 	    CTLFLAG_RD, &eni_metrics->bw_in_allowance_exceeded, 0,
517 	    "Inbound BW allowance exceeded");
518 	SYSCTL_ADD_U64(ctx, eni_list, OID_AUTO, "bw_out_allowance_exceeded",
519 	    CTLFLAG_RD, &eni_metrics->bw_out_allowance_exceeded, 0,
520 	    "Outbound BW allowance exceeded");
521 	SYSCTL_ADD_U64(ctx, eni_list, OID_AUTO, "pps_allowance_exceeded",
522 	    CTLFLAG_RD, &eni_metrics->pps_allowance_exceeded, 0,
523 	    "PPS allowance exceeded");
524 	SYSCTL_ADD_U64(ctx, eni_list, OID_AUTO, "conntrack_allowance_exceeded",
525 	    CTLFLAG_RD, &eni_metrics->conntrack_allowance_exceeded, 0,
526 	    "Connection tracking allowance exceeded");
527 	SYSCTL_ADD_U64(ctx, eni_list, OID_AUTO, "linklocal_allowance_exceeded",
528 	    CTLFLAG_RD, &eni_metrics->linklocal_allowance_exceeded, 0,
529 	    "Linklocal packet rate allowance exceeded");
530 }
531 
532 static void
533 ena_sysctl_add_tuneables(struct ena_adapter *adapter)
534 {
535 	device_t dev;
536 
537 	struct sysctl_ctx_list *ctx;
538 	struct sysctl_oid *tree;
539 	struct sysctl_oid_list *child;
540 
541 	dev = adapter->pdev;
542 
543 	ctx = device_get_sysctl_ctx(dev);
544 	tree = device_get_sysctl_tree(dev);
545 	child = SYSCTL_CHILDREN(tree);
546 
547 	/* Tuneable number of buffers in the buf-ring (drbr) */
548 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "buf_ring_size",
549 	    CTLTYPE_U32 | CTLFLAG_RW | CTLFLAG_MPSAFE, adapter, 0,
550 	    ena_sysctl_buf_ring_size, "I",
551 	    "Size of the Tx buffer ring (drbr).");
552 
553 	/* Tuneable number of the Rx ring size */
554 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rx_queue_size",
555 	    CTLTYPE_U32 | CTLFLAG_RW | CTLFLAG_MPSAFE, adapter, 0,
556 	    ena_sysctl_rx_queue_size, "I",
557 	    "Size of the Rx ring. The size should be a power of 2.");
558 
559 	/* Tuneable number of IO queues */
560 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "io_queues_nb",
561 	    CTLTYPE_U32 | CTLFLAG_RW | CTLFLAG_MPSAFE, adapter, 0,
562 	    ena_sysctl_io_queues_nb, "I", "Number of IO queues.");
563 
564 	/*
565 	 * Tuneable, which determines how often ENA metrics will be read.
566 	 * 0 means it's turned off. Maximum allowed value is limited by:
567 	 * ENA_METRICS_MAX_SAMPLE_INTERVAL.
568 	 */
569 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "stats_sample_interval",
570 	    CTLTYPE_U16 | CTLFLAG_RW | CTLFLAG_MPSAFE, adapter, 0,
571 	    ena_sysctl_metrics_interval, "SU",
572 	    "Interval in seconds for updating Netword interface metrics. 0 turns off the update.");
573 }
574 
575 /* Kernel option RSS prevents manipulation of key hash and indirection table. */
576 #ifndef RSS
577 static void
578 ena_sysctl_add_rss(struct ena_adapter *adapter)
579 {
580 	device_t dev;
581 
582 	struct sysctl_ctx_list *ctx;
583 	struct sysctl_oid *tree;
584 	struct sysctl_oid_list *child;
585 
586 	dev = adapter->pdev;
587 
588 	ctx = device_get_sysctl_ctx(dev);
589 	tree = device_get_sysctl_tree(dev);
590 	child = SYSCTL_CHILDREN(tree);
591 
592 	/* RSS options */
593 	tree = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "rss",
594 	    CTLFLAG_RW | CTLFLAG_MPSAFE, NULL, "Receive Side Scaling options.");
595 	child = SYSCTL_CHILDREN(tree);
596 
597 	/* RSS hash key */
598 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "key",
599 	    CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_MPSAFE, adapter, 0,
600 	    ena_sysctl_rss_key, "A", "RSS key.");
601 
602 	/* Tuneable RSS indirection table */
603 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "indir_table",
604 	    CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_MPSAFE, adapter, 0,
605 	    ena_sysctl_rss_indir_table, "A", "RSS indirection table.");
606 
607 	/* RSS indirection table size */
608 	SYSCTL_ADD_INT(ctx, child, OID_AUTO, "indir_table_size",
609 	    CTLFLAG_RD | CTLFLAG_MPSAFE, &ena_rss_table_size, 0,
610 	    "RSS indirection table size.");
611 }
612 #endif /* RSS */
613 
614 static void
615 ena_sysctl_add_irq_affinity(struct ena_adapter *adapter)
616 {
617 	device_t dev;
618 
619 	struct sysctl_ctx_list *ctx;
620 	struct sysctl_oid *tree;
621 	struct sysctl_oid_list *child;
622 
623 	dev = adapter->pdev;
624 
625 	ctx = device_get_sysctl_ctx(dev);
626 	tree = device_get_sysctl_tree(dev);
627 	child = SYSCTL_CHILDREN(tree);
628 
629 	tree = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "irq_affinity",
630 	    CTLFLAG_RW | CTLFLAG_MPSAFE, NULL, "Decide base CPU and stride for irqs affinity.");
631 	child = SYSCTL_CHILDREN(tree);
632 
633 	/* Add base cpu leaf */
634 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "base_cpu",
635 	    CTLTYPE_S32 | CTLFLAG_RW | CTLFLAG_MPSAFE, adapter, 0,
636 	    ena_sysctl_irq_base_cpu, "I", "Base cpu index for setting irq affinity.");
637 
638 	/* Add cpu stride leaf */
639 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "cpu_stride",
640 	    CTLTYPE_S32 | CTLFLAG_RW | CTLFLAG_MPSAFE, adapter, 0,
641 	    ena_sysctl_irq_cpu_stride, "I", "Distance between irqs when setting affinity.");
642 }
643 
644 
645 /*
646  * ena_sysctl_update_queue_node_nb - Register/unregister sysctl queue nodes.
647  *
648  * Whether the nodes are registered or unregistered depends on a delta between
649  * the `old` and `new` parameters, representing the number of queues.
650  *
651  * This function is used to hide sysctl attributes for queue nodes which aren't
652  * currently used by the HW (e.g. after a call to `ena_sysctl_io_queues_nb`).
653  *
654  * NOTE:
655  * All unregistered nodes must be registered again at detach, i.e. by a call to
656  * this function.
657  */
658 void
659 ena_sysctl_update_queue_node_nb(struct ena_adapter *adapter, int old, int new)
660 {
661 	struct sysctl_oid *oid;
662 	int min, max, i;
663 
664 	min = MIN(old, new);
665 	max = MIN(MAX(old, new), adapter->max_num_io_queues);
666 
667 	for (i = min; i < max; ++i) {
668 		oid = adapter->que[i].oid;
669 
670 		sysctl_wlock();
671 		if (old > new)
672 			sysctl_unregister_oid(oid);
673 		else
674 			sysctl_register_oid(oid);
675 		sysctl_wunlock();
676 	}
677 }
678 
679 static int
680 ena_sysctl_buf_ring_size(SYSCTL_HANDLER_ARGS)
681 {
682 	struct ena_adapter *adapter = arg1;
683 	uint32_t val;
684 	int error;
685 
686 	ENA_LOCK_LOCK();
687 	if (unlikely(!ENA_FLAG_ISSET(ENA_FLAG_DEVICE_RUNNING, adapter))) {
688 		error = EINVAL;
689 		goto unlock;
690 	}
691 
692 	val = 0;
693 	error = sysctl_wire_old_buffer(req, sizeof(val));
694 	if (error == 0) {
695 		val = adapter->buf_ring_size;
696 		error = sysctl_handle_32(oidp, &val, 0, req);
697 	}
698 	if (error != 0 || req->newptr == NULL)
699 		goto unlock;
700 
701 	if (!powerof2(val) || val == 0) {
702 		ena_log(adapter->pdev, ERR,
703 		    "Requested new Tx buffer ring size (%u) is not a power of 2\n",
704 		    val);
705 		error = EINVAL;
706 		goto unlock;
707 	}
708 
709 	if (val != adapter->buf_ring_size) {
710 		ena_log(adapter->pdev, INFO,
711 		    "Requested new Tx buffer ring size: %d. Old size: %d\n",
712 		    val, adapter->buf_ring_size);
713 
714 		error = ena_update_buf_ring_size(adapter, val);
715 	} else {
716 		ena_log(adapter->pdev, ERR,
717 		    "New Tx buffer ring size is the same as already used: %u\n",
718 		    adapter->buf_ring_size);
719 	}
720 
721 unlock:
722 	ENA_LOCK_UNLOCK();
723 
724 	return (error);
725 }
726 
727 static int
728 ena_sysctl_rx_queue_size(SYSCTL_HANDLER_ARGS)
729 {
730 	struct ena_adapter *adapter = arg1;
731 	uint32_t val;
732 	int error;
733 
734 	ENA_LOCK_LOCK();
735 	if (unlikely(!ENA_FLAG_ISSET(ENA_FLAG_DEVICE_RUNNING, adapter))) {
736 		error = EINVAL;
737 		goto unlock;
738 	}
739 
740 	val = 0;
741 	error = sysctl_wire_old_buffer(req, sizeof(val));
742 	if (error == 0) {
743 		val = adapter->requested_rx_ring_size;
744 		error = sysctl_handle_32(oidp, &val, 0, req);
745 	}
746 	if (error != 0 || req->newptr == NULL)
747 		goto unlock;
748 
749 	if (val < ENA_MIN_RING_SIZE || val > adapter->max_rx_ring_size) {
750 		ena_log(adapter->pdev, ERR,
751 		    "Requested new Rx queue size (%u) is out of range: [%u, %u]\n",
752 		    val, ENA_MIN_RING_SIZE, adapter->max_rx_ring_size);
753 		error = EINVAL;
754 		goto unlock;
755 	}
756 
757 	/* Check if the parameter is power of 2 */
758 	if (!powerof2(val)) {
759 		ena_log(adapter->pdev, ERR,
760 		    "Requested new Rx queue size (%u) is not a power of 2\n",
761 		    val);
762 		error = EINVAL;
763 		goto unlock;
764 	}
765 
766 	if (val != adapter->requested_rx_ring_size) {
767 		ena_log(adapter->pdev, INFO,
768 		    "Requested new Rx queue size: %u. Old size: %u\n", val,
769 		    adapter->requested_rx_ring_size);
770 
771 		error = ena_update_queue_size(adapter,
772 		    adapter->requested_tx_ring_size, val);
773 	} else {
774 		ena_log(adapter->pdev, ERR,
775 		    "New Rx queue size is the same as already used: %u\n",
776 		    adapter->requested_rx_ring_size);
777 	}
778 
779 unlock:
780 	ENA_LOCK_UNLOCK();
781 
782 	return (error);
783 }
784 
785 /*
786  * Change number of effectively used IO queues adapter->num_io_queues
787  */
788 static int
789 ena_sysctl_io_queues_nb(SYSCTL_HANDLER_ARGS)
790 {
791 	struct ena_adapter *adapter = arg1;
792 	uint32_t old_num_queues, tmp = 0;
793 	int error;
794 
795 	ENA_LOCK_LOCK();
796 	if (unlikely(!ENA_FLAG_ISSET(ENA_FLAG_DEVICE_RUNNING, adapter))) {
797 		error = EINVAL;
798 		goto unlock;
799 	}
800 
801 	error = sysctl_wire_old_buffer(req, sizeof(tmp));
802 	if (error == 0) {
803 		tmp = adapter->num_io_queues;
804 		error = sysctl_handle_int(oidp, &tmp, 0, req);
805 	}
806 	if (error != 0 || req->newptr == NULL)
807 		goto unlock;
808 
809 	if (tmp == 0) {
810 		ena_log(adapter->pdev, ERR,
811 		    "Requested number of IO queues is zero\n");
812 		error = EINVAL;
813 		goto unlock;
814 	}
815 
816 	/*
817 	 * The adapter::max_num_io_queues is the HW capability. The system
818 	 * resources availability may potentially be a tighter limit. Therefore
819 	 * the relation `adapter::max_num_io_queues >= adapter::msix_vecs`
820 	 * always holds true, while the `adapter::msix_vecs` is variable across
821 	 * device reset (`ena_destroy_device()` + `ena_restore_device()`).
822 	 */
823 	if (tmp > (adapter->msix_vecs - ENA_ADMIN_MSIX_VEC)) {
824 		ena_log(adapter->pdev, ERR,
825 		    "Requested number of IO queues is higher than maximum allowed (%u)\n",
826 		    adapter->msix_vecs - ENA_ADMIN_MSIX_VEC);
827 		error = EINVAL;
828 		goto unlock;
829 	}
830 	if (tmp == adapter->num_io_queues) {
831 		ena_log(adapter->pdev, ERR,
832 		    "Requested number of IO queues is equal to current value "
833 		    "(%u)\n",
834 		    adapter->num_io_queues);
835 	} else {
836 		ena_log(adapter->pdev, INFO,
837 		    "Requested new number of IO queues: %u, current value: "
838 		    "%u\n",
839 		    tmp, adapter->num_io_queues);
840 
841 		old_num_queues = adapter->num_io_queues;
842 		error = ena_update_io_queue_nb(adapter, tmp);
843 		if (error != 0)
844 			return (error);
845 
846 		ena_sysctl_update_queue_node_nb(adapter, old_num_queues, tmp);
847 	}
848 
849 unlock:
850 	ENA_LOCK_UNLOCK();
851 
852 	return (error);
853 }
854 
855 static int
856 ena_sysctl_metrics_interval(SYSCTL_HANDLER_ARGS)
857 {
858 	struct ena_adapter *adapter = arg1;
859 	uint16_t interval;
860 	int error;
861 
862 	ENA_LOCK_LOCK();
863 	if (unlikely(!ENA_FLAG_ISSET(ENA_FLAG_DEVICE_RUNNING, adapter))) {
864 		error = EINVAL;
865 		goto unlock;
866 	}
867 
868 	error = sysctl_wire_old_buffer(req, sizeof(interval));
869 	if (error == 0) {
870 		interval = adapter->metrics_sample_interval;
871 		error = sysctl_handle_16(oidp, &interval, 0, req);
872 	}
873 	if (error != 0 || req->newptr == NULL)
874 		goto unlock;
875 
876 	if (interval > ENA_METRICS_MAX_SAMPLE_INTERVAL) {
877 		ena_log(adapter->pdev, ERR,
878 		    "ENA metrics update interval is out of range - maximum allowed value: %d seconds\n",
879 		    ENA_METRICS_MAX_SAMPLE_INTERVAL);
880 		error = EINVAL;
881 		goto unlock;
882 	}
883 
884 	if (interval == 0) {
885 		ena_log(adapter->pdev, INFO,
886 		    "ENA metrics update is now turned off\n");
887 		bzero(&adapter->eni_metrics, sizeof(adapter->eni_metrics));
888 	} else {
889 		ena_log(adapter->pdev, INFO,
890 		    "ENA metrics update interval is set to: %" PRIu16
891 		    " seconds\n",
892 		    interval);
893 	}
894 
895 	adapter->metrics_sample_interval = interval;
896 
897 unlock:
898 	ENA_LOCK_UNLOCK();
899 
900 	return (0);
901 }
902 
903 static int
904 ena_sysctl_irq_base_cpu(SYSCTL_HANDLER_ARGS)
905 {
906 	struct ena_adapter *adapter = arg1;
907 	int irq_base_cpu = 0;
908 	int error;
909 
910 	ENA_LOCK_LOCK();
911 	if (unlikely(!ENA_FLAG_ISSET(ENA_FLAG_DEVICE_RUNNING, adapter))) {
912 		error = ENODEV;
913 		goto unlock;
914 	}
915 
916 	error = sysctl_wire_old_buffer(req, sizeof(irq_base_cpu));
917 	if (error == 0) {
918 		irq_base_cpu = adapter->irq_cpu_base;
919 		error = sysctl_handle_int(oidp, &irq_base_cpu, 0, req);
920 	}
921 	if (error != 0 || req->newptr == NULL)
922 		goto unlock;
923 
924 	if (irq_base_cpu <= ENA_BASE_CPU_UNSPECIFIED) {
925 		ena_log(adapter->pdev, ERR,
926 		    "Requested base CPU is less than zero.\n");
927 		error = EINVAL;
928 		goto unlock;
929 	}
930 
931 	if (irq_base_cpu > mp_ncpus) {
932 		ena_log(adapter->pdev, INFO,
933 		    "Requested base CPU is larger than the number of available CPUs. \n");
934 		error = EINVAL;
935 		goto unlock;
936 
937 	}
938 
939 	if (irq_base_cpu == adapter->irq_cpu_base) {
940 		ena_log(adapter->pdev, INFO,
941 		    "Requested IRQ base CPU is equal to current value "
942 		    "(%d)\n",
943 		    adapter->irq_cpu_base);
944 		goto unlock;
945 	}
946 
947 	ena_log(adapter->pdev, INFO,
948 	    "Requested new IRQ base CPU: %d, current value: %d\n",
949 	    irq_base_cpu, adapter->irq_cpu_base);
950 
951 	error = ena_update_base_cpu(adapter, irq_base_cpu);
952 
953 unlock:
954 	ENA_LOCK_UNLOCK();
955 
956 	return (error);
957 }
958 
959 static int
960 ena_sysctl_irq_cpu_stride(SYSCTL_HANDLER_ARGS)
961 {
962 	struct ena_adapter *adapter = arg1;
963 	int32_t irq_cpu_stride = 0;
964 	int error;
965 
966 	ENA_LOCK_LOCK();
967 	if (unlikely(!ENA_FLAG_ISSET(ENA_FLAG_DEVICE_RUNNING, adapter))) {
968 		error = ENODEV;
969 		goto unlock;
970 	}
971 
972 	error = sysctl_wire_old_buffer(req, sizeof(irq_cpu_stride));
973 	if (error == 0) {
974 		irq_cpu_stride = adapter->irq_cpu_stride;
975 		error = sysctl_handle_int(oidp, &irq_cpu_stride, 0, req);
976 	}
977 	if (error != 0 || req->newptr == NULL)
978 		goto unlock;
979 
980 	if (irq_cpu_stride < 0) {
981 		ena_log(adapter->pdev, ERR,
982 		    "Requested IRQ stride is less than zero.\n");
983 		error = EINVAL;
984 		goto unlock;
985 	}
986 
987 	if (irq_cpu_stride > mp_ncpus) {
988 		ena_log(adapter->pdev, INFO,
989 		    "Warning: Requested IRQ stride is larger than the number of available CPUs.\n");
990 	}
991 
992 	if (irq_cpu_stride == adapter->irq_cpu_stride) {
993 		ena_log(adapter->pdev, INFO,
994 		    "Requested IRQ CPU stride is equal to current value "
995 		    "(%u)\n",
996 		    adapter->irq_cpu_stride);
997 		goto unlock;
998 	}
999 
1000 	ena_log(adapter->pdev, INFO,
1001 	    "Requested new IRQ CPU stride: %u, current value: %u\n",
1002 	    irq_cpu_stride, adapter->irq_cpu_stride);
1003 
1004 	error = ena_update_cpu_stride(adapter, irq_cpu_stride);
1005 	if (error != 0)
1006 		goto unlock;
1007 
1008 unlock:
1009 	ENA_LOCK_UNLOCK();
1010 
1011 	return (error);
1012 }
1013 
1014 #ifndef RSS
1015 /*
1016  * Change the Receive Side Scaling hash key.
1017  */
1018 static int
1019 ena_sysctl_rss_key(SYSCTL_HANDLER_ARGS)
1020 {
1021 	struct ena_adapter *adapter = arg1;
1022 	struct ena_com_dev *ena_dev = adapter->ena_dev;
1023 	enum ena_admin_hash_functions ena_func;
1024 	char msg[ENA_HASH_KEY_MSG_SIZE];
1025 	char elem[3] = { 0 };
1026 	char *endp;
1027 	u8 rss_key[ENA_HASH_KEY_SIZE];
1028 	int error, i;
1029 
1030 	ENA_LOCK_LOCK();
1031 	if (unlikely(!ENA_FLAG_ISSET(ENA_FLAG_DEVICE_RUNNING, adapter))) {
1032 		error = EINVAL;
1033 		goto unlock;
1034 	}
1035 
1036 	if (unlikely(!ENA_FLAG_ISSET(ENA_FLAG_RSS_ACTIVE, adapter))) {
1037 		error = ENOTSUP;
1038 		goto unlock;
1039 	}
1040 
1041 	error = sysctl_wire_old_buffer(req, sizeof(msg));
1042 	if (error != 0)
1043 		goto unlock;
1044 
1045 	error = ena_com_get_hash_function(adapter->ena_dev, &ena_func);
1046 	if (error != 0) {
1047 		device_printf(adapter->pdev, "Cannot get hash function\n");
1048 		goto unlock;
1049 	}
1050 
1051 	if (ena_func != ENA_ADMIN_TOEPLITZ) {
1052 		error = EINVAL;
1053 		device_printf(adapter->pdev, "Unsupported hash algorithm\n");
1054 		goto unlock;
1055 	}
1056 
1057 	error = ena_rss_get_hash_key(ena_dev, rss_key);
1058 	if (error != 0) {
1059 		device_printf(adapter->pdev, "Cannot get hash key\n");
1060 		goto unlock;
1061 	}
1062 
1063 	for (i = 0; i < ENA_HASH_KEY_SIZE; ++i)
1064 		snprintf(&msg[i * 2], 3, "%02x", rss_key[i]);
1065 
1066 	error = sysctl_handle_string(oidp, msg, sizeof(msg), req);
1067 	if (error != 0 || req->newptr == NULL)
1068 		goto unlock;
1069 
1070 	if (strlen(msg) != sizeof(msg) - 1) {
1071 		error = EINVAL;
1072 		device_printf(adapter->pdev, "Invalid key size\n");
1073 		goto unlock;
1074 	}
1075 
1076 	for (i = 0; i < ENA_HASH_KEY_SIZE; ++i) {
1077 		strncpy(elem, &msg[i * 2], 2);
1078 		rss_key[i] = strtol(elem, &endp, 16);
1079 
1080 		/* Both hex nibbles in the string must be valid to continue. */
1081 		if (endp == elem || *endp != '\0' || rss_key[i] < 0) {
1082 			error = EINVAL;
1083 			device_printf(adapter->pdev,
1084 			    "Invalid key hex value: '%c'\n", *endp);
1085 			goto unlock;
1086 		}
1087 	}
1088 
1089 	error = ena_rss_set_hash(ena_dev, rss_key);
1090 	if (error != 0)
1091 		device_printf(adapter->pdev, "Cannot fill hash key\n");
1092 
1093 unlock:
1094 	ENA_LOCK_UNLOCK();
1095 
1096 	return (error);
1097 }
1098 
1099 /*
1100  * Change the Receive Side Scaling indirection table.
1101  *
1102  * The sysctl entry string consists of one or more `x:y` keypairs, where
1103  * x stands for the table index and y for its new value.
1104  * Table indices that don't need to be updated can be omitted from the string
1105  * and will retain their existing values. If an index is entered more than once,
1106  * the last value is used.
1107  *
1108  * Example:
1109  * To update two selected indices in the RSS indirection table, e.g. setting
1110  * index 0 to queue 5 and then index 5 to queue 0, the below command should be
1111  * used:
1112  *   sysctl dev.ena.0.rss.indir_table="0:5 5:0"
1113  */
1114 static int
1115 ena_sysctl_rss_indir_table(SYSCTL_HANDLER_ARGS)
1116 {
1117 	int num_queues, error;
1118 	struct ena_adapter *adapter = arg1;
1119 	struct ena_indir *indir;
1120 	char *msg, *buf, *endp;
1121 	uint32_t idx, value;
1122 
1123 	ENA_LOCK_LOCK();
1124 	if (unlikely(!ENA_FLAG_ISSET(ENA_FLAG_DEVICE_RUNNING, adapter))) {
1125 		error = EINVAL;
1126 		goto unlock;
1127 	}
1128 
1129 	if (unlikely(!ENA_FLAG_ISSET(ENA_FLAG_RSS_ACTIVE, adapter))) {
1130 		error = ENOTSUP;
1131 		goto unlock;
1132 	}
1133 
1134 	indir = adapter->rss_indir;
1135 	msg = indir->sysctl_buf;
1136 
1137 	if (unlikely(indir == NULL)) {
1138 		error = ENOTSUP;
1139 		goto unlock;
1140 	}
1141 
1142 	error = sysctl_handle_string(oidp, msg, sizeof(indir->sysctl_buf), req);
1143 	if (error != 0 || req->newptr == NULL)
1144 		goto unlock;
1145 
1146 	num_queues = adapter->num_io_queues;
1147 
1148 	/*
1149 	 * This sysctl expects msg to be a list of `x:y` record pairs,
1150 	 * where x is the indirection table index and y is its value.
1151 	 */
1152 	for (buf = msg; *buf != '\0'; buf = endp) {
1153 		idx = strtol(buf, &endp, 10);
1154 
1155 		if (endp == buf || idx < 0) {
1156 			device_printf(adapter->pdev, "Invalid index: %s\n",
1157 			    buf);
1158 			error = EINVAL;
1159 			break;
1160 		}
1161 
1162 		if (idx >= ENA_RX_RSS_TABLE_SIZE) {
1163 			device_printf(adapter->pdev, "Index %d out of range\n",
1164 			    idx);
1165 			error = ERANGE;
1166 			break;
1167 		}
1168 
1169 		buf = endp;
1170 
1171 		if (*buf++ != ':') {
1172 			device_printf(adapter->pdev, "Missing ':' separator\n");
1173 			error = EINVAL;
1174 			break;
1175 		}
1176 
1177 		value = strtol(buf, &endp, 10);
1178 
1179 		if (endp == buf || value < 0) {
1180 			device_printf(adapter->pdev, "Invalid value: %s\n",
1181 			    buf);
1182 			error = EINVAL;
1183 			break;
1184 		}
1185 
1186 		if (value >= num_queues) {
1187 			device_printf(adapter->pdev, "Value %d out of range\n",
1188 			    value);
1189 			error = ERANGE;
1190 			break;
1191 		}
1192 
1193 		indir->table[idx] = value;
1194 	}
1195 
1196 	if (error != 0) /* Reload indirection table with last good data. */
1197 		ena_rss_indir_get(adapter, indir->table);
1198 
1199 	/* At this point msg has been clobbered by sysctl_handle_string. */
1200 	ena_rss_copy_indir_buf(msg, indir->table);
1201 
1202 	if (error == 0)
1203 		error = ena_rss_indir_set(adapter, indir->table);
1204 
1205 unlock:
1206 	ENA_LOCK_UNLOCK();
1207 
1208 	return (error);
1209 }
1210 #endif /* RSS */
1211