1cb039ef3SIlya Maximets /*
2cb039ef3SIlya Maximets * AF_XDP network backend.
3cb039ef3SIlya Maximets *
4cb039ef3SIlya Maximets * Copyright (c) 2023 Red Hat, Inc.
5cb039ef3SIlya Maximets *
6cb039ef3SIlya Maximets * Authors:
7cb039ef3SIlya Maximets * Ilya Maximets <i.maximets@ovn.org>
8cb039ef3SIlya Maximets *
9cb039ef3SIlya Maximets * This work is licensed under the terms of the GNU GPL, version 2 or later.
10cb039ef3SIlya Maximets * See the COPYING file in the top-level directory.
11cb039ef3SIlya Maximets */
12cb039ef3SIlya Maximets
13cb039ef3SIlya Maximets
14cb039ef3SIlya Maximets #include "qemu/osdep.h"
15cb039ef3SIlya Maximets #include <bpf/bpf.h>
16cb039ef3SIlya Maximets #include <linux/if_link.h>
17cb039ef3SIlya Maximets #include <linux/if_xdp.h>
18cb039ef3SIlya Maximets #include <net/if.h>
19cb039ef3SIlya Maximets #include <xdp/xsk.h>
20cb039ef3SIlya Maximets
21cb039ef3SIlya Maximets #include "clients.h"
22cb039ef3SIlya Maximets #include "monitor/monitor.h"
23cb039ef3SIlya Maximets #include "net/net.h"
24cb039ef3SIlya Maximets #include "qapi/error.h"
25cb039ef3SIlya Maximets #include "qemu/cutils.h"
26cb039ef3SIlya Maximets #include "qemu/error-report.h"
27cb039ef3SIlya Maximets #include "qemu/iov.h"
28cb039ef3SIlya Maximets #include "qemu/main-loop.h"
29cb039ef3SIlya Maximets #include "qemu/memalign.h"
30cb039ef3SIlya Maximets
31cb039ef3SIlya Maximets
32cb039ef3SIlya Maximets typedef struct AFXDPState {
33cb039ef3SIlya Maximets NetClientState nc;
34cb039ef3SIlya Maximets
35cb039ef3SIlya Maximets struct xsk_socket *xsk;
36cb039ef3SIlya Maximets struct xsk_ring_cons rx;
37cb039ef3SIlya Maximets struct xsk_ring_prod tx;
38cb039ef3SIlya Maximets struct xsk_ring_cons cq;
39cb039ef3SIlya Maximets struct xsk_ring_prod fq;
40cb039ef3SIlya Maximets
41cb039ef3SIlya Maximets char ifname[IFNAMSIZ];
42cb039ef3SIlya Maximets int ifindex;
43cb039ef3SIlya Maximets bool read_poll;
44cb039ef3SIlya Maximets bool write_poll;
45cb039ef3SIlya Maximets uint32_t outstanding_tx;
46cb039ef3SIlya Maximets
47cb039ef3SIlya Maximets uint64_t *pool;
48cb039ef3SIlya Maximets uint32_t n_pool;
49cb039ef3SIlya Maximets char *buffer;
50cb039ef3SIlya Maximets struct xsk_umem *umem;
51cb039ef3SIlya Maximets
52cb039ef3SIlya Maximets uint32_t n_queues;
53cb039ef3SIlya Maximets uint32_t xdp_flags;
54cb039ef3SIlya Maximets bool inhibit;
55cb039ef3SIlya Maximets } AFXDPState;
56cb039ef3SIlya Maximets
57cb039ef3SIlya Maximets #define AF_XDP_BATCH_SIZE 64
58cb039ef3SIlya Maximets
59cb039ef3SIlya Maximets static void af_xdp_send(void *opaque);
60cb039ef3SIlya Maximets static void af_xdp_writable(void *opaque);
61cb039ef3SIlya Maximets
62cb039ef3SIlya Maximets /* Set the event-loop handlers for the af-xdp backend. */
af_xdp_update_fd_handler(AFXDPState * s)63cb039ef3SIlya Maximets static void af_xdp_update_fd_handler(AFXDPState *s)
64cb039ef3SIlya Maximets {
65cb039ef3SIlya Maximets qemu_set_fd_handler(xsk_socket__fd(s->xsk),
66cb039ef3SIlya Maximets s->read_poll ? af_xdp_send : NULL,
67cb039ef3SIlya Maximets s->write_poll ? af_xdp_writable : NULL,
68cb039ef3SIlya Maximets s);
69cb039ef3SIlya Maximets }
70cb039ef3SIlya Maximets
71cb039ef3SIlya Maximets /* Update the read handler. */
af_xdp_read_poll(AFXDPState * s,bool enable)72cb039ef3SIlya Maximets static void af_xdp_read_poll(AFXDPState *s, bool enable)
73cb039ef3SIlya Maximets {
74cb039ef3SIlya Maximets if (s->read_poll != enable) {
75cb039ef3SIlya Maximets s->read_poll = enable;
76cb039ef3SIlya Maximets af_xdp_update_fd_handler(s);
77cb039ef3SIlya Maximets }
78cb039ef3SIlya Maximets }
79cb039ef3SIlya Maximets
80cb039ef3SIlya Maximets /* Update the write handler. */
af_xdp_write_poll(AFXDPState * s,bool enable)81cb039ef3SIlya Maximets static void af_xdp_write_poll(AFXDPState *s, bool enable)
82cb039ef3SIlya Maximets {
83cb039ef3SIlya Maximets if (s->write_poll != enable) {
84cb039ef3SIlya Maximets s->write_poll = enable;
85cb039ef3SIlya Maximets af_xdp_update_fd_handler(s);
86cb039ef3SIlya Maximets }
87cb039ef3SIlya Maximets }
88cb039ef3SIlya Maximets
af_xdp_poll(NetClientState * nc,bool enable)89cb039ef3SIlya Maximets static void af_xdp_poll(NetClientState *nc, bool enable)
90cb039ef3SIlya Maximets {
91cb039ef3SIlya Maximets AFXDPState *s = DO_UPCAST(AFXDPState, nc, nc);
92cb039ef3SIlya Maximets
93cb039ef3SIlya Maximets if (s->read_poll != enable || s->write_poll != enable) {
94cb039ef3SIlya Maximets s->write_poll = enable;
95cb039ef3SIlya Maximets s->read_poll = enable;
96cb039ef3SIlya Maximets af_xdp_update_fd_handler(s);
97cb039ef3SIlya Maximets }
98cb039ef3SIlya Maximets }
99cb039ef3SIlya Maximets
af_xdp_complete_tx(AFXDPState * s)100cb039ef3SIlya Maximets static void af_xdp_complete_tx(AFXDPState *s)
101cb039ef3SIlya Maximets {
102cb039ef3SIlya Maximets uint32_t idx = 0;
103cb039ef3SIlya Maximets uint32_t done, i;
104cb039ef3SIlya Maximets uint64_t *addr;
105cb039ef3SIlya Maximets
106cb039ef3SIlya Maximets done = xsk_ring_cons__peek(&s->cq, XSK_RING_CONS__DEFAULT_NUM_DESCS, &idx);
107cb039ef3SIlya Maximets
108cb039ef3SIlya Maximets for (i = 0; i < done; i++) {
109cb039ef3SIlya Maximets addr = (void *) xsk_ring_cons__comp_addr(&s->cq, idx++);
110cb039ef3SIlya Maximets s->pool[s->n_pool++] = *addr;
111cb039ef3SIlya Maximets s->outstanding_tx--;
112cb039ef3SIlya Maximets }
113cb039ef3SIlya Maximets
114cb039ef3SIlya Maximets if (done) {
115cb039ef3SIlya Maximets xsk_ring_cons__release(&s->cq, done);
116cb039ef3SIlya Maximets }
117cb039ef3SIlya Maximets }
118cb039ef3SIlya Maximets
119cb039ef3SIlya Maximets /*
120cb039ef3SIlya Maximets * The fd_write() callback, invoked if the fd is marked as writable
121cb039ef3SIlya Maximets * after a poll.
122cb039ef3SIlya Maximets */
af_xdp_writable(void * opaque)123cb039ef3SIlya Maximets static void af_xdp_writable(void *opaque)
124cb039ef3SIlya Maximets {
125cb039ef3SIlya Maximets AFXDPState *s = opaque;
126cb039ef3SIlya Maximets
127cb039ef3SIlya Maximets /* Try to recover buffers that are already sent. */
128cb039ef3SIlya Maximets af_xdp_complete_tx(s);
129cb039ef3SIlya Maximets
130cb039ef3SIlya Maximets /*
131cb039ef3SIlya Maximets * Unregister the handler, unless we still have packets to transmit
132cb039ef3SIlya Maximets * and kernel needs a wake up.
133cb039ef3SIlya Maximets */
134cb039ef3SIlya Maximets if (!s->outstanding_tx || !xsk_ring_prod__needs_wakeup(&s->tx)) {
135cb039ef3SIlya Maximets af_xdp_write_poll(s, false);
136cb039ef3SIlya Maximets }
137cb039ef3SIlya Maximets
138cb039ef3SIlya Maximets /* Flush any buffered packets. */
139cb039ef3SIlya Maximets qemu_flush_queued_packets(&s->nc);
140cb039ef3SIlya Maximets }
141cb039ef3SIlya Maximets
af_xdp_receive(NetClientState * nc,const uint8_t * buf,size_t size)142cb039ef3SIlya Maximets static ssize_t af_xdp_receive(NetClientState *nc,
143cb039ef3SIlya Maximets const uint8_t *buf, size_t size)
144cb039ef3SIlya Maximets {
145cb039ef3SIlya Maximets AFXDPState *s = DO_UPCAST(AFXDPState, nc, nc);
146cb039ef3SIlya Maximets struct xdp_desc *desc;
147cb039ef3SIlya Maximets uint32_t idx;
148cb039ef3SIlya Maximets void *data;
149cb039ef3SIlya Maximets
150cb039ef3SIlya Maximets /* Try to recover buffers that are already sent. */
151cb039ef3SIlya Maximets af_xdp_complete_tx(s);
152cb039ef3SIlya Maximets
153cb039ef3SIlya Maximets if (size > XSK_UMEM__DEFAULT_FRAME_SIZE) {
154cb039ef3SIlya Maximets /* We can't transmit packet this size... */
155cb039ef3SIlya Maximets return size;
156cb039ef3SIlya Maximets }
157cb039ef3SIlya Maximets
158cb039ef3SIlya Maximets if (!s->n_pool || !xsk_ring_prod__reserve(&s->tx, 1, &idx)) {
159cb039ef3SIlya Maximets /*
160cb039ef3SIlya Maximets * Out of buffers or space in tx ring. Poll until we can write.
161cb039ef3SIlya Maximets * This will also kick the Tx, if it was waiting on CQ.
162cb039ef3SIlya Maximets */
163cb039ef3SIlya Maximets af_xdp_write_poll(s, true);
164cb039ef3SIlya Maximets return 0;
165cb039ef3SIlya Maximets }
166cb039ef3SIlya Maximets
167cb039ef3SIlya Maximets desc = xsk_ring_prod__tx_desc(&s->tx, idx);
168cb039ef3SIlya Maximets desc->addr = s->pool[--s->n_pool];
169cb039ef3SIlya Maximets desc->len = size;
170cb039ef3SIlya Maximets
171cb039ef3SIlya Maximets data = xsk_umem__get_data(s->buffer, desc->addr);
172cb039ef3SIlya Maximets memcpy(data, buf, size);
173cb039ef3SIlya Maximets
174cb039ef3SIlya Maximets xsk_ring_prod__submit(&s->tx, 1);
175cb039ef3SIlya Maximets s->outstanding_tx++;
176cb039ef3SIlya Maximets
177cb039ef3SIlya Maximets if (xsk_ring_prod__needs_wakeup(&s->tx)) {
178cb039ef3SIlya Maximets af_xdp_write_poll(s, true);
179cb039ef3SIlya Maximets }
180cb039ef3SIlya Maximets
181cb039ef3SIlya Maximets return size;
182cb039ef3SIlya Maximets }
183cb039ef3SIlya Maximets
184cb039ef3SIlya Maximets /*
185cb039ef3SIlya Maximets * Complete a previous send (backend --> guest) and enable the
186cb039ef3SIlya Maximets * fd_read callback.
187cb039ef3SIlya Maximets */
af_xdp_send_completed(NetClientState * nc,ssize_t len)188cb039ef3SIlya Maximets static void af_xdp_send_completed(NetClientState *nc, ssize_t len)
189cb039ef3SIlya Maximets {
190cb039ef3SIlya Maximets AFXDPState *s = DO_UPCAST(AFXDPState, nc, nc);
191cb039ef3SIlya Maximets
192cb039ef3SIlya Maximets af_xdp_read_poll(s, true);
193cb039ef3SIlya Maximets }
194cb039ef3SIlya Maximets
af_xdp_fq_refill(AFXDPState * s,uint32_t n)195cb039ef3SIlya Maximets static void af_xdp_fq_refill(AFXDPState *s, uint32_t n)
196cb039ef3SIlya Maximets {
197cb039ef3SIlya Maximets uint32_t i, idx = 0;
198cb039ef3SIlya Maximets
199cb039ef3SIlya Maximets /* Leave one packet for Tx, just in case. */
200cb039ef3SIlya Maximets if (s->n_pool < n + 1) {
201cb039ef3SIlya Maximets n = s->n_pool;
202cb039ef3SIlya Maximets }
203cb039ef3SIlya Maximets
204cb039ef3SIlya Maximets if (!n || !xsk_ring_prod__reserve(&s->fq, n, &idx)) {
205cb039ef3SIlya Maximets return;
206cb039ef3SIlya Maximets }
207cb039ef3SIlya Maximets
208cb039ef3SIlya Maximets for (i = 0; i < n; i++) {
209cb039ef3SIlya Maximets *xsk_ring_prod__fill_addr(&s->fq, idx++) = s->pool[--s->n_pool];
210cb039ef3SIlya Maximets }
211cb039ef3SIlya Maximets xsk_ring_prod__submit(&s->fq, n);
212cb039ef3SIlya Maximets
213cb039ef3SIlya Maximets if (xsk_ring_prod__needs_wakeup(&s->fq)) {
214cb039ef3SIlya Maximets /* Receive was blocked by not having enough buffers. Wake it up. */
215cb039ef3SIlya Maximets af_xdp_read_poll(s, true);
216cb039ef3SIlya Maximets }
217cb039ef3SIlya Maximets }
218cb039ef3SIlya Maximets
af_xdp_send(void * opaque)219cb039ef3SIlya Maximets static void af_xdp_send(void *opaque)
220cb039ef3SIlya Maximets {
221cb039ef3SIlya Maximets uint32_t i, n_rx, idx = 0;
222cb039ef3SIlya Maximets AFXDPState *s = opaque;
223cb039ef3SIlya Maximets
224cb039ef3SIlya Maximets n_rx = xsk_ring_cons__peek(&s->rx, AF_XDP_BATCH_SIZE, &idx);
225cb039ef3SIlya Maximets if (!n_rx) {
226cb039ef3SIlya Maximets return;
227cb039ef3SIlya Maximets }
228cb039ef3SIlya Maximets
229cb039ef3SIlya Maximets for (i = 0; i < n_rx; i++) {
230cb039ef3SIlya Maximets const struct xdp_desc *desc;
231cb039ef3SIlya Maximets struct iovec iov;
232cb039ef3SIlya Maximets
233cb039ef3SIlya Maximets desc = xsk_ring_cons__rx_desc(&s->rx, idx++);
234cb039ef3SIlya Maximets
235cb039ef3SIlya Maximets iov.iov_base = xsk_umem__get_data(s->buffer, desc->addr);
236cb039ef3SIlya Maximets iov.iov_len = desc->len;
237cb039ef3SIlya Maximets
238cb039ef3SIlya Maximets s->pool[s->n_pool++] = desc->addr;
239cb039ef3SIlya Maximets
240cb039ef3SIlya Maximets if (!qemu_sendv_packet_async(&s->nc, &iov, 1,
241cb039ef3SIlya Maximets af_xdp_send_completed)) {
242cb039ef3SIlya Maximets /*
243cb039ef3SIlya Maximets * The peer does not receive anymore. Packet is queued, stop
244cb039ef3SIlya Maximets * reading from the backend until af_xdp_send_completed().
245cb039ef3SIlya Maximets */
246cb039ef3SIlya Maximets af_xdp_read_poll(s, false);
247cb039ef3SIlya Maximets
248cb039ef3SIlya Maximets /* Return unused descriptors to not break the ring cache. */
249cb039ef3SIlya Maximets xsk_ring_cons__cancel(&s->rx, n_rx - i - 1);
250cb039ef3SIlya Maximets n_rx = i + 1;
251cb039ef3SIlya Maximets break;
252cb039ef3SIlya Maximets }
253cb039ef3SIlya Maximets }
254cb039ef3SIlya Maximets
255cb039ef3SIlya Maximets /* Release actually sent descriptors and try to re-fill. */
256cb039ef3SIlya Maximets xsk_ring_cons__release(&s->rx, n_rx);
257cb039ef3SIlya Maximets af_xdp_fq_refill(s, AF_XDP_BATCH_SIZE);
258cb039ef3SIlya Maximets }
259cb039ef3SIlya Maximets
260cb039ef3SIlya Maximets /* Flush and close. */
af_xdp_cleanup(NetClientState * nc)261cb039ef3SIlya Maximets static void af_xdp_cleanup(NetClientState *nc)
262cb039ef3SIlya Maximets {
263cb039ef3SIlya Maximets AFXDPState *s = DO_UPCAST(AFXDPState, nc, nc);
264cb039ef3SIlya Maximets
265cb039ef3SIlya Maximets qemu_purge_queued_packets(nc);
266cb039ef3SIlya Maximets
267cb039ef3SIlya Maximets af_xdp_poll(nc, false);
268cb039ef3SIlya Maximets
269cb039ef3SIlya Maximets xsk_socket__delete(s->xsk);
270cb039ef3SIlya Maximets s->xsk = NULL;
271cb039ef3SIlya Maximets g_free(s->pool);
272cb039ef3SIlya Maximets s->pool = NULL;
273cb039ef3SIlya Maximets xsk_umem__delete(s->umem);
274cb039ef3SIlya Maximets s->umem = NULL;
275cb039ef3SIlya Maximets qemu_vfree(s->buffer);
276cb039ef3SIlya Maximets s->buffer = NULL;
277cb039ef3SIlya Maximets
278cb039ef3SIlya Maximets /* Remove the program if it's the last open queue. */
279cb039ef3SIlya Maximets if (!s->inhibit && nc->queue_index == s->n_queues - 1 && s->xdp_flags
280cb039ef3SIlya Maximets && bpf_xdp_detach(s->ifindex, s->xdp_flags, NULL) != 0) {
281cb039ef3SIlya Maximets fprintf(stderr,
282cb039ef3SIlya Maximets "af-xdp: unable to remove XDP program from '%s', ifindex: %d\n",
283cb039ef3SIlya Maximets s->ifname, s->ifindex);
284cb039ef3SIlya Maximets }
285cb039ef3SIlya Maximets }
286cb039ef3SIlya Maximets
af_xdp_umem_create(AFXDPState * s,int sock_fd,Error ** errp)287cb039ef3SIlya Maximets static int af_xdp_umem_create(AFXDPState *s, int sock_fd, Error **errp)
288cb039ef3SIlya Maximets {
289cb039ef3SIlya Maximets struct xsk_umem_config config = {
290cb039ef3SIlya Maximets .fill_size = XSK_RING_PROD__DEFAULT_NUM_DESCS,
291cb039ef3SIlya Maximets .comp_size = XSK_RING_CONS__DEFAULT_NUM_DESCS,
292cb039ef3SIlya Maximets .frame_size = XSK_UMEM__DEFAULT_FRAME_SIZE,
293cb039ef3SIlya Maximets .frame_headroom = 0,
294cb039ef3SIlya Maximets };
295cb039ef3SIlya Maximets uint64_t n_descs;
296cb039ef3SIlya Maximets uint64_t size;
297cb039ef3SIlya Maximets int64_t i;
298cb039ef3SIlya Maximets int ret;
299cb039ef3SIlya Maximets
300cb039ef3SIlya Maximets /* Number of descriptors if all 4 queues (rx, tx, cq, fq) are full. */
301cb039ef3SIlya Maximets n_descs = (XSK_RING_PROD__DEFAULT_NUM_DESCS
302cb039ef3SIlya Maximets + XSK_RING_CONS__DEFAULT_NUM_DESCS) * 2;
303cb039ef3SIlya Maximets size = n_descs * XSK_UMEM__DEFAULT_FRAME_SIZE;
304cb039ef3SIlya Maximets
305cb039ef3SIlya Maximets s->buffer = qemu_memalign(qemu_real_host_page_size(), size);
306cb039ef3SIlya Maximets memset(s->buffer, 0, size);
307cb039ef3SIlya Maximets
308cb039ef3SIlya Maximets if (sock_fd < 0) {
309cb039ef3SIlya Maximets ret = xsk_umem__create(&s->umem, s->buffer, size,
310cb039ef3SIlya Maximets &s->fq, &s->cq, &config);
311cb039ef3SIlya Maximets } else {
312cb039ef3SIlya Maximets ret = xsk_umem__create_with_fd(&s->umem, sock_fd, s->buffer, size,
313cb039ef3SIlya Maximets &s->fq, &s->cq, &config);
314cb039ef3SIlya Maximets }
315cb039ef3SIlya Maximets
316cb039ef3SIlya Maximets if (ret) {
317cb039ef3SIlya Maximets qemu_vfree(s->buffer);
318cb039ef3SIlya Maximets error_setg_errno(errp, errno,
319cb039ef3SIlya Maximets "failed to create umem for %s queue_index: %d",
320cb039ef3SIlya Maximets s->ifname, s->nc.queue_index);
321cb039ef3SIlya Maximets return -1;
322cb039ef3SIlya Maximets }
323cb039ef3SIlya Maximets
324cb039ef3SIlya Maximets s->pool = g_new(uint64_t, n_descs);
325cb039ef3SIlya Maximets /* Fill the pool in the opposite order, because it's a LIFO queue. */
326cb039ef3SIlya Maximets for (i = n_descs; i >= 0; i--) {
327cb039ef3SIlya Maximets s->pool[i] = i * XSK_UMEM__DEFAULT_FRAME_SIZE;
328cb039ef3SIlya Maximets }
329cb039ef3SIlya Maximets s->n_pool = n_descs;
330cb039ef3SIlya Maximets
331cb039ef3SIlya Maximets af_xdp_fq_refill(s, XSK_RING_PROD__DEFAULT_NUM_DESCS);
332cb039ef3SIlya Maximets
333cb039ef3SIlya Maximets return 0;
334cb039ef3SIlya Maximets }
335cb039ef3SIlya Maximets
af_xdp_socket_create(AFXDPState * s,const NetdevAFXDPOptions * opts,Error ** errp)336cb039ef3SIlya Maximets static int af_xdp_socket_create(AFXDPState *s,
337cb039ef3SIlya Maximets const NetdevAFXDPOptions *opts, Error **errp)
338cb039ef3SIlya Maximets {
339cb039ef3SIlya Maximets struct xsk_socket_config cfg = {
340cb039ef3SIlya Maximets .rx_size = XSK_RING_CONS__DEFAULT_NUM_DESCS,
341cb039ef3SIlya Maximets .tx_size = XSK_RING_PROD__DEFAULT_NUM_DESCS,
342cb039ef3SIlya Maximets .libxdp_flags = 0,
343cb039ef3SIlya Maximets .bind_flags = XDP_USE_NEED_WAKEUP,
344cb039ef3SIlya Maximets .xdp_flags = XDP_FLAGS_UPDATE_IF_NOEXIST,
345cb039ef3SIlya Maximets };
346cb039ef3SIlya Maximets int queue_id, error = 0;
347cb039ef3SIlya Maximets
348cb039ef3SIlya Maximets s->inhibit = opts->has_inhibit && opts->inhibit;
349cb039ef3SIlya Maximets if (s->inhibit) {
350cb039ef3SIlya Maximets cfg.libxdp_flags |= XSK_LIBXDP_FLAGS__INHIBIT_PROG_LOAD;
351cb039ef3SIlya Maximets }
352cb039ef3SIlya Maximets
353cb039ef3SIlya Maximets if (opts->has_force_copy && opts->force_copy) {
354cb039ef3SIlya Maximets cfg.bind_flags |= XDP_COPY;
355cb039ef3SIlya Maximets }
356cb039ef3SIlya Maximets
357cb039ef3SIlya Maximets queue_id = s->nc.queue_index;
358cb039ef3SIlya Maximets if (opts->has_start_queue && opts->start_queue > 0) {
359cb039ef3SIlya Maximets queue_id += opts->start_queue;
360cb039ef3SIlya Maximets }
361cb039ef3SIlya Maximets
362cb039ef3SIlya Maximets if (opts->has_mode) {
363cb039ef3SIlya Maximets /* Specific mode requested. */
364cb039ef3SIlya Maximets cfg.xdp_flags |= (opts->mode == AFXDP_MODE_NATIVE)
365cb039ef3SIlya Maximets ? XDP_FLAGS_DRV_MODE : XDP_FLAGS_SKB_MODE;
366cb039ef3SIlya Maximets if (xsk_socket__create(&s->xsk, s->ifname, queue_id,
367cb039ef3SIlya Maximets s->umem, &s->rx, &s->tx, &cfg)) {
368cb039ef3SIlya Maximets error = errno;
369cb039ef3SIlya Maximets }
370cb039ef3SIlya Maximets } else {
371cb039ef3SIlya Maximets /* No mode requested, try native first. */
372cb039ef3SIlya Maximets cfg.xdp_flags |= XDP_FLAGS_DRV_MODE;
373cb039ef3SIlya Maximets
374cb039ef3SIlya Maximets if (xsk_socket__create(&s->xsk, s->ifname, queue_id,
375cb039ef3SIlya Maximets s->umem, &s->rx, &s->tx, &cfg)) {
376cb039ef3SIlya Maximets /* Can't use native mode, try skb. */
377cb039ef3SIlya Maximets cfg.xdp_flags &= ~XDP_FLAGS_DRV_MODE;
378cb039ef3SIlya Maximets cfg.xdp_flags |= XDP_FLAGS_SKB_MODE;
379cb039ef3SIlya Maximets
380cb039ef3SIlya Maximets if (xsk_socket__create(&s->xsk, s->ifname, queue_id,
381cb039ef3SIlya Maximets s->umem, &s->rx, &s->tx, &cfg)) {
382cb039ef3SIlya Maximets error = errno;
383cb039ef3SIlya Maximets }
384cb039ef3SIlya Maximets }
385cb039ef3SIlya Maximets }
386cb039ef3SIlya Maximets
387cb039ef3SIlya Maximets if (error) {
388cb039ef3SIlya Maximets error_setg_errno(errp, error,
389cb039ef3SIlya Maximets "failed to create AF_XDP socket for %s queue_id: %d",
390cb039ef3SIlya Maximets s->ifname, queue_id);
391cb039ef3SIlya Maximets return -1;
392cb039ef3SIlya Maximets }
393cb039ef3SIlya Maximets
394cb039ef3SIlya Maximets s->xdp_flags = cfg.xdp_flags;
395cb039ef3SIlya Maximets
396cb039ef3SIlya Maximets return 0;
397cb039ef3SIlya Maximets }
398cb039ef3SIlya Maximets
399cb039ef3SIlya Maximets /* NetClientInfo methods. */
400cb039ef3SIlya Maximets static NetClientInfo net_af_xdp_info = {
401cb039ef3SIlya Maximets .type = NET_CLIENT_DRIVER_AF_XDP,
402cb039ef3SIlya Maximets .size = sizeof(AFXDPState),
403cb039ef3SIlya Maximets .receive = af_xdp_receive,
404cb039ef3SIlya Maximets .poll = af_xdp_poll,
405cb039ef3SIlya Maximets .cleanup = af_xdp_cleanup,
406cb039ef3SIlya Maximets };
407cb039ef3SIlya Maximets
parse_socket_fds(const char * sock_fds_str,int64_t n_expected,Error ** errp)408cb039ef3SIlya Maximets static int *parse_socket_fds(const char *sock_fds_str,
409cb039ef3SIlya Maximets int64_t n_expected, Error **errp)
410cb039ef3SIlya Maximets {
411cb039ef3SIlya Maximets gchar **substrings = g_strsplit(sock_fds_str, ":", -1);
412cb039ef3SIlya Maximets int64_t i, n_sock_fds = g_strv_length(substrings);
413cb039ef3SIlya Maximets int *sock_fds = NULL;
414cb039ef3SIlya Maximets
415cb039ef3SIlya Maximets if (n_sock_fds != n_expected) {
416cb039ef3SIlya Maximets error_setg(errp, "expected %"PRIi64" socket fds, got %"PRIi64,
417cb039ef3SIlya Maximets n_expected, n_sock_fds);
418cb039ef3SIlya Maximets goto exit;
419cb039ef3SIlya Maximets }
420cb039ef3SIlya Maximets
421cb039ef3SIlya Maximets sock_fds = g_new(int, n_sock_fds);
422cb039ef3SIlya Maximets
423cb039ef3SIlya Maximets for (i = 0; i < n_sock_fds; i++) {
424cb039ef3SIlya Maximets sock_fds[i] = monitor_fd_param(monitor_cur(), substrings[i], errp);
425cb039ef3SIlya Maximets if (sock_fds[i] < 0) {
426cb039ef3SIlya Maximets g_free(sock_fds);
427cb039ef3SIlya Maximets sock_fds = NULL;
428cb039ef3SIlya Maximets goto exit;
429cb039ef3SIlya Maximets }
430cb039ef3SIlya Maximets }
431cb039ef3SIlya Maximets
432cb039ef3SIlya Maximets exit:
433cb039ef3SIlya Maximets g_strfreev(substrings);
434cb039ef3SIlya Maximets return sock_fds;
435cb039ef3SIlya Maximets }
436cb039ef3SIlya Maximets
437cb039ef3SIlya Maximets /*
438cb039ef3SIlya Maximets * The exported init function.
439cb039ef3SIlya Maximets *
440cb039ef3SIlya Maximets * ... -netdev af-xdp,ifname="..."
441cb039ef3SIlya Maximets */
net_init_af_xdp(const Netdev * netdev,const char * name,NetClientState * peer,Error ** errp)442cb039ef3SIlya Maximets int net_init_af_xdp(const Netdev *netdev,
443cb039ef3SIlya Maximets const char *name, NetClientState *peer, Error **errp)
444cb039ef3SIlya Maximets {
445cb039ef3SIlya Maximets const NetdevAFXDPOptions *opts = &netdev->u.af_xdp;
446cb039ef3SIlya Maximets NetClientState *nc, *nc0 = NULL;
447cb039ef3SIlya Maximets unsigned int ifindex;
448cb039ef3SIlya Maximets uint32_t prog_id = 0;
449*bed150beSPeter Maydell g_autofree int *sock_fds = NULL;
450cb039ef3SIlya Maximets int64_t i, queues;
451cb039ef3SIlya Maximets Error *err = NULL;
452cb039ef3SIlya Maximets AFXDPState *s;
453cb039ef3SIlya Maximets
454cb039ef3SIlya Maximets ifindex = if_nametoindex(opts->ifname);
455cb039ef3SIlya Maximets if (!ifindex) {
456cb039ef3SIlya Maximets error_setg_errno(errp, errno, "failed to get ifindex for '%s'",
457cb039ef3SIlya Maximets opts->ifname);
458cb039ef3SIlya Maximets return -1;
459cb039ef3SIlya Maximets }
460cb039ef3SIlya Maximets
461cb039ef3SIlya Maximets queues = opts->has_queues ? opts->queues : 1;
462cb039ef3SIlya Maximets if (queues < 1) {
463cb039ef3SIlya Maximets error_setg(errp, "invalid number of queues (%" PRIi64 ") for '%s'",
464cb039ef3SIlya Maximets queues, opts->ifname);
465cb039ef3SIlya Maximets return -1;
466cb039ef3SIlya Maximets }
467cb039ef3SIlya Maximets
468cb039ef3SIlya Maximets if ((opts->has_inhibit && opts->inhibit) != !!opts->sock_fds) {
469cb039ef3SIlya Maximets error_setg(errp, "'inhibit=on' requires 'sock-fds' and vice versa");
470cb039ef3SIlya Maximets return -1;
471cb039ef3SIlya Maximets }
472cb039ef3SIlya Maximets
473cb039ef3SIlya Maximets if (opts->sock_fds) {
474cb039ef3SIlya Maximets sock_fds = parse_socket_fds(opts->sock_fds, queues, errp);
475cb039ef3SIlya Maximets if (!sock_fds) {
476cb039ef3SIlya Maximets return -1;
477cb039ef3SIlya Maximets }
478cb039ef3SIlya Maximets }
479cb039ef3SIlya Maximets
480cb039ef3SIlya Maximets for (i = 0; i < queues; i++) {
481cb039ef3SIlya Maximets nc = qemu_new_net_client(&net_af_xdp_info, peer, "af-xdp", name);
482cb039ef3SIlya Maximets qemu_set_info_str(nc, "af-xdp%"PRIi64" to %s", i, opts->ifname);
483cb039ef3SIlya Maximets nc->queue_index = i;
484cb039ef3SIlya Maximets
485cb039ef3SIlya Maximets if (!nc0) {
486cb039ef3SIlya Maximets nc0 = nc;
487cb039ef3SIlya Maximets }
488cb039ef3SIlya Maximets
489cb039ef3SIlya Maximets s = DO_UPCAST(AFXDPState, nc, nc);
490cb039ef3SIlya Maximets
491cb039ef3SIlya Maximets pstrcpy(s->ifname, sizeof(s->ifname), opts->ifname);
492cb039ef3SIlya Maximets s->ifindex = ifindex;
493cb039ef3SIlya Maximets s->n_queues = queues;
494cb039ef3SIlya Maximets
495cb039ef3SIlya Maximets if (af_xdp_umem_create(s, sock_fds ? sock_fds[i] : -1, errp)
496cb039ef3SIlya Maximets || af_xdp_socket_create(s, opts, errp)) {
497cb039ef3SIlya Maximets /* Make sure the XDP program will be removed. */
498cb039ef3SIlya Maximets s->n_queues = i;
499cb039ef3SIlya Maximets error_propagate(errp, err);
500cb039ef3SIlya Maximets goto err;
501cb039ef3SIlya Maximets }
502cb039ef3SIlya Maximets }
503cb039ef3SIlya Maximets
504cb039ef3SIlya Maximets if (nc0) {
505cb039ef3SIlya Maximets s = DO_UPCAST(AFXDPState, nc, nc0);
506cb039ef3SIlya Maximets if (bpf_xdp_query_id(s->ifindex, s->xdp_flags, &prog_id) || !prog_id) {
507cb039ef3SIlya Maximets error_setg_errno(errp, errno,
508cb039ef3SIlya Maximets "no XDP program loaded on '%s', ifindex: %d",
509cb039ef3SIlya Maximets s->ifname, s->ifindex);
510cb039ef3SIlya Maximets goto err;
511cb039ef3SIlya Maximets }
512cb039ef3SIlya Maximets }
513cb039ef3SIlya Maximets
514cb039ef3SIlya Maximets af_xdp_read_poll(s, true); /* Initially only poll for reads. */
515cb039ef3SIlya Maximets
516cb039ef3SIlya Maximets return 0;
517cb039ef3SIlya Maximets
518cb039ef3SIlya Maximets err:
519cb039ef3SIlya Maximets if (nc0) {
520cb039ef3SIlya Maximets qemu_del_net_client(nc0);
521cb039ef3SIlya Maximets }
522cb039ef3SIlya Maximets
523cb039ef3SIlya Maximets return -1;
524cb039ef3SIlya Maximets }
525