xref: /freebsd/tools/tools/netmap/bridge.c (revision 9768746b)
1 /*
2  * (C) 2011-2014 Luigi Rizzo, Matteo Landi
3  *
4  * BSD license
5  *
6  * A netmap application to bridge two network interfaces,
7  * or one interface and the host stack.
8  *
9  * $FreeBSD$
10  */
11 
12 #include <libnetmap.h>
13 #include <signal.h>
14 #include <stdio.h>
15 #include <sys/poll.h>
16 #include <sys/ioctl.h>
17 #include <stdlib.h>
18 #include <unistd.h>
19 
20 #if defined(_WIN32)
21 #define BUSYWAIT
22 #endif
23 
24 static int verbose = 0;
25 
26 static int do_abort = 0;
27 static int zerocopy = 1; /* enable zerocopy if possible */
28 
29 static void
30 sigint_h(int sig)
31 {
32 	(void)sig;	/* UNUSED */
33 	do_abort = 1;
34 	signal(SIGINT, SIG_DFL);
35 }
36 
37 
38 /*
39  * How many slots do we (user application) have on this
40  * set of queues ?
41  */
42 static int
43 rx_slots_avail(struct nmport_d *d)
44 {
45 	u_int i, tot = 0;
46 
47 	for (i = d->first_rx_ring; i <= d->last_rx_ring; i++) {
48 		tot += nm_ring_space(NETMAP_RXRING(d->nifp, i));
49 	}
50 
51 	return tot;
52 }
53 
54 static int
55 tx_slots_avail(struct nmport_d *d)
56 {
57 	u_int i, tot = 0;
58 
59 	for (i = d->first_tx_ring; i <= d->last_tx_ring; i++) {
60 		tot += nm_ring_space(NETMAP_TXRING(d->nifp, i));
61 	}
62 
63 	return tot;
64 }
65 
66 /*
67  * Move up to 'limit' pkts from rxring to txring, swapping buffers
68  * if zerocopy is possible. Otherwise fall back on packet copying.
69  */
70 static int
71 rings_move(struct netmap_ring *rxring, struct netmap_ring *txring,
72 	      u_int limit, const char *msg)
73 {
74 	u_int j, k, m = 0;
75 
76 	/* print a warning if any of the ring flags is set (e.g. NM_REINIT) */
77 	if (rxring->flags || txring->flags)
78 		D("%s rxflags %x txflags %x",
79 		    msg, rxring->flags, txring->flags);
80 	j = rxring->head; /* RX */
81 	k = txring->head; /* TX */
82 	m = nm_ring_space(rxring);
83 	if (m < limit)
84 		limit = m;
85 	m = nm_ring_space(txring);
86 	if (m < limit)
87 		limit = m;
88 	m = limit;
89 	while (limit-- > 0) {
90 		struct netmap_slot *rs = &rxring->slot[j];
91 		struct netmap_slot *ts = &txring->slot[k];
92 
93 		if (ts->buf_idx < 2 || rs->buf_idx < 2) {
94 			RD(2, "wrong index rxr[%d] = %d  -> txr[%d] = %d",
95 			    j, rs->buf_idx, k, ts->buf_idx);
96 			sleep(2);
97 		}
98 		/* Copy the packet length. */
99 		if (rs->len > rxring->nr_buf_size) {
100 			RD(2,  "%s: invalid len %u, rxr[%d] -> txr[%d]",
101 			    msg, rs->len, j, k);
102 			rs->len = 0;
103 		} else if (verbose > 1) {
104 			D("%s: fwd len %u, rx[%d] -> tx[%d]",
105 			    msg, rs->len, j, k);
106 		}
107 		ts->len = rs->len;
108 		if (zerocopy) {
109 			uint32_t pkt = ts->buf_idx;
110 			ts->buf_idx = rs->buf_idx;
111 			rs->buf_idx = pkt;
112 			/* report the buffer change. */
113 			ts->flags |= NS_BUF_CHANGED;
114 			rs->flags |= NS_BUF_CHANGED;
115 		} else {
116 			char *rxbuf = NETMAP_BUF(rxring, rs->buf_idx);
117 			char *txbuf = NETMAP_BUF(txring, ts->buf_idx);
118 			nm_pkt_copy(rxbuf, txbuf, ts->len);
119 		}
120 		/*
121 		 * Copy the NS_MOREFRAG from rs to ts, leaving any
122 		 * other flags unchanged.
123 		 */
124 		ts->flags = (ts->flags & ~NS_MOREFRAG) | (rs->flags & NS_MOREFRAG);
125 		j = nm_ring_next(rxring, j);
126 		k = nm_ring_next(txring, k);
127 	}
128 	rxring->head = rxring->cur = j;
129 	txring->head = txring->cur = k;
130 	if (verbose && m > 0)
131 		D("%s fwd %d packets: rxring %u --> txring %u",
132 		    msg, m, rxring->ringid, txring->ringid);
133 
134 	return (m);
135 }
136 
137 /* Move packets from source port to destination port. */
138 static int
139 ports_move(struct nmport_d *src, struct nmport_d *dst, u_int limit,
140 	const char *msg)
141 {
142 	struct netmap_ring *txring, *rxring;
143 	u_int m = 0, si = src->first_rx_ring, di = dst->first_tx_ring;
144 
145 	while (si <= src->last_rx_ring && di <= dst->last_tx_ring) {
146 		rxring = NETMAP_RXRING(src->nifp, si);
147 		txring = NETMAP_TXRING(dst->nifp, di);
148 		if (nm_ring_empty(rxring)) {
149 			si++;
150 			continue;
151 		}
152 		if (nm_ring_empty(txring)) {
153 			di++;
154 			continue;
155 		}
156 		m += rings_move(rxring, txring, limit, msg);
157 	}
158 
159 	return (m);
160 }
161 
162 
163 static void
164 usage(void)
165 {
166 	fprintf(stderr,
167 		"netmap bridge program: forward packets between two "
168 			"netmap ports\n"
169 		"    usage(1): bridge [-v] [-i ifa] [-i ifb] [-b burst] "
170 			"[-w wait_time] [-L]\n"
171 		"    usage(2): bridge [-v] [-w wait_time] [-L] "
172 			"[ifa [ifb [burst]]]\n"
173 		"\n"
174 		"    ifa and ifb are specified using the nm_open() syntax.\n"
175 		"    When ifb is missing (or is equal to ifa), bridge will\n"
176 		"    forward between between ifa and the host stack if -L\n"
177 		"    is not specified, otherwise loopback traffic on ifa.\n"
178 		"\n"
179 		"    example: bridge -w 10 -i netmap:eth3 -i netmap:eth1\n"
180 		"\n"
181 		"    If ifa and ifb are two interfaces, they must be in\n"
182 		"    promiscuous mode. Otherwise, if bridging with the \n"
183 		"    host stack, the interface must have the offloads \n"
184 		"    disabled.\n"
185 		);
186 	exit(1);
187 }
188 
189 /*
190  * bridge [-v] if1 [if2]
191  *
192  * If only one name, or the two interfaces are the same,
193  * bridges userland and the adapter. Otherwise bridge
194  * two intefaces.
195  */
196 int
197 main(int argc, char **argv)
198 {
199 	char msg_a2b[256], msg_b2a[256];
200 	struct pollfd pollfd[2];
201 	u_int burst = 1024, wait_link = 4;
202 	struct nmport_d *pa = NULL, *pb = NULL;
203 	char *ifa = NULL, *ifb = NULL;
204 	char ifabuf[64] = { 0 };
205 	int pa_sw_rings, pb_sw_rings;
206 	int loopback = 0;
207 	int ch;
208 
209 	while ((ch = getopt(argc, argv, "hb:ci:vw:L")) != -1) {
210 		switch (ch) {
211 		default:
212 			D("bad option %c %s", ch, optarg);
213 			/* fallthrough */
214 		case 'h':
215 			usage();
216 			break;
217 		case 'b':	/* burst */
218 			burst = atoi(optarg);
219 			break;
220 		case 'i':	/* interface */
221 			if (ifa == NULL)
222 				ifa = optarg;
223 			else if (ifb == NULL)
224 				ifb = optarg;
225 			else
226 				D("%s ignored, already have 2 interfaces",
227 					optarg);
228 			break;
229 		case 'c':
230 			zerocopy = 0; /* do not zerocopy */
231 			break;
232 		case 'v':
233 			verbose++;
234 			break;
235 		case 'w':
236 			wait_link = atoi(optarg);
237 			break;
238 		case 'L':
239 			loopback = 1;
240 			break;
241 		}
242 
243 	}
244 
245 	argc -= optind;
246 	argv += optind;
247 
248 	if (argc > 0)
249 		ifa = argv[0];
250 	if (argc > 1)
251 		ifb = argv[1];
252 	if (argc > 2)
253 		burst = atoi(argv[2]);
254 	if (!ifb)
255 		ifb = ifa;
256 	if (!ifa) {
257 		D("missing interface");
258 		usage();
259 	}
260 	if (burst < 1 || burst > 8192) {
261 		D("invalid burst %d, set to 1024", burst);
262 		burst = 1024;
263 	}
264 	if (wait_link > 100) {
265 		D("invalid wait_link %d, set to 4", wait_link);
266 		wait_link = 4;
267 	}
268 	if (!strcmp(ifa, ifb)) {
269 		if (!loopback) {
270 			D("same interface, endpoint 0 goes to host");
271 			snprintf(ifabuf, sizeof(ifabuf) - 1, "%s^", ifa);
272 			ifa = ifabuf;
273 		} else {
274 			D("same interface, loopbacking traffic");
275 		}
276 	} else {
277 		/* two different interfaces. Take all rings on if1 */
278 	}
279 	pa = nmport_open(ifa);
280 	if (pa == NULL) {
281 		D("cannot open %s", ifa);
282 		return (1);
283 	}
284 	/* try to reuse the mmap() of the first interface, if possible */
285 	pb = nmport_open(ifb);
286 	if (pb == NULL) {
287 		D("cannot open %s", ifb);
288 		nmport_close(pa);
289 		return (1);
290 	}
291 	zerocopy = zerocopy && (pa->mem == pb->mem);
292 	D("------- zerocopy %ssupported", zerocopy ? "" : "NOT ");
293 
294 	/* setup poll(2) array */
295 	memset(pollfd, 0, sizeof(pollfd));
296 	pollfd[0].fd = pa->fd;
297 	pollfd[1].fd = pb->fd;
298 
299 	D("Wait %d secs for link to come up...", wait_link);
300 	sleep(wait_link);
301 	D("Ready to go, %s 0x%x/%d <-> %s 0x%x/%d.",
302 		pa->hdr.nr_name, pa->first_rx_ring, pa->reg.nr_rx_rings,
303 		pb->hdr.nr_name, pb->first_rx_ring, pb->reg.nr_rx_rings);
304 
305 	pa_sw_rings = (pa->reg.nr_mode == NR_REG_SW ||
306 	    pa->reg.nr_mode == NR_REG_ONE_SW);
307 	pb_sw_rings = (pb->reg.nr_mode == NR_REG_SW ||
308 	    pb->reg.nr_mode == NR_REG_ONE_SW);
309 
310 	snprintf(msg_a2b, sizeof(msg_a2b), "%s:%s --> %s:%s",
311 			pa->hdr.nr_name, pa_sw_rings ? "host" : "nic",
312 			pb->hdr.nr_name, pb_sw_rings ? "host" : "nic");
313 
314 	snprintf(msg_b2a, sizeof(msg_b2a), "%s:%s --> %s:%s",
315 			pb->hdr.nr_name, pb_sw_rings ? "host" : "nic",
316 			pa->hdr.nr_name, pa_sw_rings ? "host" : "nic");
317 
318 	/* main loop */
319 	signal(SIGINT, sigint_h);
320 	while (!do_abort) {
321 		int n0, n1, ret;
322 		pollfd[0].events = pollfd[1].events = 0;
323 		pollfd[0].revents = pollfd[1].revents = 0;
324 		n0 = rx_slots_avail(pa);
325 		n1 = rx_slots_avail(pb);
326 #ifdef BUSYWAIT
327 		if (n0) {
328 			pollfd[1].revents = POLLOUT;
329 		} else {
330 			ioctl(pollfd[0].fd, NIOCRXSYNC, NULL);
331 		}
332 		if (n1) {
333 			pollfd[0].revents = POLLOUT;
334 		} else {
335 			ioctl(pollfd[1].fd, NIOCRXSYNC, NULL);
336 		}
337 		ret = 1;
338 #else  /* !defined(BUSYWAIT) */
339 		if (n0)
340 			pollfd[1].events |= POLLOUT;
341 		else
342 			pollfd[0].events |= POLLIN;
343 		if (n1)
344 			pollfd[0].events |= POLLOUT;
345 		else
346 			pollfd[1].events |= POLLIN;
347 
348 		/* poll() also cause kernel to txsync/rxsync the NICs */
349 		ret = poll(pollfd, 2, 2500);
350 #endif /* !defined(BUSYWAIT) */
351 		if (ret <= 0 || verbose)
352 		    D("poll %s [0] ev %x %x rx %d@%d tx %d,"
353 			     " [1] ev %x %x rx %d@%d tx %d",
354 				ret <= 0 ? "timeout" : "ok",
355 				pollfd[0].events,
356 				pollfd[0].revents,
357 				rx_slots_avail(pa),
358 				NETMAP_RXRING(pa->nifp, pa->cur_rx_ring)->head,
359 				tx_slots_avail(pa),
360 				pollfd[1].events,
361 				pollfd[1].revents,
362 				rx_slots_avail(pb),
363 				NETMAP_RXRING(pb->nifp, pb->cur_rx_ring)->head,
364 				tx_slots_avail(pb)
365 			);
366 		if (ret < 0)
367 			continue;
368 		if (pollfd[0].revents & POLLERR) {
369 			struct netmap_ring *rx = NETMAP_RXRING(pa->nifp, pa->cur_rx_ring);
370 			D("error on fd0, rx [%d,%d,%d)",
371 			    rx->head, rx->cur, rx->tail);
372 		}
373 		if (pollfd[1].revents & POLLERR) {
374 			struct netmap_ring *rx = NETMAP_RXRING(pb->nifp, pb->cur_rx_ring);
375 			D("error on fd1, rx [%d,%d,%d)",
376 			    rx->head, rx->cur, rx->tail);
377 		}
378 		if (pollfd[0].revents & POLLOUT) {
379 			ports_move(pb, pa, burst, msg_b2a);
380 #ifdef BUSYWAIT
381 			ioctl(pollfd[0].fd, NIOCTXSYNC, NULL);
382 #endif
383 		}
384 
385 		if (pollfd[1].revents & POLLOUT) {
386 			ports_move(pa, pb, burst, msg_a2b);
387 #ifdef BUSYWAIT
388 			ioctl(pollfd[1].fd, NIOCTXSYNC, NULL);
389 #endif
390 		}
391 
392 		/*
393 		 * We don't need ioctl(NIOCTXSYNC) on the two file descriptors.
394 		 * here. The kernel will txsync on next poll().
395 		 */
396 	}
397 	nmport_close(pb);
398 	nmport_close(pa);
399 
400 	return (0);
401 }
402