1 /* $NetBSD: if_laggproto.c,v 1.6 2022/03/31 07:59:05 yamaguchi Exp $ */
2
3 /*-
4 * SPDX-License-Identifier: BSD-2-Clause-NetBSD
5 *
6 * Copyright (c)2021 Internet Initiative Japan, Inc.
7 * All rights reserved.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28 * SUCH DAMAGE.
29 */
30
31 #include <sys/cdefs.h>
32 __KERNEL_RCSID(0, "$NetBSD: if_laggproto.c,v 1.6 2022/03/31 07:59:05 yamaguchi Exp $");
33
34 #include <sys/param.h>
35 #include <sys/types.h>
36
37 #include <sys/evcnt.h>
38 #include <sys/kmem.h>
39 #include <sys/mbuf.h>
40 #include <sys/mutex.h>
41 #include <sys/pslist.h>
42 #include <sys/syslog.h>
43 #include <sys/workqueue.h>
44
45 #include <net/if.h>
46 #include <net/if_ether.h>
47 #include <net/if_media.h>
48
49 #include <net/lagg/if_lagg.h>
50 #include <net/lagg/if_laggproto.h>
51
52 struct lagg_proto_softc {
53 struct lagg_softc *psc_softc;
54 struct pslist_head psc_ports;
55 kmutex_t psc_lock;
56 pserialize_t psc_psz;
57 size_t psc_ctxsiz;
58 void *psc_ctx;
59 size_t psc_nactports;
60 };
61
62 /*
63 * Locking notes:
64 * - Items of struct lagg_proto_softc is protected by
65 * psc_lock (an adaptive mutex)
66 * - psc_ports is protected by pserialize (psc_psz)
67 * - Updates of psc_ports is serialized by sc_lock in
68 * struct lagg_softc
69 * - Other locking notes are described in if_laggproto.h
70 */
71
72 struct lagg_failover {
73 bool fo_rx_all;
74 };
75
76 struct lagg_portmap {
77 struct lagg_port *pm_ports[LAGG_MAX_PORTS];
78 size_t pm_nports;
79 };
80
81 struct lagg_portmaps {
82 struct lagg_portmap maps_pmap[2];
83 size_t maps_activepmap;
84 };
85
86 struct lagg_lb {
87 struct lagg_portmaps lb_pmaps;
88 };
89
90 struct lagg_proto_port {
91 struct pslist_entry lpp_entry;
92 struct lagg_port *lpp_laggport;
93 bool lpp_active;
94 };
95
96 #define LAGG_PROTO_LOCK(_psc) mutex_enter(&(_psc)->psc_lock)
97 #define LAGG_PROTO_UNLOCK(_psc) mutex_exit(&(_psc)->psc_lock)
98 #define LAGG_PROTO_LOCKED(_psc) mutex_owned(&(_psc)->psc_lock)
99
100 static struct lagg_proto_softc *
101 lagg_proto_alloc(lagg_proto, struct lagg_softc *);
102 static void lagg_proto_free(struct lagg_proto_softc *);
103 static void lagg_proto_insert_port(struct lagg_proto_softc *,
104 struct lagg_proto_port *);
105 static void lagg_proto_remove_port(struct lagg_proto_softc *,
106 struct lagg_proto_port *);
107 static struct lagg_port *
108 lagg_link_active(struct lagg_proto_softc *psc,
109 struct lagg_proto_port *, struct psref *);
110
111 static inline struct lagg_portmap *
lagg_portmap_active(struct lagg_portmaps * maps)112 lagg_portmap_active(struct lagg_portmaps *maps)
113 {
114 size_t i;
115
116 i = atomic_load_consume(&maps->maps_activepmap);
117
118 return &maps->maps_pmap[i];
119 }
120
121 static inline struct lagg_portmap *
lagg_portmap_next(struct lagg_portmaps * maps)122 lagg_portmap_next(struct lagg_portmaps *maps)
123 {
124 size_t i;
125
126 i = atomic_load_consume(&maps->maps_activepmap);
127 i &= 0x1;
128 i ^= 0x1;
129
130 return &maps->maps_pmap[i];
131 }
132
133 static inline void
lagg_portmap_switch(struct lagg_portmaps * maps)134 lagg_portmap_switch(struct lagg_portmaps *maps)
135 {
136 size_t i;
137
138 i = atomic_load_consume(&maps->maps_activepmap);
139 i &= 0x1;
140 i ^= 0x1;
141
142 atomic_store_release(&maps->maps_activepmap, i);
143 }
144
145 static struct lagg_proto_softc *
lagg_proto_alloc(lagg_proto pr,struct lagg_softc * sc)146 lagg_proto_alloc(lagg_proto pr, struct lagg_softc *sc)
147 {
148 struct lagg_proto_softc *psc;
149 size_t ctxsiz;
150
151 switch (pr) {
152 case LAGG_PROTO_FAILOVER:
153 ctxsiz = sizeof(struct lagg_failover);
154 break;
155 case LAGG_PROTO_LOADBALANCE:
156 ctxsiz = sizeof(struct lagg_lb);
157 break;
158 default:
159 ctxsiz = 0;
160 }
161
162 psc = kmem_zalloc(sizeof(*psc), KM_NOSLEEP);
163 if (psc == NULL)
164 return NULL;
165
166 if (ctxsiz > 0) {
167 psc->psc_ctx = kmem_zalloc(ctxsiz, KM_NOSLEEP);
168 if (psc->psc_ctx == NULL) {
169 kmem_free(psc, sizeof(*psc));
170 return NULL;
171 }
172
173 psc->psc_ctxsiz = ctxsiz;
174 }
175
176 PSLIST_INIT(&psc->psc_ports);
177 psc->psc_psz = pserialize_create();
178 mutex_init(&psc->psc_lock, MUTEX_DEFAULT, IPL_SOFTNET);
179 psc->psc_softc = sc;
180
181 return psc;
182 }
183
184 static void
lagg_proto_free(struct lagg_proto_softc * psc)185 lagg_proto_free(struct lagg_proto_softc *psc)
186 {
187
188 pserialize_destroy(psc->psc_psz);
189 mutex_destroy(&psc->psc_lock);
190
191 if (psc->psc_ctxsiz > 0)
192 kmem_free(psc->psc_ctx, psc->psc_ctxsiz);
193
194 kmem_free(psc, sizeof(*psc));
195 }
196
197 static struct lagg_port *
lagg_link_active(struct lagg_proto_softc * psc,struct lagg_proto_port * pport,struct psref * psref)198 lagg_link_active(struct lagg_proto_softc *psc,
199 struct lagg_proto_port *pport, struct psref *psref)
200 {
201 struct lagg_port *lp;
202 int s;
203
204 lp = NULL;
205 s = pserialize_read_enter();
206
207 for (;pport != NULL;
208 pport = PSLIST_READER_NEXT(pport,
209 struct lagg_proto_port, lpp_entry)) {
210 if (atomic_load_relaxed(&pport->lpp_active)) {
211 lp = pport->lpp_laggport;
212 goto done;
213 }
214 }
215
216 PSLIST_READER_FOREACH(pport, &psc->psc_ports,
217 struct lagg_proto_port, lpp_entry) {
218 if (atomic_load_relaxed(&pport->lpp_active)) {
219 lp = pport->lpp_laggport;
220 break;
221 }
222 }
223 done:
224 if (lp != NULL)
225 lagg_port_getref(lp, psref);
226 pserialize_read_exit(s);
227
228 return lp;
229 }
230
231 int
lagg_common_allocport(struct lagg_proto_softc * psc,struct lagg_port * lp)232 lagg_common_allocport(struct lagg_proto_softc *psc, struct lagg_port *lp)
233 {
234 struct lagg_proto_port *pport;
235
236 KASSERT(LAGG_LOCKED(psc->psc_softc));
237
238 pport = kmem_zalloc(sizeof(*pport), KM_NOSLEEP);
239 if (pport == NULL)
240 return ENOMEM;
241
242 PSLIST_ENTRY_INIT(pport, lpp_entry);
243 pport->lpp_laggport = lp;
244 lp->lp_proto_ctx = (void *)pport;
245 return 0;
246 }
247
248 void
lagg_common_freeport(struct lagg_proto_softc * psc,struct lagg_port * lp)249 lagg_common_freeport(struct lagg_proto_softc *psc, struct lagg_port *lp)
250 {
251 struct lagg_proto_port *pport;
252
253 pport = lp->lp_proto_ctx;
254 lp->lp_proto_ctx = NULL;
255
256 kmem_free(pport, sizeof(*pport));
257 }
258
259 static void
lagg_proto_insert_port(struct lagg_proto_softc * psc,struct lagg_proto_port * pport)260 lagg_proto_insert_port(struct lagg_proto_softc *psc,
261 struct lagg_proto_port *pport)
262 {
263 struct lagg_proto_port *pport0;
264 struct lagg_port *lp, *lp0;
265 bool insert_after;
266
267 insert_after = false;
268 lp = pport->lpp_laggport;
269
270 LAGG_PROTO_LOCK(psc);
271 PSLIST_WRITER_FOREACH(pport0, &psc->psc_ports,
272 struct lagg_proto_port, lpp_entry) {
273 lp0 = pport0->lpp_laggport;
274 if (lp0->lp_prio > lp->lp_prio)
275 break;
276
277 if (PSLIST_WRITER_NEXT(pport0,
278 struct lagg_proto_port, lpp_entry) == NULL) {
279 insert_after = true;
280 break;
281 }
282 }
283
284 if (pport0 == NULL) {
285 PSLIST_WRITER_INSERT_HEAD(&psc->psc_ports, pport,
286 lpp_entry);
287 } else if (insert_after) {
288 PSLIST_WRITER_INSERT_AFTER(pport0, pport, lpp_entry);
289 } else {
290 PSLIST_WRITER_INSERT_BEFORE(pport0, pport, lpp_entry);
291 }
292 LAGG_PROTO_UNLOCK(psc);
293 }
294
295 static void
lagg_proto_remove_port(struct lagg_proto_softc * psc,struct lagg_proto_port * pport)296 lagg_proto_remove_port(struct lagg_proto_softc *psc,
297 struct lagg_proto_port *pport)
298 {
299
300 LAGG_PROTO_LOCK(psc);
301 PSLIST_WRITER_REMOVE(pport, lpp_entry);
302 pserialize_perform(psc->psc_psz);
303 LAGG_PROTO_UNLOCK(psc);
304 }
305
306 void
lagg_common_startport(struct lagg_proto_softc * psc,struct lagg_port * lp)307 lagg_common_startport(struct lagg_proto_softc *psc, struct lagg_port *lp)
308 {
309 struct lagg_proto_port *pport;
310
311 pport = lp->lp_proto_ctx;
312 lagg_proto_insert_port(psc, pport);
313
314 lagg_common_linkstate(psc, lp);
315 }
316
317 void
lagg_common_stopport(struct lagg_proto_softc * psc,struct lagg_port * lp)318 lagg_common_stopport(struct lagg_proto_softc *psc, struct lagg_port *lp)
319 {
320 struct lagg_proto_port *pport;
321 struct ifnet *ifp;
322
323 pport = lp->lp_proto_ctx;
324 lagg_proto_remove_port(psc, pport);
325
326 if (pport->lpp_active) {
327 KASSERT(psc->psc_nactports > 0);
328 psc->psc_nactports--;
329
330 if (psc->psc_nactports == 0) {
331 ifp = &psc->psc_softc->sc_if;
332 if_link_state_change(ifp, LINK_STATE_DOWN);
333 }
334
335 pport->lpp_active = false;
336 }
337 }
338
339 void
lagg_common_linkstate(struct lagg_proto_softc * psc,struct lagg_port * lp)340 lagg_common_linkstate(struct lagg_proto_softc *psc, struct lagg_port *lp)
341 {
342 struct lagg_proto_port *pport;
343 struct ifnet *ifp;
344 bool is_active;
345
346 pport = lp->lp_proto_ctx;
347 is_active = lagg_portactive(lp);
348
349 if (pport->lpp_active == is_active)
350 return;
351
352 ifp = &psc->psc_softc->sc_if;
353 if (is_active) {
354 psc->psc_nactports++;
355 if (psc->psc_nactports == 1)
356 if_link_state_change(ifp, LINK_STATE_UP);
357 } else {
358 KASSERT(psc->psc_nactports > 0);
359 psc->psc_nactports--;
360
361 if (psc->psc_nactports == 0)
362 if_link_state_change(ifp, LINK_STATE_DOWN);
363 }
364
365 atomic_store_relaxed(&pport->lpp_active, is_active);
366 }
367
368 void
lagg_common_detach(struct lagg_proto_softc * psc)369 lagg_common_detach(struct lagg_proto_softc *psc)
370 {
371
372 lagg_proto_free(psc);
373 }
374
375 int
lagg_none_attach(struct lagg_softc * sc,struct lagg_proto_softc ** pscp)376 lagg_none_attach(struct lagg_softc *sc, struct lagg_proto_softc **pscp)
377 {
378
379 *pscp = NULL;
380 return 0;
381 }
382
383 int
lagg_fail_attach(struct lagg_softc * sc,struct lagg_proto_softc ** xpsc)384 lagg_fail_attach(struct lagg_softc *sc, struct lagg_proto_softc **xpsc)
385 {
386 struct lagg_proto_softc *psc;
387 struct lagg_failover *fovr;
388
389 psc = lagg_proto_alloc(LAGG_PROTO_FAILOVER, sc);
390 if (psc == NULL)
391 return ENOMEM;
392
393 fovr = psc->psc_ctx;
394 fovr->fo_rx_all = true;
395
396 *xpsc = psc;
397 return 0;
398 }
399
400 int
lagg_fail_transmit(struct lagg_proto_softc * psc,struct mbuf * m)401 lagg_fail_transmit(struct lagg_proto_softc *psc, struct mbuf *m)
402 {
403 struct ifnet *ifp;
404 struct lagg_port *lp;
405 struct psref psref;
406
407 lp = lagg_link_active(psc, NULL, &psref);
408 if (lp == NULL) {
409 ifp = &psc->psc_softc->sc_if;
410 if_statinc(ifp, if_oerrors);
411 m_freem(m);
412 return ENOENT;
413 }
414
415 lagg_output(psc->psc_softc, lp, m);
416 lagg_port_putref(lp, &psref);
417 return 0;
418 }
419
420 struct mbuf *
lagg_fail_input(struct lagg_proto_softc * psc,struct lagg_port * lp,struct mbuf * m)421 lagg_fail_input(struct lagg_proto_softc *psc, struct lagg_port *lp,
422 struct mbuf *m)
423 {
424 struct lagg_failover *fovr;
425 struct lagg_port *lp0;
426 struct ifnet *ifp;
427 struct psref psref;
428
429 fovr = psc->psc_ctx;
430 if (atomic_load_relaxed(&fovr->fo_rx_all))
431 return m;
432
433 lp0 = lagg_link_active(psc, NULL, &psref);
434 if (lp0 == NULL) {
435 goto drop;
436 }
437
438 if (lp0 != lp) {
439 lagg_port_putref(lp0, &psref);
440 goto drop;
441 }
442
443 lagg_port_putref(lp0, &psref);
444
445 return m;
446 drop:
447 ifp = &psc->psc_softc->sc_if;
448 if_statinc(ifp, if_ierrors);
449 m_freem(m);
450 return NULL;
451 }
452
453 void
lagg_fail_portstat(struct lagg_proto_softc * psc,struct lagg_port * lp,struct laggreqport * resp)454 lagg_fail_portstat(struct lagg_proto_softc *psc, struct lagg_port *lp,
455 struct laggreqport *resp)
456 {
457 struct lagg_failover *fovr;
458 struct lagg_proto_port *pport;
459 struct lagg_port *lp0;
460 struct psref psref;
461
462 fovr = psc->psc_ctx;
463 pport = lp->lp_proto_ctx;
464
465 if (pport->lpp_active) {
466 lp0 = lagg_link_active(psc, NULL, &psref);
467 if (lp0 == lp) {
468 SET(resp->rp_flags,
469 (LAGG_PORT_ACTIVE |
470 LAGG_PORT_COLLECTING |
471 LAGG_PORT_DISTRIBUTING));
472 } else {
473 if (fovr->fo_rx_all) {
474 SET(resp->rp_flags,
475 LAGG_PORT_COLLECTING);
476 }
477 }
478
479 if (lp0 != NULL)
480 lagg_port_putref(lp0, &psref);
481 }
482 }
483
484 int
lagg_fail_ioctl(struct lagg_proto_softc * psc,struct laggreqproto * lreq)485 lagg_fail_ioctl(struct lagg_proto_softc *psc, struct laggreqproto *lreq)
486 {
487 struct lagg_failover *fovr;
488 struct laggreq_fail *rpfail;
489 int error;
490 bool set;
491
492 error = 0;
493 fovr = psc->psc_ctx;
494 rpfail = &lreq->rp_fail;
495
496 switch (rpfail->command) {
497 case LAGGIOC_FAILSETFLAGS:
498 case LAGGIOC_FAILCLRFLAGS:
499 set = (rpfail->command == LAGGIOC_FAILSETFLAGS) ?
500 true : false;
501
502 if (ISSET(rpfail->flags, LAGGREQFAIL_RXALL))
503 fovr->fo_rx_all = set;
504 break;
505 default:
506 error = ENOTTY;
507 break;
508 }
509
510 return error;
511 }
512
513 int
lagg_lb_attach(struct lagg_softc * sc,struct lagg_proto_softc ** xpsc)514 lagg_lb_attach(struct lagg_softc *sc, struct lagg_proto_softc **xpsc)
515 {
516 struct lagg_proto_softc *psc;
517 struct lagg_lb *lb;
518
519 psc = lagg_proto_alloc(LAGG_PROTO_LOADBALANCE, sc);
520 if (psc == NULL)
521 return ENOMEM;
522
523 lb = psc->psc_ctx;
524 lb->lb_pmaps.maps_activepmap = 0;
525
526 *xpsc = psc;
527 return 0;
528 }
529
530 void
lagg_lb_startport(struct lagg_proto_softc * psc,struct lagg_port * lp)531 lagg_lb_startport(struct lagg_proto_softc *psc, struct lagg_port *lp)
532 {
533 struct lagg_lb *lb;
534 struct lagg_portmap *pm_act, *pm_next;
535 size_t n;
536
537 lb = psc->psc_ctx;
538 lagg_common_startport(psc, lp);
539
540 LAGG_PROTO_LOCK(psc);
541 pm_act = lagg_portmap_active(&lb->lb_pmaps);
542 pm_next = lagg_portmap_next(&lb->lb_pmaps);
543
544 *pm_next = *pm_act;
545
546 n = pm_next->pm_nports;
547 pm_next->pm_ports[n] = lp;
548
549 n++;
550 pm_next->pm_nports = n;
551
552 lagg_portmap_switch(&lb->lb_pmaps);
553 pserialize_perform(psc->psc_psz);
554 LAGG_PROTO_UNLOCK(psc);
555 }
556
557 void
lagg_lb_stopport(struct lagg_proto_softc * psc,struct lagg_port * lp)558 lagg_lb_stopport(struct lagg_proto_softc *psc, struct lagg_port *lp)
559 {
560 struct lagg_lb *lb;
561 struct lagg_portmap *pm_act, *pm_next;
562 size_t i, n;
563
564 lb = psc->psc_ctx;
565
566 LAGG_PROTO_LOCK(psc);
567 pm_act = lagg_portmap_active(&lb->lb_pmaps);
568 pm_next = lagg_portmap_next(&lb->lb_pmaps);
569 n = 0;
570
571 for (i = 0; i < pm_act->pm_nports; i++) {
572 if (pm_act->pm_ports[i] == lp)
573 continue;
574
575 pm_next->pm_ports[n] = pm_act->pm_ports[i];
576 n++;
577 }
578
579 lagg_portmap_switch(&lb->lb_pmaps);
580 pserialize_perform(psc->psc_psz);
581 LAGG_PROTO_UNLOCK(psc);
582
583 lagg_common_stopport(psc, lp);
584 }
585
586 int
lagg_lb_transmit(struct lagg_proto_softc * psc,struct mbuf * m)587 lagg_lb_transmit(struct lagg_proto_softc *psc, struct mbuf *m)
588 {
589 struct lagg_lb *lb;
590 struct lagg_portmap *pm;
591 struct lagg_port *lp, *lp0;
592 struct ifnet *ifp;
593 struct psref psref;
594 uint32_t hash;
595 int s;
596
597 lb = psc->psc_ctx;
598 hash = lagg_hashmbuf(psc->psc_softc, m);
599
600 s = pserialize_read_enter();
601
602 pm = lagg_portmap_active(&lb->lb_pmaps);
603 hash %= pm->pm_nports;
604 lp0 = pm->pm_ports[hash];
605 lp = lagg_link_active(psc, lp0->lp_proto_ctx, &psref);
606
607 pserialize_read_exit(s);
608
609 if (__predict_false(lp == NULL)) {
610 ifp = &psc->psc_softc->sc_if;
611 if_statinc(ifp, if_oerrors);
612 m_freem(m);
613 return ENOENT;
614 }
615
616 lagg_output(psc->psc_softc, lp, m);
617 lagg_port_putref(lp, &psref);
618
619 return 0;
620 }
621
622 struct mbuf *
lagg_lb_input(struct lagg_proto_softc * psc __unused,struct lagg_port * lp __unused,struct mbuf * m)623 lagg_lb_input(struct lagg_proto_softc *psc __unused,
624 struct lagg_port *lp __unused, struct mbuf *m)
625 {
626
627 return m;
628 }
629
630 void
lagg_lb_portstat(struct lagg_proto_softc * psc,struct lagg_port * lp,struct laggreqport * resp)631 lagg_lb_portstat(struct lagg_proto_softc *psc, struct lagg_port *lp,
632 struct laggreqport *resp)
633 {
634 struct lagg_proto_port *pport;
635
636 pport = lp->lp_proto_ctx;
637
638 if (pport->lpp_active) {
639 SET(resp->rp_flags, LAGG_PORT_ACTIVE |
640 LAGG_PORT_COLLECTING | LAGG_PORT_DISTRIBUTING);
641 }
642 }
643