/*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2020 Alexander V. Chernikov * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include "opt_inet6.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #define RTDEBUG #include "rte_lpm6.h" #define LPM6_MIN_TBL8 8 /* 2 pages of memory */ #define LPM6_MAX_TBL8 65536 * 16 /* 256M */ struct fib_algo_calldata { void *lookup; void *arg; }; struct dpdk_lpm6_data { struct rte_lpm6 *lpm6; uint64_t routes_added; uint64_t routes_failed; uint32_t number_tbl8s; uint32_t fibnum; uint8_t hit_tables; struct fib_data *fd; }; static struct nhop_object * lookup_ptr_ll(const struct rte_lpm6 *lpm6, const struct in6_addr *dst6, uint32_t scopeid) { const struct rte_lpm6_external *rte_ext; rte_ext = (const struct rte_lpm6_external *)lpm6; return (fib6_radix_lookup_nh(rte_ext->fibnum, dst6, scopeid)); } /* * Main datapath routing */ static struct nhop_object * lookup_ptr(void *algo_data, const struct flm_lookup_key key, uint32_t scopeid) { const struct rte_lpm6 *lpm6; const struct rte_lpm6_external *rte_ext; const struct in6_addr *addr6; uint32_t nhidx = 0; int ret; lpm6 = (const struct rte_lpm6 *)algo_data; addr6 = key.addr6; rte_ext = (const struct rte_lpm6_external *)lpm6; if (!IN6_IS_SCOPE_LINKLOCAL(addr6)) { ret = rte_lpm6_lookup(lpm6, (const uint8_t *)addr6, &nhidx); if (ret == 0) { /* Success! */ return (rte_ext->nh_idx[nhidx]); } else { /* Not found. Check default route */ if (rte_ext->default_idx > 0) return (rte_ext->nh_idx[rte_ext->default_idx]); else return (NULL); } } else { /* LL */ return (lookup_ptr_ll(lpm6, addr6, scopeid)); } } static uint8_t rte6_get_pref(const struct rib_rtable_info *rinfo) { if (rinfo->num_prefixes < 10) return (1); else if (rinfo->num_prefixes < 1000) return (rinfo->num_prefixes / 10); else if (rinfo->num_prefixes < 100000) return (100 + rinfo->num_prefixes / 667); else return (250); } static enum flm_op_result handle_default_change(struct dpdk_lpm6_data *dd, struct rib_cmd_info *rc) { struct rte_lpm6_external *rte_ext; rte_ext = (struct rte_lpm6_external *)dd->lpm6; if (rc->rc_cmd != RTM_DELETE) { /* Reference new */ uint32_t nhidx = fib_get_nhop_idx(dd->fd, rc->rc_nh_new); if (nhidx == 0) return (FLM_REBUILD); rte_ext->default_idx = nhidx; } else { /* No default route */ rte_ext->default_idx = 0; } return (FLM_SUCCESS); } static enum flm_op_result handle_ll_change(struct dpdk_lpm6_data *dd, struct rib_cmd_info *rc, const struct in6_addr addr6, int plen, uint32_t scopeid) { return (FLM_SUCCESS); } static struct rte_lpm6_rule * pack_parent_rule(struct dpdk_lpm6_data *dd, const struct in6_addr *addr6, int plen, int *pplen, uint32_t *pnhop_idx, char *buffer) { struct rte_lpm6_rule *lsp_rule = NULL; struct rtentry *rt; *pnhop_idx = 0; *pplen = 0; rt = rt_get_inet6_parent(dd->fibnum, addr6, plen); /* plen = 0 means default route and it's out of scope */ if (rt != NULL) { uint32_t nhop_idx, scopeid; struct in6_addr new_addr6; rt_get_inet6_prefix_plen(rt, &new_addr6, &plen, &scopeid); if (plen > 0) { nhop_idx = fib_get_nhop_idx(dd->fd, rt_get_raw_nhop(rt)); lsp_rule = fill_rule6(buffer, (uint8_t *)&new_addr6, plen, nhop_idx); *pnhop_idx = nhop_idx; *pplen = plen; } } return (lsp_rule); } static enum flm_op_result handle_gu_change(struct dpdk_lpm6_data *dd, const struct rib_cmd_info *rc, const struct in6_addr *addr6, int plen) { int ret; char abuf[INET6_ADDRSTRLEN]; inet_ntop(AF_INET6, addr6, abuf, sizeof(abuf)); /* So we get sin6, plen and nhidx */ if (rc->rc_cmd != RTM_DELETE) { /* * Addition or change. Save nhop in the internal table * and get index. */ uint32_t nhidx = fib_get_nhop_idx(dd->fd, rc->rc_nh_new); if (nhidx == 0) { FIB_PRINTF(LOG_INFO, dd->fd, "nhop limit reached, need rebuild"); return (FLM_REBUILD); } ret = rte_lpm6_add(dd->lpm6, (const uint8_t *)addr6, plen, nhidx, (rc->rc_cmd == RTM_ADD) ? 1 : 0); FIB_PRINTF(LOG_DEBUG, dd->fd, "DPDK GU: %s %s/%d nhop %u -> %u ret: %d", (rc->rc_cmd == RTM_ADD) ? "ADD" : "UPDATE", abuf, plen, rc->rc_nh_old != NULL ? fib_get_nhop_idx(dd->fd, rc->rc_nh_old) : 0, nhidx, ret); } else { /* * Need to lookup parent. Assume deletion happened already */ char buffer[RTE_LPM6_RULE_SIZE]; struct rte_lpm6_rule *lsp_rule = NULL; int parent_plen; uint32_t parent_nhop_idx; lsp_rule = pack_parent_rule(dd, addr6, plen, &parent_plen, &parent_nhop_idx, buffer); ret = rte_lpm6_delete(dd->lpm6, (const uint8_t *)addr6, plen, lsp_rule); FIB_PRINTF(LOG_DEBUG, dd->fd, "DPDK GU: %s %s/%d -> /%d nhop %u -> %u ret: %d", "DEL", abuf, plen, parent_plen, fib_get_nhop_idx(dd->fd, rc->rc_nh_old), parent_nhop_idx, ret); } if (ret != 0) { FIB_PRINTF(LOG_INFO, dd->fd, "error: %d", ret); if (ret == -ENOSPC) return (FLM_REBUILD); return (FLM_ERROR); } return (FLM_SUCCESS); } static enum flm_op_result handle_any_change(struct dpdk_lpm6_data *dd, struct rib_cmd_info *rc) { enum flm_op_result ret; struct in6_addr addr6; uint32_t scopeid; int plen; rt_get_inet6_prefix_plen(rc->rc_rt, &addr6, &plen, &scopeid); if (IN6_IS_SCOPE_LINKLOCAL(&addr6)) ret = handle_ll_change(dd, rc, addr6, plen, scopeid); else if (plen == 0) ret = handle_default_change(dd, rc); else ret = handle_gu_change(dd, rc, &addr6, plen); if (ret != 0) FIB_PRINTF(LOG_INFO, dd->fd, "error handling route"); return (ret); } static enum flm_op_result handle_rtable_change_cb(struct rib_head *rnh, struct rib_cmd_info *rc, void *_data) { struct dpdk_lpm6_data *dd; dd = (struct dpdk_lpm6_data *)_data; return (handle_any_change(dd, rc)); } static void destroy_dd(struct dpdk_lpm6_data *dd) { FIB_PRINTF(LOG_INFO, dd->fd, "destroy dd %p", dd); if (dd->lpm6 != NULL) rte_lpm6_free(dd->lpm6); free(dd, M_TEMP); } static void destroy_table(void *_data) { destroy_dd((struct dpdk_lpm6_data *)_data); } static enum flm_op_result add_route_cb(struct rtentry *rt, void *_data) { struct dpdk_lpm6_data *dd = (struct dpdk_lpm6_data *)_data; struct in6_addr addr6; struct nhop_object *nh; uint32_t scopeid; int plen; int ret; rt_get_inet6_prefix_plen(rt, &addr6, &plen, &scopeid); nh = rt_get_raw_nhop(rt); if (IN6_IS_SCOPE_LINKLOCAL(&addr6)) { /* * We don't operate on LL directly, however * reference them to maintain guarantee on * ability to refcount nhops in epoch. */ fib_get_nhop_idx(dd->fd, nh); return (FLM_SUCCESS); } char abuf[INET6_ADDRSTRLEN]; inet_ntop(AF_INET6, &addr6, abuf, sizeof(abuf)); FIB_PRINTF(LOG_DEBUG, dd->fd, "Operating on %s/%d", abuf, plen); if (plen == 0) { struct rib_cmd_info rc = { .rc_cmd = RTM_ADD, .rc_nh_new = nh, }; FIB_PRINTF(LOG_DEBUG, dd->fd, "Adding default route"); return (handle_default_change(dd, &rc)); } uint32_t nhidx = fib_get_nhop_idx(dd->fd, nh); if (nhidx == 0) { FIB_PRINTF(LOG_INFO, dd->fd, "unable to get nhop index"); return (FLM_REBUILD); } ret = rte_lpm6_add(dd->lpm6, (const uint8_t *)&addr6, plen, nhidx, 1); FIB_PRINTF(LOG_DEBUG, dd->fd, "ADD %p %s/%d nh %u = %d", dd->lpm6, abuf, plen, nhidx, ret); if (ret != 0) { FIB_PRINTF(LOG_INFO, dd->fd, "rte_lpm6_add() returned %d", ret); if (ret == -ENOSPC) { dd->hit_tables = 1; return (FLM_REBUILD); } dd->routes_failed++; return (FLM_ERROR); } else dd->routes_added++; return (FLM_SUCCESS); } static enum flm_op_result check_dump_success(void *_data, struct fib_dp *dp) { struct dpdk_lpm6_data *dd; dd = (struct dpdk_lpm6_data *)_data; FIB_PRINTF(LOG_INFO, dd->fd, "scan completed. added: %zu failed: %zu", dd->routes_added, dd->routes_failed); if (dd->hit_tables || dd->routes_failed > 0) return (FLM_REBUILD); FIB_PRINTF(LOG_INFO, dd->fd, "DPDK lookup engine synced with IPv6 RIB id %u, %zu routes", dd->fibnum, dd->routes_added); dp->f = lookup_ptr; dp->arg = dd->lpm6; return (FLM_SUCCESS); } static void estimate_scale(const struct dpdk_lpm6_data *dd_src, struct dpdk_lpm6_data *dd) { /* XXX: update at 75% capacity */ if (dd_src->hit_tables) dd->number_tbl8s = dd_src->number_tbl8s * 2; else dd->number_tbl8s = dd_src->number_tbl8s; /* TODO: look into the appropriate RIB to adjust */ } static struct dpdk_lpm6_data * build_table(struct dpdk_lpm6_data *dd_prev, struct fib_data *fd) { struct dpdk_lpm6_data *dd; struct rte_lpm6 *lpm6; dd = malloc(sizeof(struct dpdk_lpm6_data), M_TEMP, M_NOWAIT | M_ZERO); if (dd == NULL) { FIB_PRINTF(LOG_INFO, fd, "Unable to allocate base datastructure"); return (NULL); } dd->fibnum = dd_prev->fibnum; dd->fd = fd; estimate_scale(dd_prev, dd); struct rte_lpm6_config cfg = {.number_tbl8s = dd->number_tbl8s}; lpm6 = rte_lpm6_create("test", 0, &cfg); if (lpm6 == NULL) { FIB_PRINTF(LOG_INFO, fd, "unable to create lpm6"); free(dd, M_TEMP); return (NULL); } dd->lpm6 = lpm6; struct rte_lpm6_external *ext = (struct rte_lpm6_external *)lpm6; ext->nh_idx = fib_get_nhop_array(dd->fd); FIB_PRINTF(LOG_INFO, fd, "allocated %u tbl8s", dd->number_tbl8s); return (dd); } static enum flm_op_result init_table(uint32_t fibnum, struct fib_data *fd, void *_old_data, void **data) { struct dpdk_lpm6_data *dd, dd_base; if (_old_data == NULL) { bzero(&dd_base, sizeof(struct dpdk_lpm6_data)); dd_base.fibnum = fibnum; /* TODO: get rib statistics */ dd_base.number_tbl8s = LPM6_MIN_TBL8; dd = &dd_base; } else { FIB_PRINTF(LOG_INFO, fd, "Starting with old data"); dd = (struct dpdk_lpm6_data *)_old_data; } /* Guaranteed to be in epoch */ dd = build_table(dd, fd); if (dd == NULL) { FIB_PRINTF(LOG_INFO, fd, "table creation failed"); return (FLM_REBUILD); } *data = dd; return (FLM_SUCCESS); } static struct fib_lookup_module dpdk_lpm6 = { .flm_name = "dpdk_lpm6", .flm_family = AF_INET6, .flm_init_cb = init_table, .flm_destroy_cb = destroy_table, .flm_dump_rib_item_cb = add_route_cb, .flm_dump_end_cb = check_dump_success, .flm_change_rib_item_cb = handle_rtable_change_cb, .flm_get_pref = rte6_get_pref, }; static int lpm6_modevent(module_t mod, int type, void *unused) { int error = 0; switch (type) { case MOD_LOAD: fib_module_register(&dpdk_lpm6); break; case MOD_UNLOAD: error = fib_module_unregister(&dpdk_lpm6); break; default: error = EOPNOTSUPP; break; } return (error); } static moduledata_t lpm6mod = { "dpdk_lpm6", lpm6_modevent, 0 }; DECLARE_MODULE(lpm6mod, lpm6mod, SI_SUB_PSEUDO, SI_ORDER_ANY); MODULE_VERSION(lpm6mod, 1);