1 /* $OpenBSD: radix.c,v 1.61 2022/01/02 22:36:04 jsg Exp $ */ 2 /* $NetBSD: radix.c,v 1.20 2003/08/07 16:32:56 agc Exp $ */ 3 4 /* 5 * Copyright (c) 1988, 1989, 1993 6 * The Regents of the University of California. All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. Neither the name of the University nor the names of its contributors 17 * may be used to endorse or promote products derived from this software 18 * without specific prior written permission. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30 * SUCH DAMAGE. 31 * 32 * @(#)radix.c 8.6 (Berkeley) 10/17/95 33 */ 34 35 /* 36 * Routines to build and maintain radix trees for routing lookups. 37 */ 38 39 #ifndef _KERNEL 40 #include "kern_compat.h" 41 #else 42 #include <sys/param.h> 43 #include <sys/systm.h> 44 #include <sys/malloc.h> 45 #include <sys/syslog.h> 46 #include <sys/pool.h> 47 #endif 48 49 #include <net/radix.h> 50 51 #define SALEN(sa) (*(u_char *)(sa)) 52 53 /* 54 * Read-only variables, allocated & filled during rn_init(). 55 */ 56 static char *rn_zeros; /* array of 0s */ 57 static char *rn_ones; /* array of 1s */ 58 static unsigned int max_keylen; /* size of the above arrays */ 59 #define KEYLEN_LIMIT 64 /* maximum allowed keylen */ 60 61 62 struct radix_node_head *mask_rnhead; /* head of shared mask tree */ 63 struct pool rtmask_pool; /* pool for radix_mask structures */ 64 65 static inline int rn_satisfies_leaf(char *, struct radix_node *, int); 66 static inline int rn_lexobetter(void *, void *); 67 static inline struct radix_mask *rn_new_radix_mask(struct radix_node *, 68 struct radix_mask *); 69 70 int rn_refines(void *, void *); 71 int rn_inithead0(struct radix_node_head *, int); 72 struct radix_node *rn_addmask(void *, int, int); 73 struct radix_node *rn_insert(void *, struct radix_node_head *, int *, 74 struct radix_node [2]); 75 struct radix_node *rn_newpair(void *, int, struct radix_node[2]); 76 void rn_link_dupedkey(struct radix_node *, struct radix_node *, int); 77 78 static inline struct radix_node *rn_search(void *, struct radix_node *); 79 struct radix_node *rn_search_m(void *, struct radix_node *, void *); 80 int rn_add_dupedkey(struct radix_node *, struct radix_node_head *, 81 struct radix_node [2], u_int8_t); 82 void rn_fixup_nodes(struct radix_node *); 83 static inline struct radix_node *rn_lift_node(struct radix_node *); 84 void rn_add_radix_mask(struct radix_node *, int); 85 int rn_del_radix_mask(struct radix_node *); 86 static inline void rn_swap_nodes(struct radix_node *, struct radix_node *); 87 88 /* 89 * The data structure for the keys is a radix tree with one way 90 * branching removed. The index rn_b at an internal node n represents a bit 91 * position to be tested. The tree is arranged so that all descendants 92 * of a node n have keys whose bits all agree up to position rn_b - 1. 93 * (We say the index of n is rn_b.) 94 * 95 * There is at least one descendant which has a one bit at position rn_b, 96 * and at least one with a zero there. 97 * 98 * A route is determined by a pair of key and mask. We require that the 99 * bit-wise logical and of the key and mask to be the key. 100 * We define the index of a route to associated with the mask to be 101 * the first bit number in the mask where 0 occurs (with bit number 0 102 * representing the highest order bit). 103 * 104 * We say a mask is normal if every bit is 0, past the index of the mask. 105 * If a node n has a descendant (k, m) with index(m) == index(n) == rn_b, 106 * and m is a normal mask, then the route applies to every descendant of n. 107 * If the index(m) < rn_b, this implies the trailing last few bits of k 108 * before bit b are all 0, (and hence consequently true of every descendant 109 * of n), so the route applies to all descendants of the node as well. 110 * 111 * Similar logic shows that a non-normal mask m such that 112 * index(m) <= index(n) could potentially apply to many children of n. 113 * Thus, for each non-host route, we attach its mask to a list at an internal 114 * node as high in the tree as we can go. 115 * 116 * The present version of the code makes use of normal routes in short- 117 * circuiting an explicit mask and compare operation when testing whether 118 * a key satisfies a normal route, and also in remembering the unique leaf 119 * that governs a subtree. 120 */ 121 122 static inline struct radix_node * 123 rn_search(void *v_arg, struct radix_node *head) 124 { 125 struct radix_node *x = head; 126 caddr_t v = v_arg; 127 128 while (x->rn_b >= 0) { 129 if (x->rn_bmask & v[x->rn_off]) 130 x = x->rn_r; 131 else 132 x = x->rn_l; 133 } 134 return (x); 135 } 136 137 struct radix_node * 138 rn_search_m(void *v_arg, struct radix_node *head, void *m_arg) 139 { 140 struct radix_node *x = head; 141 caddr_t v = v_arg; 142 caddr_t m = m_arg; 143 144 while (x->rn_b >= 0) { 145 if ((x->rn_bmask & m[x->rn_off]) && 146 (x->rn_bmask & v[x->rn_off])) 147 x = x->rn_r; 148 else 149 x = x->rn_l; 150 } 151 return x; 152 } 153 154 int 155 rn_refines(void *m_arg, void *n_arg) 156 { 157 caddr_t m = m_arg; 158 caddr_t n = n_arg; 159 caddr_t lim, lim2; 160 int longer; 161 int masks_are_equal = 1; 162 163 lim2 = lim = n + *(u_char *)n; 164 longer = (*(u_char *)n++) - (int)(*(u_char *)m++); 165 if (longer > 0) 166 lim -= longer; 167 while (n < lim) { 168 if (*n & ~(*m)) 169 return 0; 170 if (*n++ != *m++) 171 masks_are_equal = 0; 172 } 173 while (n < lim2) 174 if (*n++) 175 return 0; 176 if (masks_are_equal && (longer < 0)) 177 for (lim2 = m - longer; m < lim2; ) 178 if (*m++) 179 return 1; 180 return (!masks_are_equal); 181 } 182 183 /* return a perfect match if m_arg is set, else do a regular rn_match */ 184 struct radix_node * 185 rn_lookup(void *v_arg, void *m_arg, struct radix_node_head *head) 186 { 187 struct radix_node *x, *tm; 188 caddr_t netmask = 0; 189 190 if (m_arg) { 191 tm = rn_addmask(m_arg, 1, head->rnh_treetop->rn_off); 192 if (tm == NULL) 193 return (NULL); 194 netmask = tm->rn_key; 195 } 196 x = rn_match(v_arg, head); 197 if (x && netmask) { 198 while (x && x->rn_mask != netmask) 199 x = x->rn_dupedkey; 200 } 201 /* Never return internal nodes to the upper layer. */ 202 if (x && (x->rn_flags & RNF_ROOT)) 203 return (NULL); 204 return x; 205 } 206 207 static inline int 208 rn_satisfies_leaf(char *trial, struct radix_node *leaf, int skip) 209 { 210 char *cp = trial; 211 char *cp2 = leaf->rn_key; 212 char *cp3 = leaf->rn_mask; 213 char *cplim; 214 int length; 215 216 length = min(SALEN(cp), SALEN(cp2)); 217 if (cp3 == NULL) 218 cp3 = rn_ones; 219 else 220 length = min(length, SALEN(cp3)); 221 cplim = cp + length; 222 cp += skip; 223 cp2 += skip; 224 cp3 += skip; 225 while (cp < cplim) { 226 if ((*cp ^ *cp2) & *cp3) 227 return 0; 228 cp++, cp2++, cp3++; 229 } 230 return 1; 231 } 232 233 struct radix_node * 234 rn_match(void *v_arg, struct radix_node_head *head) 235 { 236 caddr_t v = v_arg; 237 caddr_t cp, cp2, cplim; 238 struct radix_node *top = head->rnh_treetop; 239 struct radix_node *saved_t, *t; 240 int off = top->rn_off; 241 int vlen, matched_off; 242 int test, b, rn_b; 243 244 t = rn_search(v, top); 245 /* 246 * See if we match exactly as a host destination 247 * or at least learn how many bits match, for normal mask finesse. 248 * 249 * It doesn't hurt us to limit how many bytes to check 250 * to the length of the mask, since if it matches we had a genuine 251 * match and the leaf we have is the most specific one anyway; 252 * if it didn't match with a shorter length it would fail 253 * with a long one. This wins big for class B&C netmasks which 254 * are probably the most common case... 255 */ 256 if (t->rn_mask) 257 vlen = SALEN(t->rn_mask); 258 else 259 vlen = SALEN(v); 260 cp = v + off; 261 cp2 = t->rn_key + off; 262 cplim = v + vlen; 263 for (; cp < cplim; cp++, cp2++) 264 if (*cp != *cp2) 265 goto on1; 266 /* 267 * This extra grot is in case we are explicitly asked 268 * to look up the default. Ugh! 269 */ 270 if (t->rn_flags & RNF_ROOT) 271 t = t->rn_dupedkey; 272 273 KASSERT(t == NULL || (t->rn_flags & RNF_ROOT) == 0); 274 return t; 275 on1: 276 test = (*cp ^ *cp2) & 0xff; /* find first bit that differs */ 277 for (b = 7; (test >>= 1) > 0;) 278 b--; 279 matched_off = cp - v; 280 b += matched_off << 3; 281 rn_b = -1 - b; 282 /* 283 * If there is a host route in a duped-key chain, it will be first. 284 */ 285 saved_t = t; 286 if (t->rn_mask == NULL) 287 t = t->rn_dupedkey; 288 for (; t; t = t->rn_dupedkey) 289 /* 290 * Even if we don't match exactly as a host, 291 * we may match if the leaf we wound up at is 292 * a route to a net. 293 */ 294 if (t->rn_flags & RNF_NORMAL) { 295 if (rn_b <= t->rn_b) { 296 KASSERT((t->rn_flags & RNF_ROOT) == 0); 297 return t; 298 } 299 } else if (rn_satisfies_leaf(v, t, matched_off)) { 300 KASSERT((t->rn_flags & RNF_ROOT) == 0); 301 return t; 302 } 303 t = saved_t; 304 /* start searching up the tree */ 305 do { 306 struct radix_mask *m; 307 t = t->rn_p; 308 m = t->rn_mklist; 309 while (m) { 310 /* 311 * If non-contiguous masks ever become important 312 * we can restore the masking and open coding of 313 * the search and satisfaction test and put the 314 * calculation of "off" back before the "do". 315 */ 316 if (m->rm_flags & RNF_NORMAL) { 317 if (rn_b <= m->rm_b) { 318 KASSERT((m->rm_leaf->rn_flags & 319 RNF_ROOT) == 0); 320 return (m->rm_leaf); 321 } 322 } else { 323 struct radix_node *x; 324 off = min(t->rn_off, matched_off); 325 x = rn_search_m(v, t, m->rm_mask); 326 while (x && x->rn_mask != m->rm_mask) 327 x = x->rn_dupedkey; 328 if (x && rn_satisfies_leaf(v, x, off)) { 329 KASSERT((x->rn_flags & RNF_ROOT) == 0); 330 return x; 331 } 332 } 333 m = m->rm_mklist; 334 } 335 } while (t != top); 336 return NULL; 337 } 338 339 struct radix_node * 340 rn_newpair(void *v, int b, struct radix_node nodes[2]) 341 { 342 struct radix_node *tt = nodes, *t = nodes + 1; 343 t->rn_b = b; 344 t->rn_bmask = 0x80 >> (b & 7); 345 t->rn_l = tt; 346 t->rn_off = b >> 3; 347 tt->rn_b = -1; 348 tt->rn_key = v; 349 tt->rn_p = t; 350 tt->rn_flags = t->rn_flags = RNF_ACTIVE; 351 return t; 352 } 353 354 struct radix_node * 355 rn_insert(void *v_arg, struct radix_node_head *head, 356 int *dupentry, struct radix_node nodes[2]) 357 { 358 caddr_t v = v_arg; 359 struct radix_node *top = head->rnh_treetop; 360 struct radix_node *t, *tt; 361 int off = top->rn_off; 362 int b; 363 364 t = rn_search(v_arg, top); 365 /* 366 * Find first bit at which v and t->rn_key differ 367 */ 368 { 369 caddr_t cp, cp2, cplim; 370 int vlen, cmp_res; 371 372 vlen = SALEN(v); 373 cp = v + off; 374 cp2 = t->rn_key + off; 375 cplim = v + vlen; 376 377 while (cp < cplim) 378 if (*cp2++ != *cp++) 379 goto on1; 380 *dupentry = 1; 381 return t; 382 on1: 383 *dupentry = 0; 384 cmp_res = (cp[-1] ^ cp2[-1]) & 0xff; 385 for (b = (cp - v) << 3; cmp_res; b--) 386 cmp_res >>= 1; 387 } 388 { 389 struct radix_node *p, *x = top; 390 caddr_t cp = v; 391 do { 392 p = x; 393 if (cp[x->rn_off] & x->rn_bmask) 394 x = x->rn_r; 395 else 396 x = x->rn_l; 397 } while (b > (unsigned int) x->rn_b); /* x->rn_b < b && x->rn_b >= 0 */ 398 t = rn_newpair(v_arg, b, nodes); 399 tt = t->rn_l; 400 if ((cp[p->rn_off] & p->rn_bmask) == 0) 401 p->rn_l = t; 402 else 403 p->rn_r = t; 404 x->rn_p = t; 405 t->rn_p = p; /* frees x, p as temp vars below */ 406 if ((cp[t->rn_off] & t->rn_bmask) == 0) { 407 t->rn_r = x; 408 } else { 409 t->rn_r = tt; 410 t->rn_l = x; 411 } 412 } 413 return (tt); 414 } 415 416 struct radix_node * 417 rn_addmask(void *n_arg, int search, int skip) 418 { 419 caddr_t netmask = n_arg; 420 struct radix_node *tm, *saved_tm; 421 caddr_t cp, cplim; 422 int b = 0, mlen, j; 423 int maskduplicated, m0, isnormal; 424 char addmask_key[KEYLEN_LIMIT]; 425 426 if ((mlen = SALEN(netmask)) > max_keylen) 427 mlen = max_keylen; 428 if (skip == 0) 429 skip = 1; 430 if (mlen <= skip) 431 return (mask_rnhead->rnh_nodes); /* rn_zero root node */ 432 if (skip > 1) 433 memcpy(addmask_key + 1, rn_ones + 1, skip - 1); 434 if ((m0 = mlen) > skip) 435 memcpy(addmask_key + skip, netmask + skip, mlen - skip); 436 /* 437 * Trim trailing zeroes. 438 */ 439 for (cp = addmask_key + mlen; (cp > addmask_key) && cp[-1] == 0;) 440 cp--; 441 mlen = cp - addmask_key; 442 if (mlen <= skip) 443 return (mask_rnhead->rnh_nodes); 444 memset(addmask_key + m0, 0, max_keylen - m0); 445 SALEN(addmask_key) = mlen; 446 tm = rn_search(addmask_key, mask_rnhead->rnh_treetop); 447 if (memcmp(addmask_key, tm->rn_key, mlen) != 0) 448 tm = NULL; 449 if (tm || search) 450 return (tm); 451 tm = malloc(max_keylen + 2 * sizeof(*tm), M_RTABLE, M_NOWAIT | M_ZERO); 452 if (tm == NULL) 453 return (0); 454 saved_tm = tm; 455 netmask = cp = (caddr_t)(tm + 2); 456 memcpy(cp, addmask_key, mlen); 457 tm = rn_insert(cp, mask_rnhead, &maskduplicated, tm); 458 if (maskduplicated) { 459 log(LOG_ERR, "%s: mask impossibly already in tree\n", __func__); 460 free(saved_tm, M_RTABLE, max_keylen + 2 * sizeof(*saved_tm)); 461 return (tm); 462 } 463 /* 464 * Calculate index of mask, and check for normalcy. 465 */ 466 cplim = netmask + mlen; 467 isnormal = 1; 468 for (cp = netmask + skip; (cp < cplim) && *(u_char *)cp == 0xff;) 469 cp++; 470 if (cp != cplim) { 471 static const char normal_chars[] = { 472 0, 0x80, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc, 0xfe, -1 473 }; 474 for (j = 0x80; (j & *cp) != 0; j >>= 1) 475 b++; 476 if (*cp != normal_chars[b] || cp != (cplim - 1)) 477 isnormal = 0; 478 } 479 b += (cp - netmask) << 3; 480 tm->rn_b = -1 - b; 481 if (isnormal) 482 tm->rn_flags |= RNF_NORMAL; 483 return (tm); 484 } 485 486 /* rn_lexobetter: return a arbitrary ordering for non-contiguous masks */ 487 static inline int 488 rn_lexobetter(void *m_arg, void *n_arg) 489 { 490 u_char *mp = m_arg, *np = n_arg; 491 492 /* 493 * Longer masks might not really be lexicographically better, 494 * but longer masks always have precedence since they must be checked 495 * first. The netmasks were normalized before calling this function and 496 * don't have unneeded trailing zeros. 497 */ 498 if (SALEN(mp) > SALEN(np)) 499 return 1; 500 if (SALEN(mp) < SALEN(np)) 501 return 0; 502 /* 503 * Must return the first difference between the masks 504 * to ensure deterministic sorting. 505 */ 506 return (memcmp(mp, np, *mp) > 0); 507 } 508 509 static inline struct radix_mask * 510 rn_new_radix_mask(struct radix_node *tt, struct radix_mask *next) 511 { 512 struct radix_mask *m; 513 514 m = pool_get(&rtmask_pool, PR_NOWAIT | PR_ZERO); 515 if (m == NULL) { 516 log(LOG_ERR, "Mask for route not entered\n"); 517 return (0); 518 } 519 m->rm_b = tt->rn_b; 520 m->rm_flags = tt->rn_flags; 521 if (tt->rn_flags & RNF_NORMAL) 522 m->rm_leaf = tt; 523 else 524 m->rm_mask = tt->rn_mask; 525 m->rm_mklist = next; 526 tt->rn_mklist = m; 527 return m; 528 } 529 530 /* 531 * Find the point where the rn_mklist needs to be changed. 532 */ 533 static inline struct radix_node * 534 rn_lift_node(struct radix_node *t) 535 { 536 struct radix_node *x = t; 537 int b = -1 - t->rn_b; 538 539 /* rewind possible dupedkey list to head */ 540 while (t->rn_b < 0) 541 t = t->rn_p; 542 543 /* can't lift node above head of dupedkey list, give up */ 544 if (b > t->rn_b) 545 return (NULL); 546 547 do { 548 x = t; 549 t = t->rn_p; 550 } while (b <= t->rn_b && x != t); 551 552 return (x); 553 } 554 555 void 556 rn_add_radix_mask(struct radix_node *tt, int keyduplicated) 557 { 558 caddr_t netmask, mmask; 559 struct radix_node *x; 560 struct radix_mask *m, **mp; 561 int b_leaf = tt->rn_b; 562 563 /* Add new route to highest possible ancestor's list */ 564 if (tt->rn_mask == NULL) 565 return; /* can't lift at all */ 566 x = rn_lift_node(tt); 567 if (x == NULL) 568 return; /* didn't lift either */ 569 570 /* 571 * Search through routes associated with node to 572 * insert new route according to index. 573 * Need same criteria as when sorting dupedkeys to avoid 574 * double loop on deletion. 575 */ 576 netmask = tt->rn_mask; 577 for (mp = &x->rn_mklist; (m = *mp); mp = &m->rm_mklist) { 578 if (m->rm_b < b_leaf) 579 continue; 580 if (m->rm_b > b_leaf) 581 break; 582 if (m->rm_flags & RNF_NORMAL) { 583 if (keyduplicated) { 584 if (m->rm_leaf->rn_p == tt) 585 /* new route is better */ 586 m->rm_leaf = tt; 587 #ifdef DIAGNOSTIC 588 else { 589 struct radix_node *t; 590 591 for (t = m->rm_leaf; 592 t && t->rn_mklist == m; 593 t = t->rn_dupedkey) 594 if (t == tt) 595 break; 596 if (t == NULL) { 597 log(LOG_ERR, "Non-unique " 598 "normal route on dupedkey, " 599 "mask not entered\n"); 600 return; 601 } 602 } 603 #endif 604 m->rm_refs++; 605 tt->rn_mklist = m; 606 return; 607 } else if (tt->rn_flags & RNF_NORMAL) { 608 log(LOG_ERR, "Non-unique normal route," 609 " mask not entered\n"); 610 return; 611 } 612 mmask = m->rm_leaf->rn_mask; 613 } else 614 mmask = m->rm_mask; 615 if (mmask == netmask) { 616 m->rm_refs++; 617 tt->rn_mklist = m; 618 return; 619 } 620 if (rn_refines(netmask, mmask) || rn_lexobetter(netmask, mmask)) 621 break; 622 } 623 *mp = rn_new_radix_mask(tt, *mp); 624 } 625 626 int 627 rn_add_dupedkey(struct radix_node *saved_tt, struct radix_node_head *head, 628 struct radix_node *tt, u_int8_t prio) 629 { 630 caddr_t netmask = tt->rn_mask; 631 struct radix_node *x = saved_tt, *xp; 632 int before = -1; 633 int b_leaf = 0; 634 635 if (netmask) 636 b_leaf = tt->rn_b; 637 638 for (xp = x; x; xp = x, x = x->rn_dupedkey) { 639 if (x->rn_mask == netmask) 640 return (-1); 641 if (netmask == NULL || 642 (x->rn_mask && 643 ((b_leaf < x->rn_b) || /* index(netmask) > node */ 644 rn_refines(netmask, x->rn_mask) || 645 rn_lexobetter(netmask, x->rn_mask)))) 646 break; 647 } 648 /* 649 * If the mask is not duplicated, we wouldn't 650 * find it among possible duplicate key entries 651 * anyway, so the above test doesn't hurt. 652 * 653 * We sort the masks for a duplicated key the same way as 654 * in a masklist -- most specific to least specific. 655 * This may require the unfortunate nuisance of relocating 656 * the head of the list. 657 * 658 * We also reverse, or doubly link the list through the 659 * parent pointer. 660 */ 661 662 if ((x == saved_tt && before) || before == 1) 663 before = 1; 664 else 665 before = 0; 666 rn_link_dupedkey(tt, xp, before); 667 668 return (0); 669 } 670 671 /* 672 * Insert tt after x or in place of x if before is true. 673 */ 674 void 675 rn_link_dupedkey(struct radix_node *tt, struct radix_node *x, int before) 676 { 677 if (before) { 678 if (x->rn_p->rn_b > 0) { 679 /* link in at head of list */ 680 tt->rn_dupedkey = x; 681 tt->rn_flags = x->rn_flags; 682 tt->rn_p = x->rn_p; 683 x->rn_p = tt; 684 if (tt->rn_p->rn_l == x) 685 tt->rn_p->rn_l = tt; 686 else 687 tt->rn_p->rn_r = tt; 688 } else { 689 tt->rn_dupedkey = x; 690 x->rn_p->rn_dupedkey = tt; 691 tt->rn_p = x->rn_p; 692 x->rn_p = tt; 693 } 694 } else { 695 tt->rn_dupedkey = x->rn_dupedkey; 696 x->rn_dupedkey = tt; 697 tt->rn_p = x; 698 if (tt->rn_dupedkey) 699 tt->rn_dupedkey->rn_p = tt; 700 } 701 } 702 703 /* 704 * This function ensures that routes are properly promoted upwards. 705 * It adjusts the rn_mklist of the parent node to make sure overlapping 706 * routes can be found. 707 * 708 * There are two cases: 709 * - leaf nodes with possible rn_dupedkey list 710 * - internal nodes with maybe their own mklist 711 * If the mask of the route is bigger than the current branch bit then 712 * a rn_mklist entry needs to be made. 713 */ 714 void 715 rn_fixup_nodes(struct radix_node *tt) 716 { 717 struct radix_node *tp, *x; 718 struct radix_mask *m, **mp; 719 int b_leaf; 720 721 tp = tt->rn_p; 722 if (tp->rn_r == tt) 723 x = tp->rn_l; 724 else 725 x = tp->rn_r; 726 727 b_leaf = -1 - tp->rn_b; 728 if (x->rn_b < 0) { /* x is a leaf node */ 729 struct radix_node *xx = NULL; 730 731 for (mp = &tp->rn_mklist; x; xx = x, x = x->rn_dupedkey) { 732 if (xx && xx->rn_mklist && xx->rn_mask == x->rn_mask && 733 x->rn_mklist == 0) { 734 /* multipath route */ 735 x->rn_mklist = xx->rn_mklist; 736 x->rn_mklist->rm_refs++; 737 } 738 if (x->rn_mask && (x->rn_b >= b_leaf) && 739 x->rn_mklist == 0) { 740 *mp = m = rn_new_radix_mask(x, 0); 741 if (m) 742 mp = &m->rm_mklist; 743 } 744 } 745 } else if (x->rn_mklist) { /* x is an internal node */ 746 /* 747 * Skip over masks whose index is > that of new node 748 */ 749 for (mp = &x->rn_mklist; (m = *mp); mp = &m->rm_mklist) 750 if (m->rm_b >= b_leaf) 751 break; 752 tp->rn_mklist = m; 753 *mp = 0; 754 } 755 } 756 757 struct radix_node * 758 rn_addroute(void *v_arg, void *n_arg, struct radix_node_head *head, 759 struct radix_node treenodes[2], u_int8_t prio) 760 { 761 caddr_t v = v_arg; 762 struct radix_node *top = head->rnh_treetop; 763 struct radix_node *tt, *saved_tt, *tm = NULL; 764 int keyduplicated; 765 766 /* 767 * In dealing with non-contiguous masks, there may be 768 * many different routes which have the same mask. 769 * We will find it useful to have a unique pointer to 770 * the mask to speed avoiding duplicate references at 771 * nodes and possibly save time in calculating indices. 772 */ 773 if (n_arg) { 774 if ((tm = rn_addmask(n_arg, 0, top->rn_off)) == 0) 775 return (0); 776 } 777 778 tt = rn_insert(v, head, &keyduplicated, treenodes); 779 780 if (keyduplicated) { 781 saved_tt = tt; 782 tt = treenodes; 783 784 tt->rn_key = v_arg; 785 tt->rn_b = -1; 786 tt->rn_flags = RNF_ACTIVE; 787 } 788 789 /* Put mask into the node. */ 790 if (tm) { 791 tt->rn_mask = tm->rn_key; 792 tt->rn_b = tm->rn_b; 793 tt->rn_flags |= tm->rn_flags & RNF_NORMAL; 794 } 795 796 /* Either insert into dupedkey list or as a leaf node. */ 797 if (keyduplicated) { 798 if (rn_add_dupedkey(saved_tt, head, tt, prio)) 799 return (NULL); 800 } else { 801 rn_fixup_nodes(tt); 802 } 803 804 /* finally insert a radix_mask element if needed */ 805 rn_add_radix_mask(tt, keyduplicated); 806 return (tt); 807 } 808 809 /* 810 * Cleanup mask list, tt points to route that needs to be cleaned 811 */ 812 int 813 rn_del_radix_mask(struct radix_node *tt) 814 { 815 struct radix_node *x; 816 struct radix_mask *m, *saved_m, **mp; 817 818 /* 819 * Cleanup mask list from possible references to this route. 820 */ 821 saved_m = m = tt->rn_mklist; 822 if (tt->rn_mask == NULL || m == NULL) 823 return (0); 824 825 if (tt->rn_flags & RNF_NORMAL) { 826 if (m->rm_leaf != tt && m->rm_refs == 0) { 827 log(LOG_ERR, "rn_delete: inconsistent normal " 828 "annotation\n"); 829 return (-1); 830 } 831 if (m->rm_leaf != tt) { 832 if (--m->rm_refs >= 0) 833 return (0); 834 else 835 log(LOG_ERR, "rn_delete: " 836 "inconsistent mklist refcount\n"); 837 } 838 /* 839 * If we end up here tt should be m->rm_leaf and therefore 840 * tt should be the head of a multipath chain. 841 * If this is not the case the table is no longer consistent. 842 */ 843 if (m->rm_refs > 0) { 844 if (tt->rn_dupedkey == NULL || 845 tt->rn_dupedkey->rn_mklist != m) { 846 log(LOG_ERR, "rn_delete: inconsistent " 847 "dupedkey list\n"); 848 return (-1); 849 } 850 m->rm_leaf = tt->rn_dupedkey; 851 --m->rm_refs; 852 return (0); 853 } 854 /* else tt is last and only route */ 855 } else { 856 if (m->rm_mask != tt->rn_mask) { 857 log(LOG_ERR, "rn_delete: inconsistent annotation\n"); 858 return (0); 859 } 860 if (--m->rm_refs >= 0) 861 return (0); 862 } 863 864 /* 865 * No other references hold to the radix_mask remove it from 866 * the tree. 867 */ 868 x = rn_lift_node(tt); 869 if (x == NULL) 870 return (0); /* Wasn't lifted at all */ 871 872 /* Finally eliminate the radix_mask from the tree */ 873 for (mp = &x->rn_mklist; (m = *mp); mp = &m->rm_mklist) 874 if (m == saved_m) { 875 *mp = m->rm_mklist; 876 pool_put(&rtmask_pool, m); 877 break; 878 } 879 880 if (m == NULL) { 881 log(LOG_ERR, "rn_delete: couldn't find our annotation\n"); 882 if (tt->rn_flags & RNF_NORMAL) 883 return (-1); /* Dangling ref to us */ 884 } 885 886 return (0); 887 } 888 889 /* swap two internal nodes and fixup the parent and child pointers */ 890 static inline void 891 rn_swap_nodes(struct radix_node *from, struct radix_node *to) 892 { 893 *to = *from; 894 if (from->rn_p->rn_l == from) 895 from->rn_p->rn_l = to; 896 else 897 from->rn_p->rn_r = to; 898 899 to->rn_l->rn_p = to; 900 to->rn_r->rn_p = to; 901 } 902 903 struct radix_node * 904 rn_delete(void *v_arg, void *n_arg, struct radix_node_head *head, 905 struct radix_node *rn) 906 { 907 caddr_t v = v_arg; 908 caddr_t netmask = n_arg; 909 struct radix_node *top = head->rnh_treetop; 910 struct radix_node *tt, *tp, *pp, *x; 911 struct radix_node *dupedkey_tt, *saved_tt; 912 int off = top->rn_off; 913 int vlen; 914 915 vlen = SALEN(v); 916 917 /* 918 * Implement a lookup similar to rn_lookup but we need to save 919 * the radix leaf node (where th rn_dupedkey list starts) so 920 * it is not possible to use rn_lookup. 921 */ 922 tt = rn_search(v, top); 923 /* make sure the key is a perfect match */ 924 if (memcmp(v + off, tt->rn_key + off, vlen - off)) 925 return (NULL); 926 927 /* 928 * Here, tt is the deletion target, and 929 * saved_tt is the head of the dupedkey chain. 930 * dupedkey_tt will point to the start of the multipath chain. 931 */ 932 saved_tt = tt; 933 934 /* 935 * make tt point to the start of the rn_dupedkey list of multipath 936 * routes. 937 */ 938 if (netmask) { 939 struct radix_node *tm; 940 941 if ((tm = rn_addmask(netmask, 1, off)) == NULL) 942 return (NULL); 943 netmask = tm->rn_key; 944 while (tt->rn_mask != netmask) 945 if ((tt = tt->rn_dupedkey) == NULL) 946 return (NULL); 947 } 948 949 /* save start of multi path chain for later use */ 950 dupedkey_tt = tt; 951 952 KASSERT((tt->rn_flags & RNF_ROOT) == 0); 953 954 /* remove possible radix_mask */ 955 if (rn_del_radix_mask(tt)) 956 return (NULL); 957 958 /* 959 * Finally eliminate us from tree 960 */ 961 tp = tt->rn_p; 962 if (saved_tt->rn_dupedkey) { 963 if (tt == saved_tt) { 964 x = saved_tt->rn_dupedkey; 965 x->rn_p = tp; 966 if (tp->rn_l == tt) 967 tp->rn_l = x; 968 else 969 tp->rn_r = x; 970 /* head changed adjust dupedkey pointer */ 971 dupedkey_tt = x; 972 } else { 973 x = saved_tt; 974 /* dupedkey will change so adjust pointer */ 975 if (dupedkey_tt == tt) 976 dupedkey_tt = tt->rn_dupedkey; 977 tp->rn_dupedkey = tt->rn_dupedkey; 978 if (tt->rn_dupedkey) 979 tt->rn_dupedkey->rn_p = tp; 980 } 981 982 /* 983 * We may be holding an active internal node in the tree. 984 */ 985 if (tt[1].rn_flags & RNF_ACTIVE) 986 rn_swap_nodes(&tt[1], &x[1]); 987 988 /* over and out */ 989 goto out; 990 } 991 992 /* non-rn_dupedkey case, remove tt and tp node from the tree */ 993 if (tp->rn_l == tt) 994 x = tp->rn_r; 995 else 996 x = tp->rn_l; 997 pp = tp->rn_p; 998 if (pp->rn_r == tp) 999 pp->rn_r = x; 1000 else 1001 pp->rn_l = x; 1002 x->rn_p = pp; 1003 1004 /* 1005 * Demote routes attached to us (actually on the internal parent node). 1006 */ 1007 if (tp->rn_mklist) { 1008 struct radix_mask *m, **mp; 1009 if (x->rn_b >= 0) { 1010 for (mp = &x->rn_mklist; (m = *mp);) 1011 mp = &m->rm_mklist; 1012 *mp = tp->rn_mklist; 1013 } else { 1014 /* If there are any key,mask pairs in a sibling 1015 duped-key chain, some subset will appear sorted 1016 in the same order attached to our mklist */ 1017 for (m = tp->rn_mklist; m && x; x = x->rn_dupedkey) 1018 if (m == x->rn_mklist) { 1019 struct radix_mask *mm = m->rm_mklist; 1020 x->rn_mklist = 0; 1021 if (--(m->rm_refs) < 0) 1022 pool_put(&rtmask_pool, m); 1023 else if (m->rm_flags & RNF_NORMAL) 1024 /* 1025 * don't progress because this 1026 * a multipath route. Next 1027 * route will use the same m. 1028 */ 1029 mm = m; 1030 m = mm; 1031 } 1032 if (m) 1033 log(LOG_ERR, "%s %p at %p\n", 1034 "rn_delete: Orphaned Mask", m, x); 1035 } 1036 } 1037 1038 /* 1039 * We may be holding an active internal node in the tree. 1040 * If so swap our internal node (t) with the parent node (tp) 1041 * since that one was just removed from the tree. 1042 */ 1043 if (tp != &tt[1]) 1044 rn_swap_nodes(&tt[1], tp); 1045 1046 /* no rn_dupedkey list so no need to fixup multipath chains */ 1047 out: 1048 tt[0].rn_flags &= ~RNF_ACTIVE; 1049 tt[1].rn_flags &= ~RNF_ACTIVE; 1050 return (tt); 1051 } 1052 1053 int 1054 rn_walktree(struct radix_node_head *h, int (*f)(struct radix_node *, void *, 1055 u_int), void *w) 1056 { 1057 int error; 1058 struct radix_node *base, *next; 1059 struct radix_node *rn = h->rnh_treetop; 1060 1061 /* 1062 * This gets complicated because we may delete the node 1063 * while applying the function f to it, so we need to calculate 1064 * the successor node in advance. 1065 */ 1066 /* First time through node, go left */ 1067 while (rn->rn_b >= 0) 1068 rn = rn->rn_l; 1069 for (;;) { 1070 base = rn; 1071 /* If at right child go back up, otherwise, go right */ 1072 while (rn->rn_p->rn_r == rn && (rn->rn_flags & RNF_ROOT) == 0) 1073 rn = rn->rn_p; 1074 /* Find the next *leaf* since next node might vanish, too */ 1075 for (rn = rn->rn_p->rn_r; rn->rn_b >= 0;) 1076 rn = rn->rn_l; 1077 next = rn; 1078 /* Process leaves */ 1079 while ((rn = base) != NULL) { 1080 base = rn->rn_dupedkey; 1081 if (!(rn->rn_flags & RNF_ROOT) && 1082 (error = (*f)(rn, w, h->rnh_rtableid))) 1083 return (error); 1084 } 1085 rn = next; 1086 if (rn->rn_flags & RNF_ROOT) 1087 return (0); 1088 } 1089 /* NOTREACHED */ 1090 } 1091 1092 int 1093 rn_initmask(void) 1094 { 1095 if (mask_rnhead != NULL) 1096 return (0); 1097 1098 KASSERT(max_keylen > 0); 1099 1100 mask_rnhead = malloc(sizeof(*mask_rnhead), M_RTABLE, M_NOWAIT); 1101 if (mask_rnhead == NULL) 1102 return (1); 1103 1104 rn_inithead0(mask_rnhead, 0); 1105 return (0); 1106 } 1107 1108 int 1109 rn_inithead(void **head, int off) 1110 { 1111 struct radix_node_head *rnh; 1112 1113 if (*head != NULL) 1114 return (1); 1115 1116 if (rn_initmask()) 1117 panic("failed to initialize the mask tree"); 1118 1119 rnh = malloc(sizeof(*rnh), M_RTABLE, M_NOWAIT); 1120 if (rnh == NULL) 1121 return (0); 1122 *head = rnh; 1123 rn_inithead0(rnh, off); 1124 return (1); 1125 } 1126 1127 int 1128 rn_inithead0(struct radix_node_head *rnh, int offset) 1129 { 1130 struct radix_node *t, *tt, *ttt; 1131 int off = offset * NBBY; 1132 1133 memset(rnh, 0, sizeof(*rnh)); 1134 t = rn_newpair(rn_zeros, off, rnh->rnh_nodes); 1135 ttt = rnh->rnh_nodes + 2; 1136 t->rn_r = ttt; 1137 t->rn_p = t; 1138 tt = t->rn_l; 1139 tt->rn_flags = t->rn_flags = RNF_ROOT | RNF_ACTIVE; 1140 tt->rn_b = -1 - off; 1141 *ttt = *tt; 1142 ttt->rn_key = rn_ones; 1143 rnh->rnh_treetop = t; 1144 return (1); 1145 } 1146 1147 /* 1148 * rn_init() can be called multiple time with a different key length 1149 * as long as no radix tree head has been allocated. 1150 */ 1151 void 1152 rn_init(unsigned int keylen) 1153 { 1154 char *cp, *cplim; 1155 1156 KASSERT(keylen <= KEYLEN_LIMIT); 1157 1158 if (max_keylen == 0) { 1159 pool_init(&rtmask_pool, sizeof(struct radix_mask), 0, 1160 IPL_SOFTNET, 0, "rtmask", NULL); 1161 } 1162 1163 if (keylen <= max_keylen) 1164 return; 1165 1166 KASSERT(mask_rnhead == NULL); 1167 1168 free(rn_zeros, M_RTABLE, 2 * max_keylen); 1169 rn_zeros = mallocarray(2, keylen, M_RTABLE, M_NOWAIT | M_ZERO); 1170 if (rn_zeros == NULL) 1171 panic("cannot initialize a radix tree without memory"); 1172 max_keylen = keylen; 1173 1174 cp = rn_ones = rn_zeros + max_keylen; 1175 cplim = rn_ones + max_keylen; 1176 while (cp < cplim) 1177 *cp++ = -1; 1178 } 1179