1 /* 2 * Copyright (c) 1988, 1989, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 3. Neither the name of the University nor the names of its contributors 14 * may be used to endorse or promote products derived from this software 15 * without specific prior written permission. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 * SUCH DAMAGE. 28 * 29 * @(#)radix.c 8.4 (Berkeley) 11/2/94 30 * $FreeBSD: src/sys/net/radix.c,v 1.20.2.3 2002/04/28 05:40:25 suz Exp $ 31 */ 32 33 /* 34 * Routines to build and maintain radix trees for routing lookups. 35 */ 36 37 #include <sys/param.h> 38 #ifdef _KERNEL 39 #include <sys/systm.h> 40 #include <sys/domain.h> 41 #include <sys/globaldata.h> 42 #include <sys/malloc.h> 43 #include <sys/queue.h> 44 #include <sys/syslog.h> 45 #include <sys/thread.h> 46 #include <net/netisr2.h> 47 #include <net/netmsg2.h> 48 #else 49 #include <stdio.h> 50 #include <stdlib.h> 51 #include <strings.h> 52 #include <syslog.h> 53 #endif 54 #include <net/radix.h> 55 56 #ifndef _KERNEL 57 #undef MAXCPU 58 #define MAXCPU 1 59 #define mycpuid 0 60 #define log(l, ...) syslog(l, __VA_ARGS__) 61 #define kprintf(fmt, ...) printf(fmt, ##__VA_ARGS__) 62 #define print_backtrace(...) /* nothing */ 63 #define panic(fmt, ...) \ 64 do { \ 65 fprintf(stderr, "PANIC: " fmt "\n", ##__VA_ARGS__); \ 66 abort(); \ 67 } while (0) 68 #endif 69 70 /* 71 * The arguments to the radix functions are really counted byte arrays with 72 * the length in the first byte. struct sockaddr's fit this type structurally. 73 * Cast the result to int as this is the dominant usage. 74 */ 75 #define clen(c) (int)(*(const u_char *)(c)) 76 77 78 static struct radix_mask *rn_mkfreelist[MAXCPU]; 79 static struct radix_node_head *mask_rnheads[MAXCPU]; 80 81 static const u_char rn_zeros[RN_MAXKEYLEN]; 82 static const u_char rn_ones[RN_MAXKEYLEN] = RN_MAXKEYONES; 83 84 #ifdef RN_DEBUG 85 static int rn_nodenum; 86 static struct radix_node *rn_clist; 87 static int rn_saveinfo; 88 static bool rn_debug = true; 89 #endif 90 91 92 static __inline struct radix_mask * 93 MKGet(struct radix_mask **l) 94 { 95 struct radix_mask *m; 96 97 if (*l != NULL) { 98 m = *l; 99 *l = m->rm_next; 100 } else { 101 R_Malloc(m, struct radix_mask *, sizeof *m); 102 } 103 return m; 104 } 105 106 static __inline void 107 MKFree(struct radix_mask **l, struct radix_mask *m) 108 { 109 m->rm_next = *l; 110 *l = m; 111 } 112 113 /* 114 * The data structure for the keys is a radix tree with one way 115 * branching removed. The index rn_bit at an internal node n represents a bit 116 * position to be tested. The tree is arranged so that all descendants 117 * of a node n have keys whose bits all agree up to position rn_bit - 1. 118 * (We say the index of n is rn_bit.) 119 * 120 * There is at least one descendant which has a one bit at position rn_bit, 121 * and at least one with a zero there. 122 * 123 * A route is determined by a pair of key and mask. We require that the 124 * bit-wise logical and of the key and mask to be the key. 125 * We define the index of a route associated with the mask to be 126 * the first bit number in the mask where 0 occurs (with bit number 0 127 * representing the highest order bit). 128 * 129 * We say a mask is normal if every bit is 0, past the index of the mask. 130 * If a node n has a descendant (k, m) with index(m) == index(n) == rn_bit, 131 * and m is a normal mask, then the route applies to every descendant of n. 132 * If the index(m) < rn_bit, this implies the trailing last few bits of k 133 * before bit rn_bit are all 0, (and hence consequently true of every 134 * descendant of n), so the route applies to all descendants of the node 135 * as well. 136 * 137 * Similar logic shows that a non-normal mask m such that 138 * index(m) <= index(n) could potentially apply to many children of n. 139 * Thus, for each non-host route, we attach its mask to a list at an internal 140 * node as high in the tree as we can go. 141 * 142 * The present version of the code makes use of normal routes in short- 143 * circuiting an explict mask and compare operation when testing whether 144 * a key satisfies a normal route, and also in remembering the unique leaf 145 * that governs a subtree. 146 */ 147 148 /* 149 * Search key <key> in the subtree from <head> until encountering 150 * a leaf node and return it. 151 * 152 * NOTE: Will never return NULL because the embedded default root node. 153 */ 154 static struct radix_node * 155 rn_search(const void *_key, struct radix_node *head) 156 { 157 struct radix_node *x; 158 const u_char *key; 159 160 key = _key; 161 x = head; 162 while (x->rn_bit >= 0) { 163 if (x->rn_bmask & key[x->rn_offset]) 164 x = x->rn_right; 165 else 166 x = x->rn_left; 167 } 168 return (x); 169 } 170 171 /* 172 * Similar to rn_search() but with the netmask <mask> applied. 173 * 174 * NOTE: The netmask can be the all-zero default mask. 175 */ 176 static struct radix_node * 177 rn_search_m(const void *_key, struct radix_node *head, const void *_mask) 178 { 179 struct radix_node *x; 180 const u_char *key, *mask; 181 182 key = _key; 183 mask = _mask; 184 x = head; 185 while (x->rn_bit >= 0) { 186 if ((x->rn_bmask & mask[x->rn_offset]) && 187 (x->rn_bmask & key[x->rn_offset])) 188 x = x->rn_right; 189 else 190 x = x->rn_left; 191 } 192 return x; 193 } 194 195 /* 196 * Compare the two netmasks and return true if netmask <m> is strictly more 197 * specific than netmask <n>. 198 * 199 * NOTE: Non-contiguous netmask is supported. 200 */ 201 bool 202 rn_refines(const void *_m, const void *_n) 203 { 204 const u_char *m, *n, *lim, *lim2; 205 int longer; 206 bool equal; 207 208 m = _m; 209 n = _n; 210 lim2 = lim = n + clen(n); 211 longer = clen(n++) - clen(m++); 212 if (longer > 0) 213 lim -= longer; 214 215 equal = true; 216 while (n < lim) { 217 if (*n & ~(*m)) 218 return false; 219 if (*n++ != *m++) 220 equal = false; 221 } 222 while (n < lim2) { 223 if (*n++) /* n is longer and more specific */ 224 return false; 225 } 226 if (equal && (longer < 0)) { 227 lim2 = m - longer; 228 while (m < lim2) { 229 if (*m++) /* m is longer and more specific */ 230 return true; 231 } 232 } 233 234 return (!equal); 235 } 236 237 /* 238 * Lookup the longest-prefix match of the key <key> in the tree <head>. 239 * The netmask <mask> can be NULL; if specified, the result must have the 240 * same mask, or NULL is returned. 241 */ 242 struct radix_node * 243 rn_lookup(const void *_key, const void *_mask, struct radix_node_head *head) 244 { 245 struct radix_node *x; 246 const u_char *key, *mask, *netmask; 247 248 key = _key; 249 mask = _mask; 250 netmask = NULL; 251 252 if (mask != NULL) { 253 x = rn_addmask(mask, true, head->rnh_treetop->rn_offset, 254 head->rnh_maskhead); 255 if (x == NULL) /* mask doesn't exist in the mask tree */ 256 return (NULL); 257 netmask = x->rn_key; 258 } 259 260 x = rn_match(key, head); 261 if (x != NULL && netmask != NULL) { 262 /* check the duped-key chain for different masks */ 263 while (x != NULL && x->rn_mask != netmask) 264 x = x->rn_dupedkey; 265 } 266 267 return x; 268 } 269 270 /* 271 * Check whether the key <key> matches the (key, mask) of the given 272 * radix node <leaf>. The <skip> parameter gives the number of bytes 273 * to skip for the keys and mask. 274 */ 275 static bool 276 rn_satisfies_leaf(const void *key, struct radix_node *leaf, int skip) 277 { 278 const u_char *cp, *cp2, *cp3, *cplim; 279 int length; 280 281 cp = key; 282 cp2 = leaf->rn_key; 283 cp3 = leaf->rn_mask; 284 285 length = MIN(clen(cp), clen(cp2)); 286 if (cp3 == NULL) 287 cp3 = rn_ones; 288 else 289 length = MIN(length, clen(cp3)); 290 291 cplim = cp + length; 292 cp2 += skip; 293 cp3 += skip; 294 for (cp += skip; cp < cplim; cp++, cp2++, cp3++) { 295 if ((*cp ^ *cp2) & *cp3) 296 return false; 297 } 298 299 return true; 300 } 301 302 /* 303 * Search for the longest-prefix match of the key <key>. 304 */ 305 struct radix_node * 306 rn_match(const void *key, struct radix_node_head *head) 307 { 308 struct radix_node *top, *t, *saved_t, *x; 309 const u_char *cp, *cp2, *cplim; 310 int klen, off, matched_off, test, b, rn_bit; 311 312 top = head->rnh_treetop; 313 off = top->rn_offset; 314 315 t = rn_search(key, top); 316 /* 317 * See if we match exactly as a host destination 318 * or at least learn how many bits match, for normal mask finesse. 319 * 320 * It doesn't hurt us to limit how many bytes to check 321 * to the length of the mask, since if it matches we had a genuine 322 * match and the leaf we have is the most specific one anyway; 323 * if it didn't match with a shorter length it would fail 324 * with a long one. This wins big for class B&C netmasks which 325 * are probably the most common case... 326 */ 327 if (t->rn_mask != NULL) 328 klen = clen(t->rn_mask); 329 else 330 klen = clen(key); 331 cp = (const u_char *)key + off; 332 cplim = (const u_char *)key + klen; 333 cp2 = t->rn_key + off; 334 for (; cp < cplim; cp++, cp2++) 335 if (*cp != *cp2) 336 goto on1; 337 338 /* 339 * This extra grot is in case we are explicitly asked 340 * to look up the default. Ugh! 341 * 342 * Never return the root node itself, it seems to cause a 343 * lot of confusion. 344 */ 345 if (t->rn_flags & RNF_ROOT) 346 t = t->rn_dupedkey; 347 return t; 348 349 on1: 350 test = (*cp ^ *cp2) & 0xff; /* find first bit that differs */ 351 for (b = 7; (test >>= 1) > 0;) 352 b--; 353 matched_off = cp - (const u_char *)key; 354 b += matched_off << 3; 355 rn_bit = -1 - b; 356 357 /* 358 * If there is a host route in a duped-key chain, it will be first. 359 */ 360 saved_t = t; 361 if (t->rn_mask == NULL) 362 t = t->rn_dupedkey; 363 for (; t != NULL; t = t->rn_dupedkey) { 364 /* 365 * Even if we don't match exactly as a host, 366 * we may match if the leaf we wound up at is 367 * a route to a net. 368 */ 369 if (t->rn_flags & RNF_NORMAL) { 370 if (rn_bit <= t->rn_bit) 371 return t; 372 } else if (rn_satisfies_leaf(key, t, matched_off)) 373 return t; 374 } 375 t = saved_t; 376 377 /* start searching up the tree */ 378 do { 379 struct radix_mask *m; 380 381 t = t->rn_parent; 382 /* 383 * If non-contiguous masks ever become important 384 * we can restore the masking and open coding of 385 * the search and satisfaction test and put the 386 * calculation of "off" back before the "do". 387 */ 388 m = t->rn_mklist; 389 while (m != NULL) { 390 if (m->rm_flags & RNF_NORMAL) { 391 if (rn_bit <= m->rm_bit) 392 return (m->rm_leaf); 393 } else { 394 off = MIN(t->rn_offset, matched_off); 395 x = rn_search_m(key, t, m->rm_mask); 396 while (x != NULL && x->rn_mask != m->rm_mask) 397 x = x->rn_dupedkey; 398 if (x != NULL && rn_satisfies_leaf(key, x, off)) 399 return x; 400 } 401 m = m->rm_next; 402 } 403 } while (t != top); 404 405 return NULL; 406 } 407 408 /* 409 * Whenever to add a new leaf to the tree, another parant node is needed. 410 * So they are allocated as an array of two elements: the first element is 411 * the leaf, the second one is the parant node. 412 * 413 * This function initializes the given pair of nodes <nodes>, so that the 414 * leaf is the left child of the parent node. 415 */ 416 static struct radix_node * 417 rn_newpair(const void *key, int bit, struct radix_node nodes[2]) 418 { 419 struct radix_node *left, *parent; 420 421 left = &nodes[0]; 422 parent = &nodes[1]; 423 424 parent->rn_bit = (short)bit; 425 parent->rn_bmask = 0x80 >> (bit & 0x7); 426 parent->rn_offset = bit >> 3; 427 parent->rn_left = left; 428 parent->rn_flags = RNF_ACTIVE; 429 parent->rn_mklist = NULL; 430 431 left->rn_bit = -1; 432 left->rn_key = key; 433 left->rn_parent = parent; 434 left->rn_flags = parent->rn_flags; 435 left->rn_mklist = NULL; 436 437 #ifdef RN_DEBUG 438 left->rn_info = rn_nodenum++; 439 parent->rn_info = rn_nodenum++; 440 left->rn_twin = parent; 441 left->rn_ybro = rn_clist; 442 rn_clist = left; 443 #endif 444 445 return parent; 446 } 447 448 /* 449 * Insert the key <key> to the radix tree <head>. 450 * 451 * If the key already exists, then set <dupentry> to 'true' and return the 452 * node of the existing duped key. Otherwise, set <dupentry> to 'false', 453 * insert the key to the tree by making use of the given nodes <nodes>, and 454 * return the node of the inserted key (i.e., &nodes[0]). 455 */ 456 static struct radix_node * 457 rn_insert(const void *key, struct radix_node_head *head, bool *dupentry, 458 struct radix_node nodes[2]) 459 { 460 struct radix_node *top, *t, *tt; 461 const u_char *cp; 462 int head_off, klen; 463 unsigned int b; 464 465 top = head->rnh_treetop; 466 head_off = top->rn_offset; 467 klen = clen(key); 468 cp = (const u_char *)key + head_off; 469 t = rn_search(key, top); 470 471 /* 472 * Find first bit at which the key and t->rn_key differ 473 */ 474 { 475 const u_char *cp2 = t->rn_key + head_off; 476 const u_char *cplim = (const u_char *)key + klen; 477 int cmp_res; 478 479 while (cp < cplim) { 480 if (*cp2++ != *cp++) 481 goto on1; 482 } 483 484 *dupentry = true; 485 return t; 486 487 on1: 488 *dupentry = false; 489 cmp_res = (cp[-1] ^ cp2[-1]) & 0xff; 490 for (b = (cp - (const u_char *)key) << 3; cmp_res; b--) 491 cmp_res >>= 1; 492 } 493 { 494 struct radix_node *p, *x = top; 495 496 cp = key; 497 do { 498 p = x; 499 if (cp[x->rn_offset] & x->rn_bmask) 500 x = x->rn_right; 501 else 502 x = x->rn_left; 503 } while (b > (unsigned) x->rn_bit); 504 /* x->rn_bit < b && x->rn_bit >= 0 */ 505 #ifdef RN_DEBUG 506 if (rn_debug) { 507 log(LOG_DEBUG, "%s: Going In:\n", __func__); 508 traverse(p); 509 } 510 #endif 511 t = rn_newpair(key, b, nodes); 512 tt = t->rn_left; 513 if ((cp[p->rn_offset] & p->rn_bmask) == 0) 514 p->rn_left = t; 515 else 516 p->rn_right = t; 517 x->rn_parent = t; 518 t->rn_parent = p; /* frees x, p as temp vars below */ 519 if ((cp[t->rn_offset] & t->rn_bmask) == 0) { 520 t->rn_right = x; 521 } else { 522 t->rn_right = tt; 523 t->rn_left = x; 524 } 525 #ifdef RN_DEBUG 526 if (rn_debug) { 527 log(LOG_DEBUG, "%s: Coming Out:\n", __func__); 528 traverse(p); 529 } 530 #endif 531 } 532 return (tt); 533 } 534 535 /* 536 * Add the netmask <mask> to the mask tree <maskhead>. If <search> is 537 * 'true', then only check the existence of the given mask but don't 538 * actually add it. 539 * 540 * The <skip> parameter specifies the number of bytes to skip in <mask> 541 * to obtain the mask data. (NOTE: The length of a mask key doesn't 542 * count the trailing zero bytes.) 543 * 544 * Return a pointer to the mask node on success; otherwise NULL on error. 545 */ 546 struct radix_node * 547 rn_addmask(const void *_mask, bool search, int skip, 548 struct radix_node_head *maskhead) 549 { 550 struct radix_node *x, *saved_x; 551 const u_char *mask, *cp, *cplim; 552 u_char *p, addmask_key[RN_MAXKEYLEN]; 553 int b, mlen; 554 bool maskduplicated, isnormal; 555 556 mask = _mask; 557 if ((mlen = clen(mask)) > RN_MAXKEYLEN) 558 mlen = RN_MAXKEYLEN; 559 if (skip == 0) 560 skip = 1; 561 if (mlen <= skip) 562 return (maskhead->rnh_nodes); /* all-zero key */ 563 564 bzero(addmask_key, sizeof(addmask_key)); 565 if (skip > 1) 566 bcopy(rn_ones + 1, addmask_key + 1, skip - 1); 567 bcopy(mask + skip, addmask_key + skip, mlen - skip); 568 /* Trim trailing zeroes. */ 569 for (cp = addmask_key + mlen; (cp > addmask_key) && cp[-1] == 0;) 570 cp--; 571 mlen = cp - addmask_key; 572 if (mlen <= skip) 573 return (maskhead->rnh_nodes); /* all-zero key */ 574 575 *addmask_key = mlen; 576 x = rn_search(addmask_key, maskhead->rnh_treetop); 577 if (x->rn_key == NULL) { 578 kprintf("WARNING: radix_node->rn_key is NULL rn=%p\n", x); 579 print_backtrace(-1); 580 x = NULL; 581 } else if (bcmp(addmask_key, x->rn_key, mlen) != 0) { 582 x = NULL; 583 } 584 if (x != NULL || search) 585 return (x); 586 587 R_Malloc(x, struct radix_node *, RN_MAXKEYLEN + 2 * (sizeof *x)); 588 if ((saved_x = x) == NULL) 589 return (NULL); 590 591 bzero(x, RN_MAXKEYLEN + 2 * (sizeof *x)); 592 mask = p = (u_char *)(x + 2); 593 bcopy(addmask_key, p, mlen); 594 x = rn_insert(mask, maskhead, &maskduplicated, x); 595 if (maskduplicated) { 596 log(LOG_ERR, "%s: mask impossibly already in tree", __func__); 597 R_Free(saved_x); 598 return (x); 599 } 600 601 /* 602 * Calculate index of mask, and check for normalcy. 603 */ 604 b = 0; 605 isnormal = true; 606 cplim = mask + mlen; 607 for (cp = mask + skip; cp < cplim && clen(cp) == 0xff;) 608 cp++; 609 if (cp != cplim) { 610 static const u_char normal_chars[] = { 611 0, 0x80, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc, 0xfe, 0xff 612 }; 613 u_char j; 614 615 for (j = 0x80; (j & *cp) != 0; j >>= 1) 616 b++; 617 if (*cp != normal_chars[b] || cp != (cplim - 1)) 618 isnormal = false; 619 } 620 b += (cp - mask) << 3; 621 x->rn_bit = -1 - b; 622 if (isnormal) 623 x->rn_flags |= RNF_NORMAL; 624 return (x); 625 } 626 627 /* 628 * Compare the two netmasks and return true if netmask <m> is more 629 * specific than netmask <n>. 630 * 631 * NOTE: arbitrary ordering for non-contiguous masks. 632 */ 633 static bool 634 rn_lexobetter(const void *_m, const void *_n) 635 { 636 const u_char *m, *n, *lim; 637 638 m = _m; 639 n = _n; 640 641 if (clen(m) > clen(n)) { 642 /* not really, but need to check longer one first */ 643 return (true); 644 } 645 646 if (clen(m) == clen(n)) { 647 for (lim = m + clen(m); m < lim; m++, n++) { 648 if (*m > *n) 649 return (true); 650 } 651 } 652 653 return (false); 654 } 655 656 static struct radix_mask * 657 rn_new_radix_mask(struct radix_node *tt, struct radix_mask *nextmask) 658 { 659 struct radix_mask *m; 660 661 m = MKGet(&rn_mkfreelist[mycpuid]); 662 if (m == NULL) { 663 log(LOG_ERR, "Mask for route not entered\n"); 664 return (NULL); 665 } 666 667 bzero(m, sizeof *m); 668 m->rm_bit = tt->rn_bit; 669 m->rm_flags = tt->rn_flags; 670 if (tt->rn_flags & RNF_NORMAL) 671 m->rm_leaf = tt; 672 else 673 m->rm_mask = tt->rn_mask; 674 m->rm_next = nextmask; 675 tt->rn_mklist = m; 676 677 return m; 678 } 679 680 /* 681 * Add the route (key, mask) to the radix tree <head> using the given 682 * nodes <nodes>. The netmask <mask> is NULL for a host route. 683 * 684 * Return the node of the inserted route on success. Otherwise, return 685 * NULL if the following happened: 686 * - failed to add the netmask to the mask tree (e.g., out of memory) 687 * - the identical route already exists 688 * 689 * NOTE: The address <key> and netmask <mask> must be of the same data 690 * structure (e.g., both 'struct sockaddr_in') so that they have the 691 * same skip bytes and data length. 692 */ 693 struct radix_node * 694 rn_addroute(const void *key, const void *mask, 695 struct radix_node_head *head, struct radix_node nodes[2]) 696 { 697 struct radix_node *top, *t, *x, *tt, *saved_tt; 698 struct radix_mask *m, **mp; 699 short b, b_leaf; 700 bool keyduplicated; 701 const void *mmask; 702 703 top = head->rnh_treetop; 704 x = NULL; 705 b = b_leaf = 0; 706 707 /* 708 * In dealing with non-contiguous masks, there may be 709 * many different routes which have the same mask. 710 * We will find it useful to have a unique pointer to 711 * the mask to speed avoiding duplicate references at 712 * nodes and possibly save time in calculating indices. 713 */ 714 if (mask != NULL) { 715 if ((x = rn_addmask(mask, false, top->rn_offset, 716 head->rnh_maskhead)) == NULL) 717 return (NULL); 718 b_leaf = x->rn_bit; 719 b = -1 - x->rn_bit; 720 mask = x->rn_key; 721 } 722 /* 723 * Deal with duplicated keys: attach node to previous instance 724 */ 725 saved_tt = tt = rn_insert(key, head, &keyduplicated, nodes); 726 if (keyduplicated) { 727 for (t = tt; tt; t = tt, tt = tt->rn_dupedkey) { 728 if (tt->rn_mask == mask) 729 return (NULL); 730 if (mask == NULL || 731 (tt->rn_mask && 732 ((b_leaf < tt->rn_bit) /* index(mask) > node */ 733 || rn_refines(mask, tt->rn_mask) 734 || rn_lexobetter(mask, tt->rn_mask)))) 735 break; 736 } 737 /* 738 * If the mask is not duplicated, we wouldn't 739 * find it among possible duplicate key entries 740 * anyway, so the above test doesn't hurt. 741 * 742 * We sort the masks for a duplicated key the same way as 743 * in a masklist -- most specific to least specific. 744 * This may require the unfortunate nuisance of relocating 745 * the head of the list. 746 */ 747 if (tt == saved_tt) { 748 struct radix_node *xx = x; 749 /* link in at head of list */ 750 (tt = nodes)->rn_dupedkey = t; 751 tt->rn_flags = t->rn_flags; 752 tt->rn_parent = x = t->rn_parent; 753 t->rn_parent = tt; /* parent */ 754 if (x->rn_left == t) 755 x->rn_left = tt; 756 else 757 x->rn_right = tt; 758 saved_tt = tt; x = xx; 759 } else { 760 (tt = nodes)->rn_dupedkey = t->rn_dupedkey; 761 t->rn_dupedkey = tt; 762 tt->rn_parent = t; /* parent */ 763 if (tt->rn_dupedkey != NULL) /* parent */ 764 tt->rn_dupedkey->rn_parent = tt; /* parent */ 765 } 766 #ifdef RN_DEBUG 767 t=tt+1; tt->rn_info = rn_nodenum++; t->rn_info = rn_nodenum++; 768 tt->rn_twin = t; tt->rn_ybro = rn_clist; rn_clist = tt; 769 #endif 770 tt->rn_key = key; 771 tt->rn_bit = -1; 772 tt->rn_flags = RNF_ACTIVE; 773 } 774 /* 775 * Put mask in tree. 776 */ 777 if (mask != NULL) { 778 tt->rn_mask = mask; 779 tt->rn_bit = x->rn_bit; 780 tt->rn_flags |= x->rn_flags & RNF_NORMAL; 781 } 782 t = saved_tt->rn_parent; 783 if (keyduplicated) 784 goto on2; 785 b_leaf = -1 - t->rn_bit; 786 if (t->rn_right == saved_tt) 787 x = t->rn_left; 788 else 789 x = t->rn_right; 790 /* Promote general routes from below */ 791 if (x->rn_bit < 0) { 792 mp = &t->rn_mklist; 793 while (x != NULL) { 794 if (x->rn_mask != NULL && 795 x->rn_bit >= b_leaf && 796 x->rn_mklist == NULL) { 797 *mp = m = rn_new_radix_mask(x, NULL); 798 if (m != NULL) 799 mp = &m->rm_next; 800 } 801 x = x->rn_dupedkey; 802 } 803 } else if (x->rn_mklist != NULL) { 804 /* 805 * Skip over masks whose index is > that of new node 806 */ 807 for (mp = &x->rn_mklist; (m = *mp) != NULL; mp = &m->rm_next) { 808 if (m->rm_bit >= b_leaf) 809 break; 810 } 811 t->rn_mklist = m; 812 *mp = NULL; 813 } 814 on2: 815 /* Add new route to highest possible ancestor's list */ 816 if (mask == NULL || b > t->rn_bit) 817 return tt; /* can't lift at all */ 818 b_leaf = tt->rn_bit; 819 do { 820 x = t; 821 t = t->rn_parent; 822 } while (b <= t->rn_bit && x != top); 823 /* 824 * Search through routes associated with node to 825 * insert new route according to index. 826 * Need same criteria as when sorting dupedkeys to avoid 827 * double loop on deletion. 828 */ 829 for (mp = &x->rn_mklist; (m = *mp) != NULL; mp = &m->rm_next) { 830 if (m->rm_bit < b_leaf) 831 continue; 832 if (m->rm_bit > b_leaf) 833 break; 834 if (m->rm_flags & RNF_NORMAL) { 835 mmask = m->rm_leaf->rn_mask; 836 if (tt->rn_flags & RNF_NORMAL) { 837 log(LOG_ERR, 838 "Non-unique normal route, mask not entered\n"); 839 return tt; 840 } 841 } else 842 mmask = m->rm_mask; 843 if (mmask == mask) { 844 m->rm_refs++; 845 tt->rn_mklist = m; 846 return tt; 847 } 848 if (rn_refines(mask, mmask) || rn_lexobetter(mask, mmask)) 849 break; 850 } 851 *mp = rn_new_radix_mask(tt, *mp); 852 return tt; 853 } 854 855 struct radix_node * 856 rn_delete(const void *key, const void *mask, struct radix_node_head *head) 857 { 858 struct radix_node *top, *t, *p, *x, *tt, *saved_tt, *dupedkey; 859 struct radix_mask *m, *saved_m, **mp; 860 int b, head_off, klen, cpu; 861 862 cpu = mycpuid; 863 x = head->rnh_treetop; 864 tt = rn_search(key, x); 865 head_off = x->rn_offset; 866 klen = clen(key); 867 saved_tt = tt; 868 top = x; 869 if (tt == NULL || 870 bcmp((const u_char *)key + head_off, tt->rn_key + head_off, 871 klen - head_off) != 0) 872 return (NULL); 873 874 /* 875 * Delete our route from mask lists. 876 */ 877 if (mask != NULL) { 878 if ((x = rn_addmask(mask, true, head_off, 879 head->rnh_maskhead)) == NULL) 880 return (NULL); 881 mask = x->rn_key; 882 while (tt->rn_mask != mask) { 883 if ((tt = tt->rn_dupedkey) == NULL) 884 return (NULL); 885 } 886 } 887 if (tt->rn_mask == NULL || (saved_m = m = tt->rn_mklist) == NULL) 888 goto on1; 889 if (tt->rn_flags & RNF_NORMAL) { 890 if (m->rm_leaf != tt || m->rm_refs > 0) { 891 log(LOG_ERR, "rn_delete: inconsistent annotation\n"); 892 return (NULL); /* dangling ref could cause disaster */ 893 } 894 } else { 895 if (m->rm_mask != tt->rn_mask) { 896 log(LOG_ERR, "rn_delete: inconsistent annotation\n"); 897 goto on1; 898 } 899 if (--m->rm_refs >= 0) 900 goto on1; 901 } 902 b = -1 - tt->rn_bit; 903 t = saved_tt->rn_parent; 904 if (b > t->rn_bit) 905 goto on1; /* Wasn't lifted at all */ 906 907 do { 908 x = t; 909 t = t->rn_parent; 910 } while (b <= t->rn_bit && x != top); 911 for (mp = &x->rn_mklist; (m = *mp) != NULL; mp = &m->rm_next) 912 if (m == saved_m) { 913 *mp = m->rm_next; 914 MKFree(&rn_mkfreelist[cpu], m); 915 break; 916 } 917 if (m == NULL) { 918 log(LOG_ERR, "rn_delete: couldn't find our annotation\n"); 919 if (tt->rn_flags & RNF_NORMAL) 920 return (NULL); /* Dangling ref to us */ 921 } 922 923 on1: 924 /* 925 * Eliminate us from tree 926 */ 927 if (tt->rn_flags & RNF_ROOT) 928 return (NULL); 929 930 #ifdef RN_DEBUG 931 /* Get us out of the creation list */ 932 for (t = rn_clist; t != NULL && t->rn_ybro != tt; t = t->rn_ybro) 933 ; 934 if (t != NULL) 935 t->rn_ybro = tt->rn_ybro; 936 #endif 937 938 t = tt->rn_parent; 939 dupedkey = saved_tt->rn_dupedkey; 940 if (dupedkey != NULL) { 941 /* 942 * at this point, tt is the deletion target and saved_tt 943 * is the head of the dupekey chain 944 */ 945 if (tt == saved_tt) { 946 /* remove from head of chain */ 947 x = dupedkey; 948 x->rn_parent = t; 949 if (t->rn_left == tt) 950 t->rn_left = x; 951 else 952 t->rn_right = x; 953 } else { 954 /* find node in front of tt on the chain */ 955 for (x = p = saved_tt; p != NULL && p->rn_dupedkey != tt;) 956 p = p->rn_dupedkey; 957 if (p) { 958 p->rn_dupedkey = tt->rn_dupedkey; 959 if (tt->rn_dupedkey) /* parent */ 960 tt->rn_dupedkey->rn_parent = p; 961 /* parent */ 962 } else { 963 log(LOG_ERR, "rn_delete: couldn't find us\n"); 964 } 965 } 966 t = tt + 1; 967 if (t->rn_flags & RNF_ACTIVE) { 968 #ifndef RN_DEBUG 969 *++x = *t; 970 p = t->rn_parent; 971 #else 972 b = t->rn_info; 973 *++x = *t; 974 t->rn_info = b; 975 p = t->rn_parent; 976 #endif 977 if (p->rn_left == t) 978 p->rn_left = x; 979 else 980 p->rn_right = x; 981 x->rn_left->rn_parent = x; 982 x->rn_right->rn_parent = x; 983 } 984 goto out; 985 } 986 if (t->rn_left == tt) 987 x = t->rn_right; 988 else 989 x = t->rn_left; 990 p = t->rn_parent; 991 if (p->rn_right == t) 992 p->rn_right = x; 993 else 994 p->rn_left = x; 995 x->rn_parent = p; 996 /* 997 * Demote routes attached to us. 998 */ 999 if (t->rn_mklist != NULL) { 1000 if (x->rn_bit >= 0) { 1001 for (mp = &x->rn_mklist; (m = *mp) != NULL;) 1002 mp = &m->rm_next; 1003 *mp = t->rn_mklist; 1004 } else { 1005 /* 1006 * If there are any (key, mask) pairs in a sibling 1007 * duped-key chain, some subset will appear sorted 1008 * in the same order attached to our mklist. 1009 */ 1010 for (m = t->rn_mklist; m && x; x = x->rn_dupedkey) 1011 if (m == x->rn_mklist) { 1012 struct radix_mask *mm = m->rm_next; 1013 1014 x->rn_mklist = NULL; 1015 if (--(m->rm_refs) < 0) 1016 MKFree(&rn_mkfreelist[cpu], m); 1017 m = mm; 1018 } 1019 if (m) { 1020 log(LOG_ERR, 1021 "rn_delete: Orphaned Mask %p at %p\n", 1022 (void *)m, (void *)x); 1023 } 1024 } 1025 } 1026 /* 1027 * We may be holding an active internal node in the tree. 1028 */ 1029 x = tt + 1; 1030 if (t != x) { 1031 #ifndef RN_DEBUG 1032 *t = *x; 1033 #else 1034 b = t->rn_info; 1035 *t = *x; 1036 t->rn_info = b; 1037 #endif 1038 t->rn_left->rn_parent = t; 1039 t->rn_right->rn_parent = t; 1040 p = x->rn_parent; 1041 if (p->rn_left == x) 1042 p->rn_left = t; 1043 else 1044 p->rn_right = t; 1045 } 1046 1047 out: 1048 tt->rn_flags &= ~RNF_ACTIVE; 1049 tt[1].rn_flags &= ~RNF_ACTIVE; 1050 return (tt); 1051 } 1052 1053 /* 1054 * This is the same as rn_walktree() except for the parameters and the 1055 * exit. 1056 */ 1057 static int 1058 rn_walktree_from(struct radix_node_head *h, const void *_addr, 1059 const void *_mask, walktree_f_t *f, void *w) 1060 { 1061 struct radix_node *rn, *base, *next, *last; 1062 const u_char *addr, *mask; 1063 bool stopping; 1064 int lastb, error; 1065 1066 addr = _addr; 1067 mask = _mask; 1068 last = NULL; 1069 stopping = false; 1070 /* 1071 * rn_search_m() is sort-of-open-coded here. We cannot use that 1072 * function because we need to keep track of the last node seen. 1073 */ 1074 /* kprintf("about to search\n"); */ 1075 for (rn = h->rnh_treetop; rn->rn_bit >= 0; ) { 1076 last = rn; 1077 /* kprintf("rn_bit %d, rn_bmask %x, mask[rn_offset] %x\n", 1078 rn->rn_bit, rn->rn_bmask, mask[rn->rn_offset]); */ 1079 if (!(rn->rn_bmask & mask[rn->rn_offset])) { 1080 break; 1081 } 1082 if (rn->rn_bmask & addr[rn->rn_offset]) { 1083 rn = rn->rn_right; 1084 } else { 1085 rn = rn->rn_left; 1086 } 1087 } 1088 /* kprintf("done searching\n"); */ 1089 1090 /* 1091 * Two cases: either we stepped off the end of our mask, 1092 * in which case last == rn, or we reached a leaf, in which 1093 * case we want to start from the last node we looked at. 1094 * Either way, last is the node we want to start from. 1095 */ 1096 rn = last; 1097 lastb = rn->rn_bit; 1098 1099 /* kprintf("rn %p, lastb %d\n", rn, lastb);*/ 1100 1101 /* 1102 * This gets complicated because we may delete the node 1103 * while applying the function f to it, so we need to calculate 1104 * the successor node in advance. 1105 */ 1106 while (rn->rn_bit >= 0) 1107 rn = rn->rn_left; 1108 1109 while (!stopping) { 1110 /* kprintf("node %p (%d)\n", rn, rn->rn_bit); */ 1111 base = rn; 1112 /* If at right child go back up, otherwise, go right */ 1113 while (rn->rn_parent->rn_right == rn && 1114 !(rn->rn_flags & RNF_ROOT)) { 1115 rn = rn->rn_parent; 1116 1117 /* if went up beyond last, stop */ 1118 if (rn->rn_bit < lastb) { 1119 stopping = true; 1120 /* kprintf("up too far\n"); */ 1121 } 1122 } 1123 1124 /* Find the next *leaf* since next node might vanish, too */ 1125 for (rn = rn->rn_parent->rn_right; rn->rn_bit >= 0;) 1126 rn = rn->rn_left; 1127 next = rn; 1128 /* Process leaves */ 1129 while ((rn = base) != NULL) { 1130 base = rn->rn_dupedkey; 1131 /* kprintf("leaf %p\n", rn); */ 1132 if (!(rn->rn_flags & RNF_ROOT) && (error = (*f)(rn, w))) 1133 return (error); 1134 } 1135 rn = next; 1136 1137 if (rn->rn_flags & RNF_ROOT) { 1138 /* kprintf("root, stopping"); */ 1139 stopping = true; 1140 } 1141 } 1142 1143 return 0; 1144 } 1145 1146 static int 1147 rn_walktree_at(struct radix_node_head *h, const void *addr, const void *mask, 1148 walktree_f_t *f, void *w) 1149 { 1150 struct radix_node *rn, *base, *next; 1151 int error; 1152 1153 rn = h->rnh_treetop; 1154 1155 /* 1156 * This gets complicated because we may delete the node 1157 * while applying the function f to it, so we need to calculate 1158 * the successor node in advance. 1159 */ 1160 if (addr == NULL) { 1161 /* First time through node, go left */ 1162 while (rn->rn_bit >= 0) 1163 rn = rn->rn_left; 1164 } else { 1165 if (mask != NULL) 1166 rn = rn_search_m(addr, rn, mask); 1167 else 1168 rn = rn_search(addr, rn); 1169 } 1170 for (;;) { 1171 base = rn; 1172 /* If at right child go back up, otherwise, go right */ 1173 while (rn->rn_parent->rn_right == rn && 1174 !(rn->rn_flags & RNF_ROOT)) 1175 rn = rn->rn_parent; 1176 /* Find the next *leaf* since next node might vanish, too */ 1177 for (rn = rn->rn_parent->rn_right; rn->rn_bit >= 0;) 1178 rn = rn->rn_left; 1179 next = rn; 1180 /* Process leaves */ 1181 while ((rn = base)) { 1182 base = rn->rn_dupedkey; 1183 if (!(rn->rn_flags & RNF_ROOT) && (error = (*f)(rn, w))) 1184 return (error); 1185 } 1186 rn = next; 1187 if (rn->rn_flags & RNF_ROOT) 1188 return (0); 1189 } 1190 /* NOTREACHED */ 1191 } 1192 1193 static int 1194 rn_walktree(struct radix_node_head *h, walktree_f_t *f, void *w) 1195 { 1196 return rn_walktree_at(h, NULL, NULL, f, w); 1197 } 1198 1199 /* 1200 * Allocate and initialize an empty radix tree at <head>. 1201 * 1202 * The created radix_node_head embeds 3 nodes in the order of 1203 * {left,root,right}. These nodes are flagged with RNF_ROOT and thus 1204 * cannot be freed. The left and right leaves are initialized with 1205 * all-zero and all-one keys, respectively, and with the significant 1206 * byte starting at <off_bytes>. 1207 * 1208 * The <maskhead> refers to another radix tree for storing the network 1209 * masks (so aka mask tree). It is also created by this function with 1210 * <maskhead>=NULL; the <off_bytes> parameter is ignored and auto set 1211 * to be zero (0). The reason of requiring <off_bytes> be zero is that 1212 * a mask tree can be shared with multiple radix trees of different 1213 * address families that have different offset bytes; e.g., 1214 * offsetof(struct sockaddr_in, sin_addr) != 1215 * offsetof(struct sockaddr_in6, sin6_addr). 1216 * 1217 * Return 1 on success, 0 on error. 1218 */ 1219 int 1220 rn_inithead(struct radix_node_head **head, struct radix_node_head *maskhead, 1221 int off_bytes) 1222 { 1223 struct radix_node_head *rnh; 1224 struct radix_node *root, *left, *right; 1225 1226 if (*head != NULL) /* already initialized */ 1227 return (1); 1228 1229 R_Malloc(rnh, struct radix_node_head *, sizeof *rnh); 1230 if (rnh == NULL) 1231 return (0); 1232 1233 if (maskhead == NULL) /* mask tree initialization */ 1234 off_bytes = 0; 1235 if (off_bytes >= RN_MAXKEYLEN) /* prevent possible misuse */ 1236 panic("%s: invalid off_bytes=%d", __func__, off_bytes); 1237 1238 bzero(rnh, sizeof *rnh); 1239 *head = rnh; 1240 1241 root = rn_newpair(rn_zeros, off_bytes * NBBY, rnh->rnh_nodes); 1242 right = &rnh->rnh_nodes[2]; 1243 root->rn_parent = root; 1244 root->rn_flags = RNF_ROOT | RNF_ACTIVE; 1245 root->rn_right = right; 1246 1247 left = root->rn_left; 1248 left->rn_bit = -1 - off_bytes * NBBY; 1249 left->rn_flags = root->rn_flags; 1250 1251 *right = *left; 1252 right->rn_key = rn_ones; 1253 1254 rnh->rnh_treetop = root; 1255 rnh->rnh_maskhead = maskhead; 1256 1257 rnh->rnh_addaddr = rn_addroute; 1258 rnh->rnh_deladdr = rn_delete; 1259 rnh->rnh_matchaddr = rn_match; 1260 rnh->rnh_lookup = rn_lookup; 1261 rnh->rnh_walktree = rn_walktree; 1262 rnh->rnh_walktree_from = rn_walktree_from; 1263 rnh->rnh_walktree_at = rn_walktree_at; 1264 1265 return (1); 1266 } 1267 1268 /* 1269 * Callback function to be used in rn_flush() to empty a mask tree. 1270 */ 1271 void 1272 rn_freemask(struct radix_node *rn) 1273 { 1274 if (rn->rn_mask != NULL) 1275 panic("%s: not a mask node", __func__); 1276 1277 R_Free(rn); 1278 } 1279 1280 struct rn_flush_ctx { 1281 struct radix_node_head *head; 1282 freenode_f_t *f; 1283 }; 1284 1285 static int 1286 rn_flush_walker(struct radix_node *rn, void *arg) 1287 { 1288 struct rn_flush_ctx *ctx = arg; 1289 struct radix_node *node; 1290 1291 node = ctx->head->rnh_deladdr(rn->rn_key, rn->rn_mask, ctx->head); 1292 if (node != rn) { 1293 panic("%s: deleted wrong node: %p, want: %p", 1294 __func__, node, rn); 1295 } 1296 if (ctx->f) 1297 ctx->f(rn); 1298 1299 return 0; 1300 } 1301 1302 #define IS_EMPTY(head) \ 1303 (((head)->rnh_treetop == &(head)->rnh_nodes[1]) && \ 1304 ((head)->rnh_treetop->rn_left == &(head)->rnh_nodes[0]) && \ 1305 ((head)->rnh_treetop->rn_right == &(head)->rnh_nodes[2])) 1306 1307 /* 1308 * Flush all nodes in the radix tree at <head>. 1309 * If the callback function <f> is specified, it is called against every 1310 * flushed node to allow the caller to do extra cleanups. 1311 */ 1312 void 1313 rn_flush(struct radix_node_head *head, freenode_f_t *f) 1314 { 1315 struct rn_flush_ctx ctx; 1316 1317 if (f == rn_freemask && head->rnh_maskhead != NULL) 1318 panic("%s: rn_freemask() used with non-mask tree", __func__); 1319 1320 ctx.head = head; 1321 ctx.f = f; 1322 head->rnh_walktree(head, rn_flush_walker, &ctx); 1323 1324 if (!IS_EMPTY(head)) 1325 panic("%s: failed to flush all nodes", __func__); 1326 } 1327 1328 /* 1329 * Free an empty radix tree at <head>. 1330 * 1331 * NOTE: The radix tree must be first emptied by rn_flush(). 1332 */ 1333 void 1334 rn_freehead(struct radix_node_head *head) 1335 { 1336 if (!IS_EMPTY(head)) 1337 panic("%s: radix tree not empty", __func__); 1338 1339 R_Free(head); 1340 } 1341 1342 #ifdef _KERNEL 1343 1344 static void 1345 rn_init_handler(netmsg_t msg) 1346 { 1347 int cpu = mycpuid; 1348 1349 ASSERT_NETISR_NCPUS(cpu); 1350 if (rn_inithead(&mask_rnheads[cpu], NULL, 0) == 0) 1351 panic("rn_init 1"); 1352 1353 netisr_forwardmsg(&msg->base, cpu + 1); 1354 } 1355 1356 void 1357 rn_init(void) 1358 { 1359 struct netmsg_base msg; 1360 struct domain *dom; 1361 1362 SLIST_FOREACH(dom, &domains, dom_next) { 1363 if (dom->dom_maxrtkey > RN_MAXKEYLEN) { 1364 panic("domain %s maxkey too big %d/%d", 1365 dom->dom_name, dom->dom_maxrtkey, RN_MAXKEYLEN); 1366 } 1367 } 1368 1369 netmsg_init(&msg, NULL, &curthread->td_msgport, 0, rn_init_handler); 1370 netisr_domsg_global(&msg); 1371 } 1372 1373 struct radix_node_head * 1374 rn_cpumaskhead(int cpu) 1375 { 1376 ASSERT_NETISR_NCPUS(cpu); 1377 KKASSERT(mask_rnheads[cpu] != NULL); 1378 return mask_rnheads[cpu]; 1379 } 1380 1381 #else /* !_KERNEL */ 1382 1383 void 1384 rn_init(void) 1385 { 1386 if (rn_inithead(&mask_rnheads[0], NULL, 0) == 0) 1387 panic("rn_init 2"); 1388 } 1389 1390 struct radix_node_head * 1391 rn_cpumaskhead(int cpu __unused) 1392 { 1393 return mask_rnheads[0]; 1394 } 1395 1396 #endif /* _KERNEL */ 1397