1 /* $OpenBSD: rde_update.c,v 1.82 2014/12/18 19:28:44 tedu Exp $ */ 2 3 /* 4 * Copyright (c) 2004 Claudio Jeker <claudio@openbsd.org> 5 * 6 * Permission to use, copy, modify, and distribute this software for any 7 * purpose with or without fee is hereby granted, provided that the above 8 * copyright notice and this permission notice appear in all copies. 9 * 10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 17 */ 18 #include <sys/types.h> 19 #include <sys/queue.h> 20 21 #include <limits.h> 22 #include <stdlib.h> 23 #include <string.h> 24 #include <siphash.h> 25 26 #include "bgpd.h" 27 #include "rde.h" 28 29 in_addr_t up_get_nexthop(struct rde_peer *, struct rde_aspath *); 30 int up_generate_mp_reach(struct rde_peer *, struct update_attr *, 31 struct rde_aspath *, u_int8_t); 32 int up_generate_attr(struct rde_peer *, struct update_attr *, 33 struct rde_aspath *, u_int8_t); 34 35 /* update stuff. */ 36 struct update_prefix { 37 TAILQ_ENTRY(update_prefix) prefix_l; 38 RB_ENTRY(update_prefix) entry; 39 struct uplist_prefix *prefix_h; 40 struct bgpd_addr prefix; 41 int prefixlen; 42 }; 43 44 struct update_attr { 45 TAILQ_ENTRY(update_attr) attr_l; 46 RB_ENTRY(update_attr) entry; 47 struct uplist_prefix prefix_h; 48 u_char *attr; 49 u_char *mpattr; 50 u_int32_t attr_hash; 51 u_int16_t attr_len; 52 u_int16_t mpattr_len; 53 }; 54 55 void up_clear(struct uplist_attr *, struct uplist_prefix *); 56 int up_prefix_cmp(struct update_prefix *, struct update_prefix *); 57 int up_attr_cmp(struct update_attr *, struct update_attr *); 58 int up_add(struct rde_peer *, struct update_prefix *, struct update_attr *); 59 60 RB_PROTOTYPE(uptree_prefix, update_prefix, entry, up_prefix_cmp) 61 RB_GENERATE(uptree_prefix, update_prefix, entry, up_prefix_cmp) 62 63 RB_PROTOTYPE(uptree_attr, update_attr, entry, up_attr_cmp) 64 RB_GENERATE(uptree_attr, update_attr, entry, up_attr_cmp) 65 66 SIPHASH_KEY uptree_key; 67 68 void 69 up_init(struct rde_peer *peer) 70 { 71 u_int8_t i; 72 73 for (i = 0; i < AID_MAX; i++) { 74 TAILQ_INIT(&peer->updates[i]); 75 TAILQ_INIT(&peer->withdraws[i]); 76 } 77 RB_INIT(&peer->up_prefix); 78 RB_INIT(&peer->up_attrs); 79 peer->up_pcnt = 0; 80 peer->up_acnt = 0; 81 peer->up_nlricnt = 0; 82 peer->up_wcnt = 0; 83 arc4random_buf(&uptree_key, sizeof(uptree_key)); 84 } 85 86 void 87 up_clear(struct uplist_attr *updates, struct uplist_prefix *withdraws) 88 { 89 struct update_attr *ua; 90 struct update_prefix *up; 91 92 while ((ua = TAILQ_FIRST(updates)) != NULL) { 93 TAILQ_REMOVE(updates, ua, attr_l); 94 while ((up = TAILQ_FIRST(&ua->prefix_h)) != NULL) { 95 TAILQ_REMOVE(&ua->prefix_h, up, prefix_l); 96 free(up); 97 } 98 free(ua->attr); 99 free(ua->mpattr); 100 free(ua); 101 } 102 103 while ((up = TAILQ_FIRST(withdraws)) != NULL) { 104 TAILQ_REMOVE(withdraws, up, prefix_l); 105 free(up); 106 } 107 } 108 109 void 110 up_down(struct rde_peer *peer) 111 { 112 u_int8_t i; 113 114 for (i = 0; i < AID_MAX; i++) 115 up_clear(&peer->updates[i], &peer->withdraws[i]); 116 117 RB_INIT(&peer->up_prefix); 118 RB_INIT(&peer->up_attrs); 119 120 peer->up_pcnt = 0; 121 peer->up_acnt = 0; 122 peer->up_nlricnt = 0; 123 peer->up_wcnt = 0; 124 } 125 126 int 127 up_prefix_cmp(struct update_prefix *a, struct update_prefix *b) 128 { 129 int i; 130 131 if (a->prefix.aid < b->prefix.aid) 132 return (-1); 133 if (a->prefix.aid > b->prefix.aid) 134 return (1); 135 136 switch (a->prefix.aid) { 137 case AID_INET: 138 if (ntohl(a->prefix.v4.s_addr) < ntohl(b->prefix.v4.s_addr)) 139 return (-1); 140 if (ntohl(a->prefix.v4.s_addr) > ntohl(b->prefix.v4.s_addr)) 141 return (1); 142 break; 143 case AID_INET6: 144 i = memcmp(&a->prefix.v6, &b->prefix.v6, 145 sizeof(struct in6_addr)); 146 if (i > 0) 147 return (1); 148 if (i < 0) 149 return (-1); 150 break; 151 case AID_VPN_IPv4: 152 if (betoh64(a->prefix.vpn4.rd) < betoh64(b->prefix.vpn4.rd)) 153 return (-1); 154 if (betoh64(a->prefix.vpn4.rd) > betoh64(b->prefix.vpn4.rd)) 155 return (1); 156 if (ntohl(a->prefix.v4.s_addr) < ntohl(b->prefix.v4.s_addr)) 157 return (-1); 158 if (ntohl(a->prefix.v4.s_addr) > ntohl(b->prefix.v4.s_addr)) 159 return (1); 160 if (a->prefixlen < b->prefixlen) 161 return (-1); 162 if (a->prefixlen > b->prefixlen) 163 return (1); 164 if (a->prefix.vpn4.labellen < b->prefix.vpn4.labellen) 165 return (-1); 166 if (a->prefix.vpn4.labellen > b->prefix.vpn4.labellen) 167 return (1); 168 return (memcmp(a->prefix.vpn4.labelstack, 169 b->prefix.vpn4.labelstack, a->prefix.vpn4.labellen)); 170 default: 171 fatalx("pt_prefix_cmp: unknown af"); 172 } 173 if (a->prefixlen < b->prefixlen) 174 return (-1); 175 if (a->prefixlen > b->prefixlen) 176 return (1); 177 return (0); 178 } 179 180 int 181 up_attr_cmp(struct update_attr *a, struct update_attr *b) 182 { 183 int r; 184 185 if ((r = a->attr_hash - b->attr_hash) != 0) 186 return (r); 187 if ((r = a->attr_len - b->attr_len) != 0) 188 return (r); 189 if ((r = a->mpattr_len - b->mpattr_len) != 0) 190 return (r); 191 if ((r = memcmp(a->mpattr, b->mpattr, a->mpattr_len)) != 0) 192 return (r); 193 return (memcmp(a->attr, b->attr, a->attr_len)); 194 } 195 196 int 197 up_add(struct rde_peer *peer, struct update_prefix *p, struct update_attr *a) 198 { 199 struct update_attr *na = NULL; 200 struct update_prefix *np; 201 struct uplist_attr *upl = NULL; 202 struct uplist_prefix *wdl = NULL; 203 204 upl = &peer->updates[p->prefix.aid]; 205 wdl = &peer->withdraws[p->prefix.aid]; 206 207 /* 1. search for attr */ 208 if (a != NULL && (na = RB_FIND(uptree_attr, &peer->up_attrs, a)) == 209 NULL) { 210 /* 1.1 if not found -> add */ 211 TAILQ_INIT(&a->prefix_h); 212 if (RB_INSERT(uptree_attr, &peer->up_attrs, a) != NULL) { 213 log_warnx("uptree_attr insert failed"); 214 /* cleanup */ 215 free(a->attr); 216 free(a->mpattr); 217 free(a); 218 free(p); 219 return (-1); 220 } 221 TAILQ_INSERT_TAIL(upl, a, attr_l); 222 peer->up_acnt++; 223 } else { 224 /* 1.2 if found -> use that, free a */ 225 if (a != NULL) { 226 free(a->attr); 227 free(a->mpattr); 228 free(a); 229 a = na; 230 /* move to end of update queue */ 231 TAILQ_REMOVE(upl, a, attr_l); 232 TAILQ_INSERT_TAIL(upl, a, attr_l); 233 } 234 } 235 236 /* 2. search for prefix */ 237 if ((np = RB_FIND(uptree_prefix, &peer->up_prefix, p)) == NULL) { 238 /* 2.1 if not found -> add */ 239 if (RB_INSERT(uptree_prefix, &peer->up_prefix, p) != NULL) { 240 log_warnx("uptree_prefix insert failed"); 241 /* 242 * cleanup. But do not free a because it is already 243 * linked or NULL. up_dump_attrnlri() will remove and 244 * free the empty attribute later. 245 */ 246 free(p); 247 return (-1); 248 } 249 peer->up_pcnt++; 250 } else { 251 /* 2.2 if found -> use that and free p */ 252 TAILQ_REMOVE(np->prefix_h, np, prefix_l); 253 free(p); 254 p = np; 255 if (p->prefix_h == wdl) 256 peer->up_wcnt--; 257 else 258 peer->up_nlricnt--; 259 } 260 /* 3. link prefix to attr */ 261 if (a == NULL) { 262 TAILQ_INSERT_TAIL(wdl, p, prefix_l); 263 p->prefix_h = wdl; 264 peer->up_wcnt++; 265 } else { 266 TAILQ_INSERT_TAIL(&a->prefix_h, p, prefix_l); 267 p->prefix_h = &a->prefix_h; 268 peer->up_nlricnt++; 269 } 270 return (0); 271 } 272 273 int 274 up_test_update(struct rde_peer *peer, struct prefix *p) 275 { 276 struct bgpd_addr addr; 277 struct attr *attr; 278 279 if (peer->state != PEER_UP) 280 return (-1); 281 282 if (p == NULL) 283 /* no prefix available */ 284 return (0); 285 286 if (peer == p->aspath->peer) 287 /* Do not send routes back to sender */ 288 return (0); 289 290 if (p->aspath->flags & F_ATTR_PARSE_ERR) 291 fatalx("try to send out a botched path"); 292 if (p->aspath->flags & F_ATTR_LOOP) 293 fatalx("try to send out a looped path"); 294 295 pt_getaddr(p->prefix, &addr); 296 if (peer->capa.mp[addr.aid] == 0) 297 return (-1); 298 299 if (!p->aspath->peer->conf.ebgp && !peer->conf.ebgp) { 300 /* 301 * route reflector redistribution rules: 302 * 1. if announce is set -> announce 303 * 2. old non-client, new non-client -> no 304 * 3. old client, new non-client -> yes 305 * 4. old non-client, new client -> yes 306 * 5. old client, new client -> yes 307 */ 308 if (p->aspath->peer->conf.reflector_client == 0 && 309 peer->conf.reflector_client == 0 && 310 (p->aspath->flags & F_PREFIX_ANNOUNCED) == 0) 311 /* Do not redistribute updates to ibgp peers */ 312 return (0); 313 } 314 315 /* announce type handling */ 316 switch (peer->conf.announce_type) { 317 case ANNOUNCE_UNDEF: 318 case ANNOUNCE_NONE: 319 case ANNOUNCE_DEFAULT_ROUTE: 320 /* 321 * no need to withdraw old prefix as this will be 322 * filtered out as well. 323 */ 324 return (-1); 325 case ANNOUNCE_ALL: 326 break; 327 case ANNOUNCE_SELF: 328 /* 329 * pass only prefix that have an aspath count 330 * of zero this is equal to the ^$ regex. 331 */ 332 if (p->aspath->aspath->ascnt != 0) 333 return (0); 334 break; 335 } 336 337 /* well known communities */ 338 if (community_match(p->aspath, 339 COMMUNITY_WELLKNOWN, COMMUNITY_NO_ADVERTISE)) 340 return (0); 341 if (peer->conf.ebgp && community_match(p->aspath, 342 COMMUNITY_WELLKNOWN, COMMUNITY_NO_EXPORT)) 343 return (0); 344 if (peer->conf.ebgp && community_match(p->aspath, 345 COMMUNITY_WELLKNOWN, COMMUNITY_NO_EXPSUBCONFED)) 346 return (0); 347 348 /* 349 * Don't send messages back to originator 350 * this is not specified in the RFC but seems logical. 351 */ 352 if ((attr = attr_optget(p->aspath, ATTR_ORIGINATOR_ID)) != NULL) { 353 if (memcmp(attr->data, &peer->remote_bgpid, 354 sizeof(peer->remote_bgpid)) == 0) { 355 /* would cause loop don't send */ 356 return (-1); 357 } 358 } 359 360 return (1); 361 } 362 363 int 364 up_generate(struct rde_peer *peer, struct rde_aspath *asp, 365 struct bgpd_addr *addr, u_int8_t prefixlen) 366 { 367 struct update_attr *ua = NULL; 368 struct update_prefix *up; 369 SIPHASH_CTX ctx; 370 371 if (asp) { 372 ua = calloc(1, sizeof(struct update_attr)); 373 if (ua == NULL) 374 fatal("up_generate"); 375 376 if (up_generate_attr(peer, ua, asp, addr->aid) == -1) { 377 log_warnx("generation of bgp path attributes failed"); 378 free(ua); 379 return (-1); 380 } 381 /* 382 * use aspath_hash as attr_hash, this may be unoptimal 383 * but currently I don't care. 384 */ 385 SipHash24_Init(&ctx, &uptree_key); 386 SipHash24_Update(&ctx, ua->attr, ua->attr_len); 387 if (ua->mpattr) 388 SipHash24_Update(&ctx, ua->mpattr, ua->mpattr_len); 389 ua->attr_hash = SipHash24_End(&ctx); 390 } 391 392 up = calloc(1, sizeof(struct update_prefix)); 393 if (up == NULL) 394 fatal("up_generate"); 395 up->prefix = *addr; 396 up->prefixlen = prefixlen; 397 398 if (up_add(peer, up, ua) == -1) 399 return (-1); 400 401 return (0); 402 } 403 404 void 405 up_generate_updates(struct filter_head *rules, struct rde_peer *peer, 406 struct prefix *new, struct prefix *old) 407 { 408 struct rde_aspath *asp; 409 struct bgpd_addr addr; 410 411 if (peer->state != PEER_UP) 412 return; 413 414 if (new == NULL) { 415 if (up_test_update(peer, old) != 1) 416 return; 417 418 pt_getaddr(old->prefix, &addr); 419 if (rde_filter(rules, NULL, peer, old->aspath, &addr, 420 old->prefix->prefixlen, old->aspath->peer) == ACTION_DENY) 421 return; 422 423 /* withdraw prefix */ 424 up_generate(peer, NULL, &addr, old->prefix->prefixlen); 425 } else { 426 switch (up_test_update(peer, new)) { 427 case 1: 428 break; 429 case 0: 430 up_generate_updates(rules, peer, NULL, old); 431 return; 432 case -1: 433 return; 434 } 435 436 pt_getaddr(new->prefix, &addr); 437 if (rde_filter(rules, &asp, peer, new->aspath, &addr, 438 new->prefix->prefixlen, new->aspath->peer) == ACTION_DENY) { 439 path_put(asp); 440 up_generate_updates(rules, peer, NULL, old); 441 return; 442 } 443 444 /* generate update */ 445 if (asp != NULL) { 446 up_generate(peer, asp, &addr, new->prefix->prefixlen); 447 path_put(asp); 448 } else 449 up_generate(peer, new->aspath, &addr, 450 new->prefix->prefixlen); 451 } 452 } 453 454 /* send a default route to the specified peer */ 455 void 456 up_generate_default(struct filter_head *rules, struct rde_peer *peer, 457 u_int8_t aid) 458 { 459 struct rde_aspath *asp, *fasp; 460 struct bgpd_addr addr; 461 462 if (peer->capa.mp[aid] == 0) 463 return; 464 465 asp = path_get(); 466 asp->aspath = aspath_get(NULL, 0); 467 asp->origin = ORIGIN_IGP; 468 /* the other default values are OK, nexthop is once again NULL */ 469 470 /* 471 * XXX apply default overrides. Not yet possible, mainly a parse.y 472 * problem. 473 */ 474 /* rde_apply_set(asp, set, af, NULL ???, DIR_IN); */ 475 476 /* filter as usual */ 477 bzero(&addr, sizeof(addr)); 478 addr.aid = aid; 479 480 if (rde_filter(rules, &fasp, peer, asp, &addr, 0, NULL) == 481 ACTION_DENY) { 482 path_put(fasp); 483 path_put(asp); 484 return; 485 } 486 487 /* generate update */ 488 if (fasp != NULL) 489 up_generate(peer, fasp, &addr, 0); 490 else 491 up_generate(peer, asp, &addr, 0); 492 493 /* no longer needed */ 494 path_put(fasp); 495 path_put(asp); 496 } 497 498 /* generate a EoR marker in the update list. This is a horrible hack. */ 499 int 500 up_generate_marker(struct rde_peer *peer, u_int8_t aid) 501 { 502 struct update_attr *ua; 503 struct update_attr *na = NULL; 504 struct uplist_attr *upl = NULL; 505 506 ua = calloc(1, sizeof(struct update_attr)); 507 if (ua == NULL) 508 fatal("up_generate_marker"); 509 510 upl = &peer->updates[aid]; 511 512 /* 1. search for attr */ 513 if ((na = RB_FIND(uptree_attr, &peer->up_attrs, ua)) == NULL) { 514 /* 1.1 if not found -> add */ 515 TAILQ_INIT(&ua->prefix_h); 516 if (RB_INSERT(uptree_attr, &peer->up_attrs, ua) != NULL) { 517 log_warnx("uptree_attr insert failed"); 518 /* cleanup */ 519 free(ua); 520 return (-1); 521 } 522 TAILQ_INSERT_TAIL(upl, ua, attr_l); 523 peer->up_acnt++; 524 } else { 525 /* 1.2 if found -> use that, free ua */ 526 free(ua); 527 ua = na; 528 /* move to end of update queue */ 529 TAILQ_REMOVE(upl, ua, attr_l); 530 TAILQ_INSERT_TAIL(upl, ua, attr_l); 531 } 532 return (0); 533 } 534 535 u_char up_attr_buf[4096]; 536 537 /* only for IPv4 */ 538 in_addr_t 539 up_get_nexthop(struct rde_peer *peer, struct rde_aspath *a) 540 { 541 in_addr_t mask; 542 543 /* nexthop, already network byte order */ 544 if (a->flags & F_NEXTHOP_NOMODIFY) { 545 /* no modify flag set */ 546 if (a->nexthop == NULL) 547 return (peer->local_v4_addr.v4.s_addr); 548 else 549 return (a->nexthop->exit_nexthop.v4.s_addr); 550 } else if (a->flags & F_NEXTHOP_SELF) 551 return (peer->local_v4_addr.v4.s_addr); 552 else if (!peer->conf.ebgp) { 553 /* 554 * If directly connected use peer->local_v4_addr 555 * this is only true for announced networks. 556 */ 557 if (a->nexthop == NULL) 558 return (peer->local_v4_addr.v4.s_addr); 559 else if (a->nexthop->exit_nexthop.v4.s_addr == 560 peer->remote_addr.v4.s_addr) 561 /* 562 * per RFC: if remote peer address is equal to 563 * the nexthop set the nexthop to our local address. 564 * This reduces the risk of routing loops. 565 */ 566 return (peer->local_v4_addr.v4.s_addr); 567 else 568 return (a->nexthop->exit_nexthop.v4.s_addr); 569 } else if (peer->conf.distance == 1) { 570 /* ebgp directly connected */ 571 if (a->nexthop != NULL && 572 a->nexthop->flags & NEXTHOP_CONNECTED) { 573 mask = htonl( 574 prefixlen2mask(a->nexthop->nexthop_netlen)); 575 if ((peer->remote_addr.v4.s_addr & mask) == 576 (a->nexthop->nexthop_net.v4.s_addr & mask)) 577 /* nexthop and peer are in the same net */ 578 return (a->nexthop->exit_nexthop.v4.s_addr); 579 else 580 return (peer->local_v4_addr.v4.s_addr); 581 } else 582 return (peer->local_v4_addr.v4.s_addr); 583 } else 584 /* ebgp multihop */ 585 /* 586 * For ebgp multihop nh->flags should never have 587 * NEXTHOP_CONNECTED set so it should be possible to unify the 588 * two ebgp cases. But this is safe and RFC compliant. 589 */ 590 return (peer->local_v4_addr.v4.s_addr); 591 } 592 593 int 594 up_generate_mp_reach(struct rde_peer *peer, struct update_attr *upa, 595 struct rde_aspath *a, u_int8_t aid) 596 { 597 u_int16_t tmp; 598 599 switch (aid) { 600 case AID_INET6: 601 upa->mpattr_len = 21; /* AFI + SAFI + NH LEN + NH + Reserved */ 602 upa->mpattr = malloc(upa->mpattr_len); 603 if (upa->mpattr == NULL) 604 fatal("up_generate_mp_reach"); 605 if (aid2afi(aid, &tmp, &upa->mpattr[2])) 606 fatalx("up_generate_mp_reachi: bad AID"); 607 tmp = htons(tmp); 608 memcpy(upa->mpattr, &tmp, sizeof(tmp)); 609 upa->mpattr[3] = sizeof(struct in6_addr); 610 upa->mpattr[20] = 0; /* Reserved must be 0 */ 611 612 /* nexthop dance see also up_get_nexthop() */ 613 if (a->flags & F_NEXTHOP_NOMODIFY) { 614 /* no modify flag set */ 615 if (a->nexthop == NULL) 616 memcpy(&upa->mpattr[4], &peer->local_v6_addr.v6, 617 sizeof(struct in6_addr)); 618 else 619 memcpy(&upa->mpattr[4], 620 &a->nexthop->exit_nexthop.v6, 621 sizeof(struct in6_addr)); 622 } else if (a->flags & F_NEXTHOP_SELF) 623 memcpy(&upa->mpattr[4], &peer->local_v6_addr.v6, 624 sizeof(struct in6_addr)); 625 else if (!peer->conf.ebgp) { 626 /* ibgp */ 627 if (a->nexthop == NULL || 628 (a->nexthop->exit_nexthop.aid == AID_INET6 && 629 !memcmp(&a->nexthop->exit_nexthop.v6, 630 &peer->remote_addr.v6, sizeof(struct in6_addr)))) 631 memcpy(&upa->mpattr[4], &peer->local_v6_addr.v6, 632 sizeof(struct in6_addr)); 633 else 634 memcpy(&upa->mpattr[4], 635 &a->nexthop->exit_nexthop.v6, 636 sizeof(struct in6_addr)); 637 } else if (peer->conf.distance == 1) { 638 /* ebgp directly connected */ 639 if (a->nexthop != NULL && 640 a->nexthop->flags & NEXTHOP_CONNECTED) 641 if (prefix_compare(&peer->remote_addr, 642 &a->nexthop->nexthop_net, 643 a->nexthop->nexthop_netlen) == 0) { 644 /* 645 * nexthop and peer are in the same 646 * subnet 647 */ 648 memcpy(&upa->mpattr[4], 649 &a->nexthop->exit_nexthop.v6, 650 sizeof(struct in6_addr)); 651 return (0); 652 } 653 memcpy(&upa->mpattr[4], &peer->local_v6_addr.v6, 654 sizeof(struct in6_addr)); 655 } else 656 /* ebgp multihop */ 657 memcpy(&upa->mpattr[4], &peer->local_v6_addr.v6, 658 sizeof(struct in6_addr)); 659 return (0); 660 case AID_VPN_IPv4: 661 upa->mpattr_len = 17; /* AFI + SAFI + NH LEN + NH + Reserved */ 662 upa->mpattr = calloc(upa->mpattr_len, 1); 663 if (upa->mpattr == NULL) 664 fatal("up_generate_mp_reach"); 665 if (aid2afi(aid, &tmp, &upa->mpattr[2])) 666 fatalx("up_generate_mp_reachi: bad AID"); 667 tmp = htons(tmp); 668 memcpy(upa->mpattr, &tmp, sizeof(tmp)); 669 upa->mpattr[3] = sizeof(u_int64_t) + sizeof(struct in_addr); 670 671 /* nexthop dance see also up_get_nexthop() */ 672 if (a->flags & F_NEXTHOP_NOMODIFY) { 673 /* no modify flag set */ 674 if (a->nexthop == NULL) 675 memcpy(&upa->mpattr[12], 676 &peer->local_v4_addr.v4, 677 sizeof(struct in_addr)); 678 else 679 /* nexthops are stored as IPv4 addrs */ 680 memcpy(&upa->mpattr[12], 681 &a->nexthop->exit_nexthop.v4, 682 sizeof(struct in_addr)); 683 } else if (a->flags & F_NEXTHOP_SELF) 684 memcpy(&upa->mpattr[12], &peer->local_v4_addr.v4, 685 sizeof(struct in_addr)); 686 else if (!peer->conf.ebgp) { 687 /* ibgp */ 688 if (a->nexthop == NULL || 689 (a->nexthop->exit_nexthop.aid == AID_INET && 690 !memcmp(&a->nexthop->exit_nexthop.v4, 691 &peer->remote_addr.v4, sizeof(struct in_addr)))) 692 memcpy(&upa->mpattr[12], 693 &peer->local_v4_addr.v4, 694 sizeof(struct in_addr)); 695 else 696 memcpy(&upa->mpattr[12], 697 &a->nexthop->exit_nexthop.v4, 698 sizeof(struct in_addr)); 699 } else if (peer->conf.distance == 1) { 700 /* ebgp directly connected */ 701 if (a->nexthop != NULL && 702 a->nexthop->flags & NEXTHOP_CONNECTED) 703 if (prefix_compare(&peer->remote_addr, 704 &a->nexthop->nexthop_net, 705 a->nexthop->nexthop_netlen) == 0) { 706 /* 707 * nexthop and peer are in the same 708 * subnet 709 */ 710 memcpy(&upa->mpattr[12], 711 &a->nexthop->exit_nexthop.v4, 712 sizeof(struct in_addr)); 713 return (0); 714 } 715 memcpy(&upa->mpattr[12], &peer->local_v4_addr.v4, 716 sizeof(struct in_addr)); 717 } else 718 /* ebgp multihop */ 719 memcpy(&upa->mpattr[12], &peer->local_v4_addr.v4, 720 sizeof(struct in_addr)); 721 return (0); 722 default: 723 break; 724 } 725 return (-1); 726 } 727 728 int 729 up_generate_attr(struct rde_peer *peer, struct update_attr *upa, 730 struct rde_aspath *a, u_int8_t aid) 731 { 732 struct attr *oa, *newaggr = NULL; 733 u_char *pdata; 734 u_int32_t tmp32; 735 in_addr_t nexthop; 736 int flags, r, ismp = 0, neednewpath = 0; 737 u_int16_t len = sizeof(up_attr_buf), wlen = 0, plen; 738 u_int8_t l; 739 740 /* origin */ 741 if ((r = attr_write(up_attr_buf + wlen, len, ATTR_WELL_KNOWN, 742 ATTR_ORIGIN, &a->origin, 1)) == -1) 743 return (-1); 744 wlen += r; len -= r; 745 746 /* aspath */ 747 if (!peer->conf.ebgp || 748 peer->conf.flags & PEERFLAG_TRANS_AS) 749 pdata = aspath_prepend(a->aspath, rde_local_as(), 0, &plen); 750 else 751 pdata = aspath_prepend(a->aspath, rde_local_as(), 1, &plen); 752 753 if (!rde_as4byte(peer)) 754 pdata = aspath_deflate(pdata, &plen, &neednewpath); 755 756 if ((r = attr_write(up_attr_buf + wlen, len, ATTR_WELL_KNOWN, 757 ATTR_ASPATH, pdata, plen)) == -1) 758 return (-1); 759 wlen += r; len -= r; 760 free(pdata); 761 762 switch (aid) { 763 case AID_INET: 764 nexthop = up_get_nexthop(peer, a); 765 if ((r = attr_write(up_attr_buf + wlen, len, ATTR_WELL_KNOWN, 766 ATTR_NEXTHOP, &nexthop, 4)) == -1) 767 return (-1); 768 wlen += r; len -= r; 769 break; 770 default: 771 ismp = 1; 772 break; 773 } 774 775 /* 776 * The old MED from other peers MUST not be announced to others 777 * unless the MED is originating from us or the peer is an IBGP one. 778 * Only exception are routers with "transparent-as yes" set. 779 */ 780 if (a->flags & F_ATTR_MED && (!peer->conf.ebgp || 781 a->flags & F_ATTR_MED_ANNOUNCE || 782 peer->conf.flags & PEERFLAG_TRANS_AS)) { 783 tmp32 = htonl(a->med); 784 if ((r = attr_write(up_attr_buf + wlen, len, ATTR_OPTIONAL, 785 ATTR_MED, &tmp32, 4)) == -1) 786 return (-1); 787 wlen += r; len -= r; 788 } 789 790 if (!peer->conf.ebgp) { 791 /* local preference, only valid for ibgp */ 792 tmp32 = htonl(a->lpref); 793 if ((r = attr_write(up_attr_buf + wlen, len, ATTR_WELL_KNOWN, 794 ATTR_LOCALPREF, &tmp32, 4)) == -1) 795 return (-1); 796 wlen += r; len -= r; 797 } 798 799 /* 800 * dump all other path attributes. Following rules apply: 801 * 1. well-known attrs: ATTR_ATOMIC_AGGREGATE and ATTR_AGGREGATOR 802 * pass unmodified (enforce flags to correct values) 803 * Actually ATTR_AGGREGATOR may be deflated for OLD 2-byte peers. 804 * 2. non-transitive attrs: don't re-announce to ebgp peers 805 * 3. transitive known attrs: announce unmodified 806 * 4. transitive unknown attrs: set partial bit and re-announce 807 */ 808 for (l = 0; l < a->others_len; l++) { 809 if ((oa = a->others[l]) == NULL) 810 break; 811 switch (oa->type) { 812 case ATTR_ATOMIC_AGGREGATE: 813 if ((r = attr_write(up_attr_buf + wlen, len, 814 ATTR_WELL_KNOWN, ATTR_ATOMIC_AGGREGATE, 815 NULL, 0)) == -1) 816 return (-1); 817 break; 818 case ATTR_AGGREGATOR: 819 if (!rde_as4byte(peer)) { 820 /* need to deflate the aggregator */ 821 u_int8_t t[6]; 822 u_int16_t tas; 823 824 if ((!(oa->flags & ATTR_TRANSITIVE)) && 825 peer->conf.ebgp) { 826 r = 0; 827 break; 828 } 829 830 memcpy(&tmp32, oa->data, sizeof(tmp32)); 831 if (ntohl(tmp32) > USHRT_MAX) { 832 tas = htons(AS_TRANS); 833 newaggr = oa; 834 } else 835 tas = htons(ntohl(tmp32)); 836 837 memcpy(t, &tas, sizeof(tas)); 838 memcpy(t + sizeof(tas), 839 oa->data + sizeof(tmp32), 840 oa->len - sizeof(tmp32)); 841 if ((r = attr_write(up_attr_buf + wlen, len, 842 oa->flags, oa->type, &t, sizeof(t))) == -1) 843 return (-1); 844 break; 845 } 846 /* FALLTHROUGH */ 847 case ATTR_COMMUNITIES: 848 case ATTR_ORIGINATOR_ID: 849 case ATTR_CLUSTER_LIST: 850 if ((!(oa->flags & ATTR_TRANSITIVE)) && 851 peer->conf.ebgp) { 852 r = 0; 853 break; 854 } 855 if ((r = attr_write(up_attr_buf + wlen, len, 856 oa->flags, oa->type, oa->data, oa->len)) == -1) 857 return (-1); 858 break; 859 default: 860 /* unknown attribute */ 861 if (!(oa->flags & ATTR_TRANSITIVE)) { 862 /* 863 * RFC 1771: 864 * Unrecognized non-transitive optional 865 * attributes must be quietly ignored and 866 * not passed along to other BGP peers. 867 */ 868 r = 0; 869 break; 870 } 871 if ((r = attr_write(up_attr_buf + wlen, len, 872 oa->flags | ATTR_PARTIAL, oa->type, 873 oa->data, oa->len)) == -1) 874 return (-1); 875 break; 876 } 877 wlen += r; len -= r; 878 } 879 880 /* NEW to OLD conversion when going sending stuff to a 2byte AS peer */ 881 if (neednewpath) { 882 if (!peer->conf.ebgp || 883 peer->conf.flags & PEERFLAG_TRANS_AS) 884 pdata = aspath_prepend(a->aspath, rde_local_as(), 0, 885 &plen); 886 else 887 pdata = aspath_prepend(a->aspath, rde_local_as(), 1, 888 &plen); 889 flags = ATTR_OPTIONAL|ATTR_TRANSITIVE; 890 if (!(a->flags & F_PREFIX_ANNOUNCED)) 891 flags |= ATTR_PARTIAL; 892 if (plen == 0) 893 r = 0; 894 else if ((r = attr_write(up_attr_buf + wlen, len, flags, 895 ATTR_AS4_PATH, pdata, plen)) == -1) 896 return (-1); 897 wlen += r; len -= r; 898 free(pdata); 899 } 900 if (newaggr) { 901 flags = ATTR_OPTIONAL|ATTR_TRANSITIVE; 902 if (!(a->flags & F_PREFIX_ANNOUNCED)) 903 flags |= ATTR_PARTIAL; 904 if ((r = attr_write(up_attr_buf + wlen, len, flags, 905 ATTR_AS4_AGGREGATOR, newaggr->data, newaggr->len)) == -1) 906 return (-1); 907 wlen += r; len -= r; 908 } 909 910 /* write mp attribute to different buffer */ 911 if (ismp) 912 if (up_generate_mp_reach(peer, upa, a, aid) == -1) 913 return (-1); 914 915 /* the bgp path attributes are now stored in the global buf */ 916 upa->attr = malloc(wlen); 917 if (upa->attr == NULL) 918 fatal("up_generate_attr"); 919 memcpy(upa->attr, up_attr_buf, wlen); 920 upa->attr_len = wlen; 921 return (wlen); 922 } 923 924 #define MIN_PREFIX_LEN 5 /* 1 byte prefix length + 4 bytes addr */ 925 int 926 up_dump_prefix(u_char *buf, int len, struct uplist_prefix *prefix_head, 927 struct rde_peer *peer) 928 { 929 struct update_prefix *upp; 930 int r, wpos = 0; 931 u_int8_t i; 932 933 while ((upp = TAILQ_FIRST(prefix_head)) != NULL) { 934 if ((r = prefix_write(buf + wpos, len - wpos, 935 &upp->prefix, upp->prefixlen)) == -1) 936 break; 937 wpos += r; 938 if (RB_REMOVE(uptree_prefix, &peer->up_prefix, upp) == NULL) 939 log_warnx("dequeuing update failed."); 940 TAILQ_REMOVE(upp->prefix_h, upp, prefix_l); 941 peer->up_pcnt--; 942 for (i = 0; i < AID_MAX; i++) { 943 if (upp->prefix_h == &peer->withdraws[i]) { 944 peer->up_wcnt--; 945 peer->prefix_sent_withdraw++; 946 } else { 947 peer->up_nlricnt--; 948 peer->prefix_sent_update++; 949 } 950 } 951 free(upp); 952 } 953 return (wpos); 954 } 955 956 int 957 up_dump_attrnlri(u_char *buf, int len, struct rde_peer *peer) 958 { 959 struct update_attr *upa; 960 int r, wpos; 961 u_int16_t attr_len; 962 963 /* 964 * It is possible that a queued path attribute has no nlri prefix. 965 * Ignore and remove those path attributes. 966 */ 967 while ((upa = TAILQ_FIRST(&peer->updates[AID_INET])) != NULL) 968 if (TAILQ_EMPTY(&upa->prefix_h)) { 969 attr_len = upa->attr_len; 970 if (RB_REMOVE(uptree_attr, &peer->up_attrs, 971 upa) == NULL) 972 log_warnx("dequeuing update failed."); 973 TAILQ_REMOVE(&peer->updates[AID_INET], upa, attr_l); 974 free(upa->attr); 975 free(upa->mpattr); 976 free(upa); 977 peer->up_acnt--; 978 /* XXX horrible hack, 979 * if attr_len is 0, it is a EoR marker */ 980 if (attr_len == 0) 981 return (-1); 982 } else 983 break; 984 985 if (upa == NULL || upa->attr_len + MIN_PREFIX_LEN > len) { 986 /* 987 * either no packet or not enough space. 988 * The length field needs to be set to zero else it would be 989 * an invalid bgp update. 990 */ 991 bzero(buf, 2); 992 return (2); 993 } 994 995 /* first dump the 2-byte path attribute length */ 996 attr_len = htons(upa->attr_len); 997 memcpy(buf, &attr_len, 2); 998 wpos = 2; 999 1000 /* then the path attributes themselves */ 1001 memcpy(buf + wpos, upa->attr, upa->attr_len); 1002 wpos += upa->attr_len; 1003 1004 /* last but not least dump the nlri */ 1005 r = up_dump_prefix(buf + wpos, len - wpos, &upa->prefix_h, peer); 1006 wpos += r; 1007 1008 /* now check if all prefixes were written */ 1009 if (TAILQ_EMPTY(&upa->prefix_h)) { 1010 if (RB_REMOVE(uptree_attr, &peer->up_attrs, upa) == NULL) 1011 log_warnx("dequeuing update failed."); 1012 TAILQ_REMOVE(&peer->updates[AID_INET], upa, attr_l); 1013 free(upa->attr); 1014 free(upa->mpattr); 1015 free(upa); 1016 peer->up_acnt--; 1017 } 1018 1019 return (wpos); 1020 } 1021 1022 u_char * 1023 up_dump_mp_unreach(u_char *buf, u_int16_t *len, struct rde_peer *peer, 1024 u_int8_t aid) 1025 { 1026 int wpos; 1027 u_int16_t datalen, tmp; 1028 u_int16_t attrlen = 2; /* attribute header (without len) */ 1029 u_int8_t flags = ATTR_OPTIONAL, safi; 1030 1031 /* 1032 * reserve space for withdraw len, attr len, the attribute header 1033 * and the mp attribute header 1034 */ 1035 wpos = 2 + 2 + 4 + 3; 1036 1037 if (*len < wpos) 1038 return (NULL); 1039 1040 datalen = up_dump_prefix(buf + wpos, *len - wpos, 1041 &peer->withdraws[aid], peer); 1042 if (datalen == 0) 1043 return (NULL); 1044 1045 datalen += 3; /* afi + safi */ 1046 1047 /* prepend header, need to do it reverse */ 1048 /* safi & afi */ 1049 if (aid2afi(aid, &tmp, &safi)) 1050 fatalx("up_dump_mp_unreach: bad AID"); 1051 buf[--wpos] = safi; 1052 wpos -= sizeof(u_int16_t); 1053 tmp = htons(tmp); 1054 memcpy(buf + wpos, &tmp, sizeof(u_int16_t)); 1055 1056 /* attribute length */ 1057 if (datalen > 255) { 1058 attrlen += 2 + datalen; 1059 flags |= ATTR_EXTLEN; 1060 wpos -= sizeof(u_int16_t); 1061 tmp = htons(datalen); 1062 memcpy(buf + wpos, &tmp, sizeof(u_int16_t)); 1063 } else { 1064 attrlen += 1 + datalen; 1065 buf[--wpos] = (u_char)datalen; 1066 } 1067 1068 /* mp attribute */ 1069 buf[--wpos] = (u_char)ATTR_MP_UNREACH_NLRI; 1070 buf[--wpos] = flags; 1071 1072 /* attribute length */ 1073 wpos -= sizeof(u_int16_t); 1074 tmp = htons(attrlen); 1075 memcpy(buf + wpos, &tmp, sizeof(u_int16_t)); 1076 1077 /* no IPv4 withdraws */ 1078 wpos -= sizeof(u_int16_t); 1079 bzero(buf + wpos, sizeof(u_int16_t)); 1080 1081 if (wpos < 0) 1082 fatalx("up_dump_mp_unreach: buffer underflow"); 1083 1084 /* total length includes the two 2-bytes length fields. */ 1085 *len = attrlen + 2 * sizeof(u_int16_t); 1086 1087 return (buf + wpos); 1088 } 1089 1090 int 1091 up_dump_mp_reach(u_char *buf, u_int16_t *len, struct rde_peer *peer, 1092 u_int8_t aid) 1093 { 1094 struct update_attr *upa; 1095 int wpos; 1096 u_int16_t attr_len, datalen, tmp; 1097 u_int8_t flags = ATTR_OPTIONAL; 1098 1099 /* 1100 * It is possible that a queued path attribute has no nlri prefix. 1101 * Ignore and remove those path attributes. 1102 */ 1103 while ((upa = TAILQ_FIRST(&peer->updates[aid])) != NULL) 1104 if (TAILQ_EMPTY(&upa->prefix_h)) { 1105 attr_len = upa->attr_len; 1106 if (RB_REMOVE(uptree_attr, &peer->up_attrs, 1107 upa) == NULL) 1108 log_warnx("dequeuing update failed."); 1109 TAILQ_REMOVE(&peer->updates[aid], upa, attr_l); 1110 free(upa->attr); 1111 free(upa->mpattr); 1112 free(upa); 1113 peer->up_acnt--; 1114 /* XXX horrible hack, 1115 * if attr_len is 0, it is a EoR marker */ 1116 if (attr_len == 0) 1117 return (-1); 1118 } else 1119 break; 1120 1121 if (upa == NULL) 1122 return (-2); 1123 1124 /* 1125 * reserve space for attr len, the attributes, the 1126 * mp attribute and the attribute header 1127 */ 1128 wpos = 2 + 2 + upa->attr_len + 4 + upa->mpattr_len; 1129 if (*len < wpos) 1130 return (-2); 1131 1132 datalen = up_dump_prefix(buf + wpos, *len - wpos, 1133 &upa->prefix_h, peer); 1134 if (datalen == 0) 1135 return (-2); 1136 1137 if (upa->mpattr_len == 0 || upa->mpattr == NULL) 1138 fatalx("mulitprotocol update without MP attrs"); 1139 1140 datalen += upa->mpattr_len; 1141 wpos -= upa->mpattr_len; 1142 memcpy(buf + wpos, upa->mpattr, upa->mpattr_len); 1143 1144 if (datalen > 255) { 1145 wpos -= 2; 1146 tmp = htons(datalen); 1147 memcpy(buf + wpos, &tmp, sizeof(tmp)); 1148 datalen += 4; 1149 flags |= ATTR_EXTLEN; 1150 } else { 1151 buf[--wpos] = (u_char)datalen; 1152 datalen += 3; 1153 } 1154 buf[--wpos] = (u_char)ATTR_MP_REACH_NLRI; 1155 buf[--wpos] = flags; 1156 1157 datalen += upa->attr_len; 1158 wpos -= upa->attr_len; 1159 memcpy(buf + wpos, upa->attr, upa->attr_len); 1160 1161 if (wpos < 4) 1162 fatalx("Grrr, mp_reach buffer fucked up"); 1163 1164 wpos -= 2; 1165 tmp = htons(datalen); 1166 memcpy(buf + wpos, &tmp, sizeof(tmp)); 1167 1168 wpos -= 2; 1169 bzero(buf + wpos, 2); 1170 1171 /* now check if all prefixes were written */ 1172 if (TAILQ_EMPTY(&upa->prefix_h)) { 1173 if (RB_REMOVE(uptree_attr, &peer->up_attrs, upa) == NULL) 1174 log_warnx("dequeuing update failed."); 1175 TAILQ_REMOVE(&peer->updates[aid], upa, attr_l); 1176 free(upa->attr); 1177 free(upa->mpattr); 1178 free(upa); 1179 peer->up_acnt--; 1180 } 1181 1182 *len = datalen + 4; 1183 return (wpos); 1184 } 1185