1 /* $OpenBSD: rde_decide.c,v 1.62 2012/07/04 20:43:26 claudio Exp $ */ 2 3 /* 4 * Copyright (c) 2003, 2004 Claudio Jeker <claudio@openbsd.org> 5 * Copyright (c) 2003, 2004 Henning Brauer <henning@openbsd.org> 6 * 7 * Permission to use, copy, modify, and distribute this software for any 8 * purpose with or without fee is hereby granted, provided that the above 9 * copyright notice and this permission notice appear in all copies. 10 * 11 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 12 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 13 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 14 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 15 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 16 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 17 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 18 */ 19 20 #include <sys/types.h> 21 #include <sys/queue.h> 22 23 #include <string.h> 24 25 #include "bgpd.h" 26 #include "rde.h" 27 28 int prefix_cmp(struct prefix *, struct prefix *); 29 /* 30 * Decision Engine RFC implementation: 31 * Phase 1: 32 * - calculate LOCAL_PREF if needed -- EBGP or IGP learnt routes 33 * - IBGP routes may either use LOCAL_PREF or the local system computes 34 * the degree of preference 35 * - If the route is ineligible, the route MAY NOT serve as an input to 36 * the next phase of route selection 37 * - if the route is eligible the computed value MUST be used as the 38 * LOCAL_PREF value in any IBGP readvertisement 39 * 40 * Phase 2: 41 * - If the NEXT_HOP attribute of a BGP route depicts an address that is 42 * not resolvable the BGP route MUST be excluded from the Phase 2 decision 43 * function. 44 * - If the AS_PATH attribute of a BGP route contains an AS loop, the BGP 45 * route should be excluded from the Phase 2 decision function. 46 * - The local BGP speaker identifies the route that has: 47 * a) the highest degree of preference of any route to the same set 48 * of destinations 49 * b) is the only route to that destination 50 * c) is selected as a result of the Phase 2 tie breaking rules 51 * - The local speaker MUST determine the immediate next-hop address from 52 * the NEXT_HOP attribute of the selected route. 53 * - If either the immediate next hop or the IGP cost to the NEXT_HOP changes, 54 * Phase 2 Route Selection MUST be performed again. 55 * 56 * Route Resolvability Condition 57 * - A route Rte1, referencing only the intermediate network address, is 58 * considered resolvable if the Routing Table contains at least one 59 * resolvable route Rte2 that matches Rte1's intermediate network address 60 * and is not recursively resolved through Rte1. 61 * - Routes referencing interfaces are considered resolvable if the state of 62 * the referenced interface is up and IP processing is enabled. 63 * 64 * Breaking Ties (Phase 2) 65 * 1. Remove from consideration all routes which are not tied for having the 66 * smallest number of AS numbers present in their AS_PATH attributes. 67 * Note, that when counting this number, an AS_SET counts as 1 68 * 2. Remove from consideration all routes which are not tied for having the 69 * lowest Origin number in their Origin attribute. 70 * 3. Remove from consideration routes with less-preferred MULTI_EXIT_DISC 71 * attributes. MULTI_EXIT_DISC is only comparable between routes learned 72 * from the same neighboring AS. 73 * 4. If at least one of the candidate routes was received via EBGP, 74 * remove from consideration all routes which were received via IBGP. 75 * 5. Remove from consideration any routes with less-preferred interior cost. 76 * If the NEXT_HOP hop for a route is reachable, but no cost can be 77 * determined, then this step should be skipped. 78 * 6. Remove from consideration all routes other than the route that was 79 * advertised by the BGP speaker whose BGP Identifier has the lowest value. 80 * 7. Prefer the route received from the lowest peer address. 81 * 82 * Phase 3: Route Dissemination 83 * - All routes in the Loc-RIB are processed into Adj-RIBs-Out according 84 * to configured policy. A route SHALL NOT be installed in the Adj-Rib-Out 85 * unless the destination and NEXT_HOP described by this route may be 86 * forwarded appropriately by the Routing Table. 87 */ 88 89 /* 90 * Decision Engine OUR implementation: 91 * Our implementation has only one RIB. The filtering is done first. The 92 * filtering calculates the preference and stores it in LOCAL_PREF (Phase 1). 93 * Ineligible routes are flagged as ineligible via nexthop_add(). 94 * Phase 3 is done together with Phase 2. 95 * In following cases a prefix needs to be reevaluated: 96 * - update of a prefix (path_update) 97 * - withdraw of a prefix (prefix_remove) 98 * - state change of the nexthop (nexthop-{in}validate) 99 * - state change of session (session down) 100 */ 101 102 /* 103 * Compare two prefixes with equal pt_entry. Returns an integer greater than or 104 * less than 0, according to whether the prefix p1 is more or less preferred 105 * than the prefix p2. p1 should be used for the new prefix and p2 for a 106 * already added prefix. 107 */ 108 int 109 prefix_cmp(struct prefix *p1, struct prefix *p2) 110 { 111 struct rde_aspath *asp1, *asp2; 112 struct attr *a; 113 u_int32_t p1id, p2id; 114 int p1cnt, p2cnt; 115 116 if (p1 == NULL) 117 return (-1); 118 if (p2 == NULL) 119 return (1); 120 121 asp1 = p1->aspath; 122 asp2 = p2->aspath; 123 124 /* pathes with errors are not eligible */ 125 if (asp1->flags & F_ATTR_PARSE_ERR) 126 return (-1); 127 if (asp2->flags & F_ATTR_PARSE_ERR) 128 return (1); 129 130 /* only loop free pathes are eligible */ 131 if (asp1->flags & F_ATTR_LOOP) 132 return (-1); 133 if (asp2->flags & F_ATTR_LOOP) 134 return (1); 135 136 /* 1. check if prefix is eligible a.k.a reachable */ 137 if (asp2->nexthop != NULL && asp2->nexthop->state != NEXTHOP_REACH) 138 return (1); 139 if (asp1->nexthop != NULL && asp1->nexthop->state != NEXTHOP_REACH) 140 return (-1); 141 142 /* 2. local preference of prefix, bigger is better */ 143 if ((asp1->lpref - asp2->lpref) != 0) 144 return (asp1->lpref - asp2->lpref); 145 146 /* 3. aspath count, the shorter the better */ 147 if ((asp2->aspath->ascnt - asp1->aspath->ascnt) != 0) 148 return (asp2->aspath->ascnt - asp1->aspath->ascnt); 149 150 /* 4. origin, the lower the better */ 151 if ((asp2->origin - asp1->origin) != 0) 152 return (asp2->origin - asp1->origin); 153 154 /* 5. MED decision, only comparable between the same neighboring AS */ 155 if (rde_decisionflags() & BGPD_FLAG_DECISION_MED_ALWAYS || 156 aspath_neighbor(asp1->aspath) == aspath_neighbor(asp2->aspath)) 157 /* lowest value wins */ 158 if ((asp2->med - asp1->med) != 0) 159 return (asp2->med - asp1->med); 160 161 /* 162 * 6. EBGP is cooler than IBGP 163 * It is absolutely important that the ebgp value in peer_config.ebgp 164 * is bigger than all other ones (IBGP, confederations) 165 */ 166 if (asp1->peer->conf.ebgp != asp2->peer->conf.ebgp) { 167 if (asp1->peer->conf.ebgp) /* p1 is EBGP other is lower */ 168 return 1; 169 else if (asp2->peer->conf.ebgp) /* p2 is EBGP */ 170 return -1; 171 } 172 173 /* 174 * 7. local tie-breaker, this weight is here to tip equal long AS 175 * paths in one or the other direction. It happens more and more 176 * that AS paths are equally long and so traffic engineering needs 177 * a metric that weights a prefix at a very late stage in the 178 * decision process. 179 */ 180 if ((asp1->weight - asp2->weight) != 0) 181 return (asp1->weight - asp2->weight); 182 183 /* 8. nexthop costs. NOT YET -> IGNORE */ 184 185 /* 186 * 9. older route (more stable) wins but only if route-age 187 * evaluation is enabled. 188 */ 189 if (rde_decisionflags() & BGPD_FLAG_DECISION_ROUTEAGE) 190 if ((p2->lastchange - p1->lastchange) != 0) 191 return (p2->lastchange - p1->lastchange); 192 193 /* 10. lowest BGP Id wins, use ORIGINATOR_ID if present */ 194 if ((a = attr_optget(asp1, ATTR_ORIGINATOR_ID)) != NULL) { 195 memcpy(&p1id, a->data, sizeof(p1id)); 196 p1id = ntohl(p1id); 197 } else 198 p1id = asp1->peer->remote_bgpid; 199 if ((a = attr_optget(asp2, ATTR_ORIGINATOR_ID)) != NULL) { 200 memcpy(&p2id, a->data, sizeof(p2id)); 201 p2id = ntohl(p2id); 202 } else 203 p2id = asp2->peer->remote_bgpid; 204 if ((p2id - p1id) != 0) 205 return (p2id - p1id); 206 207 /* 11. compare CLUSTER_LIST length, shorter is better */ 208 p1cnt = p2cnt = 0; 209 if ((a = attr_optget(asp1, ATTR_CLUSTER_LIST)) != NULL) 210 p1cnt = a->len / sizeof(u_int32_t); 211 if ((a = attr_optget(asp2, ATTR_CLUSTER_LIST)) != NULL) 212 p2cnt = a->len / sizeof(u_int32_t); 213 if ((p2cnt - p1cnt) != 0) 214 return (p2cnt - p1cnt); 215 216 /* 12. lowest peer address wins (IPv4 is better than IPv6) */ 217 if (memcmp(&p1->aspath->peer->remote_addr, 218 &p2->aspath->peer->remote_addr, 219 sizeof(p1->aspath->peer->remote_addr)) != 0) 220 return (-memcmp(&p1->aspath->peer->remote_addr, 221 &p2->aspath->peer->remote_addr, 222 sizeof(p1->aspath->peer->remote_addr))); 223 224 /* 13. for announced prefixes prefer dynamic routes */ 225 if ((asp1->flags & F_ANN_DYNAMIC) != (asp2->flags & F_ANN_DYNAMIC)) { 226 if (asp1->flags & F_ANN_DYNAMIC) 227 return (1); 228 else 229 return (-1); 230 } 231 232 fatalx("Uh, oh a politician in the decision process"); 233 /* NOTREACHED */ 234 } 235 236 /* 237 * Find the correct place to insert the prefix in the prefix list. 238 * If the active prefix has changed we need to send an update. 239 * The to evaluate prefix must not be in the prefix list. 240 */ 241 void 242 prefix_evaluate(struct prefix *p, struct rib_entry *re) 243 { 244 struct prefix *xp; 245 246 if (re->flags & F_RIB_NOEVALUATE || rde_noevaluate()) { 247 /* decision process is turned off */ 248 if (p != NULL) 249 LIST_INSERT_HEAD(&re->prefix_h, p, rib_l); 250 if (re->active != NULL) { 251 re->active->aspath->active_cnt--; 252 re->active = NULL; 253 } 254 return; 255 } 256 257 if (p != NULL) { 258 if (LIST_EMPTY(&re->prefix_h)) 259 LIST_INSERT_HEAD(&re->prefix_h, p, rib_l); 260 else { 261 LIST_FOREACH(xp, &re->prefix_h, rib_l) 262 if (prefix_cmp(p, xp) > 0) { 263 LIST_INSERT_BEFORE(xp, p, rib_l); 264 break; 265 } else if (LIST_NEXT(xp, rib_l) == NULL) { 266 /* if xp last element ... */ 267 LIST_INSERT_AFTER(xp, p, rib_l); 268 break; 269 } 270 } 271 } 272 273 xp = LIST_FIRST(&re->prefix_h); 274 if (xp == NULL || xp->aspath->flags & (F_ATTR_LOOP|F_ATTR_PARSE_ERR) || 275 (xp->aspath->nexthop != NULL && 276 xp->aspath->nexthop->state != NEXTHOP_REACH)) 277 /* xp is ineligible */ 278 xp = NULL; 279 280 if (re->active != xp) { 281 /* need to generate an update */ 282 if (re->active != NULL) 283 re->active->aspath->active_cnt--; 284 285 /* 286 * Send update with remove for re->active and add for xp 287 * but remember that xp may be NULL aka ineligible. 288 * Additional decision may be made by the called functions. 289 */ 290 rde_generate_updates(re->ribid, xp, re->active); 291 if ((re->flags & F_RIB_NOFIB) == 0) 292 rde_send_kroute(xp, re->active, re->ribid); 293 294 re->active = xp; 295 if (xp != NULL) 296 xp->aspath->active_cnt++; 297 } 298 } 299