1 /* 2 * Copyright 1994, 1995 Massachusetts Institute of Technology 3 * 4 * Permission to use, copy, modify, and distribute this software and 5 * its documentation for any purpose and without fee is hereby 6 * granted, provided that both the above copyright notice and this 7 * permission notice appear in all copies, that both the above 8 * copyright notice and this permission notice appear in all 9 * supporting documentation, and that the name of M.I.T. not be used 10 * in advertising or publicity pertaining to distribution of the 11 * software without specific, written prior permission. M.I.T. makes 12 * no representations about the suitability of this software for any 13 * purpose. It is provided "as is" without express or implied 14 * warranty. 15 * 16 * THIS SOFTWARE IS PROVIDED BY M.I.T. ``AS IS''. M.I.T. DISCLAIMS 17 * ALL EXPRESS OR IMPLIED WARRANTIES WITH REGARD TO THIS SOFTWARE, 18 * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF 19 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. IN NO EVENT 20 * SHALL M.I.T. BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 21 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 22 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF 23 * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 24 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 25 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 26 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 * SUCH DAMAGE. 28 * 29 * $FreeBSD: src/sys/netinet/in_rmx.c,v 1.37.2.3 2002/08/09 14:49:23 ru Exp $ 30 * $DragonFly: src/sys/netinet/in_rmx.c,v 1.5 2004/09/16 23:14:29 joerg Exp $ 31 */ 32 33 /* 34 * This code does two things necessary for the enhanced TCP metrics to 35 * function in a useful manner: 36 * 1) It marks all non-host routes as `cloning', thus ensuring that 37 * every actual reference to such a route actually gets turned 38 * into a reference to a host route to the specific destination 39 * requested. 40 * 2) When such routes lose all their references, it arranges for them 41 * to be deleted in some random collection of circumstances, so that 42 * a large quantity of stale routing data is not kept in kernel memory 43 * indefinitely. See in_rtqtimo() below for the exact mechanism. 44 */ 45 46 #include <sys/param.h> 47 #include <sys/systm.h> 48 #include <sys/kernel.h> 49 #include <sys/sysctl.h> 50 #include <sys/socket.h> 51 #include <sys/mbuf.h> 52 #include <sys/syslog.h> 53 54 #include <net/if.h> 55 #include <net/route.h> 56 #include <netinet/in.h> 57 #include <netinet/in_var.h> 58 #include <netinet/ip_var.h> 59 60 static struct callout in_rtqtimo_ch; 61 62 extern int in_inithead (void **head, int off); 63 64 #define RTPRF_OURS RTF_PROTO3 /* set on routes we manage */ 65 66 /* 67 * Do what we need to do when inserting a route. 68 */ 69 static struct radix_node * 70 in_addroute(void *v_arg, void *n_arg, struct radix_node_head *head, 71 struct radix_node *treenodes) 72 { 73 struct rtentry *rt = (struct rtentry *)treenodes; 74 struct sockaddr_in *sin = (struct sockaddr_in *)rt_key(rt); 75 struct radix_node *ret; 76 77 /* 78 * For IP, all unicast non-host routes are automatically cloning. 79 */ 80 if(IN_MULTICAST(ntohl(sin->sin_addr.s_addr))) 81 rt->rt_flags |= RTF_MULTICAST; 82 83 if(!(rt->rt_flags & (RTF_HOST | RTF_CLONING | RTF_MULTICAST))) { 84 rt->rt_flags |= RTF_PRCLONING; 85 } 86 87 /* 88 * A little bit of help for both IP output and input: 89 * For host routes, we make sure that RTF_BROADCAST 90 * is set for anything that looks like a broadcast address. 91 * This way, we can avoid an expensive call to in_broadcast() 92 * in ip_output() most of the time (because the route passed 93 * to ip_output() is almost always a host route). 94 * 95 * We also do the same for local addresses, with the thought 96 * that this might one day be used to speed up ip_input(). 97 * 98 * We also mark routes to multicast addresses as such, because 99 * it's easy to do and might be useful (but this is much more 100 * dubious since it's so easy to inspect the address). (This 101 * is done above.) 102 */ 103 if (rt->rt_flags & RTF_HOST) { 104 if (in_broadcast(sin->sin_addr, rt->rt_ifp)) { 105 rt->rt_flags |= RTF_BROADCAST; 106 } else { 107 if (satosin(rt->rt_ifa->ifa_addr)->sin_addr.s_addr 108 == sin->sin_addr.s_addr) 109 rt->rt_flags |= RTF_LOCAL; 110 } 111 } 112 113 if (!rt->rt_rmx.rmx_mtu && !(rt->rt_rmx.rmx_locks & RTV_MTU) 114 && rt->rt_ifp) 115 rt->rt_rmx.rmx_mtu = rt->rt_ifp->if_mtu; 116 117 ret = rn_addroute(v_arg, n_arg, head, treenodes); 118 if (ret == NULL && rt->rt_flags & RTF_HOST) { 119 struct rtentry *rt2; 120 /* 121 * We are trying to add a host route, but can't. 122 * Find out if it is because of an 123 * ARP entry and delete it if so. 124 */ 125 rt2 = rtalloc1((struct sockaddr *)sin, 0, 126 RTF_CLONING | RTF_PRCLONING); 127 if (rt2) { 128 if (rt2->rt_flags & RTF_LLINFO && 129 rt2->rt_flags & RTF_HOST && 130 rt2->rt_gateway && 131 rt2->rt_gateway->sa_family == AF_LINK) { 132 rtrequest(RTM_DELETE, 133 (struct sockaddr *)rt_key(rt2), 134 rt2->rt_gateway, 135 rt_mask(rt2), rt2->rt_flags, 0); 136 ret = rn_addroute(v_arg, n_arg, head, 137 treenodes); 138 } 139 RTFREE(rt2); 140 } 141 } 142 143 /* 144 * If the new route created successfully, and we are forwarding, 145 * and there is a cached route, free it. Otherwise, we may end 146 * up using the wrong route. 147 */ 148 if (ret != NULL && ipforwarding && ipforward_rt.ro_rt) { 149 RTFREE(ipforward_rt.ro_rt); 150 ipforward_rt.ro_rt = 0; 151 } 152 153 return ret; 154 } 155 156 /* 157 * This code is the inverse of in_clsroute: on first reference, if we 158 * were managing the route, stop doing so and set the expiration timer 159 * back off again. 160 */ 161 static struct radix_node * 162 in_matroute(void *v_arg, struct radix_node_head *head) 163 { 164 struct radix_node *rn = rn_match(v_arg, head); 165 struct rtentry *rt = (struct rtentry *)rn; 166 167 if(rt && rt->rt_refcnt == 0) { /* this is first reference */ 168 if(rt->rt_flags & RTPRF_OURS) { 169 rt->rt_flags &= ~RTPRF_OURS; 170 rt->rt_rmx.rmx_expire = 0; 171 } 172 } 173 return rn; 174 } 175 176 static int rtq_reallyold = 60*60; 177 /* one hour is ``really old'' */ 178 SYSCTL_INT(_net_inet_ip, IPCTL_RTEXPIRE, rtexpire, CTLFLAG_RW, 179 &rtq_reallyold , 0, 180 "Default expiration time on dynamically learned routes"); 181 182 static int rtq_minreallyold = 10; 183 /* never automatically crank down to less */ 184 SYSCTL_INT(_net_inet_ip, IPCTL_RTMINEXPIRE, rtminexpire, CTLFLAG_RW, 185 &rtq_minreallyold , 0, 186 "Minimum time to attempt to hold onto dynamically learned routes"); 187 188 static int rtq_toomany = 128; 189 /* 128 cached routes is ``too many'' */ 190 SYSCTL_INT(_net_inet_ip, IPCTL_RTMAXCACHE, rtmaxcache, CTLFLAG_RW, 191 &rtq_toomany , 0, "Upper limit on dynamically learned routes"); 192 193 /* 194 * On last reference drop, mark the route as belong to us so that it can be 195 * timed out. 196 */ 197 static void 198 in_clsroute(struct radix_node *rn, struct radix_node_head *head) 199 { 200 struct rtentry *rt = (struct rtentry *)rn; 201 202 if(!(rt->rt_flags & RTF_UP)) 203 return; /* prophylactic measures */ 204 205 if((rt->rt_flags & (RTF_LLINFO | RTF_HOST)) != RTF_HOST) 206 return; 207 208 if((rt->rt_flags & (RTF_WASCLONED | RTPRF_OURS)) 209 != RTF_WASCLONED) 210 return; 211 212 /* 213 * As requested by David Greenman: 214 * If rtq_reallyold is 0, just delete the route without 215 * waiting for a timeout cycle to kill it. 216 */ 217 if(rtq_reallyold != 0) { 218 rt->rt_flags |= RTPRF_OURS; 219 rt->rt_rmx.rmx_expire = time_second + rtq_reallyold; 220 } else { 221 rtrequest(RTM_DELETE, 222 (struct sockaddr *)rt_key(rt), 223 rt->rt_gateway, rt_mask(rt), 224 rt->rt_flags, 0); 225 } 226 } 227 228 struct rtqk_arg { 229 struct radix_node_head *rnh; 230 int draining; 231 int killed; 232 int found; 233 int updating; 234 time_t nextstop; 235 }; 236 237 /* 238 * Get rid of old routes. When draining, this deletes everything, even when 239 * the timeout is not expired yet. When updating, this makes sure that 240 * nothing has a timeout longer than the current value of rtq_reallyold. 241 */ 242 static int 243 in_rtqkill(struct radix_node *rn, void *rock) 244 { 245 struct rtqk_arg *ap = rock; 246 struct rtentry *rt = (struct rtentry *)rn; 247 int err; 248 249 if(rt->rt_flags & RTPRF_OURS) { 250 ap->found++; 251 252 if(ap->draining || rt->rt_rmx.rmx_expire <= time_second) { 253 if(rt->rt_refcnt > 0) 254 panic("rtqkill route really not free"); 255 256 err = rtrequest(RTM_DELETE, 257 (struct sockaddr *)rt_key(rt), 258 rt->rt_gateway, rt_mask(rt), 259 rt->rt_flags, 0); 260 if(err) { 261 log(LOG_WARNING, "in_rtqkill: error %d\n", err); 262 } else { 263 ap->killed++; 264 } 265 } else { 266 if(ap->updating 267 && (rt->rt_rmx.rmx_expire - time_second 268 > rtq_reallyold)) { 269 rt->rt_rmx.rmx_expire = time_second 270 + rtq_reallyold; 271 } 272 ap->nextstop = lmin(ap->nextstop, 273 rt->rt_rmx.rmx_expire); 274 } 275 } 276 277 return 0; 278 } 279 280 #define RTQ_TIMEOUT 60*10 /* run no less than once every ten minutes */ 281 static int rtq_timeout = RTQ_TIMEOUT; 282 283 static void 284 in_rtqtimo(void *rock) 285 { 286 struct radix_node_head *rnh = rock; 287 struct rtqk_arg arg; 288 struct timeval atv; 289 static time_t last_adjusted_timeout = 0; 290 int s; 291 292 arg.found = arg.killed = 0; 293 arg.rnh = rnh; 294 arg.nextstop = time_second + rtq_timeout; 295 arg.draining = arg.updating = 0; 296 s = splnet(); 297 rnh->rnh_walktree(rnh, in_rtqkill, &arg); 298 splx(s); 299 300 /* 301 * Attempt to be somewhat dynamic about this: 302 * If there are ``too many'' routes sitting around taking up space, 303 * then crank down the timeout, and see if we can't make some more 304 * go away. However, we make sure that we will never adjust more 305 * than once in rtq_timeout seconds, to keep from cranking down too 306 * hard. 307 */ 308 if((arg.found - arg.killed > rtq_toomany) 309 && (time_second - last_adjusted_timeout >= rtq_timeout) 310 && rtq_reallyold > rtq_minreallyold) { 311 rtq_reallyold = 2*rtq_reallyold / 3; 312 if(rtq_reallyold < rtq_minreallyold) { 313 rtq_reallyold = rtq_minreallyold; 314 } 315 316 last_adjusted_timeout = time_second; 317 #ifdef DIAGNOSTIC 318 log(LOG_DEBUG, "in_rtqtimo: adjusted rtq_reallyold to %d\n", 319 rtq_reallyold); 320 #endif 321 arg.found = arg.killed = 0; 322 arg.updating = 1; 323 s = splnet(); 324 rnh->rnh_walktree(rnh, in_rtqkill, &arg); 325 splx(s); 326 } 327 328 atv.tv_usec = 0; 329 atv.tv_sec = arg.nextstop - time_second; 330 callout_reset(&in_rtqtimo_ch, tvtohz_high(&atv), in_rtqtimo, rock); 331 } 332 333 void 334 in_rtqdrain(void) 335 { 336 struct radix_node_head *rnh = rt_tables[AF_INET]; 337 struct rtqk_arg arg; 338 int s; 339 arg.found = arg.killed = 0; 340 arg.rnh = rnh; 341 arg.nextstop = 0; 342 arg.draining = 1; 343 arg.updating = 0; 344 s = splnet(); 345 rnh->rnh_walktree(rnh, in_rtqkill, &arg); 346 splx(s); 347 } 348 349 /* 350 * Initialize our routing tree. 351 */ 352 int 353 in_inithead(void **head, int off) 354 { 355 struct radix_node_head *rnh; 356 357 if(!rn_inithead(head, off)) 358 return 0; 359 360 if(head != (void **)&rt_tables[AF_INET]) /* BOGUS! */ 361 return 1; /* only do this for the real routing table */ 362 363 rnh = *head; 364 rnh->rnh_addaddr = in_addroute; 365 rnh->rnh_matchaddr = in_matroute; 366 rnh->rnh_close = in_clsroute; 367 callout_init(&in_rtqtimo_ch); 368 in_rtqtimo(rnh); /* kick off timeout first time */ 369 return 1; 370 } 371 372 373 /* 374 * This zaps old routes when the interface goes down or interface 375 * address is deleted. In the latter case, it deletes static routes 376 * that point to this address. If we don't do this, we may end up 377 * using the old address in the future. The ones we always want to 378 * get rid of are things like ARP entries, since the user might down 379 * the interface, walk over to a completely different network, and 380 * plug back in. 381 */ 382 struct in_ifadown_arg { 383 struct radix_node_head *rnh; 384 struct ifaddr *ifa; 385 int del; 386 }; 387 388 static int 389 in_ifadownkill(struct radix_node *rn, void *xap) 390 { 391 struct in_ifadown_arg *ap = xap; 392 struct rtentry *rt = (struct rtentry *)rn; 393 int err; 394 395 if (rt->rt_ifa == ap->ifa && 396 (ap->del || !(rt->rt_flags & RTF_STATIC))) { 397 /* 398 * We need to disable the automatic prune that happens 399 * in this case in rtrequest() because it will blow 400 * away the pointers that rn_walktree() needs in order 401 * continue our descent. We will end up deleting all 402 * the routes that rtrequest() would have in any case, 403 * so that behavior is not needed there. 404 */ 405 rt->rt_flags &= ~(RTF_CLONING | RTF_PRCLONING); 406 err = rtrequest(RTM_DELETE, (struct sockaddr *)rt_key(rt), 407 rt->rt_gateway, rt_mask(rt), rt->rt_flags, 0); 408 if (err) { 409 log(LOG_WARNING, "in_ifadownkill: error %d\n", err); 410 } 411 } 412 return 0; 413 } 414 415 int 416 in_ifadown(struct ifaddr *ifa, int delete) 417 { 418 struct in_ifadown_arg arg; 419 struct radix_node_head *rnh; 420 421 if (ifa->ifa_addr->sa_family != AF_INET) 422 return 1; 423 424 arg.rnh = rnh = rt_tables[AF_INET]; 425 arg.ifa = ifa; 426 arg.del = delete; 427 rnh->rnh_walktree(rnh, in_ifadownkill, &arg); 428 ifa->ifa_flags &= ~IFA_ROUTE; 429 return 0; 430 } 431