1 /*- 2 * Copyright (c) 1998 The NetBSD Foundation, Inc. 3 * All rights reserved. 4 * 5 * This code is derived from software contributed to The NetBSD Foundation 6 * by the 3am Software Foundry ("3am"). It was developed by Matt Thomas. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. All advertising materials mentioning features or use of this software 17 * must display the following acknowledgement: 18 * This product includes software developed by the NetBSD 19 * Foundation, Inc. and its contributors. 20 * 4. Neither the name of The NetBSD Foundation nor the names of its 21 * contributors may be used to endorse or promote products derived 22 * from this software without specific prior written permission. 23 * 24 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 25 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 26 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 27 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 28 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 29 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 30 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 31 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 32 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 33 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 34 * POSSIBILITY OF SUCH DAMAGE. 35 * 36 * $FreeBSD: src/sys/netinet/ip_flow.c,v 1.9.2.2 2001/11/04 17:35:31 luigi Exp $ 37 * $DragonFly: src/sys/netinet/ip_flow.c,v 1.4 2004/05/03 15:18:25 hmp Exp $ 38 */ 39 40 #include <sys/param.h> 41 #include <sys/systm.h> 42 #include <sys/malloc.h> 43 #include <sys/mbuf.h> 44 #include <sys/globaldata.h> 45 #include <sys/thread.h> 46 #include <sys/protosw.h> 47 #include <sys/socket.h> 48 #include <sys/kernel.h> 49 50 #include <sys/sysctl.h> 51 52 #include <net/if.h> 53 #include <net/route.h> 54 55 #include <netinet/in.h> 56 #include <netinet/in_systm.h> 57 #include <netinet/ip.h> 58 #include <netinet/in_var.h> 59 #include <netinet/ip_var.h> 60 #include <netinet/ip_flow.h> 61 62 #define IPFLOW_TIMER (5 * PR_SLOWHZ) 63 #define IPFLOW_HASHBITS 6 /* should not be a multiple of 8 */ 64 #define IPFLOW_HASHSIZE (1 << IPFLOW_HASHBITS) 65 static LIST_HEAD(ipflowhead, ipflow) ipflows[IPFLOW_HASHSIZE]; 66 static int ipflow_inuse; 67 #define IPFLOW_MAX 256 68 69 static int ipflow_active = 0; 70 SYSCTL_INT(_net_inet_ip, IPCTL_FASTFORWARDING, fastforwarding, CTLFLAG_RW, 71 &ipflow_active, 0, "Enable flow-based IP forwarding"); 72 73 static MALLOC_DEFINE(M_IPFLOW, "ip_flow", "IP flow"); 74 75 static unsigned 76 ipflow_hash( 77 struct in_addr dst, 78 struct in_addr src, 79 unsigned tos) 80 { 81 unsigned hash = tos; 82 int idx; 83 for (idx = 0; idx < 32; idx += IPFLOW_HASHBITS) 84 hash += (dst.s_addr >> (32 - idx)) + (src.s_addr >> idx); 85 return hash & (IPFLOW_HASHSIZE-1); 86 } 87 88 static struct ipflow * 89 ipflow_lookup( 90 const struct ip *ip) 91 { 92 unsigned hash; 93 struct ipflow *ipf; 94 95 hash = ipflow_hash(ip->ip_dst, ip->ip_src, ip->ip_tos); 96 97 ipf = LIST_FIRST(&ipflows[hash]); 98 while (ipf != NULL) { 99 if (ip->ip_dst.s_addr == ipf->ipf_dst.s_addr 100 && ip->ip_src.s_addr == ipf->ipf_src.s_addr 101 && ip->ip_tos == ipf->ipf_tos) 102 break; 103 ipf = LIST_NEXT(ipf, ipf_next); 104 } 105 return ipf; 106 } 107 108 int 109 ipflow_fastforward( 110 struct mbuf *m) 111 { 112 struct ip *ip; 113 struct ipflow *ipf; 114 struct rtentry *rt; 115 struct sockaddr *dst; 116 int error; 117 118 /* 119 * Are we forwarding packets? Big enough for an IP packet? 120 */ 121 if (!ipforwarding || !ipflow_active || m->m_len < sizeof(struct ip)) 122 return 0; 123 /* 124 * IP header with no option and valid version and length 125 */ 126 ip = mtod(m, struct ip *); 127 if (ip->ip_v != IPVERSION || ip->ip_hl != (sizeof(struct ip) >> 2) 128 || ntohs(ip->ip_len) > m->m_pkthdr.len) 129 return 0; 130 /* 131 * Find a flow. 132 */ 133 if ((ipf = ipflow_lookup(ip)) == NULL) 134 return 0; 135 136 /* 137 * Route and interface still up? 138 */ 139 rt = ipf->ipf_ro.ro_rt; 140 if ((rt->rt_flags & RTF_UP) == 0 || (rt->rt_ifp->if_flags & IFF_UP) == 0) 141 return 0; 142 143 /* 144 * Packet size OK? TTL? 145 */ 146 if (m->m_pkthdr.len > rt->rt_ifp->if_mtu || ip->ip_ttl <= IPTTLDEC) 147 return 0; 148 149 /* 150 * Everything checks out and so we can forward this packet. 151 * Modify the TTL and incrementally change the checksum. 152 */ 153 ip->ip_ttl -= IPTTLDEC; 154 if (ip->ip_sum >= htons(0xffff - (IPTTLDEC << 8))) { 155 ip->ip_sum += htons(IPTTLDEC << 8) + 1; 156 } else { 157 ip->ip_sum += htons(IPTTLDEC << 8); 158 } 159 160 /* 161 * Send the packet on its way. All we can get back is ENOBUFS 162 */ 163 ipf->ipf_uses++; 164 ipf->ipf_timer = IPFLOW_TIMER; 165 166 if (rt->rt_flags & RTF_GATEWAY) 167 dst = rt->rt_gateway; 168 else 169 dst = &ipf->ipf_ro.ro_dst; 170 if ((error = (*rt->rt_ifp->if_output)(rt->rt_ifp, m, dst, rt)) != 0) { 171 if (error == ENOBUFS) 172 ipf->ipf_dropped++; 173 else 174 ipf->ipf_errors++; 175 } 176 return 1; 177 } 178 179 static void 180 ipflow_addstats( 181 struct ipflow *ipf) 182 { 183 ipf->ipf_ro.ro_rt->rt_use += ipf->ipf_uses; 184 ipstat.ips_cantforward += ipf->ipf_errors + ipf->ipf_dropped; 185 ipstat.ips_forward += ipf->ipf_uses; 186 ipstat.ips_fastforward += ipf->ipf_uses; 187 } 188 189 static void 190 ipflow_free( 191 struct ipflow *ipf) 192 { 193 int s; 194 /* 195 * Remove the flow from the hash table (at elevated IPL). 196 * Once it's off the list, we can deal with it at normal 197 * network IPL. 198 */ 199 s = splimp(); 200 LIST_REMOVE(ipf, ipf_next); 201 splx(s); 202 ipflow_addstats(ipf); 203 RTFREE(ipf->ipf_ro.ro_rt); 204 ipflow_inuse--; 205 free(ipf, M_IPFLOW); 206 } 207 208 static struct ipflow * 209 ipflow_reap( 210 void) 211 { 212 struct ipflow *ipf, *maybe_ipf = NULL; 213 int idx; 214 int s; 215 216 for (idx = 0; idx < IPFLOW_HASHSIZE; idx++) { 217 ipf = LIST_FIRST(&ipflows[idx]); 218 while (ipf != NULL) { 219 /* 220 * If this no longer points to a valid route 221 * reclaim it. 222 */ 223 if ((ipf->ipf_ro.ro_rt->rt_flags & RTF_UP) == 0) 224 goto done; 225 /* 226 * choose the one that's been least recently used 227 * or has had the least uses in the last 1.5 228 * intervals. 229 */ 230 if (maybe_ipf == NULL 231 || ipf->ipf_timer < maybe_ipf->ipf_timer 232 || (ipf->ipf_timer == maybe_ipf->ipf_timer 233 && ipf->ipf_last_uses + ipf->ipf_uses < 234 maybe_ipf->ipf_last_uses + 235 maybe_ipf->ipf_uses)) 236 maybe_ipf = ipf; 237 ipf = LIST_NEXT(ipf, ipf_next); 238 } 239 } 240 ipf = maybe_ipf; 241 done: 242 /* 243 * Remove the entry from the flow table. 244 */ 245 s = splimp(); 246 LIST_REMOVE(ipf, ipf_next); 247 splx(s); 248 ipflow_addstats(ipf); 249 RTFREE(ipf->ipf_ro.ro_rt); 250 return ipf; 251 } 252 253 void 254 ipflow_slowtimo( 255 void) 256 { 257 struct ipflow *ipf; 258 int idx; 259 260 for (idx = 0; idx < IPFLOW_HASHSIZE; idx++) { 261 ipf = LIST_FIRST(&ipflows[idx]); 262 while (ipf != NULL) { 263 struct ipflow *next_ipf = LIST_NEXT(ipf, ipf_next); 264 if (--ipf->ipf_timer == 0) { 265 ipflow_free(ipf); 266 } else { 267 ipf->ipf_last_uses = ipf->ipf_uses; 268 ipf->ipf_ro.ro_rt->rt_use += ipf->ipf_uses; 269 ipstat.ips_forward += ipf->ipf_uses; 270 ipstat.ips_fastforward += ipf->ipf_uses; 271 ipf->ipf_uses = 0; 272 } 273 ipf = next_ipf; 274 } 275 } 276 } 277 278 void 279 ipflow_create(const struct route *ro, struct mbuf *m) 280 { 281 const struct ip *const ip = mtod(m, struct ip *); 282 struct ipflow *ipf; 283 unsigned hash; 284 int s; 285 286 /* 287 * Don't create cache entries for ICMP messages. 288 */ 289 if (!ipflow_active || ip->ip_p == IPPROTO_ICMP) 290 return; 291 /* 292 * See if an existing flow struct exists. If so remove it from it's 293 * list and free the old route. If not, try to malloc a new one 294 * (if we aren't at our limit). 295 */ 296 ipf = ipflow_lookup(ip); 297 if (ipf == NULL) { 298 if (ipflow_inuse == IPFLOW_MAX) { 299 ipf = ipflow_reap(); 300 } else { 301 ipf = malloc(sizeof(*ipf), M_IPFLOW, 302 M_INTWAIT | M_NULLOK); 303 if (ipf == NULL) 304 return; 305 ipflow_inuse++; 306 } 307 bzero((caddr_t) ipf, sizeof(*ipf)); 308 } else { 309 s = splimp(); 310 LIST_REMOVE(ipf, ipf_next); 311 splx(s); 312 ipflow_addstats(ipf); 313 RTFREE(ipf->ipf_ro.ro_rt); 314 ipf->ipf_uses = ipf->ipf_last_uses = 0; 315 ipf->ipf_errors = ipf->ipf_dropped = 0; 316 } 317 318 /* 319 * Fill in the updated information. 320 */ 321 ipf->ipf_ro = *ro; 322 ro->ro_rt->rt_refcnt++; 323 ipf->ipf_dst = ip->ip_dst; 324 ipf->ipf_src = ip->ip_src; 325 ipf->ipf_tos = ip->ip_tos; 326 ipf->ipf_timer = IPFLOW_TIMER; 327 /* 328 * Insert into the approriate bucket of the flow table. 329 */ 330 hash = ipflow_hash(ip->ip_dst, ip->ip_src, ip->ip_tos); 331 s = splimp(); 332 LIST_INSERT_HEAD(&ipflows[hash], ipf, ipf_next); 333 splx(s); 334 } 335