1 /* $FreeBSD: src/sys/netinet/ip_encap.c,v 1.1.2.5 2003/01/23 21:06:45 sam Exp $ */ 2 /* $KAME: ip_encap.c,v 1.41 2001/03/15 08:35:08 itojun Exp $ */ 3 4 /* 5 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. 6 * All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. Neither the name of the project nor the names of its contributors 17 * may be used to endorse or promote products derived from this software 18 * without specific prior written permission. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND 21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE 24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30 * SUCH DAMAGE. 31 */ 32 /* 33 * My grandfather said that there's a devil inside tunnelling technology... 34 * 35 * We have surprisingly many protocols that want packets with IP protocol 36 * #4 or #41. Here's a list of protocols that want protocol #41: 37 * RFC1933 configured tunnel 38 * RFC1933 automatic tunnel 39 * RFC2401 IPsec tunnel 40 * RFC2473 IPv6 generic packet tunnelling 41 * RFC2529 6over4 tunnel 42 * mobile-ip6 (uses RFC2473) 43 * RFC3056 6to4 tunnel 44 * isatap tunnel 45 * Here's a list of protocol that want protocol #4: 46 * RFC1853 IPv4-in-IPv4 tunnelling 47 * RFC2003 IPv4 encapsulation within IPv4 48 * RFC2344 reverse tunnelling for mobile-ip4 49 * RFC2401 IPsec tunnel 50 * Well, what can I say. They impose different en/decapsulation mechanism 51 * from each other, so they need separate protocol handler. The only one 52 * we can easily determine by protocol # is IPsec, which always has 53 * AH/ESP/IPComp header right after outer IP header. 54 * 55 * So, clearly good old protosw does not work for protocol #4 and #41. 56 * The code will let you match protocol via src/dst address pair. 57 */ 58 59 #include "opt_inet.h" 60 #include "opt_inet6.h" 61 62 #include <sys/param.h> 63 #include <sys/systm.h> 64 #include <sys/socket.h> 65 #include <sys/sockio.h> 66 #include <sys/mbuf.h> 67 #include <sys/errno.h> 68 #include <sys/protosw.h> 69 #include <sys/queue.h> 70 71 #include <net/if.h> 72 #include <net/route.h> 73 74 #include <netinet/in.h> 75 #include <netinet/in_systm.h> 76 #include <netinet/ip.h> 77 #include <netinet/ip_var.h> 78 #include <netinet/ip_encap.h> 79 80 #ifdef INET6 81 #include <netinet/ip6.h> 82 #include <netinet6/ip6_var.h> 83 #include <netinet6/ip6protosw.h> 84 #endif 85 86 #include <machine/stdarg.h> 87 88 #include <net/net_osdep.h> 89 90 #include <sys/kernel.h> 91 #include <sys/malloc.h> 92 #include <sys/thread2.h> 93 MALLOC_DEFINE(M_IPENCAP, "IP Encapsulation", "IP Encapsulation"); 94 95 static void encap_add (struct encaptab *); 96 static int mask_match (const struct encaptab *, const struct sockaddr *, 97 const struct sockaddr *); 98 static void encap_fillarg (struct mbuf *, const struct encaptab *); 99 100 #ifndef LIST_HEAD_INITIALIZER 101 /* rely upon BSS initialization */ 102 LIST_HEAD(, encaptab) encaptab; 103 #else 104 LIST_HEAD(, encaptab) encaptab = LIST_HEAD_INITIALIZER(&encaptab); 105 #endif 106 107 int (*ipip_input)(struct mbuf **, int *, int); /* hook for mrouting */ 108 109 void 110 encap_init(void) 111 { 112 static int initialized = 0; 113 114 if (initialized) 115 return; 116 initialized++; 117 #if 0 118 /* 119 * we cannot use LIST_INIT() here, since drivers may want to call 120 * encap_attach(), on driver attach. encap_init() will be called 121 * on AF_INET{,6} initialization, which happens after driver 122 * initialization - using LIST_INIT() here can nuke encap_attach() 123 * from drivers. 124 */ 125 LIST_INIT(&encaptab); 126 #endif 127 } 128 129 #ifdef INET 130 int 131 encap4_input(struct mbuf **mp, int *offp, int proto) 132 { 133 struct mbuf *m = *mp; 134 int off = *offp; 135 struct ip *ip; 136 struct sockaddr_in s, d; 137 const struct protosw *psw; 138 struct encaptab *ep, *match; 139 int prio, matchprio; 140 141 if (!IN_NETISR(0)) { 142 /* 143 * NOTE: 144 * Some NICs, noticeably igb(4) and ix(4), use inner IP 145 * datagram to calculate the packet hash, which leads us 146 * here. 147 */ 148 m->m_flags &= ~M_HASH; 149 m = ip_rehashm(m); 150 if (m != NULL) { 151 lwkt_port_t port = netisr_hashport(m->m_pkthdr.hash); 152 153 KASSERT(port != &curthread->td_msgport, 154 ("mbuf hash recursion")); 155 ip_transport_redispatch(port, m, off); 156 } 157 return (IPPROTO_DONE); 158 } 159 160 ip = mtod(m, struct ip *); 161 *mp = NULL; 162 163 bzero(&s, sizeof s); 164 s.sin_family = AF_INET; 165 s.sin_len = sizeof(struct sockaddr_in); 166 s.sin_addr = ip->ip_src; 167 bzero(&d, sizeof d); 168 d.sin_family = AF_INET; 169 d.sin_len = sizeof(struct sockaddr_in); 170 d.sin_addr = ip->ip_dst; 171 172 match = NULL; 173 matchprio = 0; 174 for (ep = LIST_FIRST(&encaptab); ep; ep = LIST_NEXT(ep, chain)) { 175 if (ep->af != AF_INET) 176 continue; 177 if (ep->proto >= 0 && ep->proto != proto) 178 continue; 179 if (ep->func) 180 prio = (*ep->func)(m, off, proto, ep->arg); 181 else { 182 /* 183 * it's inbound traffic, we need to match in reverse 184 * order 185 */ 186 prio = mask_match(ep, 187 (struct sockaddr *)&d, 188 (struct sockaddr *)&s); 189 } 190 191 /* 192 * We prioritize the matches by using bit length of the 193 * matches. mask_match() and user-supplied matching function 194 * should return the bit length of the matches (for example, 195 * if both src/dst are matched for IPv4, 64 should be returned). 196 * 0 or negative return value means "it did not match". 197 * 198 * The question is, since we have two "mask" portion, we 199 * cannot really define total order between entries. 200 * For example, which of these should be preferred? 201 * mask_match() returns 48 (32 + 16) for both of them. 202 * src=3ffe::/16, dst=3ffe:501::/32 203 * src=3ffe:501::/32, dst=3ffe::/16 204 * 205 * We need to loop through all the possible candidates 206 * to get the best match - the search takes O(n) for 207 * n attachments (i.e. interfaces). 208 */ 209 if (prio <= 0) 210 continue; 211 if (prio > matchprio) { 212 matchprio = prio; 213 match = ep; 214 } 215 } 216 217 if (match) { 218 /* found a match, "match" has the best one */ 219 psw = match->psw; 220 if (psw && psw->pr_input) { 221 encap_fillarg(m, match); 222 *mp = m; 223 (*psw->pr_input)(mp, offp, proto); 224 } else { 225 m_freem(m); 226 } 227 return(IPPROTO_DONE); 228 } 229 230 /* for backward compatibility */ 231 if (proto == IPPROTO_IPV4 && ipip_input) { 232 *mp = m; 233 ipip_input(mp, offp, proto); 234 return(IPPROTO_DONE); 235 } 236 237 /* last resort: inject to raw socket */ 238 *mp = m; 239 rip_input(mp, offp, proto); 240 return(IPPROTO_DONE); 241 } 242 #endif 243 244 #ifdef INET6 245 int 246 encap6_input(struct mbuf **mp, int *offp, int proto) 247 { 248 struct mbuf *m = *mp; 249 struct ip6_hdr *ip6; 250 struct sockaddr_in6 s, d; 251 const struct protosw *psw; 252 struct encaptab *ep, *match; 253 int prio, matchprio; 254 255 ip6 = mtod(m, struct ip6_hdr *); 256 257 bzero(&s, sizeof s); 258 s.sin6_family = AF_INET6; 259 s.sin6_len = sizeof(struct sockaddr_in6); 260 s.sin6_addr = ip6->ip6_src; 261 bzero(&d, sizeof d); 262 d.sin6_family = AF_INET6; 263 d.sin6_len = sizeof(struct sockaddr_in6); 264 d.sin6_addr = ip6->ip6_dst; 265 266 match = NULL; 267 matchprio = 0; 268 for (ep = LIST_FIRST(&encaptab); ep; ep = LIST_NEXT(ep, chain)) { 269 if (ep->af != AF_INET6) 270 continue; 271 if (ep->proto >= 0 && ep->proto != proto) 272 continue; 273 if (ep->func) 274 prio = (*ep->func)(m, *offp, proto, ep->arg); 275 else { 276 /* 277 * it's inbound traffic, we need to match in reverse 278 * order 279 */ 280 prio = mask_match(ep, (struct sockaddr *)&d, 281 (struct sockaddr *)&s); 282 } 283 284 /* see encap4_input() for issues here */ 285 if (prio <= 0) 286 continue; 287 if (prio > matchprio) { 288 matchprio = prio; 289 match = ep; 290 } 291 } 292 293 if (match) { 294 /* found a match */ 295 psw = match->psw; 296 if (psw && psw->pr_input) { 297 encap_fillarg(m, match); 298 return (*psw->pr_input)(mp, offp, proto); 299 } else { 300 m_freem(m); 301 return IPPROTO_DONE; 302 } 303 } 304 305 /* last resort: inject to raw socket */ 306 return rip6_input(mp, offp, proto); 307 } 308 #endif 309 310 static void 311 encap_add(struct encaptab *ep) 312 { 313 314 LIST_INSERT_HEAD(&encaptab, ep, chain); 315 } 316 317 /* 318 * sp (src ptr) is always my side, and dp (dst ptr) is always remote side. 319 * length of mask (sm and dm) is assumed to be same as sp/dp. 320 * Return value will be necessary as input (cookie) for encap_detach(). 321 */ 322 const struct encaptab * 323 encap_attach(int af, int proto, const struct sockaddr *sp, 324 const struct sockaddr *sm, const struct sockaddr *dp, 325 const struct sockaddr *dm, const struct protosw *psw, void *arg) 326 { 327 struct encaptab *ep; 328 329 crit_enter(); 330 /* sanity check on args */ 331 if (sp->sa_len > sizeof ep->src || dp->sa_len > sizeof ep->dst) 332 goto fail; 333 if (sp->sa_len != dp->sa_len) 334 goto fail; 335 if (af != sp->sa_family || af != dp->sa_family) 336 goto fail; 337 338 /* check if anyone have already attached with exactly same config */ 339 for (ep = LIST_FIRST(&encaptab); ep; ep = LIST_NEXT(ep, chain)) { 340 if (ep->af != af) 341 continue; 342 if (ep->proto != proto) 343 continue; 344 if (ep->src.ss_len != sp->sa_len || 345 bcmp(&ep->src, sp, sp->sa_len) != 0 || 346 bcmp(&ep->srcmask, sm, sp->sa_len) != 0) 347 continue; 348 if (ep->dst.ss_len != dp->sa_len || 349 bcmp(&ep->dst, dp, dp->sa_len) != 0 || 350 bcmp(&ep->dstmask, dm, dp->sa_len) != 0) 351 continue; 352 353 goto fail; 354 } 355 356 ep = kmalloc(sizeof *ep, M_IPENCAP, M_INTWAIT | M_ZERO | M_NULLOK); 357 if (ep == NULL) 358 goto fail; 359 360 ep->af = af; 361 ep->proto = proto; 362 bcopy(sp, &ep->src, sp->sa_len); 363 bcopy(sm, &ep->srcmask, sp->sa_len); 364 bcopy(dp, &ep->dst, dp->sa_len); 365 bcopy(dm, &ep->dstmask, dp->sa_len); 366 ep->psw = psw; 367 ep->arg = arg; 368 369 encap_add(ep); 370 371 crit_exit(); 372 return ep; 373 374 fail: 375 crit_exit(); 376 return NULL; 377 } 378 379 const struct encaptab * 380 encap_attach_func(int af, int proto, 381 int (*func)(const struct mbuf *, int, int, void *), 382 const struct protosw *psw, void *arg) 383 { 384 struct encaptab *ep; 385 386 crit_enter(); 387 /* sanity check on args */ 388 if (!func) 389 goto fail; 390 391 ep = kmalloc(sizeof *ep, M_IPENCAP, M_INTWAIT | M_ZERO | M_NULLOK); 392 if (ep == NULL) 393 goto fail; 394 395 ep->af = af; 396 ep->proto = proto; 397 ep->func = func; 398 ep->psw = psw; 399 ep->arg = arg; 400 401 encap_add(ep); 402 403 crit_exit(); 404 return ep; 405 406 fail: 407 crit_exit(); 408 return NULL; 409 } 410 411 int 412 encap_detach(const struct encaptab *cookie) 413 { 414 const struct encaptab *ep = cookie; 415 struct encaptab *p; 416 417 for (p = LIST_FIRST(&encaptab); p; p = LIST_NEXT(p, chain)) { 418 if (p == ep) { 419 LIST_REMOVE(p, chain); 420 kfree(p, M_IPENCAP); /*XXX*/ 421 return 0; 422 } 423 } 424 425 return EINVAL; 426 } 427 428 static int 429 mask_match(const struct encaptab *ep, const struct sockaddr *sp, 430 const struct sockaddr *dp) 431 { 432 struct sockaddr_storage s; 433 struct sockaddr_storage d; 434 int i; 435 const u_int8_t *p, *q; 436 u_int8_t *r; 437 int matchlen; 438 439 if (sp->sa_len > sizeof s || dp->sa_len > sizeof d) 440 return 0; 441 if (sp->sa_family != ep->af || dp->sa_family != ep->af) 442 return 0; 443 if (sp->sa_len != ep->src.ss_len || dp->sa_len != ep->dst.ss_len) 444 return 0; 445 446 matchlen = 0; 447 448 p = (const u_int8_t *)sp; 449 q = (const u_int8_t *)&ep->srcmask; 450 r = (u_int8_t *)&s; 451 for (i = 0 ; i < sp->sa_len; i++) { 452 r[i] = p[i] & q[i]; 453 /* XXX estimate */ 454 matchlen += (q[i] ? 8 : 0); 455 } 456 457 p = (const u_int8_t *)dp; 458 q = (const u_int8_t *)&ep->dstmask; 459 r = (u_int8_t *)&d; 460 for (i = 0 ; i < dp->sa_len; i++) { 461 r[i] = p[i] & q[i]; 462 /* XXX rough estimate */ 463 matchlen += (q[i] ? 8 : 0); 464 } 465 466 /* need to overwrite len/family portion as we don't compare them */ 467 s.ss_len = sp->sa_len; 468 s.ss_family = sp->sa_family; 469 d.ss_len = dp->sa_len; 470 d.ss_family = dp->sa_family; 471 472 if (bcmp(&s, &ep->src, ep->src.ss_len) == 0 && 473 bcmp(&d, &ep->dst, ep->dst.ss_len) == 0) { 474 return matchlen; 475 } else 476 return 0; 477 } 478 479 static void 480 encap_fillarg(struct mbuf *m, const struct encaptab *ep) 481 { 482 struct m_tag *tag; 483 484 tag = m_tag_get(PACKET_TAG_ENCAP, sizeof(void *), M_NOWAIT); 485 if (tag != NULL) { 486 *(void **)m_tag_data(tag) = ep->arg; 487 m_tag_prepend(m, tag); 488 } 489 } 490 491 void * 492 encap_getarg(struct mbuf *m) 493 { 494 void *p = NULL; 495 struct m_tag *tag; 496 497 tag = m_tag_find(m, PACKET_TAG_ENCAP, NULL); 498 if (tag != NULL) { 499 p = *(void **)m_tag_data(tag); 500 m_tag_delete(m, tag); 501 } 502 return p; 503 } 504